├── .bumpversion.cfg ├── .github └── workflows │ ├── build.yml │ └── publish.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── Pipfile ├── Pipfile.lock ├── README.md ├── build.sh ├── docs └── orca.png ├── orca ├── __init__.py ├── find_cpes.py ├── lib │ ├── __init__.py │ ├── analyzer.py │ ├── apk.py │ ├── ascii_checkers.py │ ├── bin_checkers.py │ ├── bin_checkers_cpe.py │ ├── composer.py │ ├── cpe2cve.py │ ├── dnf.py │ ├── dockerfile.py │ ├── dpkg.py │ ├── golang.py │ ├── jar.py │ ├── logger.py │ ├── package_json.py │ ├── path.py │ ├── path_checkers.py │ ├── perl.py │ ├── pkgconfig.py │ ├── python.py │ ├── rpm_packages.py │ ├── spdx.py │ ├── test_apk.py │ ├── test_ascii_checkers.py │ ├── test_bin_checkers.py │ ├── test_bin_checkers_cpe.py │ ├── test_composer.py │ ├── test_dockerfile.py │ ├── test_jar.py │ ├── test_path.py │ ├── test_path_checkers.py │ ├── test_perl.py │ ├── test_pkgconfig.py │ ├── test_python.py │ ├── types.py │ └── utils.py ├── main.py └── rpm_checker │ ├── __init__.py │ ├── go.mod │ ├── go.sum │ └── main.go ├── requirements.txt └── setup.py /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.1.20 3 | commit = True 4 | tag = True 5 | tag_name = v{new_version} 6 | 7 | [bumpversion:file:setup.py] 8 | 9 | [bumpversion:file:Dockerfile] 10 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build ORCA 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | permissions: 9 | contents: write 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v3 18 | 19 | - name: Set up Python 20 | uses: actions/setup-python@v4 21 | with: 22 | python-version: '3.12' 23 | 24 | - name: Install Pipenv 25 | run: pip install pipenv 26 | 27 | - name: Install dependencies 28 | run: pipenv install --dev 29 | 30 | - name: Run tests with coverage 31 | run: pipenv run pytest --cov=. --cov-report=json 32 | 33 | - name: Update Coverage Badge 34 | # https://stackoverflow.com/questions/64781462/github-actions-default-branch-variable 35 | if: github.ref == format('refs/heads/{0}', github.event.repository.default_branch) 36 | uses: we-cli/coverage-badge-action@main -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish ORCA 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*' # Triggers only when tags starting with 'v' are pushed (e.g., v1.0.0) 7 | 8 | permissions: 9 | contents: write 10 | packages: write 11 | 12 | jobs: 13 | build-and-publish: 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - name: Checkout 18 | uses: actions/checkout@v3 19 | 20 | - name: Set up Python 21 | uses: actions/setup-python@v4 22 | with: 23 | python-version: '3.12' 24 | 25 | - name: Install Pipenv 26 | run: pip install pipenv 27 | 28 | - name: Install dependencies 29 | run: | 30 | pipenv install --dev 31 | 32 | - name: Run tests 33 | run: | 34 | pipenv run pytest 35 | 36 | - name: Build the package 37 | run: pipenv run python setup.py sdist 38 | 39 | - name: Smoke test 40 | run: | 41 | TAG_NAME=${GITHUB_REF#refs/tags/v} 42 | pip install dist/orca-${TAG_NAME}.tar.gz 43 | orca busybox 44 | 45 | - name: Create Release 46 | id: create_release 47 | uses: softprops/action-gh-release@v2 48 | with: 49 | tag_name: ${{ github.ref }} # Use the tag that triggered the workflow 50 | files: dist/* # Attach all files in the dist directory 51 | 52 | - name: Set up Docker Buildx 53 | uses: docker/setup-buildx-action@v2 54 | 55 | - name: Login to GitHub Container Registry 56 | uses: docker/login-action@v2 57 | with: 58 | registry: ghcr.io 59 | username: ${{ github.actor }} 60 | password: ${{ secrets.GITHUB_TOKEN }} 61 | 62 | - name: Build and push Docker image 63 | uses: docker/build-push-action@v4 64 | with: 65 | context: . 66 | push: true 67 | tags: ghcr.io/${{ github.repository }}:latest -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/__pycache__/** 2 | model_output/** 3 | diagrams/** 4 | inputs/** 5 | tmpdir/** 6 | .DS_Store 7 | logs/** 8 | results/** 9 | .DS_Store 10 | dist/** 11 | 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | 17 | # C extensions 18 | *.so 19 | 20 | # Distribution / packaging 21 | .Python 22 | build/ 23 | develop-eggs/ 24 | dist/ 25 | downloads/ 26 | eggs/ 27 | .eggs/ 28 | lib64/ 29 | parts/ 30 | sdist/ 31 | var/ 32 | wheels/ 33 | share/python-wheels/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | MANIFEST 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .nox/ 53 | .coverage 54 | .coverage.* 55 | .cache 56 | nosetests.xml 57 | coverage.xml 58 | *.cover 59 | *.py,cover 60 | .hypothesis/ 61 | .pytest_cache/ 62 | cover/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | db.sqlite3 72 | db.sqlite3-journal 73 | 74 | # Flask stuff: 75 | instance/ 76 | .webassets-cache 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/_build/ 83 | 84 | # PyBuilder 85 | .pybuilder/ 86 | target/ 87 | 88 | # Jupyter Notebook 89 | .ipynb_checkpoints 90 | 91 | # IPython 92 | profile_default/ 93 | ipython_config.py 94 | 95 | # pyenv 96 | # For a library or package, you might want to ignore these files since the code is 97 | # intended to run in multiple environments; otherwise, check them in: 98 | # .python-version 99 | 100 | # pipenv 101 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 102 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 103 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 104 | # install all needed dependencies. 105 | #Pipfile.lock 106 | 107 | # poetry 108 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 109 | # This is especially recommended for binary packages to ensure reproducibility, and is more 110 | # commonly ignored for libraries. 111 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 112 | #poetry.lock 113 | 114 | # pdm 115 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 116 | #pdm.lock 117 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 118 | # in version control. 119 | # https://pdm.fming.dev/#use-with-ide 120 | .pdm.toml 121 | 122 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 123 | __pypackages__/ 124 | 125 | # Celery stuff 126 | celerybeat-schedule 127 | celerybeat.pid 128 | 129 | # SageMath parsed files 130 | *.sage.py 131 | 132 | # Environments 133 | .env 134 | .venv 135 | env/ 136 | venv/ 137 | ENV/ 138 | env.bak/ 139 | venv.bak/ 140 | 141 | # Spyder project settings 142 | .spyderproject 143 | .spyproject 144 | 145 | # Rope project settings 146 | .ropeproject 147 | 148 | # mkdocs documentation 149 | /site 150 | 151 | # mypy 152 | .mypy_cache/ 153 | .dmypy.json 154 | dmypy.json 155 | 156 | # Pyre type checker 157 | .pyre/ 158 | 159 | # pytype static type analyzer 160 | .pytype/ 161 | 162 | # Cython debug symbols 163 | cython_debug/ 164 | 165 | # PyCharm 166 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 167 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 168 | # and can be added to the global gitignore or merged into this file. For a more nuclear 169 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 170 | #.idea/ -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG ORCA_VERSION=0.1.20 2 | 3 | FROM golang:1.21 AS gobuilder 4 | 5 | WORKDIR /app 6 | 7 | COPY orca /app/orca 8 | WORKDIR /app/orca/rpm_checker 9 | RUN go vet 10 | RUN CGO_ENABLED=0 GOOS=linux go build -o rpm_checker main.go 11 | 12 | FROM python:3.12-slim AS pythonbuild 13 | 14 | WORKDIR /app 15 | COPY . . 16 | RUN pip install --no-cache-dir -r requirements.txt 17 | 18 | RUN python setup.py sdist 19 | 20 | FROM python:3.12-slim 21 | 22 | RUN apt update && apt install golang -y 23 | WORKDIR /app 24 | ENV ORCA_VERSION=0.1.20 25 | COPY --from=pythonbuild /app/dist/orca-${ORCA_VERSION}.tar.gz /app 26 | COPY --from=gobuilder /app/orca/rpm_checker /bin/ 27 | COPY requirements.txt . 28 | RUN pip install orca-${ORCA_VERSION}.tar.gz 29 | 30 | 31 | ENTRYPOINT [ "orca" ] 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 kube-security 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | include orca/rpm_checker/* -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | packaging = "*" 8 | requests = "*" 9 | docker = "*" 10 | natsort = "*" 11 | python-debian = "*" 12 | pykg-config = "*" 13 | rpmfile = "*" 14 | rpm = "*" 15 | spdx-tools = "*" 16 | packageurl-python = "*" 17 | setuptools = "*" 18 | validators = "*" 19 | pytest-xdist = "*" 20 | pytest-cov = "*" 21 | pip-audit = "*" 22 | 23 | [dev-packages] 24 | bump2version = "*" 25 | pytest = "*" 26 | 27 | [requires] 28 | python_version = "3.12" 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ORCA (Obfuscation-Resilient Container Analysis) 2 | 3 | [![CI Pipeline](https://github.com/kube-security/orca/actions/workflows/publish.yml/badge.svg)](https://github.com/kube-security/orca/actions) 4 | [![License](https://img.shields.io/github/license/kube-security/orca)](https://github.com/kube-security/orca/blob/main/LICENSE) 5 | [![Latest Release](https://img.shields.io/github/v/release/kube-security/orca?sort=semver)](https://github.com/kube-security/orca/releases) 6 | 7 | ORCA logo 8 | 9 | ORCA is a tool designed to analyze obfuscated or obscure container images, providing reliable Software Bill of Materials (SBOMs) even when traditional tools fail. It addresses the challenge of container image obfuscation and empowers developers and security teams to better manage and secure containerized environments. 10 | 11 | [![asciicast](https://asciinema.org/a/iqqpx2iHC5BZM10hscxn7cayl.png)](https://asciinema.org/a/iqqpx2iHC5BZM10hscxn7cayl) 12 | 13 | ## Motivation 14 | 15 | Containers often undergo obfuscation or contain altered content, making it difficult for standard tools to generate accurate SBOMs. ORCA scans all container layers and analyzes as many files as possible, even inspecting Dockerfile content for hidden commands. 16 | 17 | ## Installation 18 | 19 | The easiest way to use ORCA is via the `kube-security/orca` container image that is available on [GitHub Container Registry (GHCR)](https://github.com/kube-security/orca/pkgs/container/orca) 20 | 21 | You can use the image as follows 22 | 23 | ```bash 24 | docker run -v /var/run/docker.sock:/var/run/docker.sock \ 25 | -v $(pwd)/results:/app/results \ 26 | -v $(pwd)/logs:/app/logs \ 27 | -it ghcr.io/kube-security/orca:latest \ 28 | 29 | ``` 30 | 31 | ### From source 32 | 33 | 1. **Download the package archive** from the [releases](https://github.com/kube-security/orca/releases) page. 34 | 35 | 2. **Install the package**: 36 | ```bash 37 | pip install dist/orca-.tar.gz 38 | ``` 39 | 40 | 3. **Build the necessary Go binary** (ORCA includes Go code that needs to be compiled): 41 | ```bash 42 | tar -xvf orca-.tar.gz 43 | cd orca-/orca/rpm_checker 44 | go build -o rpm_checker main.go 45 | mv rpm_checker /usr/local/bin # should be in PATH 46 | ``` 47 | 48 | ## Usage 49 | 50 | Once installed, ORCA can be used to scan container images. 51 | 52 | ```bash 53 | orca --help 54 | usage: orca [-h] [-d DIR] [--csv] [-b] [-c] containers 55 | 56 | Software composition analysis for containers 57 | 58 | positional arguments: 59 | containers Comma separated list of containers to analyze 60 | 61 | options: 62 | -h, --help show this help message and exit 63 | -d DIR, --dir DIR Folder where to store results *without ending /* 64 | --csv Store also a csv file with package information 65 | -b, --with-binaries Analyze every binary file (slower). Go binaries are always analyzed 66 | -c, --complete Generate complete SPDX report with relationships (>200MB file is generated) 67 | ``` 68 | 69 | Example usage: `orca alpine:latest` 70 | 71 | 72 | ## Results 73 | 74 | Some basic statistics will be shown in the terminal. Additionally two folders: `results` and `logs` should appear and will contain the SPDX and additional logs. 75 | 76 | 77 | ## Development 78 | 79 | 1. **Clone the repository**: 80 | ```bash 81 | git clone https://github.com/kube-security/orca.git 82 | cd orca 83 | ``` 84 | 85 | 2. **Install dependencies** using `pip` or `Pipenv`: 86 | ```bash 87 | pip install -r requirements.txt 88 | ``` 89 | or, if using Pipenv: 90 | ```bash 91 | pipenv install 92 | ``` 93 | 94 | ### Obfucation benchmark dataset 95 | 96 | You can compare the results of ORCA against other container scanning tools using our [container obfuscation benchmark](https://github.com/kube-security/container-obfuscation-benchmark). 97 | ### Contributing 98 | 99 | Contributions are welcome!! 100 | 101 | 102 | ## License 103 | 104 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 105 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | pipenv run python setup.py sdist -------------------------------------------------------------------------------- /docs/orca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kube-security/orca/7a6f756bfbf70d7031ff1810c1c5c414747ac63d/docs/orca.png -------------------------------------------------------------------------------- /orca/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kube-security/orca/7a6f756bfbf70d7031ff1810c1c5c414747ac63d/orca/__init__.py -------------------------------------------------------------------------------- /orca/find_cpes.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import datetime 3 | import json 4 | import os 5 | from typing import List, Set 6 | 7 | from orca.lib.apk import get_apk 8 | from orca.lib.ascii_checkers import parse_gemspec 9 | from orca.lib.bin_checkers import check_binaries 10 | from orca.lib.composer import get_composer 11 | from orca.lib.cpe2cve import cpe2cve 12 | from orca.lib.dnf import get_dnf 13 | from orca.lib.dpkg import get_dpkg 14 | from orca.lib.golang import extract_go_dependencies, get_gomod 15 | from orca.lib.jar import get_jar 16 | from orca.lib.package_json import get_package_json 17 | from orca.lib.path import get_filepaths 18 | from orca.lib.perl import get_perl 19 | from orca.lib.pkgconfig import get_pkgconfig 20 | from orca.lib.python import extract_python_dependencies 21 | from orca.lib.rpm_packages import get_rpm 22 | from orca.lib.logger import logger 23 | from orca.lib.types import VulnerabilityReport 24 | 25 | unuseful_extensions = [".php",".h",".c",".xml",".png",".csv",".js",".css",".jar"] 26 | 27 | 28 | 29 | def is_executable(file_path): 30 | return os.path.isfile(file_path) and os.access(file_path, os.X_OK) 31 | 32 | 33 | def get_executables(files,directory) -> List[str]: 34 | no_ext = filter(lambda x: "." not in x.split("/")[-1],files) # First no extension. 35 | no_ext_executable = filter(lambda x: is_executable(os.path.join(directory,x)),no_ext) 36 | 37 | no_ext_binary = list(filter(lambda x: is_binary_executable(os.path.join(directory,x)),no_ext_executable)) 38 | libs = list(filter(lambda x: x.endswith(".so"),files)) 39 | return no_ext_binary + libs 40 | 41 | def split_executable_files(files, directory): 42 | executables = [] 43 | non_executables = [] 44 | for path in files: 45 | file = path.split("/")[-1] 46 | real_path = os.path.join(directory, path) 47 | if any([file.endswith(ext) for ext in unuseful_extensions]): 48 | non_executables.append(path) 49 | elif file.startswith("lib") and ".so" in file: 50 | continue 51 | elif is_binary_executable(real_path): 52 | executables.append(path) 53 | elif os.path.isdir(real_path): 54 | continue 55 | else: 56 | non_executables.append(path) 57 | return executables, non_executables 58 | 59 | 60 | def is_binary_executable(file_path): 61 | if not os.path.isfile(file_path): 62 | return False 63 | try: 64 | with open(file_path, "rb") as f: 65 | magic = f.read(4) 66 | except Exception: 67 | return False 68 | 69 | return magic == b"\x7fELF" # Check for ELF magic number 70 | 71 | def add_duplicate_links(directory,paths,files): 72 | fcopy = set() 73 | for file in paths.union(files): 74 | if len(file) < 2: 75 | # paths.remove(file) 76 | continue 77 | orig_path = directory + "/" + file 78 | realpath = os.path.realpath(orig_path) 79 | cleanpath = realpath.replace(directory + "/", "") 80 | 81 | if orig_path != realpath and ( 82 | (cleanpath in files and file not in files) 83 | or (cleanpath in paths and file in files) 84 | ): 85 | fcopy.add(cleanpath) 86 | fcopy.add(file) 87 | return fcopy 88 | 89 | def remove_links(directory,paths): 90 | real_paths = set() 91 | for file in paths: 92 | if len(file) < 2: 93 | # paths.remove(file) 94 | continue 95 | orig_path = directory + "/" + file 96 | realpath = os.path.realpath(orig_path) 97 | cleanpath = realpath.replace(directory + "/", "") 98 | 99 | if orig_path != realpath: 100 | real_paths.add(cleanpath) 101 | else: 102 | real_paths.add(file) 103 | return real_paths 104 | 105 | def maybe_binary(file: str): 106 | end = file.split("/")[-1] 107 | return ("." not in file or ".so" in file) and end.lower() == end 108 | 109 | def scan_os(paths: List[str],directory: str)-> None: 110 | OS_INFOS = ["etc/os-release","etc-release","usr/lib/os-release","etc/debian_version"] 111 | os_relevant_paths = [path for path in paths if path in OS_INFOS ] 112 | if len(os_relevant_paths) == 0: 113 | logger.warning("Could not find os information") 114 | else: 115 | osinfo = {} 116 | for path in os_relevant_paths: 117 | content = open(os.path.join(directory,path)).read() 118 | if "debian_version" in path: 119 | osinfo["version"] = content.strip().split("/")[0] 120 | data = {} 121 | for line in content.split("\n"): 122 | # Strip the line of extra whitespace and ignore comments or empty lines 123 | line = line.strip() 124 | if line and not line.startswith('#'): 125 | # Split the line by '=' to separate key and value 126 | try: 127 | key, value = line.split('=', 1) 128 | except Exception as _: 129 | break 130 | # Remove surrounding quotes if the value is quoted 131 | value = value.strip('"') 132 | # Add to the dictionary 133 | data[key] = value 134 | if "NAME" in data: 135 | osinfo["name"] = data.get("NAME") 136 | osinfo["major"] = data.get("VERSION_ID") 137 | osinfo["codename"] = data.get("VERSION_CODENAME") 138 | osinfo["cpe"] = data.get("CPE_NAME") 139 | osinfo["prettyname"] = data.get("PRETTY_NAME") 140 | 141 | 142 | return osinfo 143 | return None 144 | 145 | def scan_filesystem(directory: str,files,analyze_binaries=False,accurate=False) -> VulnerabilityReport: 146 | """ 147 | Scans the filesystem to identify and analyze files, extract dependencies, and generate a vulnerability report. 148 | Args: 149 | directory (str): The root directory to scan for files. This directory does not contain links or devices. 150 | files (list): A list of files to analyze. This includes also links and devices. 151 | analyze_binaries (bool, optional): Whether to analyze binary files for dependencies. Defaults to False. 152 | accurate (bool, optional): Whether to perform additional steps to remove duplicate files for more accurate results. Defaults to False. 153 | Returns: 154 | VulnerabilityReport: A report containing information about identified vulnerabilities, packages, and remaining files. 155 | 156 | """ 157 | paths: Set[str] = get_filepaths(directory) 158 | 159 | 160 | report: VulnerabilityReport = VulnerabilityReport(paths,files) 161 | 162 | osinfo = scan_os(report.remaining_files,directory) 163 | if osinfo is not None: 164 | report.os = osinfo 165 | 166 | # OS-packages 167 | logger.info(f"Initial files {len(paths)}") 168 | 169 | logger.info("Parsing executables") 170 | executable = get_executables(report.remaining_files, directory) 171 | logger.info(f"Found {len(executable)} executables") 172 | 173 | # assume go 174 | go = extract_go_dependencies(executable, directory) 175 | report.add_package_files(go) 176 | 177 | 178 | # Try to remove duplicates probably could be removed 179 | if accurate: 180 | logger.info("Removing duplicates") 181 | duplicates = add_duplicate_links(directory,paths,report.analyzed_files) 182 | report.analyzed_files.update(duplicates) 183 | report.remaining_files = report.remaining_files.difference(duplicates) 184 | 185 | 186 | logger.info("Parsing language-specific packages") 187 | 188 | report.add_package_files(extract_python_dependencies(paths,directory)) 189 | report.add_package_files(get_jar(report.remaining_files,directory)) 190 | report.add_package_files(get_package_json(report.remaining_files,directory)) 191 | report.add_package_files(get_composer(report.remaining_files, directory)) 192 | report.add_package_files(get_perl(report.remaining_files,directory)) 193 | report.add_package_files(parse_gemspec(report.remaining_files,directory)) 194 | report.add_package_files(get_gomod(report.remaining_files,directory)) 195 | 196 | 197 | 198 | logger.info("Parsing OS package managers") 199 | report.add_package_files(get_dpkg(report.remaining_files, directory)) 200 | report.add_package_files(get_rpm(report.remaining_files, directory)) 201 | report.add_package_files(get_apk(report.remaining_files,directory)) 202 | report.add_package_files(get_dnf(report.remaining_files,directory)) 203 | report.add_package_files(get_pkgconfig(report.remaining_files,directory)) 204 | 205 | if analyze_binaries: 206 | binaries = check_binaries(directory,executable) 207 | report.add_package_files(binaries) 208 | 209 | 210 | logger.info(f"Files not indexed {len(report.remaining_files)}") 211 | logger.info(f"Total Packages {len(report.packages)}") 212 | return report 213 | 214 | 215 | def get_cpes(directory: str,analyze_binaries=False,store_cpes=True,store_cpe_files=True,accurate=False,analyze_cves=False): 216 | 217 | report:VulnerabilityReport = scan_filesystem(directory,analyze_binaries,accurate) 218 | pkgset = list(set(report.packages)) 219 | 220 | if store_cpes: 221 | with open("result.csv","w") as fp: 222 | fp.write("product,version,vendor\n") 223 | for pkg in pkgset: 224 | fp.write(pkg.to_csv_entry() + "\n") 225 | fp.close() 226 | 227 | if store_cpe_files: 228 | with open("cpe_files.json","w") as fp: 229 | json.dump(report.to_json(),fp,indent="\t") 230 | fp.close() 231 | 232 | if analyze_cves: 233 | cpeset = set([cpe.to_cpe() for cpe in report.packages]) 234 | total_cves = set() 235 | for cpe in cpeset: 236 | cves = cpe2cve(cpe) 237 | total_cves.update(cves) 238 | for cve in cves: 239 | logger.error(cve) 240 | logger.error(f"Found {len(total_cves)} CVEs") 241 | 242 | 243 | 244 | if __name__ == "__main__": 245 | parser = argparse.ArgumentParser( 246 | description="""Scans for CPEs in a given directory. Currently in alpha phase, the program will randomly select N=30 subfolders and scan for cpes therein""" 247 | ) 248 | parser.add_argument( 249 | "-d", "--directory", type=str, help="Directory to analyze", required=True 250 | ) 251 | parser.add_argument( 252 | "--store-cpes", type=bool, help="Store cpes to file (result.csv)", required=False,default=True 253 | ) 254 | parser.add_argument( 255 | "--store-cpe-files", type=bool, help="Store cpe-related files to file (cpe_files.json)", required=False,default=True 256 | ) 257 | parser.add_argument( 258 | "--analyze-cves", type=bool, help="Scan for CVEs", required=False,default=False 259 | ) 260 | args = parser.parse_args() 261 | 262 | path: str = args.directory 263 | store_cpes = args.store_cpes 264 | store_cpe_files = args.store_cpe_files 265 | analyze_cves = args.analyze_cves 266 | subdirs = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))] 267 | for subdir in subdirs: 268 | directory = os.path.join(path,subdir) 269 | start = datetime.datetime.now() 270 | get_cpes(directory,analyze_binaries=False,store_cpes=store_cpes,store_cpe_files=store_cpe_files,analyze_cves=analyze_cves) 271 | end = datetime.datetime.now() 272 | logger.info(f"Elapsed time: {(end-start).total_seconds() * 1000} ms") 273 | logger.debug("------END------") 274 | -------------------------------------------------------------------------------- /orca/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kube-security/orca/7a6f756bfbf70d7031ff1810c1c5c414747ac63d/orca/lib/__init__.py -------------------------------------------------------------------------------- /orca/lib/analyzer.py: -------------------------------------------------------------------------------- 1 | from.utils import calculate_sha256 2 | from.logger import logger 3 | import base64 4 | import os 5 | import requests 6 | from.types import PackageRecord, to_record 7 | import re 8 | from packaging.version import Version 9 | 10 | 11 | def search_vulnerabilities(name: str,version: str): 12 | """ 13 | Code adapted from : https://github.com/pypa/packaging/blob/main/src/packaging/version.py 14 | """ 15 | url = f"https://pypi.org/pypi/{name}/{version}/json" 16 | response : requests.Response = requests.get(url,timeout=10) 17 | # TODO: error handling 18 | response_json = response.json() 19 | results: list = [] 20 | vulns = response_json.get("vulnerabilities") 21 | 22 | # No `vulnerabilities` key means that there are no vulnerabilities for any version 23 | if vulns is None: 24 | return results 25 | for v in vulns: 26 | id = v["id"] 27 | 28 | # If the vulnerability has been withdrawn, we skip it entirely. 29 | withdrawn_at = v.get("withdrawn") 30 | if withdrawn_at is not None: 31 | logger.debug(f"PyPI vuln entry '{id}' marked as withdrawn at {withdrawn_at}") 32 | continue 33 | 34 | # Put together the fix versions list 35 | try: 36 | fix_versions = [Version(fixed_in) for fixed_in in v["fixed_in"]] 37 | except Exception as _: 38 | logger.error(f'Received malformed version from PyPI: {v["fixed_in"]}') 39 | 40 | # The ranges aren't guaranteed to come in chronological order 41 | fix_versions.sort() 42 | 43 | description = v.get("summary") 44 | if description is None: 45 | description = v.get("details") 46 | 47 | if description is None: 48 | description = "N/A" 49 | 50 | # The "summary" field should be a single line, but "details" might 51 | # be multiple (Markdown-formatted) lines. So, we normalize our 52 | # description into a single line (and potentially break the Markdown 53 | # formatting in the process). 54 | description = description.replace("\n", " ") 55 | 56 | results.append({ 57 | 'id':id, 58 | 'description':description, 59 | 'fix_versions':fix_versions, 60 | 'aliases':set(v["aliases"]) 61 | }) 62 | return results 63 | 64 | 65 | 66 | def analyze_record(directory: str, record: PackageRecord) -> bool: 67 | #logger.info(f"Analysing record {directory}/{record.path}") 68 | if not os.path.exists(f"{directory}/{record.path}") and record.nlines is not None: 69 | logger.error(f"Path does not exist {directory}/{record.path}") 70 | print(record) 71 | if record.nlines is not None: 72 | fp = open(f"{directory}/{record.path}","rb") 73 | content = fp.read() 74 | if len(content) != record.nlines: 75 | logger.error(f"File {directory}/{record.path} has incorrect number of bytes: expected {record.nlines}, actual {len(content)}") 76 | return False 77 | assert record.hashtype == "sha256" 78 | hash = calculate_sha256(f"{directory}/{record.path}") 79 | digest = base64.urlsafe_b64encode(hash).decode() 80 | if digest[-1] == "=": 81 | digest = digest[:-1] 82 | if digest != record.hash: 83 | logger.error(f"Hash value does not match for file: {directory}/{record.path}") 84 | return False 85 | 86 | 87 | return True 88 | 89 | def get_package_version(directory: str, package: str) -> str: 90 | version_regex = r'\nVersion: (.*)' 91 | assert os.path.exists(f"{directory}/{package}/METADATA") 92 | fp = open(f"{directory}/{package}/METADATA") 93 | content = fp.read() 94 | matches = re.findall(version_regex,content) 95 | assert len(matches) == 1 96 | version = matches[0] 97 | package_name = package.split("-")[0] 98 | return (package_name,version) 99 | 100 | 101 | def analyze_package(directory: str,package: str): 102 | # Read the content of RECORD FILE 103 | assert os.path.exists(f"{directory}/{package}/RECORD") 104 | fp = open(f"{directory}/{package}/RECORD") 105 | content = fp.read() 106 | logger.info(f"Analysing package {package}") 107 | integrity = [] 108 | for record_item in content.split("\n"): # Package integrity 109 | if len(record_item) > 1: 110 | record = to_record(record_item) 111 | integrity.append(analyze_record(directory, record)) 112 | (name, version) = get_package_version(directory,package) 113 | results = search_vulnerabilities(name,version) 114 | if results != []: 115 | for result in results: 116 | logger.error(f"Vulnerability {result['id']} found on dependency") 117 | return False 118 | return not any(integrity) 119 | -------------------------------------------------------------------------------- /orca/lib/apk.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | from typing import Dict, List 4 | 5 | from . import logger 6 | from .types import PackageInfo,PackageInfoType 7 | 8 | 9 | 10 | def read_apk_db(db_path,path) -> Dict[PackageInfo,List[str]]: 11 | fs = open(db_path).read() 12 | cpeMap = {} 13 | for entry in fs.split("\n\n"): 14 | print(entry) 15 | package = "" 16 | version = "" 17 | last_folder = "" 18 | files = set() 19 | for line in entry.split(): 20 | if line.startswith("P:"): 21 | package = line[2:] 22 | elif line.startswith("V:"): 23 | version = line[2:] 24 | elif line.startswith("F:"): 25 | last_folder = line[2:] 26 | elif line.startswith("R:"): 27 | files.add(last_folder + "/"+ line[2:]) 28 | if package == "": 29 | continue 30 | files.add(path) 31 | package = PackageInfo(package,version,None,PackageInfoType.APK) 32 | cpeMap[package] = files 33 | return cpeMap 34 | 35 | def read_world_file(db_path,path) -> Dict[PackageInfo,List[str]]: 36 | lines = open(db_path).readlines() 37 | cpeMap = {} 38 | files = set() 39 | files.add(path) 40 | for entry in lines: 41 | package = PackageInfo(entry.strip(),None,None,PackageInfoType.APK) 42 | cpeMap[package] = files 43 | return cpeMap 44 | 45 | # 1549 65 46 | def get_apk(paths: List[str],directory: str)-> Dict[PackageInfo,List[str]]: 47 | apks = [p for p in paths if "apk/db/installed" in p or "apk/world" in p]# or "apk/db/names" in p ] 48 | total_pkgs = {} 49 | for path in apks: 50 | if "installed" in path: 51 | packages = read_apk_db(os.path.join(directory,path),path) 52 | total_pkgs.update(packages) 53 | elif "world" in path: 54 | packages = read_world_file(os.path.join(directory,path),path) 55 | total_pkgs.update(packages) 56 | 57 | 58 | if len(total_pkgs.keys()): 59 | logger.logger.info(f"APKs: {len(total_pkgs.keys())}") 60 | return total_pkgs -------------------------------------------------------------------------------- /orca/lib/ascii_checkers.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import os 4 | import re 5 | from typing import Dict, List 6 | 7 | from . import logger 8 | from.types import PackageInfo, PackageInfoType 9 | 10 | GOSUM = re.compile(r'(\S+)\s+(\S+)\s+h1:(\S+)') 11 | 12 | def parse_gemspec(paths: List[str],directory: str) -> Dict[PackageInfo, List[str]]: 13 | files = [f for f in paths if f.endswith(".gemspec")] 14 | 15 | patterns = { 16 | 'name': r'\.name\s*=\s*["\']([^"\']+)["\']', 17 | 'version': r'\.version\s*=\s*["\']([^"\']+)["\']', 18 | } 19 | packages: Dict[PackageInfo, List[str]] = {} 20 | for filename in files: 21 | try: 22 | file = open(os.path.join(directory,filename), 'r') 23 | except Exception as e: 24 | logger.logger.error(f"[GEM] could not open file {filename} - {e}") 25 | continue 26 | content = file.read() 27 | 28 | spec_blocks = re.findall(r'Gem::Specification\.new do (.+?)end', content, re.DOTALL) 29 | for block in spec_blocks: 30 | gemspec_data = {} 31 | for key, pattern in patterns.items(): 32 | match = re.search(pattern, block) 33 | if match: 34 | gemspec_data[key] = match.group(1) 35 | if "version" not in gemspec_data: 36 | #gemspec_data['version'] = "" 37 | continue 38 | if "name" not in gemspec_data: 39 | continue 40 | p = PackageInfo(gemspec_data['name'],gemspec_data['version'],None,PackageInfoType.GEM) 41 | 42 | if p in packages: 43 | packages[p].append(filename) 44 | else: 45 | packages[p] = [filename] 46 | if len(packages.keys()): 47 | logger.logger.info(f"Gemspec : {len(packages)}") 48 | return packages 49 | 50 | 51 | def parse_gosum(filepath): 52 | cpes = [] 53 | with open(filepath, 'r') as file: 54 | lines = file.readlines() 55 | for line in lines: 56 | match = GOSUM.match(line) 57 | if match: 58 | module_path = match.group(1) 59 | version = match.group(2)[1:] 60 | org = module_path.split("/")[-2] 61 | module = module_path.split("/")[-1] 62 | version = version if "/go.mod" not in version else version.split("/")[0] 63 | cpes.append(f"cpe:2.3:a:{org}:{module}:{version}:*:*:*:*:*:*:*") 64 | return list(set(cpes)) 65 | 66 | -------------------------------------------------------------------------------- /orca/lib/bin_checkers.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import os 4 | import re 5 | from typing import List 6 | 7 | from . import logger 8 | from.types import PackageInfo 9 | 10 | zlib = re.compile(r'inflate\s\(.*\)\s([0-9]+\.[0-9]+\.[0-9]+)') 11 | GCC_re = re.compile(r'GCC:\s\(.*\)\s([0-9]+\.[0-9]+\.[0-9]+)') 12 | GCC2_re = re.compile(r'gcc\s([0-9]+\.[0-9]+\.[0-9]+)') 13 | openssl_re = re.compile(r'.*OpenSSL\s([0-9]+\.[0-9]+\.[0-9]+)') 14 | pg_re = re.compile(r'.*\(PostgreSQL\)\s([0-9]+\.[0-9]+\.[0-9]+)') 15 | 16 | def check_gcc(strings: List[str]): 17 | 18 | for string in strings: 19 | match = GCC_re.search(string) 20 | if match: 21 | version = match.group(1) 22 | return PackageInfo("gcc",version,"gnu",None) 23 | return None 24 | 25 | def check_gcc2(strings: List[str]): 26 | for string in strings: 27 | match = GCC2_re.search(string) 28 | if match: 29 | version = match.group(1) 30 | return PackageInfo("gcc",version,"gnu",None) 31 | return None 32 | 33 | def check_openssl(strings: List[str]): 34 | for string in strings: 35 | match = openssl_re.search(string) 36 | if match: 37 | version = match.group(1) 38 | return PackageInfo("openssl",version,"openssl",None) 39 | return None 40 | 41 | def check_postgres(strings: List[str]): 42 | for string in strings: 43 | match = pg_re.search(string) 44 | if match: 45 | version = match.group(1) 46 | return PackageInfo("postgresql",version,"postgresql",None) 47 | return None 48 | 49 | def check_zlib(strings: List[str]): 50 | 51 | for string in strings: 52 | match = zlib.search(string) 53 | if match: 54 | version = match.group(1) 55 | return PackageInfo("zlib",version,"zlib",None) 56 | return None 57 | 58 | def check_self(strings: List[str],binary_name): 59 | if len(binary_name) == 1: 60 | return None 61 | pattern = r'{binary_name}\s(v?[0-9]+\.[0-9]+\.[0-9]+)'.format(binary_name=binary_name) 62 | try: 63 | selfbin = re.compile(pattern) 64 | except Exception as e : 65 | logger.logger.info(f"Could not compile regex for {binary_name} {e}") 66 | return None,None 67 | for string in strings: 68 | match = selfbin.search(string) 69 | if match: 70 | version = match.group(1) 71 | return PackageInfo(binary_name,version,None,None) 72 | return None 73 | 74 | 75 | def extract_strings(filename, min_length=4): 76 | thestrings = [] 77 | with open(filename, 'rb') as file: 78 | data = file.read() 79 | 80 | # Use a regex to find sequences of printable characters of at least `min_length` 81 | pattern = re.compile(b'[\x20-\x7E]{' + str(min_length).encode() + b',}') 82 | strings = pattern.findall(data) 83 | 84 | for s in strings: 85 | thestrings.append(s.decode('ascii')) 86 | return thestrings 87 | 88 | def check_binaries(directory,executables): 89 | results = {} 90 | for exec_file in executables: 91 | cpes = static_check_cpes(os.path.join(directory,exec_file)) 92 | if len(cpes): 93 | for cpe in cpes: 94 | if cpe in results: 95 | results[cpe].append(exec_file) 96 | else: 97 | results[cpe] = [exec_file] 98 | if len(results): 99 | logger.logger.info(f"Binaries {len(results)}") 100 | return results 101 | 102 | def static_check_cpes(filepath): 103 | """ 104 | This function extracts strings from a file and 105 | applies regex to fing known applications and versions 106 | 107 | --- 108 | Returns: List of CPEs 109 | """ 110 | strings = set(extract_strings(filepath,4)) 111 | cpes = [] 112 | gcc_ver = check_gcc(strings) 113 | if gcc_ver is not None: 114 | cpes.append(gcc_ver) 115 | gcc_ver2 = check_gcc2(strings) 116 | if gcc_ver2 is not None: 117 | cpes.append(gcc_ver2) 118 | ssl_ver = check_openssl(strings) 119 | if ssl_ver is not None: 120 | cpes.append(ssl_ver) 121 | zlib_ver = check_zlib(strings) 122 | if zlib_ver is not None: 123 | cpes.append(zlib_ver) 124 | pg = check_postgres(strings) 125 | if pg is not None: 126 | cpes.append(pg) 127 | self_ver = check_self(strings,filepath.split("/")[-1].strip()) 128 | if self_ver is not None: 129 | cpes.append(self_ver) 130 | return cpes 131 | -------------------------------------------------------------------------------- /orca/lib/bin_checkers_cpe.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import re 4 | from typing import List 5 | 6 | zlib = re.compile(r'inflate\s\(.*\)\s([0-9]+\.[0-9]+\.[0-9]+)') 7 | GCC_re = re.compile(r'GCC:\s\(.*\)\s([0-9]+\.[0-9]+\.[0-9]+)') 8 | GCC2_re = re.compile(r'gcc\s([0-9]+\.[0-9]+\.[0-9]+)') 9 | openssl_re = re.compile(r'.*OpenSSL\s([0-9]+\.[0-9]+\.[0-9]+)') 10 | pg_re = re.compile(r'.*\(PostgreSQL\)\s([0-9]+\.[0-9]+\.[0-9]+)') 11 | 12 | def check_gcc(strings: List[str]): 13 | 14 | for string in strings: 15 | match = GCC_re.search(string) 16 | if match: 17 | version = match.group(1) 18 | return f"cpe:2.3:a:gnu:gcc:{version}:*:*:*:*:*:*:*",string 19 | return None,None 20 | 21 | def check_gcc2(strings: List[str]): 22 | for string in strings: 23 | match = GCC2_re.search(string) 24 | if match: 25 | version = match.group(1) 26 | return f"cpe:2.3:a:gnu:gcc:{version}:*:*:*:*:*:*:*",string 27 | return None,None 28 | 29 | def check_openssl(strings: List[str]): 30 | for string in strings: 31 | match = openssl_re.search(string) 32 | if match: 33 | version = match.group(1) 34 | return f"cpe:2.3:a:openssl:openssl:{version}:*:*:*:*:*:*:*",string 35 | return None,None 36 | 37 | def check_postgres(strings: List[str]): 38 | for string in strings: 39 | match = pg_re.search(string) 40 | if match: 41 | version = match.group(1) 42 | return f"cpe:2.3:a:postgresql:postgresql:{version}:*:*:*:*:*:*:*",string 43 | return None,None 44 | 45 | def check_zlib(strings: List[str]): 46 | 47 | for string in strings: 48 | match = zlib.search(string) 49 | if match: 50 | version = match.group(1) 51 | return f"cpe:2.3:a:zlib:zlib:{version}:*:*:*:*:*:*:*",string 52 | return None,None 53 | 54 | def check_self(strings: List[str],binary_name): 55 | pattern = r'{binary_name}\s(v?[0-9]+\.[0-9]+\.[0-9]+)'.format(binary_name=binary_name) 56 | selfbin = re.compile(pattern) 57 | for string in strings: 58 | match = selfbin.search(string) 59 | if match: 60 | version = match.group(1) 61 | return f"cpe:2.3:a:*:{binary_name}:{version}:*:*:*:*:*:*:*",string 62 | return None,None 63 | 64 | 65 | def extract_strings(filename, min_length=4): 66 | thestrings = [] 67 | with open(filename, 'rb') as file: 68 | data = file.read() 69 | 70 | # Use a regex to find sequences of printable characters of at least `min_length` 71 | pattern = re.compile(b'[\x20-\x7E]{' + str(min_length).encode() + b',}') 72 | strings = pattern.findall(data) 73 | 74 | for s in strings: 75 | thestrings.append(s.decode('ascii')) 76 | return thestrings 77 | 78 | def static_check_cpes(filepath): 79 | """ 80 | This function extracts strings from a file and 81 | applies regex to fing known applications and versions 82 | 83 | --- 84 | Returns: List of CPEs 85 | """ 86 | strings = set(extract_strings(filepath,4)) 87 | cpes = [] 88 | gcc_ver,gcc_str = check_gcc(strings) 89 | if gcc_ver is not None: 90 | strings.remove(gcc_str) 91 | cpes.append(gcc_ver) 92 | 93 | gcc_ver2,gcc_str2 = check_gcc2(strings) 94 | if gcc_ver2 is not None: 95 | strings.remove(gcc_str2) 96 | cpes.append(gcc_ver2) 97 | 98 | ssl_ver,ssl_str = check_openssl(strings) 99 | if ssl_ver is not None: 100 | strings.remove(ssl_str) 101 | cpes.append(ssl_ver) 102 | 103 | zlib_ver,zlib_str = check_zlib(strings) 104 | if zlib_ver is not None: 105 | strings.remove(zlib_str) 106 | cpes.append(zlib_ver) 107 | 108 | pg,pg_str = check_postgres(strings) 109 | if pg is not None: 110 | strings.remove(pg_str) 111 | cpes.append(pg) 112 | 113 | self_ver,self_str = check_self(strings,filepath.split("/")[-1].strip()) 114 | if self_ver is not None: 115 | strings.remove(self_str) 116 | cpes.append(self_ver) 117 | 118 | return list(set(cpes)) 119 | -------------------------------------------------------------------------------- /orca/lib/composer.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | from typing import List 4 | import json 5 | 6 | from . import logger 7 | from .types import PackageInfo, PackageInfoType 8 | 9 | 10 | def parse_composer_lock(paths,directory,filename): 11 | composer_lock = json.load(open(directory +"/" + filename)) 12 | packages = [] 13 | accessed_paths = [] 14 | files = [filename] 15 | for package in composer_lock["packages"]: 16 | name = package["name"] 17 | version = package["version"] 18 | pkg = PackageInfo(name.split("/")[1],version,name.split("/")[0],PackageInfoType.COMPOSER) 19 | 20 | basepath = os.path.dirname(filename) 21 | packages.append(pkg) 22 | 23 | if "autoload" in composer_lock: 24 | for key,value in composer_lock["autoload"].items(): 25 | if "psr" in key: 26 | v = list(value.values()) 27 | if type(v[0]) is list: 28 | accessed_paths.extend(v[0]) 29 | else: 30 | accessed_paths.extend(v) 31 | if "autoload-dev" in composer_lock: 32 | for key,value in composer_lock["autoload-dev"].items(): 33 | if "psr" in key: 34 | if type(v[0]) is list: 35 | accessed_paths.extend(v[0]) 36 | else: 37 | accessed_paths.extend(v) 38 | for path in accessed_paths: 39 | baseinfo = os.path.join(basepath,path) 40 | for imagepath in paths: 41 | if baseinfo in imagepath: 42 | files.append(baseinfo) 43 | 44 | return {pkg: files for pkg in packages} 45 | 46 | def parse_composer(paths,directory,filename): 47 | try: 48 | composer = json.load(open(directory +"/" + filename)) 49 | except Exception as e: 50 | logger.logger.error(f"[COMPOSER] Could not open file {filename} -- {e}") 51 | return {} 52 | 53 | if "name" in composer: 54 | name = composer["name"] 55 | version = composer["version"] if "version" in composer else None 56 | pkg = PackageInfo(name.split("/")[1],version,name.split("/")[0],PackageInfoType.COMPOSER) 57 | accessed_paths = [] 58 | files = [filename] 59 | basepath = os.path.dirname(filename) 60 | if "autoload" in composer: 61 | for key,value in composer["autoload"].items(): 62 | if "psr" in key: 63 | values = value.values() 64 | for v in values: 65 | if type(v) is list: 66 | accessed_paths.extend(v) 67 | else: 68 | accessed_paths.append(v) 69 | if "autoload-dev" in composer: 70 | for key,value in composer["autoload-dev"].items(): 71 | if "psr" in key: 72 | values = value.values() 73 | for v in values: 74 | if type(v) is list: 75 | accessed_paths.extend(v) 76 | else: 77 | accessed_paths.append(v) 78 | for path in accessed_paths: 79 | baseinfo = os.path.join(basepath,path) 80 | for imagepath in paths: 81 | if baseinfo in imagepath: 82 | files.append(baseinfo) 83 | 84 | return {pkg: files} 85 | return {} 86 | 87 | 88 | def get_composer(paths: List[str],directory: str): # Assuming only one composer per container 89 | packages = {} 90 | files = set() 91 | composer_lock = sorted([path for path in paths if "composer.lock" in path ],key=len) 92 | composer_json = sorted([path for path in paths if "composer.json" in path ],key=len) 93 | if len(composer_lock) == 0: 94 | return {} 95 | files.update(composer_json) 96 | files.update(composer_lock) 97 | raw_packages = [] 98 | # Start by root composer.lock 99 | root_composer_lock = json.load(open(directory +"/" + composer_lock[0])) 100 | #root_composer_json = json.load(open(directory +"/" + composer_json[0])) 101 | for package in root_composer_lock["packages"]: 102 | name = package["name"] 103 | version = package["version"] 104 | pkg = PackageInfo(name.split("/")[1],version,name.split("/")[0],PackageInfoType.COMPOSER) 105 | packages[pkg] = [] 106 | raw_packages.append(name) 107 | 108 | basepath = os.path.dirname(composer_json[0]) 109 | for package in raw_packages: 110 | composer_lock = [x for x in composer_lock if package not in x] 111 | composer_json = [x for x in composer_json if package not in x] 112 | 113 | accessed_paths = [] 114 | if "autoload" in root_composer_lock: 115 | for key,value in root_composer_lock["autoload"].items(): 116 | if "psr" in key: 117 | v = list(value.values()) 118 | if type(v[0]) is list: 119 | accessed_paths.extend(v[0]) 120 | else: 121 | accessed_paths.extend(v) 122 | if "autoload-dev" in root_composer_lock: 123 | for key,value in root_composer_lock["autoload-dev"].items(): 124 | if "psr" in key: 125 | if type(v[0]) is list: 126 | accessed_paths.extend(v[0]) 127 | else: 128 | accessed_paths.extend(v) 129 | for path in paths: 130 | for accessed_path in accessed_paths: 131 | if f"{basepath}/{accessed_path}" in path: 132 | files.add(path) 133 | if "vendor" in path: 134 | upath = path.replace(f"{basepath}/vendor/","") 135 | try: 136 | upathsplit = upath.split("/") 137 | final_package = f"{upathsplit[0]}/{upathsplit[1]}" 138 | if final_package in raw_packages: 139 | files.add(path) 140 | else: 141 | pass 142 | #print(path) 143 | except Exception: 144 | pass # probably a folder 145 | 146 | 147 | 148 | for package in packages: 149 | packages[package] = files 150 | 151 | for composer in composer_json: 152 | packages.update(parse_composer(paths,directory,composer)) 153 | if len(packages): 154 | logger.logger.info(f"PHP composer: {len(packages)}") 155 | return packages -------------------------------------------------------------------------------- /orca/lib/cpe2cve.py: -------------------------------------------------------------------------------- 1 | # Original Author: Matteo (xonoxitron) Pisani 2 | # Description: Given a CPE, this script returns all related CVE, ordered by severity (desc) 3 | # Usage: python3 cpe2cve.py -c cpe:2.3:a:apache:http_server:2.4.54 4 | 5 | # Import necessary modules 6 | import requests 7 | 8 | CPES_URL = "https://services.nvd.nist.gov/rest/json/cpes/2.0" 9 | # Function to retrieve CVE data for a given CPE 10 | def get_cve_data(session:requests.Session,cpe:str): 11 | base_url = "https://services.nvd.nist.gov/rest/json/cves/2.0?cpeName=" 12 | response = session.get(base_url + cpe) 13 | # Check if the request was successful (status code 200) 14 | if response.status_code == 200: 15 | try: 16 | cve_data = response.json() 17 | 18 | return cve_data.get("vulnerabilities", []) 19 | except Exception: 20 | print(response.text) 21 | exit() 22 | else: 23 | print(f"Error in HTTP request: {response.status_code}") 24 | print(response.text) 25 | return [] 26 | 27 | 28 | # Function to retrieve the CVE ID from a CVE object 29 | def get_cve_id(cve): 30 | try: 31 | return cve["cve"]["CVE_data_meta"]["ID"] 32 | except (KeyError, TypeError, ValueError): 33 | # In case of missing or non-numeric data, assign a high value for non-evaluability 34 | return "N/A/" 35 | 36 | 37 | # Function to retrieve metric version 38 | def get_cve_metric_version(cve): 39 | if "baseMetricV4" in cve["impact"]: 40 | return "4" 41 | if "baseMetricV3" in cve["impact"]: 42 | return "3" 43 | if "baseMetricV2" in cve["impact"]: 44 | return "2" 45 | if "baseMetricV1" in cve["impact"]: 46 | return "1" 47 | return "N/A" 48 | 49 | 50 | # Function to retrieve the score from a CVE object 51 | def get_cve_score(cve): 52 | try: 53 | v = get_cve_metric_version(cve) 54 | return float(cve["impact"]["baseMetricV" + v]["cvssV" + v]["baseScore"]) 55 | except (KeyError, TypeError, ValueError): 56 | # In case of missing or non-numeric data, assign a high value for non-evaluability 57 | return float("inf") 58 | 59 | 60 | # Function to retrieve the severity from a CVE object 61 | def get_cve_severity(cve): 62 | v = get_cve_metric_version(cve) 63 | cvss = cve["impact"]["baseMetricV" + v] 64 | if "severity" in cvss: 65 | return cvss["severity"] 66 | if "baseSeverity" in cvss["cvssV" + v]: 67 | return cvss["cvssV" + v]["baseSeverity"] 68 | return "N/A" 69 | 70 | 71 | def create_session(): 72 | s = requests.Session() 73 | s.headers.update({"apiKey":"1d424904-314b-4ebe-9740-23b427694cf4"}) 74 | return s 75 | 76 | def search_cpe(session: requests.Session,cpe:str): 77 | response = session.get(f"{CPES_URL}?cpeMatchString={cpe}") 78 | cpes = [] 79 | if response.status_code != 200: 80 | return [] 81 | json_response = response.json() 82 | products = json_response.get("products",[]) 83 | for product in products: 84 | cpeName = product["cpe"]["cpeName"] 85 | cpes.append(cpeName) 86 | return cpes 87 | 88 | 89 | 90 | 91 | # Main function for parsing command-line arguments and performing the sorting and printing 92 | def cpe2cve(cpe:str): 93 | # Set up the argument parser 94 | session = create_session() 95 | cpeNames = search_cpe(session,cpe) 96 | print(cpeNames) 97 | cves = [] 98 | for cpeName in cpeNames: 99 | # Retrieve CVE data for the given CPE 100 | cve_data = get_cve_data(session,cpeName) 101 | for item in cve_data: 102 | print(item["cve"]["id"]) 103 | cves.append(item["cve"]["id"]) 104 | 105 | 106 | return cves 107 | if len(cve_data) == 0: 108 | return [] 109 | # Sort the CVEs by score in descending order 110 | sorted_cve = sorted(cve_data["CVE_Items"], key=get_cve_score, reverse=True) 111 | 112 | # Print the sorted CVEs 113 | i = 1 114 | cves = [] 115 | for cve in sorted_cve: 116 | cve_id = get_cve_id(cve) 117 | score = get_cve_score(cve) 118 | severity = get_cve_severity(cve) 119 | cves.append(f"[{i}] ID: {cve_id}, Score: {score}, Severity: {severity}") 120 | i += 1 121 | return cves 122 | -------------------------------------------------------------------------------- /orca/lib/dnf.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | from typing import Dict, List 4 | 5 | from . import logger 6 | from .types import PackageInfo, PackageInfoType 7 | import sqlite3 8 | 9 | 10 | def read_dnf_db(db_path,path) -> Dict[PackageInfo,List[str]]: 11 | c = sqlite3.connect(db_path) 12 | cur = c.cursor() 13 | res = cur.execute("SELECT name,version from rpm") 14 | packagesMap = {} 15 | for entry in res.fetchall(): 16 | package = PackageInfo(entry[0],entry[1],None,PackageInfoType.RPM) 17 | packagesMap[package] = [path] 18 | return packagesMap 19 | 20 | 21 | def get_dnf(paths: List[str],directory: str)-> Dict[PackageInfo,List[str]]: 22 | if "var/lib/dnf/history.sqlite" in paths: 23 | packages = read_dnf_db(os.path.join(directory,"var/lib/dnf/history.sqlite"),"var/lib/dnf/history.sqlite") 24 | 25 | if len(packages.keys()): 26 | logger.logger.info(f"DNFs: {len(packages.keys())}") 27 | return packages 28 | 29 | return {} -------------------------------------------------------------------------------- /orca/lib/dockerfile.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import List, Tuple 3 | import validators 4 | 5 | from orca.lib.types import PackageInfo, PackageInfoType, VulnerabilityReport 6 | 7 | def extract_urls(text): 8 | """ 9 | This function extracts all the http and https urls from the list of commands 10 | """ 11 | url_pattern = re.compile(r'https?://[^\s";|()\']+(?:\([^)]*\))?') 12 | return url_pattern.findall(text) 13 | 14 | 15 | def replace_curly_variables(url, line,env_variables=""): 16 | """ 17 | This function searches for user-defined variables in the Dockerfile 18 | TODO: needs to be updated with variables of the Dockerfile. RN is only checking for variables in the same line. 19 | """ 20 | variables = re.findall(r'\$\{[^}]+\}', url) 21 | env_var_map = {} 22 | for var in env_variables.split("\n"): 23 | if "=" in var: 24 | key,value = var.split("=",1) 25 | env_var_map[key] = value 26 | # Refactor line 27 | for k, v in env_var_map.items(): 28 | line = line.replace(f"${{{k}}}",v) 29 | if variables: 30 | for variable in variables: 31 | var_name = variable.strip("${}()") 32 | var_pattern = re.compile(rf'{var_name}=(\S+)') 33 | match_line = var_pattern.search(line) 34 | match_env = var_pattern.search(env_variables) 35 | if match_line: 36 | url = url.replace(variable, match_line.group(1)) 37 | elif match_env: 38 | url = url.replace(variable, match_env.group(1)) 39 | 40 | array_pattern = re.compile(r'for\s+(\w+)\s+in\s+"\${(\w+)\[@\]}"') 41 | array_match = array_pattern.search(line) 42 | if array_match: 43 | urls = [] 44 | component = array_match.group(1) 45 | array_name = array_match.group(2) 46 | array_pattern = re.compile(rf'{array_name}\s*=\s*\(([^)]+)\)') 47 | array_match = array_pattern.search(line) 48 | if array_match: 49 | array_values = array_match.group(1).split() 50 | for value in array_values: 51 | urls.append(url.replace(f"${{{component}}}", value).replace("\"","")) 52 | return urls 53 | 54 | return [url] 55 | 56 | def replace_dollar_variables(url, line, env_variables=""): 57 | """ 58 | This function searches for user-defined variables in the Dockerfile 59 | and replaces them with their values. It checks for variables in the same lin and in the environment variables. 60 | """ 61 | variables = re.findall(r'\$[a-zA-Z_][a-zA-Z0-9_]*', url) 62 | if variables: 63 | for variable in variables: 64 | var_name = variable.strip("$") 65 | var_pattern = re.compile(rf'{var_name}=(\S+)') 66 | match_line = var_pattern.search(line) 67 | match_env = var_pattern.search(env_variables) 68 | 69 | if match_line: 70 | url = url.replace(variable, match_line.group(1)) 71 | elif match_env: 72 | url = url.replace(variable, match_env.group(1)) 73 | return url 74 | 75 | def interpolate_variables(dockerfile_config): 76 | extracted_urls = [] 77 | urls = [] 78 | configurations = [""] 79 | 80 | if 'Env' not in dockerfile_config['config'] or dockerfile_config['config']['Env'] is None : 81 | pass 82 | else: 83 | configurations = '\n'.join(dockerfile_config['config']['Env']) 84 | 85 | if len(dockerfile_config['history']) == 1 and 'created_by' in dockerfile_config['history'][0] and "crane" in dockerfile_config['history'][0]['created_by']: 86 | item = dockerfile_config['history'][0] 87 | comments = item["comment"] 88 | for comment in comments: 89 | if 'created_by' not in comment: 90 | continue 91 | line = comment['created_by'] 92 | if "LABEL" in line or "http" not in line: 93 | continue 94 | else: 95 | ex_u = extract_urls(line) 96 | extracted_urls.extend(ex_u) 97 | for url in ex_u: 98 | replaced_url = replace_curly_variables(url, line,configurations) 99 | urls.append(replaced_url) 100 | 101 | else: 102 | for history_line in dockerfile_config['history']: 103 | if 'created_by' not in history_line: 104 | #print("Empty history entry:",history_line) 105 | continue 106 | line = history_line['created_by'] 107 | if "LABEL" in line or "http" not in line: 108 | continue 109 | else: 110 | ex_u = extract_urls(line) 111 | extracted_urls.extend(ex_u) 112 | for url in ex_u: 113 | replaced_url = replace_curly_variables(url, line,configurations) 114 | urls.append(replaced_url) 115 | return urls 116 | 117 | 118 | def github_to_cpe(urls)->List[Tuple[PackageInfo,str]]: 119 | # Now find github stuff 120 | found_cpes = [] 121 | github_pattern = re.compile(r'https://github\.com/([^/]+)/([^/]+)/releases/download/(v?\d+(\.\d+)*(\+\d+)?)/[^/]+') 122 | github_urls = [url for url in urls if github_pattern.match(url)] 123 | for github_url in github_urls: 124 | match = github_pattern.match(github_url) 125 | if match: 126 | author = match.group(1) 127 | name = match.group(2) 128 | version = match.group(3) 129 | found_cpes.append((PackageInfo(name,version,author,PackageInfoType.GITHUB),github_url)) 130 | return found_cpes 131 | 132 | 133 | def selected_websites_to_cpe(urls)->List[Tuple[PackageInfo,str]]: 134 | rust_pattern = re.compile(r'https://static\.rust-lang\.org/rustup/archive/(\d+\.\d+\.\d+)/') 135 | 136 | github_content_pattern = re.compile(r'https://raw\.githubusercontent\.com/([^\/]+)\/([^\/]+)\/([^\/]*\d+[^\/]*)') 137 | 138 | github_archive_pattern = re.compile(r'https://github\.com/([^\/]+)\/([^\/]+)\/archive\/([^\/]+)\.tar\.gz') 139 | 140 | gradle_pattern = re.compile(r'https://services\.gradle\.org/distributions/gradle-(.*)-bin\.zip') 141 | 142 | postgresql_pattern = re.compile(r'https://ftp\.postgresql\.org/pub/source/(v[\d\.]+)') 143 | 144 | bitnami_pattern = re.compile(r'https://downloads\.bitnami\.com/files/stacksmith/([^\/]+)\.tar\.gz') 145 | 146 | generic_compressed_app_pattern = re.compile(r'.*\/(\w+)-([\d\.]+)\.tar\.[a-z]z') 147 | cpes = [] 148 | 149 | for kurl in urls: 150 | url = kurl.rstrip() 151 | match_rust = rust_pattern.match(url) 152 | github_content_pattern_match = github_content_pattern.match(url) 153 | github_archive_pattern_match = github_archive_pattern.match(url) 154 | gradle_pattern_match = gradle_pattern.match(url) 155 | postgresql_pattern_match = postgresql_pattern.match(url) 156 | bitnami_pattern_match = bitnami_pattern.match(url) 157 | generic_compressed_app_pattern_match = generic_compressed_app_pattern.match(url) 158 | if match_rust: 159 | cpes.append((PackageInfo("rust",match_rust.group(1),"rust",type=PackageInfoType.RUST),url)) 160 | elif github_content_pattern_match: 161 | cpes.append((PackageInfo(github_content_pattern_match.group(1),github_content_pattern_match.group(3), 162 | github_content_pattern_match.group(2),type=PackageInfoType.GITHUB),url)) 163 | elif github_archive_pattern_match: 164 | #print(github_archive_pattern_match.groups()) 165 | cpes.append((PackageInfo(github_archive_pattern_match.group(1),github_archive_pattern_match.group(3), 166 | github_archive_pattern_match.group(2),type=PackageInfoType.GITHUB),url)) 167 | elif gradle_pattern_match: 168 | cpes.append((PackageInfo("gradle",gradle_pattern_match.group(1),"gradle",PackageInfoType.GRADLE),url)) 169 | elif postgresql_pattern_match: 170 | cpes.append((PackageInfo("postgresql",postgresql_pattern_match.group(1),"postgresql"),url)) 171 | elif bitnami_pattern_match: 172 | regex = r"^([a-zA-Z0-9-]+)-([\d.]+-\d+)-linux-(amd64)-debian-(\d+)" 173 | match = re.match(regex, bitnami_pattern_match.group(1)) 174 | if match: 175 | name, version, arch, distro = match.groups() 176 | pkg = PackageInfo(name,version,"bitnami",arch=arch,type=PackageInfoType.BITNAMI) 177 | #purl = f"pkg:bitnami/{name}@{version}?arch={arch}&distro=debian-{distro}" 178 | cpes.append((pkg,url)) 179 | elif generic_compressed_app_pattern_match: # TODO: this should probably be separated into a different function 180 | pkg = PackageInfo(generic_compressed_app_pattern_match.group(1),generic_compressed_app_pattern_match.group(2),generic_compressed_app_pattern_match.group(1)) 181 | cpes.append((pkg,url)) 182 | 183 | return cpes 184 | 185 | def extract_cpes_from_dockerfile(dockerfile_config): 186 | # Those are all the urls + the ones that have been interpolated with the env variables and other variables in the Dockerfile line. 187 | urls = [u.rstrip() for theurls in interpolate_variables(dockerfile_config) for u in theurls] 188 | 189 | useful_urls = [u for u in urls if "$(" not in u]# TO be removed. 190 | found_cpes = github_to_cpe(useful_urls) 191 | found_cpes.extend(selected_websites_to_cpe(useful_urls)) 192 | 193 | files_with_cpe = [cpe[1] for cpe in found_cpes] 194 | non_cpes = list(set(urls).difference(set(files_with_cpe))) 195 | 196 | 197 | return found_cpes,non_cpes 198 | 199 | def extract_cpes_from_dockerfile_with_validation(dockerfile_config) -> VulnerabilityReport: 200 | report = VulnerabilityReport(set("Dockerfile")) 201 | cpes,non_cpes = extract_cpes_from_dockerfile(dockerfile_config) 202 | new_cpes = [] 203 | packagefiles = {} 204 | for cpe in cpes: 205 | if not validators.url(cpe[1]): 206 | non_cpes.append(cpe[1]) 207 | else: 208 | new_cpes.append(cpe[0]) 209 | packagefiles[cpe[0]] = ["Dockerfile"] 210 | 211 | report.add_package_files(packagefiles) 212 | return report -------------------------------------------------------------------------------- /orca/lib/dpkg.py: -------------------------------------------------------------------------------- 1 | 2 | from typing import Dict, List 3 | import debian.deb822 4 | from .logger import logger 5 | import os 6 | from .types import PackageInfo, PackageInfoType 7 | 8 | def parse_dpkg_status(file_path): 9 | with open(file_path, "r", encoding="utf-8") as file: 10 | status_file = debian.deb822.Deb822.iter_paragraphs(file) 11 | packages = [dict(pkg) for pkg in status_file] 12 | pp = [] 13 | for package in packages: 14 | version = package["Version"] 15 | epoch = None 16 | 17 | if len(version.split(":")) > 1: 18 | epoch = version.split(":")[0] 19 | version = version.split(":")[1] 20 | pp.append(PackageInfo(package["Package"],version,None,PackageInfoType.DEBIAN,package["Architecture"],epoch)) 21 | if "python-" in package["Package"]: 22 | pp.append(PackageInfo(package["Package"].replace("python-",""),version,None,PackageInfoType.PYPI,package["Architecture"],epoch)) 23 | elif "python3-" in package["Package"]: 24 | pp.append(PackageInfo(package["Package"].replace("python3-",""),version,None,PackageInfoType.PYPI,package["Architecture"],epoch)) 25 | if "Source" in package: 26 | pp.append(PackageInfo(package["Source"].split(" ")[0],version,None,PackageInfoType.DEBIAN,package["Architecture"],epoch)) 27 | return pp 28 | 29 | installed_bins = {"coreutils": 30 | ["arch","base64","basename","cat","chcon","chgrp","chmod","chown","chroot","cksum","comm","cp","csplit","cut","date","dd","df","dir","dircolors","dirname","du","echo","env","expand","expr","factor","false","flock","fmt","fold","groups","head","hostid","id","install","join","link","ln","logname","ls","md5sum","mkdir","mkfifo","mknod","mktemp","mv","nice","nl","nohup","nproc","numfmt","od","paste","pathchk","pinky","pr","printenv","printf","ptx","pwd","readlink","realpath","rm","rmdir","runcon","sha1sum","shasum","sha256sum","sha384sum","sha224sum","sha512sum","seq","shred","sleep","sort","split","stat","stty","sum","sync","tac","tail","tee","test","timeout","touch","tr","true","truncate","tsort","tty","uname","unexpand","uniq","unlink","users","vdir","wc","who","whoami","yes"], 31 | 32 | "findutils": ["find","xargs"], 33 | 34 | "procps": ["ee","kill","pkill","pgrep","pmap","ps","pwdx","skill","slabtop","snice","sysctl","tload","top","uptime","vmstat","w","watch"], 35 | "bsdutils": ["logger", "renice", "script", "scriptlive", "scriptreplay","wall"], 36 | "debianutils": ["add-shell", "installkernel", "ischroot", "remove-shell", "run-parts", "savelog","update-shells", "which"], 37 | "libc-bin": ["getconf","getent","iconv","ldd","lddconfig","locale","localedef","tzselect","zdump","zic"] 38 | } 39 | 40 | additional_files = [".preinst",".prerm",".postrm",".postinst",".list",".md5sums",".shlibs",".symbols",".triggers",".conffiles",".templates",".config"] 41 | 42 | 43 | def find_individual_packages(paths: List[str],directory: str)-> Dict[PackageInfo,List[str]]: 44 | packagesMap = {} 45 | for path in paths: 46 | if "var/lib/dpkg/status.d/" in path and "." not in path.split("/")[-1]: 47 | packages = parse_dpkg_status(directory + "/" +path) 48 | for package in packages: 49 | packagesMap[package] = [path] 50 | elif "var/lib/dpkg/status.d/" in path and os.path.isfile(path): 51 | for package in packages: 52 | packagesMap[package].add(path) 53 | return packagesMap 54 | 55 | def parse_dpkg_from_status(paths,directory,status) -> Dict[PackageInfo,List[str]]: 56 | package_dict = dict() 57 | os_pkgs = parse_dpkg_status(directory + "/" + status) 58 | for package in os_pkgs: 59 | files_checked = [] 60 | target_file = "var/lib/dpkg/info/" + package.name + ".list" 61 | if target_file in paths: 62 | content = open(directory + "/" + target_file).readlines() 63 | content = [ c.replace("\n","")[1:] if c[0] == "/" else c.replace("\n","") for c in content] 64 | files_checked.extend(content) 65 | for f in additional_files: 66 | fname = "var/lib/dpkg/info/" + package.name + f 67 | if fname in paths: 68 | files_checked.append(fname) 69 | else: 70 | target_file = "var/lib/dpkg/info/" + package.name + ":amd64.list" 71 | try: 72 | content = open(directory + "/" + target_file).readlines() 73 | content = [ c.replace("\n","")[1:] if c[0] == "/" else c.replace("\n","") for c in content] 74 | files_checked.extend(content) 75 | for f in additional_files: 76 | fname = "var/lib/dpkg/info/" + package.name + ":amd64" + f 77 | if fname in paths: 78 | files_checked.append(fname) 79 | except Exception: 80 | #logger.debug(f"DPKG indexed file not found: {target_file}") 81 | pass 82 | # Check binaries 83 | if package.name in installed_bins: 84 | for f in installed_bins[package.name]: 85 | files_checked.append("bin/"+f) 86 | 87 | files_checked.append("var/lib/dpkg/status") 88 | if package in package_dict: 89 | package_dict[package] = list(set([*package_dict[package],*files_checked])) 90 | else: 91 | package_dict[package] = files_checked 92 | return package_dict 93 | 94 | def get_dpkg(paths: List[str],directory: str)-> Dict[PackageInfo,List[str]]: 95 | status = [path for path in paths if path.endswith("dpkg/status")] 96 | others = [path for path in paths if "var/lib/dpkg" in path] 97 | 98 | assert len(status) < 2 99 | packages = {} 100 | 101 | if len(status) == 1: 102 | packages.update(parse_dpkg_from_status(paths, directory, status[0])) 103 | if len(packages.keys()): 104 | logger.info(f"DPKGS: {len(packages.keys())}") 105 | for package in packages.keys(): 106 | packages[package].extend(others) 107 | 108 | packages.update(find_individual_packages(paths,directory)) 109 | if len(packages.keys()): 110 | logger.info(f"DPKGS: {len(packages.keys())}") 111 | for package in packages.keys(): 112 | packages[package].extend(others) 113 | 114 | return packages 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /orca/lib/golang.py: -------------------------------------------------------------------------------- 1 | import re 2 | import subprocess 3 | import os 4 | from typing import Dict, List 5 | from .types import PackageInfo, PackageInfoType 6 | from .logger import logger 7 | def extract_go_dependencies(go_binary_path,directory: str): 8 | results = {} 9 | for path in go_binary_path: 10 | result = extract_dependency(os.path.join(directory,path)) 11 | for res in result: 12 | results[res] = [path] 13 | if len(results): 14 | logger.info(f"GO executables {len(results)}") 15 | return results 16 | 17 | def extract_dependency(go_binary_path): 18 | packages = [] 19 | # Use go list to get package dependencies 20 | deps_process = subprocess.Popen(['go', 'version',"-m" ,go_binary_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE) 21 | deps_output, deps_error = deps_process.communicate() 22 | 23 | if deps_process.returncode != 0 or len(deps_error) > 1: 24 | return [] 25 | lines = deps_output.decode('utf-8').splitlines() 26 | try: 27 | version = lines[0].split(" ")[1] 28 | except Exception as e: 29 | logger.warning(f"Go binary {go_binary_path} is too old to be analyzed {e}") 30 | return packages 31 | pkg = PackageInfo("stdlib", version[2:],None,PackageInfoType.GOLANG) 32 | packages.append(pkg) 33 | 34 | dep_lines = [line for line in lines if "dep" in line or "=>" in line or "mod" in line] 35 | for line in dep_lines: 36 | dep_split = line.split("\t") 37 | if len(dep_split) < 4: 38 | logger.error(f"[GO] Could not parse: {line}") 39 | continue 40 | 41 | packages.append(PackageInfo(dep_split[2], dep_split[3],None,PackageInfoType.GOLANG)) 42 | for build_line in [line for line in lines if "build" in line]: 43 | last_item = build_line.split("\t")[-1] 44 | 45 | if "-X " in last_item in last_item: 46 | #print(last_item) 47 | flags = last_item.split("-X ")[1:] 48 | found = False 49 | for flag in flags: 50 | f = flag.split(" ")[0] 51 | if "version.Version" in f: 52 | found = True 53 | split = flag.split("/version.Version=") 54 | if len(split) < 2 : 55 | continue 56 | p = PackageInfo(split[0], split[1],None,PackageInfoType.GOLANG) 57 | packages.append(p) 58 | if not found: 59 | f = flags[0].split(" ")[0] 60 | name = "/".join(f.split("/")[:-1]) 61 | if name is not None and "/" in name: 62 | p = PackageInfo(name, "unknown",None,PackageInfoType.GOLANG) 63 | packages.append(p) 64 | 65 | 66 | #imported_symbols = [line.strip() for line in objdump_output.decode('utf-8').splitlines() if 'imported symbol' in line] 67 | return packages#lines#, imported_symbols 68 | 69 | 70 | go_version_pattern = r'^go\s+(\d+\.\d+)' 71 | require_pattern = r'require\s+\(\s*([^)]*)\s*\)' # for multiline `require` 72 | single_require_pattern = r'require\s+([^\s]+)\s+([^\s]+)' # for single line `require` 73 | 74 | 75 | def get_gomod(paths: List[str],directory: str) -> Dict[PackageInfo,List[str]]: 76 | gomods = [path for path in paths if path.endswith("/go.mod")] 77 | 78 | packages = {} 79 | for gomod in gomods: 80 | file_content = open(os.path.join(directory,gomod)).read() 81 | 82 | match = re.search(go_version_pattern, file_content, re.MULTILINE) 83 | if match: 84 | go_version = match.group(1) 85 | p = PackageInfo("go",go_version,None) 86 | if p in packages: 87 | packages[p].append(gomod) 88 | else: 89 | packages[p] = [gomod] 90 | 91 | 92 | require_block = re.search(require_pattern, file_content, re.DOTALL) 93 | if require_block: 94 | # Extract dependencies from a multiline require block 95 | modules = require_block.group(1).strip().splitlines() 96 | for module in modules: 97 | module_info = module.strip().split() 98 | if len(module_info) == 2: 99 | name, version = module_info 100 | p = PackageInfo(name,version,None) 101 | if p in packages: 102 | packages[p].append(gomod) 103 | else: 104 | packages[p] = [gomod] 105 | # TODO: Add also gofiles here 106 | 107 | if len(packages): 108 | logger.info(f"GoMOD : {len(packages)}") 109 | return packages 110 | 111 | -------------------------------------------------------------------------------- /orca/lib/jar.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import re 4 | from typing import List 5 | import zipfile 6 | from . import logger 7 | from.types import PackageInfo, PackageInfoType 8 | 9 | # TODO: fix this 10 | TMP_DIR = f"{os.getcwd()}/tmpdir" 11 | 12 | 13 | def parse_pom_properties(jar: zipfile.ZipFile,content: str): 14 | packages = [] 15 | package_info = {} 16 | data = jar.open(content).readlines() 17 | for line in data: 18 | sline = line.decode() 19 | if "=" in sline: 20 | kv = sline.replace("\n","").replace("\r","").split("=") 21 | package_info[kv[0]] = kv[1] 22 | try: 23 | packages.append(PackageInfo(package_info["artifactId"],package_info["version"],package_info["groupId"],PackageInfoType.MAVEN)) 24 | except Exception as e : 25 | logger.logger.warn(f"{jar.filename} - {package_info.keys()} - {data} - {e}") 26 | pass 27 | return packages 28 | 29 | 30 | def list_jar_props(jar_path,directory): 31 | packages = [] 32 | try: 33 | with zipfile.ZipFile(os.path.join(directory,jar_path), 'r') as jar: 34 | contents = jar.namelist() 35 | real_contents = [content for content in contents if content.endswith("pom.properties") ] 36 | nested_jars = [content for content in contents if content.endswith(".jar")] 37 | 38 | for nested_jar in nested_jars: 39 | jar.extract(nested_jar,os.path.join(TMP_DIR,nested_jar[:-4])) 40 | packages.extend(list_jar_props(nested_jar[:-4],TMP_DIR)) 41 | 42 | 43 | for content in real_contents: 44 | packages.extend(parse_pom_properties(jar,content)) 45 | return packages 46 | except Exception as _: 47 | return packages 48 | 49 | def extract_jar(input_string: str): 50 | dots = input_string.split(".") 51 | 52 | for idx, dot in enumerate(dots): 53 | if len(dot) > 2 and dot[-2] == "-" and dot[-1].isdigit(): 54 | author = ".".join(dots[:idx]) 55 | name = dot[:-2] 56 | version =dot[-1] + "." + ".".join(dots[idx+1:]) 57 | return {"author": author,"name": name, "version": version} 58 | elif len(dot) > 2 and dot[-3] == "-" and dot[-2].isdigit() and dot[-1].isdigit(): 59 | author = ".".join(dots[:idx]) 60 | name = dot[:-3] 61 | version =dot[-2] + dot[-1] +"." + ".".join(dots[idx+1:]) 62 | return {"author": author,"name": name, "version": version} 63 | return None 64 | 65 | def get_jar(paths: List[str],directory: str): 66 | jars = [path for path in paths if path.endswith(".jar") ] 67 | packages = {} 68 | for jar in jars: 69 | basename = os.path.basename(jar).split(".jar")[0] 70 | tokens = basename.split("-") 71 | dots = basename.split(".") 72 | #print(basename) 73 | # AwsJavaSdk-CognitoIdentityProvider-2.0.jar 74 | if len(tokens) > 2 and len(dots) < 4: 75 | #print("first") 76 | version = tokens[-1] 77 | pattern = re.compile(r"^([a-zA-Z0-9\-_]+?)-(\d+\.\d+(?:\.\d+)?)(?:[-_][a-zA-Z0-9\-._]+)?$") 78 | match = pattern.match(basename) 79 | if match: 80 | name, version = match.groups() 81 | package = PackageInfo(name,version,name,PackageInfoType.MAVEN) 82 | packages[package] = [basename] 83 | else: 84 | result = extract_jar(basename) 85 | if result is None: 86 | continue 87 | name = result["name"] 88 | version = result["version"] 89 | author = result["author"] 90 | package = PackageInfo(name,version,author,PackageInfoType.MAVEN) 91 | packages[package] = [basename] 92 | 93 | 94 | 95 | for jar in jars: 96 | pkgs = list_jar_props(jar,directory) 97 | basepath = os.path.dirname(jar) 98 | files = list(filter(lambda x: basepath in x, paths)) 99 | for pkg in pkgs: 100 | packages[pkg] = files 101 | if len(packages): 102 | logger.logger.info(f"JARs: {len(packages)}") 103 | 104 | return packages 105 | 106 | -------------------------------------------------------------------------------- /orca/lib/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | LOG_LEVEL = os.getenv("LOG_LEVEL",logging.WARNING) 4 | 5 | class CustomFormatter(logging.Formatter): 6 | 7 | grey = "\x1b[38;20m" 8 | yellow = "\x1b[33;20m" 9 | red = "\x1b[31;20m" 10 | bold_red = "\x1b[31;1m" 11 | reset = "\x1b[0m" 12 | blue = "\x1b[34m" 13 | debug_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s (%(filename)s:%(lineno)d)" 14 | format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 15 | 16 | FORMATS = { 17 | logging.DEBUG: grey + format + reset, 18 | logging.INFO: blue + format + reset, 19 | logging.WARNING: yellow + debug_format + reset, 20 | logging.ERROR: red + debug_format + reset, 21 | logging.CRITICAL: bold_red + debug_format + reset 22 | } 23 | 24 | def format(self, record): 25 | log_fmt = self.FORMATS.get(record.levelno) 26 | formatter = logging.Formatter(log_fmt) 27 | return formatter.format(record) 28 | 29 | def setup_logger(): 30 | logger = logging.getLogger('ORCA') 31 | logger.setLevel(logging.DEBUG) 32 | 33 | ch = logging.StreamHandler() 34 | ch.setLevel(logging.DEBUG) 35 | 36 | ch.setFormatter(CustomFormatter()) 37 | 38 | logger.addHandler(ch) 39 | 40 | 41 | return logger 42 | 43 | # Setup logger for the entire project 44 | logger: logging.Logger = setup_logger() -------------------------------------------------------------------------------- /orca/lib/package_json.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | from typing import Dict, List 4 | import json 5 | 6 | from.logger import logger 7 | from.types import PackageInfo, PackageInfoType 8 | #import rpm 9 | 10 | 11 | def parse_package_json(paths,enclosing_dir,file: str): 12 | try: 13 | content = json.load(open(file)) 14 | except Exception: 15 | logger.error(f"[JS] Could not parse {file}") 16 | return {} 17 | if "name" not in content: 18 | return {} 19 | name = content["name"] 20 | files = set([path for path in paths if enclosing_dir in path]) 21 | main_package = PackageInfo(name,"","npm") 22 | packages = {} 23 | if "version" in content: 24 | main_package = PackageInfo(name,content["version"],None,PackageInfoType.NPM) 25 | else: 26 | logger.info(f"Could not parse version from package.json at {file}") 27 | 28 | if "dependencies" not in content: 29 | return {main_package: list(files)} 30 | else: 31 | # TODO: Maybe we should also add dev-packages 32 | for dependency,version in content["dependencies"].items(): 33 | if type(version) is dict: 34 | version = version['version'] 35 | package = PackageInfo(dependency,version.split(" ")[0].replace("<","").replace(">","").replace("=",""),None,PackageInfoType.NPM) 36 | files_to_add = set([path for path in paths if os.path.join(enclosing_dir,"node_modules","package") in path]) 37 | packages[package] = list(files_to_add) 38 | files.difference_update(files_to_add) 39 | packages[main_package] = list(files) 40 | 41 | return packages 42 | 43 | 44 | def parse_package_lock(paths,enclosing_dir,file: str): 45 | packages = {} 46 | content = json.load(open(file)) 47 | name_author = content["name"].split("/") 48 | author = "npm" 49 | name = "" 50 | if len(name_author) > 1: 51 | author = name_author[0].replace("@","") 52 | name = name_author[1] 53 | else: 54 | name = name_author[0] 55 | if "version" in content: 56 | packages[PackageInfo(name,content["version"],author)] = [path for path in paths if enclosing_dir in path] 57 | key = "packages" if "packages" in content else "dependencies" 58 | for pkgname,package in content[key].items(): 59 | if pkgname == "": 60 | continue 61 | if "node_modules" in pkgname: 62 | pkg = pkgname.split("node_modules/")[-1] 63 | if "version" not in package: 64 | continue 65 | if "/" in pkg: 66 | pkg_split = pkg.replace("@","").split("/") 67 | packages[PackageInfo(pkg_split[1],package["version"],pkg_split[0],PackageInfoType.NPM)] = [enclosing_dir + "/package_lock.json",enclosing_dir + "/package.json"] 68 | else: 69 | packages[PackageInfo(pkg,package["version"],"npm",PackageInfoType.NPM)] = [path for path in paths if enclosing_dir in path] 70 | else: 71 | if "/" in pkgname: 72 | pkg_split = pkgname.replace("@","").split("/") 73 | packages[PackageInfo(pkg_split[1],package["version"],pkg_split[0])] = [enclosing_dir + "/package_lock.json",enclosing_dir + "/package.json"] 74 | else: 75 | packages[PackageInfo(pkgname,package["version"],"npm",PackageInfoType.NPM)] = [enclosing_dir + "/package_lock.json",enclosing_dir + "/package.json"] 76 | 77 | return packages 78 | 79 | def parse_library_packages(directory,paths,package_jsons)-> Dict[PackageInfo,List[str]]: 80 | packageMap = {} 81 | for file in package_jsons: 82 | pmap = parse_package_json(paths,os.path.dirname(file),os.path.join(directory,file)) 83 | packageMap.update(pmap) 84 | return packageMap 85 | 86 | 87 | def get_package_json(paths: List[str],directory: str): 88 | total_packages = {} 89 | 90 | package_json_node_modules = [path for path in paths if path.endswith("package.json") or path.endswith("package-lock.json")] 91 | package_lock = sorted([path for path in package_json_node_modules if "node_modules" not in path ],key=len) 92 | 93 | package_json = sorted([path for path in paths if path.endswith("package.json") and "node_modules" not in path ],key=len) 94 | 95 | if len(package_json_node_modules) > 200: # Number can be changes 96 | logger.warning(f"Discovered {len(package_json_node_modules)} package modules. Analyzing all of these files will take time") 97 | 98 | total_packages = parse_library_packages(directory,paths,package_json_node_modules) 99 | 100 | if len(package_lock) == 0 and len(package_json) == 0: 101 | if len(package_json_node_modules) == 0: 102 | return {} 103 | else: 104 | if len(total_packages.keys()): 105 | logger.info(f"JS packages: {len(total_packages.keys())}") 106 | return total_packages 107 | else: 108 | biggest = max(package_json,package_lock,key=len) 109 | for item in biggest: 110 | basepath = os.path.dirname(item) 111 | if basepath + "/package.json" in package_json and basepath + "/package-lock.json" in package_lock: 112 | total_packages.update(parse_package_lock(paths,basepath,os.path.join(directory,basepath,"package-lock.json"))) 113 | 114 | elif basepath + "/package.json" in package_json and basepath + "/package-lock.json" not in package_lock: 115 | pmap = parse_package_json(paths,basepath,os.path.join(directory,basepath,"package.json")) 116 | total_packages.update(pmap) 117 | 118 | else: 119 | continue 120 | #files.update(package_json) 121 | #files.update(package_lock) 122 | 123 | 124 | if len(total_packages.keys()): 125 | logger.info(f"JS packages: {len(total_packages.keys())}") 126 | return total_packages -------------------------------------------------------------------------------- /orca/lib/path.py: -------------------------------------------------------------------------------- 1 | import glob 2 | from typing import Set 3 | 4 | def remove_folders(paths): 5 | dir_set = set() 6 | result = [] 7 | 8 | for path in paths: 9 | parts = path.split("/") 10 | for i in range(1, len(parts)): 11 | dir_set.add("/".join(parts[:i])) 12 | 13 | for path in paths: 14 | if path not in dir_set and len(path) > 2: 15 | result.append(path) 16 | 17 | return result 18 | 19 | def get_filepaths(directory:str) -> Set[str]: 20 | paths = filter(lambda path: len(path) > 2 21 | and "etc/ssl/certs/" not in path 22 | and "usr/share/zoneinfo" not in path 23 | and "etc/nginx/" not in path, 24 | glob.glob(directory + "/**", recursive=True,include_hidden=True)) 25 | mapped_paths = map(lambda path: path.replace(directory + "/",""),paths) 26 | return set(mapped_paths)#set(remove_folders(paths)) -------------------------------------------------------------------------------- /orca/lib/path_checkers.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import os 4 | import re 5 | from typing import List 6 | from .types import PackageInfo 7 | 8 | python_dist_regex = re.compile(r'.*python(\d\.\d+)\/site-packages\/(([a-zA-Z0-9_\-]+)\/)?([a-zA-Z0-9]+)-(\d+\.\d+\.?\d*)\.dist-info') 9 | def check_python_from_path_once(filename: str,directory: str): 10 | result = re.match(python_dist_regex,filename) 11 | files = [filename] 12 | if result: 13 | pkg = [PackageInfo("python", result.group(1),None)] 14 | if result.group(3) is not None: 15 | package = f"{result.group(3)}-{result.group(4)}" 16 | version = result.group(5) 17 | pkg.append(PackageInfo(package,version,None)) 18 | else: 19 | package = result.group(4) 20 | version = result.group(5) 21 | pkg.append(PackageInfo(package,version,None)) 22 | if filename.endswith("RECORD"): 23 | record = open(os.path.join(directory,filename)).readlines() 24 | basepath = "/".join(filename.split("/")[:-1]) 25 | files = [basepath + "/" + line.split(",")[0] for line in record] 26 | files.append(filename) 27 | 28 | return pkg,files 29 | return None,files 30 | 31 | 32 | def check_python_from_path(paths: List[str],directory: str): 33 | files = set() 34 | cpes = [] 35 | for path in [p for p in paths if ".dist-info" in p]: 36 | res,fn = check_python_from_path_once(path,directory) 37 | if res: 38 | cpes.extend(res) 39 | files.update(fn) 40 | return cpes,files -------------------------------------------------------------------------------- /orca/lib/perl.py: -------------------------------------------------------------------------------- 1 | 2 | from typing import List 3 | import re 4 | import os 5 | 6 | from . import logger 7 | from .types import PackageInfo, PackageInfoType 8 | 9 | package_regex = r'package\s+([^\s;]+)' 10 | # Regex for extracting the version 11 | version_regex = r'\$VERSION\s*=\s*\'([^\']+)\'' 12 | 13 | 14 | 15 | 16 | def parse_module(filepath): 17 | try: 18 | content = open(filepath).read() 19 | except Exception as _: 20 | return "","" 21 | # Extract package name 22 | package_match = re.search(package_regex, content) 23 | if package_match: 24 | package_name = package_match.group(1) 25 | version_match = re.search(version_regex, content) 26 | if version_match: 27 | version = version_match.group(1) 28 | return package_name,version 29 | return "","" 30 | 31 | def get_perl(paths: List[str],directory: str): 32 | packages = {} 33 | perl_modules = [path for path in paths if path.endswith(".pm") and "perl" in path] 34 | for module in perl_modules: 35 | package,version = parse_module(os.path.join(directory,module)) 36 | if len(package) > 0 and len(package.split("::")) < 3: 37 | packages[PackageInfo(package,version,None,PackageInfoType.PERL)] = [module] 38 | if len(packages): 39 | logger.logger.info(f"Perl: {len(packages)}") 40 | return packages 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /orca/lib/pkgconfig.py: -------------------------------------------------------------------------------- 1 | 2 | from typing import Dict, List 3 | import pykg_config 4 | import pykg_config.pcfile 5 | 6 | from . import logger 7 | from .types import PackageInfo 8 | 9 | def get_pkgconfig(paths: List[str],directory: str) -> Dict[PackageInfo,List[str]]: 10 | pkgs = filter(lambda path: "pkgconfig" in path and path.endswith(".pc"), paths) 11 | pkgmap = {} 12 | for pkg in pkgs: 13 | name = pkg.split("/")[-1] 14 | if name not in pkgmap: 15 | pkgmap[name] = pkg 16 | pkg_dir = {} 17 | for pkg in pkgmap.values(): 18 | directories = [] 19 | pc_file_path = directory + "/" + pkg 20 | vars = {} 21 | props = {} 22 | try: 23 | _, vars, props = pykg_config.pcfile.read_pc_file(pc_file_path,{}) 24 | except Exception as _: 25 | logger.logger.warning(f"Could not parse pkgconfig file {pc_file_path}") 26 | continue 27 | version = props.get("version") 28 | if "." not in version: 29 | version = vars.get("abiver") 30 | 31 | package = PackageInfo(props.get("name"),version,None,None) 32 | 33 | directories.append(pkg) 34 | if vars.get("exec_prefix") is not None: 35 | directories.append(vars.get("exec_prefix")[1:]) 36 | if props.get("libdir") is not None: 37 | directories.append(props.get("libdir")[1:]) 38 | 39 | if package in pkg_dir: 40 | pkg_dir[package] = [*pkg_dir[package],*directories] 41 | else: 42 | pkg_dir[package] = directories 43 | 44 | package_files = {} 45 | for package,dirs in pkg_dir.items(): 46 | for directory in list(set(dirs)): 47 | files_found = [] 48 | for path in paths: 49 | if directory in path: 50 | files_found.append(path) 51 | package_files[package] = files_found 52 | return package_files 53 | -------------------------------------------------------------------------------- /orca/lib/python.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List 3 | from .types import PackageInfo, PackageInfoType 4 | from .logger import logger 5 | from email.parser import Parser 6 | from packaging.requirements import Requirement 7 | 8 | import re 9 | python_dist_regex = re.compile( 10 | r'.*python(\d\.\d+)\/(?:site|dist)-packages\/(([a-zA-Z0-9_\-]+)\/)?([a-zA-Z0-9]+)-(\d+\.\d+\.?\d*)\.dist-info' 11 | ) 12 | 13 | def check_python_from_path_once(paths,filename: str,directory: str): 14 | filenamenopath = [split for split in filename.split("/") if "-info" in split] 15 | if len(filenamenopath) == 0: 16 | return {} 17 | filenamenopath = filenamenopath[0] 18 | basename = os.path.dirname(filename) 19 | files = list(filter(lambda x: basename in x,paths)) 20 | if filenamenopath.endswith(".dist-info") or filenamenopath.endswith(".egg-info"): 21 | file = filenamenopath.replace(".dist-info","").replace(".egg-info","") 22 | splits = file.split("-") 23 | package = "-".join(splits[:-1]).replace(".wh.","") 24 | version = splits[-1].replace(".dist","") 25 | if package is None or package== "": 26 | return {} 27 | pkg = PackageInfo(package,version,None,PackageInfoType.PYPI) 28 | if filename.endswith("RECORD"): 29 | record = open(os.path.join(directory,filename)).readlines() 30 | basepath = "/".join(filename.split("/")[:-2]) 31 | files.extend([basepath + "/" + line.split(",")[0] for line in record]) 32 | 33 | return {pkg: files} 34 | return {} 35 | 36 | 37 | def check_python_from_path(paths: List[str],directory: str): 38 | packages = {} 39 | all_dist_info_records = [p for p in paths if ".dist-info" in p or "egg" in p] 40 | 41 | for path in all_dist_info_records: 42 | for k,v in check_python_from_path_once(paths,path,directory).items(): 43 | if k in packages: 44 | packages[k] = list(set([*packages[k],*v])) 45 | else: 46 | packages[k] = v 47 | return packages 48 | 49 | def extract_egg_dependencies(depfile): 50 | packages = [] 51 | pkg_info_content = open(depfile, 'r').read() 52 | 53 | pkg_info = Parser().parsestr(pkg_info_content) 54 | 55 | # Access general metadata fields 56 | package_name = pkg_info.get('Name') 57 | package_version = pkg_info.get('Version').replace(".dist","") 58 | author = pkg_info.get('Author') 59 | packages.append(PackageInfo(package_name,package_version,author,PackageInfoType.PYPI)) 60 | requires_dist = pkg_info.get_all('Requires-Dist') 61 | if requires_dist: 62 | for requirement in requires_dist: 63 | req = Requirement(requirement) 64 | name = req.name.replace(".wh.","") 65 | version = req.specifier.__str__().replace("!","").replace(">","").replace("<","").replace("=","").split(",")[0].replace(".dist","") 66 | if req.marker and "runtime" not in req.marker.__str__(): 67 | continue 68 | packages.append(PackageInfo(name,version,None,PackageInfoType.PYPI)) 69 | return packages 70 | 71 | def get_egg_files(file:str,sources: str): 72 | basepath = "/".join(file.split("/")[:-2]) 73 | if not os.path.exists(sources): 74 | return [] 75 | lines = open(sources).readlines() 76 | return [basepath + "/"+line.replace("\n","") for line in lines] 77 | 78 | def get_record_files(file:str,sources: str): 79 | basepath = "/".join(file.split("/")[:-2]) 80 | lines = open(sources).readlines() 81 | return [basepath + "/"+line.replace("\n","").split(",")[0] for line in lines] 82 | 83 | def parse_egg_info(paths,file,dirpath: str): 84 | packagesMap = {} 85 | packages = extract_egg_dependencies(os.path.join(dirpath,"PKG-INFO")) 86 | basename = os.path.dirname(file) 87 | for package in packages: 88 | packagesMap[package] = [*get_egg_files(file,dirpath + "SOURCES.txt"),*list(filter(lambda x: basename in x, paths))] 89 | return packagesMap 90 | 91 | def parse_metadata(paths,file,dirpath: str): 92 | packagesMap = {} 93 | packages = extract_egg_dependencies(dirpath + "METADATA") 94 | basename = os.path.dirname(file) 95 | for package in packages: 96 | packagesMap[package] = [*get_record_files(file,dirpath + "RECORD"),*list(filter(lambda x: basename in x, paths))] 97 | return packagesMap 98 | 99 | def extract_python_dependencies(paths,directory: str): 100 | interesting_paths = [p for p in paths if "dist-info" in p or "site-packages" in p or "dist-packages" in p] 101 | total_packages = {} 102 | total_packages.update(check_python_from_path(interesting_paths,directory)) 103 | 104 | for path in interesting_paths: 105 | if path.endswith(".egg-info") or path.endswith(".dist-info"): 106 | # pygpgme-0.3-py2.7.egg-info 107 | path.replace(".egg-info","").replace(".dist-info","") 108 | stuff = path.split("/")[-1] 109 | tokens = stuff.split("-") 110 | version = tokens[1].replace(".egg","").replace(".dist","") 111 | pkg = PackageInfo(tokens[0].replace(".wh.",""),version,None,PackageInfoType.PYPI) 112 | if pkg in total_packages: 113 | total_packages[pkg] = [*total_packages[pkg],path] 114 | else: 115 | total_packages[pkg] = [path] 116 | 117 | pkginfo = [path for path in interesting_paths if ".egg-info/PKG-INFO" in path] 118 | records = [path for path in interesting_paths if ".dist-info/RECORD" in path] 119 | 120 | 121 | for eggpkg in pkginfo: 122 | pakagesegg = parse_egg_info(interesting_paths,eggpkg,os.path.join(directory,eggpkg).replace("PKG-INFO","")) 123 | for k,v in pakagesegg.items(): 124 | if k in total_packages: 125 | total_packages[k].extend(v) 126 | else: 127 | total_packages[k] = v 128 | total_packages.update(pakagesegg) 129 | for record in records: 130 | pakagesegg = parse_metadata(interesting_paths,record,os.path.join(directory,record).replace("RECORD","")) 131 | for k,v in pakagesegg.items(): 132 | if k in total_packages: 133 | total_packages[k].extend(v) 134 | else: 135 | total_packages[k] = v 136 | if len(total_packages): 137 | logger.info(f"Python: {len(total_packages)}") 138 | return total_packages 139 | 140 | -------------------------------------------------------------------------------- /orca/lib/rpm_packages.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | import os 4 | import re 5 | import subprocess 6 | from typing import Dict, List 7 | 8 | from . import logger 9 | from .types import PackageInfo, PackageInfoType 10 | 11 | installed_bins = {"coreutils": 12 | ["arch","base64","basename","cat","chcon","chgrp","chmod","chown","chroot","cksum","comm","cp","csplit","cut","date","dd","df","dir","dircolors","dirname","du","echo","env","expand","expr","factor","false","flock","fmt","fold","groups","head","hostid","id","install","join","link","ln","logname","ls","md5sum","mkdir","mkfifo","mknod","mktemp","mv","nice","nl","nohup","nproc","numfmt","od","paste","pathchk","pinky","pr","printenv","printf","ptx","pwd","readlink","realpath","rm","rmdir","runcon","sha1sum","shasum","sha256sum","sha384sum","sha224sum","sha512sum","seq","shred","sleep","sort","split","stat","stty","sum","sync","tac","tail","tee","test","timeout","touch","tr","true","truncate","tsort","tty","uname","unexpand","uniq","unlink","users","vdir","wc","who","whoami","yes"], 13 | 14 | "findutils": ["find","xargs"], 15 | 16 | "procps": ["ee","kill","pkill","pgrep","pmap","ps","pwdx","skill","slabtop","snice","sysctl","tload","top","uptime","vmstat","w","watch"], 17 | "bsdutils": ["logger", "renice", "script", "scriptlive", "scriptreplay","wall"], 18 | "debianutils": ["add-shell", "installkernel", "ischroot", "remove-shell", "run-parts", "savelog","update-shells", "which"], 19 | "libc-bin": ["getconf","getent","iconv","ldd","lddconfig","locale","localedef","tzselect","zdump","zic"] 20 | } 21 | 22 | additional_files = [".preinst",".prerm",".postrm",".postinst",".list",".md5sums",".shlibs",".symbols",".triggers",".conffiles",".templates",".config"] 23 | 24 | def get_author(author): 25 | if "Red" in author: 26 | return "redhat" 27 | elif "Amazon" in author: 28 | return "amazonlinux" 29 | elif "suse" in author.lower(): 30 | return "suse" 31 | else: 32 | return author.lower() 33 | 34 | 35 | def read_rpm_db(directory,path)->Dict[PackageInfo,List[str]]: 36 | packages_dict = {} 37 | try: 38 | # Run the rpm command with --dbpath to list installed packages from the specified database 39 | result = subprocess.run(['rpm_checker', '--dbpath', os.path.join(directory,path),], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) 40 | 41 | # Check for errors 42 | if result.returncode != 0: 43 | print(f"Error reading RPM database: {result.stderr} - {path}") 44 | return 45 | 46 | # Print the list of installed packages 47 | packages_raw = result.stdout.splitlines()[0] 48 | json_data = json.loads(packages_raw) 49 | 50 | for item in json_data: 51 | author = get_author(item['author']) 52 | 53 | 54 | 55 | package = PackageInfo(item["package"],item["version"],author,PackageInfoType.RPM) 56 | packages_dict[package] = [*item["files"],path] 57 | if author == "amazonlinux": 58 | pattern = re.compile(r"^([a-zA-Z0-9\-_]+)-(\d+\.\d+(?:\.\d+)?)-") 59 | # Process each package 60 | match = pattern.match(item['rpm']) 61 | if match: 62 | name, version = match.groups() 63 | package = PackageInfo(name,version,author,PackageInfoType.RPM) 64 | if name.startswith("python-") or name.startswith("python3-"): 65 | split= version.split("-") 66 | if len(split) <=1: 67 | continue 68 | pythonp = split[1] 69 | ppkg = PackageInfo(pythonp,version,None,PackageInfoType.PYPI) 70 | packages_dict[ppkg] = [*item["files"],path] 71 | packages_dict[package] = [*item["files"],path] 72 | 73 | return packages_dict 74 | 75 | except Exception as e: 76 | print(f"An error occurred: {e.with_traceback()}") 77 | 78 | 79 | def get_rpm(paths: List[str],directory: str)-> Dict[PackageInfo,List[str]]: 80 | additional_files = [file for file in paths if "var/lib/yum" in file or "var/cache/yum/" in file or "etc/yum.repos.d/" in file or "var/log/yum" in file] 81 | total_packages = {} 82 | for path in paths: 83 | if "rpm/Packages" in path or path.endswith( "rpmdb.sqlite"): 84 | packages = read_rpm_db(directory,path) 85 | if packages and len(packages.keys()): 86 | logger.logger.info(f"RPMs: {len(packages.keys())}") 87 | if len(additional_files): 88 | for package in packages.keys(): 89 | packages[package].extend(additional_files) 90 | total_packages.update(packages) 91 | 92 | 93 | 94 | 95 | return total_packages -------------------------------------------------------------------------------- /orca/lib/spdx.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | from datetime import datetime 3 | from typing import Dict, List 4 | from spdx_tools.spdx.model import (Document,CreationInfo,Package,SpdxNone,Actor,ActorType,ExternalPackageRef,ExternalPackageRefCategory,PackagePurpose,Relationship,RelationshipType,File,Checksum,ChecksumAlgorithm) 5 | from spdx_tools.spdx.writer.write_anything import write_file 6 | import base64 7 | from.types import PackageInfo, PackageInfoType, VulnerabilityReport 8 | from packageurl import PackageURL 9 | 10 | 11 | 12 | 13 | def getpurl(packageInfo: PackageInfo) -> str: 14 | if packageInfo.type == "debian": 15 | return f"pkg:deb/debian/{packageInfo.name.lower()}@{packageInfo.version}" 16 | elif packageInfo.type == "pypi": 17 | return f"pkg:pypi/{packageInfo.name.lower()}@{packageInfo.version}" 18 | else: 19 | return f"pkg:generic/{packageInfo.name.lower()}@{packageInfo.version}" 20 | 21 | def create_anchore_purl(osinfo,packageInfo: PackageInfo): 22 | 23 | if packageInfo.type is None: 24 | purl = PackageURL(type="generic",name=packageInfo.name.lower(),version=packageInfo.version) 25 | elif packageInfo.type == PackageInfoType.DEBIAN: 26 | qualifiers = { 27 | "arch": packageInfo.arch, 28 | "distro": "debian-"+osinfo["version"], 29 | } 30 | if packageInfo.epoch is not None: 31 | qualifiers["epoch"] = packageInfo.epoch 32 | 33 | if "name" in osinfo and osinfo["name"].lower().rstrip() == "ubuntu": 34 | purl = PackageURL(type="deb",namespace="ubuntu",name=packageInfo.name.lower(),version=packageInfo.version,qualifiers=qualifiers) 35 | else: 36 | purl = PackageURL(type="deb",namespace="debian",name=packageInfo.name.lower(),version=packageInfo.version,qualifiers=qualifiers) 37 | 38 | elif packageInfo.type == PackageInfoType.RPM: 39 | purl = PackageURL(type="rpm",name=packageInfo.name.lower(),version=packageInfo.version,namespace=packageInfo.author) 40 | elif packageInfo.type == PackageInfoType.APK: 41 | purl = PackageURL(type="apk",name=packageInfo.name.lower(),version=packageInfo.version,namespace="alpine") 42 | elif packageInfo.type == PackageInfoType.NPM: 43 | purl = PackageURL(type="npm",name=packageInfo.name.lower(),version=packageInfo.version) 44 | elif packageInfo.type == PackageInfoType.PYPI: 45 | purl = PackageURL(type="pypi",name=packageInfo.name.lower(),version=packageInfo.version) 46 | elif packageInfo.type == PackageInfoType.PERL: 47 | purl = PackageURL(type="perl",name=packageInfo.name.lower(),version=packageInfo.version) 48 | elif packageInfo.type == PackageInfoType.MAVEN: 49 | purl = PackageURL(type="maven",name=packageInfo.name.lower(),version=packageInfo.version,namespace=packageInfo.author) 50 | elif packageInfo.type == PackageInfoType.GOLANG: # This should probably be edited 51 | path = packageInfo.name.lower() 52 | path_split = path.split("/") 53 | name = path 54 | other = None 55 | if len(path_split) > 3: 56 | name = "/".join(path_split[:3]) 57 | other = "/".join(path_split[3:]) 58 | purl = PackageURL(type="golang",name=name,version=packageInfo.version,namespace=packageInfo.author,subpath=other) 59 | elif packageInfo.type == PackageInfoType.COMPOSER: 60 | purl = PackageURL(type="composer",name=packageInfo.name.lower(),version=packageInfo.version,namespace=packageInfo.author) 61 | elif packageInfo.type == PackageInfoType.GEM: 62 | purl = PackageURL(type="gem",name=packageInfo.name.lower(),version=packageInfo.version) 63 | elif packageInfo.type == PackageInfoType.GITHUB: 64 | purl = PackageURL(type="github",name=packageInfo.name.lower(),version=packageInfo.version,namespace=packageInfo.author) 65 | elif packageInfo.type == PackageInfoType.BITNAMI: 66 | purl = PackageURL(type="bitnami",name=packageInfo.name.lower(),version=packageInfo.version,qualifiers={"arch":packageInfo.arch}) 67 | elif packageInfo.type == PackageInfoType.RUST: 68 | purl = PackageURL(type="cargo",name=packageInfo.name.lower(),version=packageInfo.version) 69 | elif packageInfo.type == PackageInfoType.GRADLE: 70 | purl = PackageURL(type="gradle",name=packageInfo.name.lower(),version=packageInfo.version) 71 | else: 72 | purl = PackageURL(type="generic",name=packageInfo.name.lower(),version=packageInfo.version) 73 | return purl 74 | 75 | 76 | def map_package(osinfo,index: int,packageInfo: PackageInfo) -> Package: 77 | def getid(): 78 | return f"SPDXRef-PACKAGE-{base64.b64encode(bytes(f"{packageInfo.name} {packageInfo.version} {packageInfo.author} {packageInfo.arch}",'utf-8')).decode("utf-8",errors="ignore").replace("=","").replace("+","")}" 79 | external_refs = [] 80 | anchore_purl = create_anchore_purl(osinfo,packageInfo) 81 | external_refs = [ 82 | #ExternalPackageRef(ExternalPackageRefCategory.PACKAGE_MANAGER,reference_type="purl",locator=generic_purl+f"?arch=allu0026distro=debian-{osinfo['version']}"), 83 | ExternalPackageRef(ExternalPackageRefCategory.PACKAGE_MANAGER,reference_type="purl",locator=anchore_purl.to_string()) 84 | #ExternalPackageRef(ExternalPackageRefCategory.PACKAGE_MANAGER,reference_type="purl",locator=generic_purl+f"?os_distro={osinfo['codename']}&os_name=debian&os_version={osinfo['version']}") 85 | ] 86 | 87 | package: Package = Package( 88 | name=packageInfo.name, 89 | version=packageInfo.version, 90 | download_location=SpdxNone(), 91 | license_concluded=SpdxNone(), 92 | license_declared=SpdxNone(), 93 | primary_package_purpose=PackagePurpose.LIBRARY, 94 | spdx_id=getid(), 95 | copyright_text=SpdxNone(), 96 | external_references=external_refs) 97 | return package 98 | 99 | 100 | def generateSPDXFromCPE(containerImage: str,inputPackages: List[PackageInfo],output_filename: str): 101 | 102 | containerPackage: Package = Package(name=containerImage, download_location=SpdxNone(),license_concluded=SpdxNone(),license_declared=SpdxNone(),spdx_id="SPDXRef-ContainerImage",copyright_text=SpdxNone(),primary_package_purpose=PackagePurpose.CONTAINER) 103 | 104 | 105 | 106 | creation_info = CreationInfo(spdx_version="SPDX-2.3",spdx_id="SPDXRef-DOCUMENT",name="CPE Finder",created=datetime.now(),creators=[Actor(ActorType.ORGANIZATION,"CNAM"),Actor(ActorType.TOOL,"CPE finder")],document_namespace="http://example.com") 107 | 108 | packages = [map_package(idx,p) for idx,p in enumerate(inputPackages)] 109 | 110 | relationships = [] 111 | relationships.append(Relationship("SPDXRef-DOCUMENT",RelationshipType.DESCRIBES,"SPDXRef-ContainerImage")) 112 | 113 | packages.append(containerPackage) 114 | doc = Document(creation_info,packages=packages,relationships=relationships) 115 | write_file(doc, output_filename,validate=True) 116 | 117 | 118 | def generateFileMappingReport(reportMap: Dict[str,VulnerabilityReport])-> Dict[str,File]: 119 | filemap: Dict[str,File] = dict() 120 | for layer,report in reportMap.items(): 121 | layer_id = layer.split("/")[-1] 122 | 123 | for file in report.initial_files: 124 | fid = file.replace("/","-").replace("_","").replace(" ","") 125 | sid = f"SPDXRef-File-{layer_id}-{fid}" 126 | if sid in filemap: 127 | filemap[sid].comment += f"\n Layer: {layer_id}" 128 | else: 129 | checksum = Checksum(ChecksumAlgorithm.SHA1,hashlib.sha1("testme".encode()).hexdigest()) 130 | 131 | filemap[sid] = File(name=file,spdx_id=sid,comment=f"Layer: {layer}",checksums=[checksum]) 132 | return filemap 133 | 134 | 135 | def getOsInfo(reportMap: Dict[str,VulnerabilityReport]) -> Dict[str,str]: 136 | osinfo = None 137 | for layer,report in reportMap.items(): 138 | if report.os is not None: 139 | if osinfo is not None: 140 | print(f"Received multiple entries of os. The latest one is: {report.os} \nOld one was: {osinfo} \n Merging them") 141 | for k,v in report.os.items(): 142 | osinfo[k] = v 143 | else: 144 | osinfo = report.os 145 | return osinfo 146 | 147 | def getTotalCPE(reportMap: Dict[str,VulnerabilityReport]) -> List[PackageInfo]: 148 | total_cpe = set() 149 | for layer,report in reportMap.items(): 150 | if len(report.packages) == 1 and report.packages[0] == (None,None): 151 | continue 152 | total_cpe.update(report.packages) 153 | return list(total_cpe) 154 | 155 | def generateRelationships(reportMap: Dict[str,VulnerabilityReport],filemap: Dict[str,File],packagesmap: Dict[str,Package],osinfo: Dict[str,str]) -> List[Relationship]: 156 | relmap = {} 157 | 158 | for layer,report in reportMap.items(): 159 | layer_id = layer.split("/")[-1] 160 | 161 | for package,files in report.package_files.items(): 162 | for file in files: 163 | fid = file.replace("/","-").replace("_","").replace(" ","") 164 | sid = f"SPDXRef-File-{layer_id}-{fid}" 165 | filemapid = filemap.get(sid).spdx_id if sid in filemap else None 166 | if filemapid is None: 167 | # TODO: this is the case where a file is recorded by ORCA but does not exist in the layer (e.g., updates to a dpkg/status). 168 | continue 169 | customid = f"{packagesmap.get(package).spdx_id}{filemapid}" 170 | if customid in relmap: 171 | continue 172 | relmap[customid] = Relationship(packagesmap.get(package).spdx_id,RelationshipType.CONTAINS,filemap.get(sid).spdx_id) 173 | relmap["root"] = Relationship("SPDXRef-DOCUMENT",RelationshipType.DESCRIBES,"SPDXRef-ContainerImage") 174 | return relmap 175 | 176 | 177 | 178 | def generateSPDXFromReportMap(containerImage: str,reportMap: Dict[str,VulnerabilityReport],output_filename: str,complete_report: bool): 179 | 180 | osinfo = getOsInfo(reportMap) 181 | if osinfo is not None and "version" not in osinfo: 182 | osinfo = None 183 | 184 | total_cpe = getTotalCPE(reportMap) 185 | filemap = generateFileMappingReport(reportMap) 186 | packagesmap = {p:map_package(osinfo,idx,p) for idx,p in enumerate(list(total_cpe))} 187 | 188 | containerPackage: Package = Package(name=containerImage, download_location=SpdxNone(),license_concluded=SpdxNone(),license_declared=SpdxNone(),spdx_id="SPDXRef-ContainerImage",copyright_text=SpdxNone(),primary_package_purpose=PackagePurpose.CONTAINER) 189 | creation_info = CreationInfo(spdx_version="SPDX-2.3",spdx_id="SPDXRef-DOCUMENT",name="CPE Finder",created=datetime.now(),creators=[Actor(ActorType.ORGANIZATION,"CNAM"),Actor(ActorType.TOOL,"CPE finder")],document_namespace="http://example.com") 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | packages = list(packagesmap.values()) 198 | 199 | packages.append(containerPackage) 200 | if osinfo is not None: 201 | osPackage: Package = Package(name=osinfo["name"].split(" ")[0].lower(), 202 | version=osinfo["version"], 203 | download_location=SpdxNone(),license_concluded=SpdxNone(),license_declared=SpdxNone(),spdx_id="SPDXRef-OperatingSystem",copyright_text=SpdxNone(),primary_package_purpose=PackagePurpose.OPERATING_SYSTEM) 204 | packages.append(osPackage) 205 | 206 | files = list(filemap.values()) 207 | 208 | doc = None 209 | if complete_report: 210 | relmap = generateRelationships(reportMap,filemap,packagesmap,osinfo) 211 | relationships = list(relmap.values()) 212 | doc = Document(creation_info,packages=packages,relationships=relationships,files=files) 213 | else: 214 | doc = Document(creation_info,packages=packages,files=files) 215 | 216 | write_file(doc, output_filename,validate=False) 217 | #write_file(doc, output_filename,validate=True) 218 | -------------------------------------------------------------------------------- /orca/lib/test_apk.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from unittest.mock import patch 4 | from orca.lib.apk import get_apk, read_apk_db, read_world_file 5 | from orca.lib.types import PackageInfo, PackageInfoType 6 | 7 | 8 | class TestApk: 9 | @patch("orca.lib.apk.open", create=True) 10 | def test_read_apk_db(self, mock_open): 11 | # Mock the file content 12 | file_content = """P:test_package 13 | V:1.2.3 14 | F:lib 15 | R:test.so 16 | 17 | P:another_package 18 | V:4.5.6 19 | F:usr/bin 20 | R:executable 21 | """ 22 | mock_open.return_value.read.return_value = file_content 23 | mock_open.return_value.__enter__.return_value = mock_open.return_value 24 | 25 | # Call the function 26 | db_path = "fake_path" 27 | path = "actual_path" 28 | result = read_apk_db(db_path, path) 29 | 30 | # Assert the result 31 | expected_package1 = PackageInfo( 32 | "test_package", "1.2.3", None, PackageInfoType.APK 33 | ) 34 | expected_package2 = PackageInfo( 35 | "another_package", "4.5.6", None, PackageInfoType.APK 36 | ) 37 | assert expected_package1 in result 38 | assert expected_package2 in result 39 | assert result[expected_package1] == {"lib/test.so", "actual_path"} 40 | assert result[expected_package2] == {"usr/bin/executable", "actual_path"} 41 | 42 | @patch("orca.lib.apk.open", create=True) 43 | def test_read_world_file(self, mock_open): 44 | # Mock the file content 45 | file_content = "package1\npackage2\n" 46 | mock_open.return_value.readlines.return_value = file_content.splitlines() 47 | mock_open.return_value.__enter__.return_value = mock_open.return_value 48 | 49 | # Call the function 50 | db_path = "fake_path" 51 | path = "actual_path" 52 | result = read_world_file(db_path, path) 53 | 54 | # Assert the result 55 | expected_package1 = PackageInfo("package1", None, None, PackageInfoType.APK) 56 | expected_package2 = PackageInfo("package2", None, None, PackageInfoType.APK) 57 | assert expected_package1 in result 58 | assert expected_package2 in result 59 | assert result[expected_package1] == {"actual_path"} 60 | assert result[expected_package2] == {"actual_path"} 61 | 62 | @patch("orca.lib.apk.read_apk_db") 63 | @patch("orca.lib.apk.read_world_file") 64 | def test_get_apk(self, mock_read_world_file, mock_read_apk_db): 65 | # Mock the paths 66 | paths = ["path/to/apk/db/installed", "path/to/apk/world"] 67 | directory = "test_dir" 68 | 69 | # Mock the return values of read_apk_db and read_world_file 70 | mock_read_apk_db.return_value = { 71 | PackageInfo("package1", "1.0", None, PackageInfoType.APK): {"file1"} 72 | } 73 | mock_read_world_file.return_value = { 74 | PackageInfo("package2", None, None, PackageInfoType.APK): {"file2"} 75 | } 76 | 77 | # Call the function 78 | result = get_apk(paths, directory) 79 | 80 | # Assert the calls and the result 81 | mock_read_apk_db.assert_called_once_with( 82 | os.path.join(directory, paths[0]), paths[0] 83 | ) 84 | mock_read_world_file.assert_called_once_with( 85 | os.path.join(directory, paths[1]), paths[1] 86 | ) 87 | expected_result = { 88 | PackageInfo("package1", "1.0", None, PackageInfoType.APK): {"file1"}, 89 | PackageInfo("package2", None, None, PackageInfoType.APK): {"file2"}, 90 | } 91 | assert result == expected_result 92 | 93 | def test_get_apk_no_apks(self): 94 | # Test when there are no apk files in the paths 95 | paths = ["path/to/some/other/file"] 96 | directory = "test_dir" 97 | result = get_apk(paths, directory) 98 | assert result == {} 99 | 100 | @patch("orca.lib.apk.logger") 101 | @patch("orca.lib.apk.read_apk_db") 102 | def test_get_apk_logging(self, mock_read_apk_db, mock_logger): 103 | # Mock the paths 104 | paths = ["path/to/apk/db/installed"] 105 | directory = "test_dir" 106 | 107 | # Mock the return values of read_apk_db 108 | mock_read_apk_db.return_value = { 109 | PackageInfo("package1", "1.0", None, PackageInfoType.APK): {"file1"} 110 | } 111 | 112 | # Call the function 113 | get_apk(paths, directory) 114 | 115 | # Assert that the logger was called 116 | mock_logger.logger.info.assert_called_with("APKs: 1") 117 | -------------------------------------------------------------------------------- /orca/lib/test_ascii_checkers.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from unittest.mock import patch 3 | import os 4 | from orca.lib import ascii_checkers 5 | from orca.lib.types import PackageInfo, PackageInfoType 6 | 7 | class TestAsciiCheckers: 8 | 9 | def test_parse_gemspec_empty(self): 10 | paths = [] 11 | directory = "" 12 | result = ascii_checkers.parse_gemspec(paths, directory) 13 | assert result == {} 14 | 15 | def test_parse_gemspec_no_gemspec(self): 16 | paths = ["test.txt"] 17 | directory = "" 18 | result = ascii_checkers.parse_gemspec(paths, directory) 19 | assert result == {} 20 | 21 | @patch("orca.lib.ascii_checkers.logger.logger") 22 | def test_parse_gemspec_file_not_found(self, mock_logger): 23 | paths = ["test.gemspec"] 24 | directory = "" 25 | result = ascii_checkers.parse_gemspec(paths, directory) 26 | mock_logger.error.assert_called() 27 | assert result == {} 28 | 29 | def test_parse_gemspec_ok(self): 30 | # Create a dummy gemspec file 31 | gemspec_content = """ 32 | Gem::Specification.new do |s| 33 | s.name = 'test_gem' 34 | s.version = '1.2.3' 35 | end 36 | """ 37 | with open("test.gemspec", "w") as f: 38 | f.write(gemspec_content) 39 | 40 | paths = ["test.gemspec"] 41 | directory = "" 42 | result = ascii_checkers.parse_gemspec(paths, directory) 43 | expected_package_info = PackageInfo("test_gem", "1.2.3", None, PackageInfoType.GEM) 44 | assert list(result.keys())[0] == expected_package_info 45 | assert result[expected_package_info] == ["test.gemspec"] 46 | 47 | # Clean up the dummy file 48 | os.remove("test.gemspec") 49 | 50 | def test_parse_gemspec_no_version(self): 51 | # Create a dummy gemspec file 52 | gemspec_content = """ 53 | Gem::Specification.new do |s| 54 | s.name = 'test_gem' 55 | end 56 | """ 57 | with open("test.gemspec", "w") as f: 58 | f.write(gemspec_content) 59 | 60 | paths = ["test.gemspec"] 61 | directory = "" 62 | result = ascii_checkers.parse_gemspec(paths, directory) 63 | assert result == {} 64 | 65 | # Clean up the dummy file 66 | os.remove("test.gemspec") 67 | 68 | def test_parse_gemspec_no_name(self): 69 | # Create a dummy gemspec file 70 | gemspec_content = """ 71 | Gem::Specification.new do |s| 72 | s.version = '1.2.3' 73 | end 74 | """ 75 | with open("test.gemspec", "w") as f: 76 | f.write(gemspec_content) 77 | 78 | paths = ["test.gemspec"] 79 | directory = "" 80 | result = ascii_checkers.parse_gemspec(paths, directory) 81 | assert result == {} 82 | 83 | # Clean up the dummy file 84 | os.remove("test.gemspec") 85 | 86 | def test_parse_gosum_empty(self): 87 | # Create a dummy go.sum file 88 | gosum_content = "" 89 | with open("go.sum", "w") as f: 90 | f.write(gosum_content) 91 | 92 | result = ascii_checkers.parse_gosum("go.sum") 93 | assert result == [] 94 | 95 | # Clean up the dummy file 96 | os.remove("go.sum") 97 | 98 | def test_parse_gosum_ok(self): 99 | # Create a dummy go.sum file 100 | gosum_content = """ 101 | github.com/test/module v1.2.3 h1:abcdefg 102 | """ 103 | with open("go.sum", "w") as f: 104 | f.write(gosum_content) 105 | 106 | result = ascii_checkers.parse_gosum("go.sum") 107 | assert result == ['cpe:2.3:a:test:module:1.2.3:*:*:*:*:*:*:*'] 108 | 109 | # Clean up the dummy file 110 | os.remove("go.sum") 111 | 112 | def test_parse_gosum_multiple(self): 113 | # Create a dummy go.sum file 114 | gosum_content = """ 115 | github.com/test/module v1.2.3 h1:abcdefg 116 | github.com/test/module v1.2.4 h1:abcdefg 117 | """ 118 | with open("go.sum", "w") as f: 119 | f.write(gosum_content) 120 | 121 | result = ascii_checkers.parse_gosum("go.sum") 122 | assert set(result) == {'cpe:2.3:a:test:module:1.2.3:*:*:*:*:*:*:*', 'cpe:2.3:a:test:module:1.2.4:*:*:*:*:*:*:*'} 123 | 124 | # Clean up the dummy file 125 | os.remove("go.sum") 126 | 127 | def test_parse_gosum_file_not_found(self): 128 | with pytest.raises(FileNotFoundError): 129 | ascii_checkers.parse_gosum("nonexistent_file.sum") -------------------------------------------------------------------------------- /orca/lib/test_bin_checkers.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from unittest.mock import patch 4 | from orca.lib import bin_checkers 5 | from orca.lib.types import PackageInfo 6 | 7 | def test_check_gcc(): 8 | strings = ["GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0"] 9 | expected = PackageInfo("gcc", "9.4.0", "gnu", None) 10 | assert bin_checkers.check_gcc(strings) == expected 11 | 12 | strings = ["Some other string", "GCC: (GNU) 7.5.0"] 13 | expected = PackageInfo("gcc", "7.5.0", "gnu", None) 14 | assert bin_checkers.check_gcc(strings) == expected 15 | 16 | strings = ["No match here"] 17 | assert bin_checkers.check_gcc(strings) is None 18 | 19 | def test_check_gcc2(): 20 | strings = ["gcc 4.8.5"] 21 | expected = PackageInfo("gcc", "4.8.5", "gnu", None) 22 | assert bin_checkers.check_gcc2(strings) == expected 23 | 24 | strings = ["Some other string", "gcc 5.4.0"] 25 | expected = PackageInfo("gcc", "5.4.0", "gnu", None) 26 | assert bin_checkers.check_gcc2(strings) == expected 27 | 28 | strings = ["No match here"] 29 | assert bin_checkers.check_gcc2(strings) is None 30 | 31 | def test_check_openssl(): 32 | strings = ["OpenSSL 1.1.1f 31 Mar 2020"] 33 | expected = PackageInfo("openssl", "1.1.1", "openssl", None) 34 | assert bin_checkers.check_openssl(strings) == expected 35 | 36 | strings = ["Some other string", "* OpenSSL 1.0.2g 1 Mar 2016"] 37 | expected = PackageInfo("openssl", "1.0.2", "openssl", None) 38 | assert bin_checkers.check_openssl(strings) == expected 39 | 40 | strings = ["No match here"] 41 | assert bin_checkers.check_openssl(strings) is None 42 | 43 | def test_check_postgres(): 44 | expected = PackageInfo("postgresql", "12.3.4", "postgresql", None) 45 | assert bin_checkers.check_postgres(["(PostgreSQL) 12.3.4"]) == expected 46 | 47 | expected = PackageInfo("postgresql", "9.6.17", "postgresql", None) 48 | assert bin_checkers.check_postgres(["Some other string", "(PostgreSQL) 9.6.17"]) == expected 49 | 50 | assert bin_checkers.check_postgres(["No match here"]) is None 51 | 52 | def test_check_zlib(): 53 | strings = ["inflate (zlib v1.2.11) 1.2.11"] 54 | expected = PackageInfo("zlib", "1.2.11", "zlib", None) 55 | assert bin_checkers.check_zlib(strings) == expected 56 | 57 | strings = ["Some other string", "inflate (zlib v1.2.8) 1.2.8"] 58 | expected = PackageInfo("zlib", "1.2.8", "zlib", None) 59 | assert bin_checkers.check_zlib(strings) == expected 60 | 61 | strings = ["No match here"] 62 | assert bin_checkers.check_zlib(strings) is None 63 | 64 | def test_check_self(): 65 | strings = ["mybinary v1.2.3"] 66 | expected = PackageInfo("mybinary", "v1.2.3", None, None) 67 | assert bin_checkers.check_self(strings, "mybinary") == expected 68 | 69 | strings = ["Some other string", "anotherbin 2.0.0"] 70 | expected = PackageInfo("anotherbin", "2.0.0", None, None) 71 | assert bin_checkers.check_self(strings, "anotherbin") == expected 72 | 73 | strings = ["No match here"] 74 | assert bin_checkers.check_self(strings, "testbin") is None 75 | 76 | # Test with binary name of length 1 77 | assert bin_checkers.check_self(strings, "a") is None 78 | 79 | @patch('orca.lib.bin_checkers.logger.logger.info') 80 | def test_check_self_regex_error(mock_info): 81 | strings = ["test v1.2.3"] 82 | # Force a regex error by using an invalid binary name 83 | result = bin_checkers.check_self(strings, "*invalid*") 84 | assert result == (None,None) 85 | mock_info.assert_called() 86 | 87 | def test_extract_strings(): 88 | # Create a dummy file for testing 89 | with open("test_file.txt", "wb") as f: 90 | f.write(b"This is a test\n") 91 | f.write(b"with some strings\n") 92 | f.write(b"and some non-ascii: \x80\x81\x82\n") # Include some non-ASCII bytes 93 | f.write(b"short\n") 94 | f.write(b"toolongstringgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg") 95 | 96 | # Test code for the "static_check_cpes" function 97 | # Test with an empty file 98 | with open("test_file.txt", "w" ) as f: 99 | f.write("") 100 | 101 | assert bin_checkers.static_check_cpes("test_file.txt") == [] 102 | 103 | # Clean up the dummy file 104 | os.remove("test_file.txt") 105 | 106 | # Test for check_binaries 107 | @pytest.mark.skip(reason="no way to test this functionality yet") 108 | def test_check_binaries(): 109 | # Create a dummy directory and files for testing 110 | os.makedirs("test_dir", exist_ok=True) 111 | with open("test_dir/file1.txt", "w") as f: 112 | f.write("gcc 1.2.3") 113 | with open("test_dir/file2.txt", "w") as f: 114 | f.write("zlib v1.2.8") 115 | 116 | executables = ["file1.txt", "file2.txt"] 117 | # Call check_binaries 118 | results = bin_checkers.check_binaries("test_dir", executables) 119 | 120 | # Assert the expected results 121 | assert len(results) == 2 122 | assert "gcc" in results 123 | assert "zlib" in results 124 | 125 | # Clean up the dummy directory and files 126 | os.remove("test_dir/file1.txt") 127 | os.remove("test_dir/file2.txt") 128 | os.rmdir("test_dir") -------------------------------------------------------------------------------- /orca/lib/test_bin_checkers_cpe.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from orca.lib import bin_checkers_cpe as bcc 3 | import os 4 | import os 5 | import os 6 | 7 | def test_check_gcc(): 8 | strings = ["GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0"] 9 | cpe, string = bcc.check_gcc(strings) 10 | assert cpe == "cpe:2.3:a:gnu:gcc:9.4.0:*:*:*:*:*:*:*" 11 | assert string == "GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0" 12 | 13 | strings = ["Some other string", "GCC: (GNU) 7.5.0"] 14 | cpe, string = bcc.check_gcc(strings) 15 | assert cpe == "cpe:2.3:a:gnu:gcc:7.5.0:*:*:*:*:*:*:*" 16 | assert string == "GCC: (GNU) 7.5.0" 17 | 18 | strings = ["No match here"] 19 | cpe, string = bcc.check_gcc(strings) 20 | assert cpe is None 21 | assert string is None 22 | 23 | def test_check_gcc2(): 24 | strings = ["gcc 5.4.0"] 25 | cpe, string = bcc.check_gcc2(strings) 26 | assert cpe == "cpe:2.3:a:gnu:gcc:5.4.0:*:*:*:*:*:*:*" 27 | assert string == "gcc 5.4.0" 28 | 29 | strings = ["Another string", "gcc 4.9.3"] 30 | cpe, string = bcc.check_gcc2(strings) 31 | assert cpe == "cpe:2.3:a:gnu:gcc:4.9.3:*:*:*:*:*:*:*" 32 | assert string == "gcc 4.9.3" 33 | 34 | strings = ["No match"] 35 | cpe, string = bcc.check_gcc2(strings) 36 | assert cpe is None 37 | assert string is None 38 | 39 | def test_check_openssl(): 40 | strings = ["OpenSSL 1.1.1f 31 Mar 2020"] 41 | cpe, string = bcc.check_openssl(strings) 42 | assert cpe == "cpe:2.3:a:openssl:openssl:1.1.1:*:*:*:*:*:*:*" 43 | assert string == "OpenSSL 1.1.1f 31 Mar 2020" 44 | 45 | strings = ["Some text", "OpenSSL 1.0.2k-fips 26 Jan 2017"] 46 | cpe, string = bcc.check_openssl(strings) 47 | assert cpe == "cpe:2.3:a:openssl:openssl:1.0.2:*:*:*:*:*:*:*" 48 | assert string == "OpenSSL 1.0.2k-fips 26 Jan 2017" 49 | 50 | strings = ["No OpenSSL here"] 51 | cpe, string = bcc.check_openssl(strings) 52 | assert cpe is None 53 | assert string is None 54 | 55 | def test_check_postgres(): 56 | strings = ["(PostgreSQL) 12.3.2"] 57 | cpe, string = bcc.check_postgres(strings) 58 | assert cpe == "cpe:2.3:a:postgresql:postgresql:12.3.2:*:*:*:*:*:*:*" 59 | assert string == "(PostgreSQL) 12.3.2" 60 | 61 | strings = ["Other stuff", "(PostgreSQL) 9.6.17"] 62 | cpe, string = bcc.check_postgres(strings) 63 | assert cpe == "cpe:2.3:a:postgresql:postgresql:9.6.17:*:*:*:*:*:*:*" 64 | assert string == "(PostgreSQL) 9.6.17" 65 | 66 | strings = ["No PostgreSQL"] 67 | cpe, string = bcc.check_postgres(strings) 68 | assert cpe is None 69 | assert string is None 70 | 71 | def test_check_zlib(): 72 | strings = ["inflate (zlib) 1.2.11"] 73 | cpe, string = bcc.check_zlib(strings) 74 | assert cpe == "cpe:2.3:a:zlib:zlib:1.2.11:*:*:*:*:*:*:*" 75 | assert string == "inflate (zlib) 1.2.11" 76 | 77 | strings = ["Another string", "inflate (zlib) 1.2.8"] 78 | cpe, string = bcc.check_zlib(strings) 79 | assert cpe == "cpe:2.3:a:zlib:zlib:1.2.8:*:*:*:*:*:*:*" 80 | assert string == "inflate (zlib) 1.2.8" 81 | 82 | strings = ["No zlib here"] 83 | cpe, string = bcc.check_zlib(strings) 84 | assert cpe is None 85 | assert string is None 86 | 87 | def test_check_self(): 88 | strings = ["mybinary v1.0.0"] 89 | cpe, string = bcc.check_self(strings, "mybinary") 90 | assert cpe == "cpe:2.3:a:*:mybinary:v1.0.0:*:*:*:*:*:*:*" 91 | assert string == "mybinary v1.0.0" 92 | 93 | strings = ["Some other string", "anotherbin 2.5.1"] 94 | cpe, string = bcc.check_self(strings, "anotherbin") 95 | assert cpe == "cpe:2.3:a:*:anotherbin:2.5.1:*:*:*:*:*:*:*" 96 | assert string == "anotherbin 2.5.1" 97 | 98 | strings = ["No match for this binary"] 99 | cpe, string = bcc.check_self(strings, "nonexistent") 100 | assert cpe is None 101 | assert string is None 102 | 103 | def test_extract_strings(): 104 | # Create a dummy file for testing 105 | with open("test_file.txt", "wb") as f: 106 | f.write(b"This is a test file.\n") 107 | f.write(b"It contains some strings.\n") 108 | f.write(b"Short: abc\n") 109 | f.write(b"Longer: abcdefg\n") 110 | f.write(b"\x01\x02\x03BinaryData\x04\x05\x06\n") 111 | 112 | strings = bcc.extract_strings("test_file.txt", min_length=4) 113 | assert "This is a test file." in strings 114 | assert "abc" not in strings # Shorter than min_length 115 | 116 | # Clean up the dummy file 117 | os.remove("test_file.txt") 118 | 119 | def test_static_check_cpes(): 120 | # Create a dummy file with strings that match known CPE patterns 121 | with open("test_binary", "wb") as f: 122 | f.write(b"GCC: (GNU) 7.5.0\n") 123 | f.write(b"OpenSSL 1.1.1f 31 Mar 2020\n") 124 | f.write(b"test_binary v2.0.1\n") 125 | 126 | cpes = bcc.static_check_cpes("test_binary") 127 | assert "cpe:2.3:a:gnu:gcc:7.5.0:*:*:*:*:*:*:*" in cpes 128 | assert "cpe:2.3:a:openssl:openssl:1.1.1:*:*:*:*:*:*:*" in cpes 129 | assert "cpe:2.3:a:*:test_binary:v2.0.1:*:*:*:*:*:*:*" in cpes 130 | 131 | # Clean up the dummy file 132 | os.remove("test_binary") 133 | 134 | def test_static_check_cpes_empty(): 135 | # Create an empty dummy file 136 | with open("empty_binary", "wb") as f: 137 | pass 138 | 139 | cpes = bcc.static_check_cpes("empty_binary") 140 | assert cpes == [] 141 | 142 | # Clean up the dummy file 143 | os.remove("empty_binary") -------------------------------------------------------------------------------- /orca/lib/test_composer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from orca.lib import composer 4 | from orca.lib.types import PackageInfo, PackageInfoType 5 | 6 | def test_parse_composer_lock_empty(tmp_path): 7 | d = tmp_path / "sub" 8 | d.mkdir() 9 | p = d / "composer.lock" 10 | p.write_text(json.dumps({"packages": []})) 11 | 12 | result = composer.parse_composer_lock([], str(d), "composer.lock") 13 | assert result == {} 14 | 15 | def test_parse_composer_lock_basic(tmp_path): 16 | d = tmp_path / "sub" 17 | d.mkdir() 18 | p = d / "composer.lock" 19 | p.write_text(json.dumps({ 20 | "packages": [ 21 | {"name": "vendor/package1", "version": "1.0.0"}, 22 | {"name": "vendor2/package2", "version": "2.0.0"} 23 | ] 24 | })) 25 | 26 | result = composer.parse_composer_lock([], str(d), "composer.lock") 27 | 28 | assert len(result) == 2 29 | 30 | expected_package1 = PackageInfo("package1", "1.0.0", "vendor", PackageInfoType.COMPOSER) 31 | expected_package2 = PackageInfo("package2", "2.0.0", "vendor2", PackageInfoType.COMPOSER) 32 | 33 | assert expected_package1 in result 34 | assert expected_package2 in result 35 | 36 | assert result[expected_package1] == ["composer.lock"] 37 | assert result[expected_package2] == ["composer.lock"] 38 | 39 | def test_parse_composer_empty(tmp_path): 40 | d = tmp_path / "sub" 41 | d.mkdir() 42 | p = d / "composer.json" 43 | p.write_text(json.dumps({})) 44 | 45 | result = composer.parse_composer([], str(d), "composer.json") 46 | assert result == {} 47 | 48 | def test_parse_composer_basic(tmp_path): 49 | d = tmp_path / "sub" 50 | d.mkdir() 51 | p = d / "composer.json" 52 | p.write_text(json.dumps({ 53 | "name": "vendor/package1", 54 | "version": "1.0.0" 55 | })) 56 | 57 | result = composer.parse_composer([], str(d), "composer.json") 58 | 59 | assert len(result) == 1 60 | 61 | expected_package = PackageInfo("package1", "1.0.0", "vendor", PackageInfoType.COMPOSER) 62 | 63 | assert expected_package in result 64 | assert result[expected_package] == ["composer.json"] 65 | 66 | def test_parse_composer_no_version(tmp_path): 67 | d = tmp_path / "sub" 68 | d.mkdir() 69 | p = d / "composer.json" 70 | p.write_text(json.dumps({ 71 | "name": "vendor/package1", 72 | })) 73 | 74 | result = composer.parse_composer([], str(d), "composer.json") 75 | 76 | assert len(result) == 1 77 | 78 | expected_package = PackageInfo("package1", None, "vendor", PackageInfoType.COMPOSER) 79 | 80 | assert expected_package in result 81 | assert result[expected_package] == ["composer.json"] 82 | 83 | def test_get_composer_no_composer_files(tmp_path): 84 | d = tmp_path / "sub" 85 | d.mkdir() 86 | 87 | result = composer.get_composer([], str(d)) 88 | assert result == {} 89 | 90 | def test_get_composer_only_composer_json(tmp_path): 91 | d = tmp_path / "sub" 92 | d.mkdir() 93 | p = d / "composer.json" 94 | p.write_text(json.dumps({"name": "vendor/package1", "version": "1.0.0"})) 95 | 96 | result = composer.get_composer([str(p)], str(d)) 97 | assert result == {} # Because it requires composer.lock 98 | 99 | def test_parse_composer_exception(tmp_path, caplog): 100 | d = tmp_path / "sub" 101 | d.mkdir() 102 | p = d / "composer.json" 103 | p.write_text("Invalid JSON") 104 | 105 | result = composer.parse_composer([], str(d), "composer.json") 106 | assert result == {} 107 | assert "Could not open file composer.json" in caplog.text -------------------------------------------------------------------------------- /orca/lib/test_dockerfile.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from orca.lib.types import PackageInfo, PackageInfoType 3 | from orca.lib.dockerfile import ( 4 | extract_urls, 5 | replace_curly_variables, 6 | replace_dollar_variables, 7 | interpolate_variables, 8 | github_to_cpe, 9 | selected_websites_to_cpe, 10 | extract_cpes_from_dockerfile 11 | ) 12 | 13 | def test_extract_urls(): 14 | text = "RUN curl https://example.com/file.tar.gz && wget http://test.org/pkg.zip" 15 | urls = extract_urls(text) 16 | assert urls == ["https://example.com/file.tar.gz", "http://test.org/pkg.zip"] 17 | 18 | def test_replace_curly_variables(): 19 | url = "https://example.com/${VERSION}/file.tar.gz" 20 | line = "VERSION=1.2.3 curl ${VERSION}" 21 | result = replace_curly_variables(url, line) 22 | assert result == ["https://example.com/1.2.3/file.tar.gz"] 23 | 24 | def test_replace_dollar_variables(): 25 | url = "https://example.com/$VERSION/file.tar.gz" 26 | line = "VERSION=1.2.3" 27 | result = replace_dollar_variables(url, line) 28 | assert result == "https://example.com/1.2.3/file.tar.gz" 29 | 30 | def test_github_to_cpe(): 31 | urls = ["https://github.com/user/repo/releases/download/v1.2.3/file.tar.gz"] 32 | result = github_to_cpe(urls) 33 | expected = [(PackageInfo("repo", "v1.2.3", "user", PackageInfoType.GITHUB), urls[0])] 34 | assert result == expected 35 | 36 | def test_selected_websites_to_cpe(): 37 | urls = [ 38 | "https://static.rust-lang.org/rustup/archive/1.2.3/", 39 | "https://services.gradle.org/distributions/gradle-7.0-bin.zip", 40 | "https://ftp.postgresql.org/pub/source/v12.0" 41 | ] 42 | result = selected_websites_to_cpe(urls) 43 | expected = [ 44 | (PackageInfo("rust", "1.2.3", "rust", type=PackageInfoType.RUST), urls[0]), 45 | (PackageInfo("gradle", "7.0", "gradle", PackageInfoType.GRADLE), urls[1]), 46 | (PackageInfo("postgresql", "v12.0", "postgresql"), urls[2]) 47 | ] 48 | assert result == expected 49 | -------------------------------------------------------------------------------- /orca/lib/test_jar.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | from .jar import extract_jar, list_jar_props, parse_pom_properties, get_jar 4 | from .types import PackageInfo, PackageInfoType 5 | import zipfile 6 | 7 | class TestJar(unittest.TestCase): 8 | def test_extract_jar_simple(self): 9 | # Test simple version format 10 | result = extract_jar("org.apache.commons.text-1.9") 11 | self.assertEqual(result["author"], "org.apache.commons") 12 | self.assertEqual(result["name"], "text") 13 | self.assertEqual(result["version"], "1.9") 14 | 15 | def test_extract_jar_complex(self): 16 | # Test complex version format 17 | result = extract_jar("com.google.guava-31.jar") 18 | self.assertEqual(result["author"], "com.google") 19 | self.assertEqual(result["name"], "guava") 20 | self.assertEqual(result["version"], "31.jar") 21 | 22 | def test_extract_jar_invalid(self): 23 | # Test invalid format 24 | result = extract_jar("invalid_format") 25 | self.assertIsNone(result) 26 | 27 | def test_get_jar(self): 28 | test_paths = [ 29 | "test.jar", 30 | "commons-text-1.9.jar", 31 | "guava-31.0.jar" 32 | ] 33 | packages = get_jar(test_paths, ".") 34 | self.assertIsInstance(packages, dict) 35 | 36 | def test_list_jar_props_empty(self): 37 | # Test with non-existent jar 38 | packages = list_jar_props("nonexistent.jar", ".") 39 | self.assertEqual(packages, []) 40 | 41 | if __name__ == '__main__': 42 | unittest.main() -------------------------------------------------------------------------------- /orca/lib/test_path.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from orca.lib.path import remove_folders, get_filepaths 3 | import os 4 | import tempfile 5 | 6 | def test_remove_folders_empty(): 7 | assert remove_folders([]) == [] 8 | 9 | def test_remove_folders_single_file(): 10 | assert remove_folders(["file.txt"]) == ["file.txt"] 11 | 12 | def test_get_filepaths(): 13 | with tempfile.TemporaryDirectory() as tmp_dir: 14 | # Create test directory structure 15 | os.makedirs(os.path.join(tmp_dir, "a/b")) 16 | os.makedirs(os.path.join(tmp_dir, "etc/ssl/certs")) 17 | os.makedirs(os.path.join(tmp_dir, "usr/share/zoneinfo")) 18 | os.makedirs(os.path.join(tmp_dir, "etc/nginx")) 19 | 20 | # Create some test files 21 | open(os.path.join(tmp_dir, "a/b/test.txt"), "w").close() 22 | open(os.path.join(tmp_dir, "file.txt"), "w").close() 23 | open(os.path.join(tmp_dir, "etc/ssl/certs/cert.pem"), "w").close() 24 | 25 | paths = get_filepaths(tmp_dir) 26 | 27 | assert "a/b/test.txt" in paths 28 | assert "file.txt" in paths 29 | assert "etc/ssl/certs/cert.pem" not in paths 30 | -------------------------------------------------------------------------------- /orca/lib/test_path_checkers.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from .path_checkers import check_python_from_path_once, check_python_from_path 3 | from .types import PackageInfo 4 | import os 5 | 6 | class TestPathCheckers(unittest.TestCase): 7 | def test_check_python_from_path_once_basic(self): 8 | filename = "python3.8/site-packages/requests-2.25.1.dist-info" 9 | result, files = check_python_from_path_once(filename, "") 10 | self.assertEqual(len(result), 2) 11 | self.assertEqual(result[0], PackageInfo("python", "3.8", None)) 12 | self.assertEqual(result[1], PackageInfo("requests", "2.25.1", None)) 13 | self.assertEqual(files, [filename]) 14 | 15 | def test_check_python_from_path_once_nested(self): 16 | filename = "python3.9/site-packages/urllib3/urllib3-1.26.4.dist-info" 17 | result, files = check_python_from_path_once(filename, "") 18 | self.assertEqual(len(result), 2) 19 | self.assertEqual(result[0], PackageInfo("python", "3.9", None)) 20 | self.assertEqual(result[1], PackageInfo("urllib3-urllib3", "1.26.4", None)) 21 | self.assertEqual(files, [filename]) 22 | 23 | def test_check_python_from_path_once_invalid(self): 24 | filename = "invalid/path/format" 25 | result, files = check_python_from_path_once(filename, "") 26 | self.assertIsNone(result) 27 | self.assertEqual(files, [filename]) 28 | 29 | def test_check_python_from_path_multiple(self): 30 | paths = [ 31 | "python3.8/site-packages/requests-2.25.1.dist-info", 32 | "python3.8/site-packages/urllib3/urllib3-1.26.4.dist-info", 33 | "not/a/valid/path" 34 | ] 35 | result, files = check_python_from_path(paths, "") 36 | self.assertEqual(len(result), 4) 37 | self.assertEqual(len(files), 2) 38 | 39 | def test_check_python_from_path_empty(self): 40 | paths = ["not/a/valid/path"] 41 | result, files = check_python_from_path(paths, "") 42 | self.assertEqual(len(result), 0) 43 | self.assertEqual(len(files), 0) -------------------------------------------------------------------------------- /orca/lib/test_perl.py: -------------------------------------------------------------------------------- 1 | import os 2 | from .perl import parse_module, get_perl 3 | from .types import PackageInfo, PackageInfoType 4 | import pytest 5 | 6 | class TestPerl: 7 | @pytest.fixture(autouse=True) 8 | def setup(self): 9 | self.test_dir = os.path.dirname(os.path.abspath(__file__)) 10 | 11 | def test_parse_module_empty(self): 12 | result = parse_module("nonexistent_file.pm") 13 | assert result == ("", "") 14 | 15 | def test_parse_module_valid(self, tmp_path): 16 | test_content = """ 17 | package Test::Module; 18 | $VERSION = '1.2.3'; 19 | """ 20 | test_file = tmp_path / "test.pm" 21 | test_file.write_text(test_content) 22 | 23 | package, version = parse_module(str(test_file)) 24 | assert package == "Test::Module" 25 | assert version == "1.2.3" 26 | 27 | def test_parse_module_no_version(self, tmp_path): 28 | test_content = """ 29 | package Test::Module; 30 | """ 31 | test_file = tmp_path / "test.pm" 32 | test_file.write_text(test_content) 33 | 34 | package, version = parse_module(str(test_file)) 35 | assert package == "" 36 | assert version == "" 37 | 38 | def test_get_perl_empty(self): 39 | result = get_perl([], "") 40 | assert result == {} 41 | -------------------------------------------------------------------------------- /orca/lib/test_pkgconfig.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from .pkgconfig import get_pkgconfig 3 | from .types import PackageInfo 4 | 5 | def test_get_pkgconfig_empty(): 6 | paths = [] 7 | directory = "/test" 8 | result = get_pkgconfig(paths, directory) 9 | assert result == {} 10 | 11 | def test_get_pkgconfig_no_pc_files(): 12 | paths = ["/test/lib/file.so", "/test/include/header.h"] 13 | directory = "/test" 14 | result = get_pkgconfig(paths, directory) 15 | assert result == {} 16 | 17 | def test_get_pkgconfig_invalid_pc(): 18 | paths = ["/test/usr/lib/pkgconfig/invalid.pc"] 19 | directory = "/test" 20 | 21 | def mock_read_pc_file(path, vars): 22 | raise Exception("Invalid PC file") 23 | 24 | import pykg_config.pcfile 25 | pykg_config.pcfile.read_pc_file = mock_read_pc_file 26 | 27 | result = get_pkgconfig(paths, directory) 28 | assert result == {} -------------------------------------------------------------------------------- /orca/lib/test_python.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from orca.lib.types import PackageInfo, PackageInfoType 3 | import os 4 | 5 | from orca.lib.python import ( 6 | check_python_from_path_once, 7 | check_python_from_path, 8 | extract_egg_dependencies, 9 | get_egg_files, 10 | get_record_files, 11 | parse_egg_info, 12 | parse_metadata, 13 | extract_python_dependencies, 14 | ) 15 | 16 | def test_check_python_from_path_once_dist_info(): 17 | paths = ["/path/to/package-1.0.dist-info/METADATA", "/path/to/package-1.0.dist-info/RECORD"] 18 | filename = "/path/to/package-1.0.dist-info" 19 | directory = "/path/to" 20 | result = check_python_from_path_once(paths, filename, directory) 21 | expected_package = PackageInfo("package",'1.0',None,PackageInfoType.PYPI) 22 | 23 | assert len(result) == 1 24 | assert expected_package == list(result.keys())[0] 25 | assert len(result[expected_package]) == 2 26 | 27 | def test_check_python_from_path_once_egg_info(): 28 | paths = ["/path/to/package-1.0.egg-info/PKG-INFO", "/path/to/package-1.0.egg-info/SOURCES.txt"] 29 | filename = "/path/to/package-1.0.egg-info" 30 | directory = "/path/to" 31 | result = check_python_from_path_once(paths, filename, directory) 32 | expected_package = PackageInfo("package",'1.0',None,PackageInfoType.PYPI) 33 | 34 | assert len(result) == 1 35 | assert expected_package == list(result.keys())[0] 36 | assert len(result[expected_package]) == 2 37 | 38 | def skip_test_check_python_from_path_once_record(): 39 | paths = ["/path/to/package-1.0.dist-info/METADATA", "/path/to/package-1.0.dist-info/RECORD", "/path/to/package/file1.py", "/path/to/package/file2.py"] 40 | filename = "/path/to/package-1.0.dist-info/RECORD" 41 | directory = "/path/to" 42 | result = check_python_from_path_once(paths, filename, directory) 43 | assert len(result) == 1 44 | package_info = list(result.keys())[0] 45 | assert package_info.name == "package" 46 | assert package_info.version == "1.0" 47 | assert result[package_info] == ['/path/to/package/file1.py', '/path/to/package/file2.py'] 48 | 49 | def test_check_python_from_path(): 50 | paths = ["/path/to/package1-1.0.dist-info/METADATA", 51 | "/path/to/package2-2.0.egg-info/PKG-INFO", "/path/to/package2-2.0.egg-info/SOURCES.txt"] 52 | directory = "/path/to" 53 | result = check_python_from_path(paths, directory) 54 | assert isinstance(result, dict) 55 | 56 | def test_extract_egg_dependencies(tmpdir): 57 | depfile_content = """Name: test_package 58 | Version: 1.2.3 59 | Author: Test Author 60 | Requires-Dist: requests 61 | Requires-Dist: flask""" 62 | depfile = tmpdir.join("PKG-INFO") 63 | depfile.write(depfile_content) 64 | packages = extract_egg_dependencies(str(depfile)) 65 | assert len(packages) == 3 66 | assert packages[0].name == "test_package" 67 | assert packages[0].version == "1.2.3" 68 | 69 | def skip_test_get_egg_files(tmpdir): 70 | sources_content = """file1.py 71 | file2.py 72 | """ 73 | sources = tmpdir.join("SOURCES.txt") 74 | sources.write(sources_content) 75 | file = "/path/to/package-1.0.egg-info" 76 | result = get_egg_files(file, str(sources)) 77 | assert result == ['/path/to/file1.py', '/path/to/file2.py'] 78 | 79 | def skip_test_get_record_files(tmpdir): 80 | record_content = """file1.py,sha256=abc,100 81 | file2.py,sha256=def,200 82 | """ 83 | record = tmpdir.join("RECORD") 84 | record.write(record_content) 85 | file = "/path/to/package-1.0.dist-info" 86 | result = get_record_files(file, str(record)) 87 | assert result == ['/path/to/file1.py', '/path/to/file2.py'] 88 | 89 | def skip_test_parse_egg_info(tmpdir): 90 | pkg_info_content = """Metadata-Version: 2.1 91 | Name: test_package 92 | Version: 1.2.3 93 | Author: Test Author 94 | Requires-Dist: requests""" 95 | sources_content = "file1.py\nfile2.py\n" 96 | pkg_info = tmpdir.join("PKG-INFO") 97 | sources = tmpdir.join("SOURCES.txt") 98 | pkg_info.write(pkg_info_content) 99 | sources.write(sources_content) 100 | paths = ["/path/to/file1.py", "/path/to/file2.py", "/path/to/package-1.0.egg-info/PKG-INFO"] 101 | file = "/path/to/package-1.0.egg-info" 102 | dirpath = str(tmpdir) + "/" 103 | result = parse_egg_info(paths, file, dirpath) 104 | assert len(result) == 2 105 | package_info = list(result.keys())[0] 106 | assert package_info.name == "test_package" 107 | assert package_info.version == "1.2.3" 108 | assert len(result[package_info]) == 3 109 | 110 | def skip_test_parse_metadata(tmpdir): 111 | metadata_content = """Metadata-Version: 2.1 112 | Name: test_package 113 | Version: 1.2.3 114 | Author: Test Author 115 | Requires-Dist: requests""" 116 | record_content = "file1.py,sha256=abc,100\nfile2.py,sha256=def,200\n" 117 | metadata = tmpdir.join("METADATA") 118 | record = tmpdir.join("RECORD") 119 | metadata.write(metadata_content) 120 | record.write(record_content) 121 | paths = ["/path/to/file1.py", "/path/to/file2.py", "/path/to/package-1.0.dist-info/METADATA"] 122 | file = "/path/to/package-1.0.dist-info" 123 | dirpath = str(tmpdir) + "/" 124 | result = parse_metadata(paths, file, dirpath) 125 | assert len(result) == 2 126 | package_info = list(result.keys())[0] 127 | assert package_info.name == "test_package" 128 | assert package_info.version == "1.2.3" 129 | assert len(result[package_info]) == 3 130 | 131 | def skip_test_extract_python_dependencies(tmpdir): 132 | # Create dummy files and directories 133 | dist_info_dir = tmpdir.mkdir("test_package-1.0.dist-info") 134 | dist_info_dir.join("METADATA").write("Metadata-Version: 2.1\nName: test_package\nVersion: 1.0") 135 | dist_info_dir.join("RECORD").write("file1.py,,\nfile2.py,,\n") 136 | 137 | egg_info_dir = tmpdir.mkdir("another_package-2.0.egg-info") 138 | egg_info_dir.join("PKG-INFO").write("Metadata-Version: 2.1\nName: another_package\nVersion: 2.0") 139 | egg_info_dir.join("SOURCES.txt").write("file3.py\nfile4.py\n") 140 | 141 | # Define paths 142 | paths = [str(dist_info_dir.join("METADATA")), str(dist_info_dir.join("RECORD")), 143 | str(egg_info_dir.join("PKG-INFO")), str(egg_info_dir.join("SOURCES.txt"))] 144 | directory = str(tmpdir) 145 | 146 | # Call the function 147 | dependencies = extract_python_dependencies(paths, directory) 148 | 149 | # Assertions 150 | assert len(dependencies) == 2 151 | assert PackageInfo("test_package", "1.0", None, PackageInfoType.PYPI) in dependencies 152 | assert PackageInfo("another_package", "2.0", None, PackageInfoType.PYPI) in dependencies -------------------------------------------------------------------------------- /orca/lib/types.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Dict, List, Optional, Set 3 | from enum import Enum 4 | 5 | class LayerAction(Enum): 6 | ADDED = "added" 7 | REPLACED = "replaced" 8 | DELETED = "deleted" 9 | 10 | @dataclass 11 | class PackageRecord: 12 | path: str 13 | hashtype: Optional[str] 14 | hash: Optional[str] 15 | nlines: Optional[int] 16 | 17 | 18 | def to_record(record_item: str) -> PackageRecord: 19 | split = record_item.split(',') 20 | if len(split) < 2: 21 | return PackageRecord(split[0],None,None,None) 22 | if len(split[1]) < 5: 23 | htype = None 24 | hash = None 25 | nlines = None 26 | else: 27 | htype = split[1].split("=")[0] 28 | hash = split[1].split("=")[1] 29 | nlines = int(split[2]) 30 | return PackageRecord(split[0],htype,hash,nlines) 31 | 32 | @dataclass 33 | class LayerChangeRecord: 34 | action: LayerAction 35 | layer: str 36 | 37 | class PackageInfoType(Enum): 38 | DEBIAN = "debian", 39 | PYPI = "pypi", 40 | NPM = "npm", 41 | MAVEN = "maven", 42 | GOLANG = "golang", 43 | APK = "apk", 44 | COMPOSER = "composer", 45 | RPM = "rpm", 46 | GEM = "gem", 47 | PERL = "perl", 48 | GITHUB = "github", 49 | BITNAMI = "bitnami", 50 | RUST="rust", 51 | GRADLE="gradle", 52 | 53 | 54 | 55 | @dataclass(frozen=True) 56 | class PackageInfo: 57 | name: str 58 | version: str 59 | author: Optional[str] 60 | type: Optional[PackageInfoType] = None 61 | arch: Optional[str] = None 62 | epoch: Optional[str] = None 63 | 64 | def to_cpe(self): 65 | return f"cpe:2.3:a:{self.author if self.author is not None and "Amazon" not in self.author else "*"}:{self.name}:{self.version}:*:*:*:*:*:*:*" 66 | 67 | def to_csv_entry(self): 68 | author = "unknown" if self.author is None else self.author 69 | author = author if "Amazon" not in author else "unknown" 70 | 71 | return f"{self.name},{self.version},{author}" 72 | 73 | class VulnerabilityReport: 74 | def __init__(self,paths: Set[str],files=None): 75 | if files is not None: 76 | self.original_files = files 77 | else: 78 | self.original_files = paths 79 | self.initial_files = paths 80 | self.remaining_files = paths 81 | assert isinstance(self.remaining_files, set), "remaining_files must be a set" 82 | self.packages: List[PackageInfo] = [] 83 | self.package_files: Dict[PackageInfo,List[str]] = {} 84 | self.analyzed_files: Set[str] = set() 85 | self.os = None 86 | 87 | def add_package_files(self,package_files: Dict[PackageInfo,List[str]]): 88 | self.packages.extend(package_files.keys()) 89 | self.package_files.update({pkg: files for pkg, files in package_files.items() if any(f in self.initial_files for f in files)}) # TODO: probably add the other files to another dict 90 | fs = [file for file_list in package_files.values() for file in file_list ] 91 | fs_in_initial = [f for f in fs if f in self.initial_files] 92 | self.analyzed_files.update(fs_in_initial) 93 | self.remaining_files = self.remaining_files.difference(fs_in_initial) 94 | 95 | def to_json(self): 96 | json_dict = {} 97 | for k,v in self.package_files.items(): 98 | json_dict[f"{k.name}_{k.version}_{k.author}"] = list(v) 99 | return json_dict 100 | 101 | def to_json_all(self): 102 | json_dict = {'package_files': {}, 'analyzed_files': [], 'remaining_files': []} 103 | for k, v in self.package_files.items(): 104 | if isinstance(k, PackageInfo): 105 | json_dict['package_files'][f"{k.name}_{k.version}_{k.author}"] = { 106 | "type": self.package_types[k], 107 | "list_files": list(v) 108 | } 109 | json_dict['analyzed_files'] = list(set(self.analyzed_files)) 110 | json_dict['remaining_files'] = list(set(self.remaining_files)) 111 | return json_dict 112 | 113 | def summary(self) -> str: 114 | return f"Found {len(self.packages)} packages. Indexed {len(self.analyzed_files)} files over a total of {len(self.original_files)} - Remaining files {len(self.original_files) - len(self.analyzed_files)}" 115 | 116 | -------------------------------------------------------------------------------- /orca/lib/utils.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | def calculate_sha256(file_path): 3 | try: 4 | # Open the file in binary mode 5 | with open(file_path, 'rb') as file: 6 | # Initialize the SHA-256 hash object 7 | sha256_hash = hashlib.sha256() 8 | # Read the file in chunks to efficiently handle large files 9 | for chunk in iter(lambda: file.read(4096), b''): 10 | # Update the hash object with the current chunk 11 | sha256_hash.update(chunk) 12 | # Get the hexadecimal representation of the digest (hash value) 13 | hash_value = sha256_hash.digest() 14 | return hash_value 15 | except FileNotFoundError: 16 | return None 17 | 18 | def map_container_id(container_id: str): 19 | return container_id.replace(":", "twodots").replace("/", "slash") 20 | -------------------------------------------------------------------------------- /orca/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import datetime 3 | import json 4 | import shutil 5 | from typing import Dict, List 6 | import docker 7 | import docker.errors 8 | from orca.find_cpes import scan_filesystem 9 | from orca.lib.dockerfile import extract_cpes_from_dockerfile_with_validation 10 | from orca.lib.logger import logger 11 | import tarfile 12 | import os 13 | 14 | from orca.lib.spdx import generateSPDXFromReportMap 15 | from orca.lib.types import VulnerabilityReport 16 | from orca.lib.utils import map_container_id 17 | 18 | TMP_DIR = f"{os.getcwd()}/tmpdir" 19 | 20 | 21 | 22 | def tar_remove_links(file: tarfile.TarInfo,path): 23 | if not file.islnk() and not file.issym() and not file.isdev() and not file.isdir(): 24 | return file 25 | return None 26 | 27 | 28 | def save_image(client:docker.DockerClient,container:str,filepath:str): 29 | try: 30 | image = client.images.get(container) 31 | except docker.errors.ImageNotFound as _: 32 | logger.info(f"Image {container} not found") 33 | logger.info(f"Pulling image {container}") 34 | image = client.images.pull(container) 35 | except Exception as e: 36 | print(e) 37 | 38 | 39 | shutil.rmtree(TMP_DIR,ignore_errors=True) 40 | 41 | os.mkdir(TMP_DIR,mode=0o755) 42 | 43 | 44 | logger.info(f"Saving image {container} to {filepath}") 45 | f = open(filepath, 'wb') 46 | for chunk in image.save(named=False): 47 | f.write(chunk) 48 | f.close() 49 | return 50 | 51 | 52 | def extract_config(config_path: str): 53 | config_file = json.load(open(config_path)) 54 | data = config_file['history'] 55 | if len(data) > 1: 56 | return config_file 57 | # Compressed images with crane 58 | for item in config_file['history']: 59 | if "comment" in item: 60 | try: 61 | x = json.loads(item["comment"]) 62 | item["comment"] = x 63 | except json.JSONDecodeError: 64 | break 65 | #print(f"Error parsing nested JSON - {item}") 66 | #exit() 67 | if 'comment' not in data[0]: 68 | return config_file 69 | config_file['history'] = data[0]['comment'] 70 | return config_file 71 | 72 | 73 | 74 | def extract_with_config_and_layers(image_location:str): 75 | tarf = tarfile.open(image_location) 76 | manifests = [x for x in tarf.getmembers() if x.name == "manifest.json"] 77 | assert len(manifests) == 1 78 | manifest = manifests[0] 79 | tarf.extract(manifest,path=f"{TMP_DIR}",set_attrs=False,filter=tar_remove_links) 80 | manifestFile = json.load(open(f"{TMP_DIR}/manifest.json")) 81 | layers = manifestFile[0]['Layers'] 82 | config_path = manifestFile[0]['Config'] 83 | tarf.extract(config_path,path=f"{TMP_DIR}",set_attrs=False,filter=tar_remove_links) 84 | config = extract_config(f"{TMP_DIR}/{config_path}") 85 | return tarf,config,layers 86 | 87 | def scan_tar(image_tar:str,client:docker.DockerClient,binary_analysis:bool): 88 | layers_archive,config,layers = extract_with_config_and_layers(image_tar) 89 | 90 | report_by_layer: Dict[str,VulnerabilityReport] = {} 91 | for layer in layers: 92 | logger.info(f"Analyzing layer {layer}") 93 | layers_archive.extract(layer,f"{TMP_DIR}",set_attrs=False,filter=tar_remove_links) 94 | if not os.path.exists(f"{TMP_DIR}/{layer}"): 95 | logger.error(f"Layer {layer} does not exist on container {image_tar}") 96 | continue 97 | image_layer = tarfile.open(f"{TMP_DIR}/{layer}") 98 | image_layer.extractall(f"{TMP_DIR}/{layer}_layer",filter=tar_remove_links,numeric_owner=True) 99 | image_files = image_layer.getnames() 100 | report = scan_filesystem(f"{TMP_DIR}/{layer}_layer",image_files,binary_analysis,False) 101 | report_by_layer[layer] = report 102 | # Add dockerfile: 103 | logger.info(report.summary()) 104 | 105 | cpes = extract_cpes_from_dockerfile_with_validation(config) 106 | # FIXME: this is a hack to make the report work with the dockerfile. Obfiously Dockerfile commands are not files. 107 | cpes.remaining_files = set() 108 | cpes.initial_files = set() 109 | cpes.original_files = set() 110 | report_by_layer["Dockerfile"] = cpes 111 | report_by_layer["Dockerfile"] = cpes 112 | 113 | # Cleanup: TODO: probably should be done in a separate function 114 | shutil.rmtree(TMP_DIR,ignore_errors=True) 115 | return report_by_layer 116 | 117 | def scan_image(container:str,client:docker.DockerClient,binary_analysis:bool): 118 | image_tar = f'{TMP_DIR}/container.tar' 119 | save_image(client,container,image_tar) 120 | layers_archive,config,layers = extract_with_config_and_layers(image_tar) 121 | 122 | report_by_layer: Dict[str,VulnerabilityReport] = {} 123 | for layer in layers: 124 | logger.info(f"Analyzing layer {layer}") 125 | layers_archive.extract(layer,f"{TMP_DIR}",set_attrs=False,filter=tar_remove_links) 126 | if not os.path.exists(f"{TMP_DIR}/{layer}"): 127 | logger.error(f"Layer {layer} does not exist on container {container}") 128 | continue 129 | image_layer = tarfile.open(f"{TMP_DIR}/{layer}") 130 | image_layer.extractall(f"{TMP_DIR}/{layer}_layer",filter=tar_remove_links) 131 | image_files = image_layer.getnames() 132 | report = scan_filesystem(f"{TMP_DIR}/{layer}_layer",image_files, binary_analysis,False) 133 | report_by_layer[layer] = report 134 | 135 | logger.info(report.summary()) 136 | 137 | cpes = extract_cpes_from_dockerfile_with_validation(config) 138 | report_by_layer["Dockerfile"] = cpes 139 | # FIXME: this is a hack to make the report work with the dockerfile. Obfiously Dockerfile commands are not files. 140 | cpes.remaining_files = set() 141 | cpes.initial_files = set() 142 | cpes.original_files = set() 143 | report_by_layer["Dockerfile"] = cpes 144 | # Cleanup: TODO: probably should be done in a separate function 145 | shutil.rmtree(TMP_DIR,ignore_errors=True) 146 | return report_by_layer 147 | 148 | def write_logfile(report_by_layer: dict[str, VulnerabilityReport],container:str,container_name:str,elapsed:int)->None: 149 | total_files = set() 150 | total_files_duplicates = [] 151 | analyzed_files = set() 152 | analyzed_files_duplicates = [] 153 | for _layer,report in report_by_layer.items(): 154 | total_files.update(report.initial_files) 155 | total_files_duplicates.extend(report.initial_files) 156 | analyzed_files.update(report.analyzed_files) 157 | analyzed_files_duplicates.extend(report.analyzed_files) 158 | 159 | loginfo = { 160 | "analyzed_files":len(analyzed_files), 161 | "analyzed_files_duplicates":len(analyzed_files_duplicates), 162 | "container": container, 163 | "container_usable_name": container_name, 164 | "total_files": len(total_files), 165 | "total_files_duplicates": len(total_files_duplicates), 166 | "elapsed_time":elapsed 167 | } 168 | with open(f"logs/orca-{container_name}_logs.json","w") as fp: 169 | json.dump(loginfo,fp) 170 | 171 | 172 | def orca(client: docker.DockerClient,output_folder: str,csv:bool,binary_analysis:bool,with_complete_report:bool,containers: List[str]): 173 | 174 | if not os.path.exists("logs/"): 175 | os.mkdir("logs",mode=0o755) 176 | if output_folder == "results" and not os.path.exists("results"): 177 | os.mkdir("results",mode=0o755) 178 | 179 | for container in containers: 180 | start = datetime.datetime.now() 181 | container_usable_name = map_container_id(container) 182 | 183 | if not container.endswith(".tar"): 184 | report_by_layer = scan_image(container,client,binary_analysis) 185 | else: 186 | report_by_layer = scan_tar(container,client,binary_analysis) 187 | 188 | end = datetime.datetime.now() 189 | 190 | elapsed = (end-start).total_seconds() * 1000 191 | total_cpe = set() 192 | for layer,report in report_by_layer.items(): 193 | logger.info(f"{layer} - {report.summary()}") 194 | if len(report.packages) == 1 and report.packages[0] == (None,None): 195 | continue 196 | total_cpe.update(report.packages) 197 | 198 | print(f"[{container}] Total packages identified {len(total_cpe)}") 199 | logger.info(f"Elapsed time: {elapsed} ms") 200 | write_logfile(report_by_layer,container,container_usable_name,elapsed) 201 | 202 | if len(total_cpe) == 0: 203 | continue 204 | if csv: 205 | with open(f"{output_folder}/{container_usable_name}_packages.csv","w") as fp: 206 | fp.write("product,version,vendor\n") 207 | for pkg in total_cpe: 208 | fp.write(pkg.to_csv_entry() + "\n") 209 | fp.close() 210 | 211 | generateSPDXFromReportMap(container,report_by_layer,f"{output_folder}/orca-{container_usable_name}.json",with_complete_report) 212 | 213 | 214 | def main(): 215 | 216 | parser = argparse.ArgumentParser( 217 | prog="orca", 218 | description="""Software composition analysis for containers""" 219 | ) 220 | 221 | parser.add_argument( 222 | "-d","--dir", type=str, help="Folder where to store results *without ending /*",default="results") 223 | 224 | parser.add_argument( 225 | "--csv", action='store_true', help="Store also a csv file with package information",default=False) 226 | 227 | parser.add_argument( 228 | "-b","--with-binaries", action='store_true', help="Analyze every binary file (slower). Go binaries are always analyzed",default=False) 229 | 230 | parser.add_argument( 231 | "-c","--complete", action='store_true', help="Generate complete SPDX report with relationships (>200MB file is generated)", default=False) 232 | 233 | parser.add_argument( 234 | "containers", type=str, help="Comma separated list of containers to analyze") 235 | 236 | args = parser.parse_args() 237 | client = docker.from_env(timeout=900) # TODO: if scanning a tar there is no reason to access the docker engine 238 | output = args.dir 239 | csv = args.csv 240 | with_bin = args.with_binaries 241 | with_complete_report = args.complete 242 | containers = args.containers.split(",") 243 | orca(client,output,csv,with_bin,with_complete_report,containers) 244 | 245 | if __name__ == "__main__": 246 | main() -------------------------------------------------------------------------------- /orca/rpm_checker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kube-security/orca/7a6f756bfbf70d7031ff1810c1c5c414747ac63d/orca/rpm_checker/__init__.py -------------------------------------------------------------------------------- /orca/rpm_checker/go.mod: -------------------------------------------------------------------------------- 1 | module test.me/rpm 2 | 3 | go 1.20 4 | 5 | require github.com/knqyf263/go-rpmdb v0.1.1 6 | 7 | require ( 8 | github.com/dustin/go-humanize v1.0.1 // indirect 9 | github.com/google/uuid v1.6.0 // indirect 10 | github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect 11 | github.com/mattn/go-isatty v0.0.20 // indirect 12 | github.com/mattn/go-sqlite3 v1.14.22 // indirect 13 | github.com/ncruces/go-strftime v0.1.9 // indirect 14 | github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect 15 | golang.org/x/sys v0.22.0 // indirect 16 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect 17 | modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 // indirect 18 | modernc.org/libc v1.55.3 // indirect 19 | modernc.org/mathutil v1.6.0 // indirect 20 | modernc.org/memory v1.8.0 // indirect 21 | modernc.org/sqlite v1.31.1 // indirect 22 | modernc.org/strutil v1.2.0 // indirect 23 | modernc.org/token v1.1.0 // indirect 24 | ) 25 | -------------------------------------------------------------------------------- /orca/rpm_checker/go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= 2 | github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= 3 | github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= 4 | github.com/glebarez/go-sqlite v1.20.3 h1:89BkqGOXR9oRmG58ZrzgoY/Fhy5x0M+/WV48U5zVrZ4= 5 | github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= 6 | github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= 7 | github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 8 | github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= 9 | github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= 10 | github.com/knqyf263/go-rpmdb v0.1.1 h1:oh68mTCvp1XzxdU7EfafcWzzfstUZAEa3MW0IJye584= 11 | github.com/knqyf263/go-rpmdb v0.1.1/go.mod h1:9LQcoMCMQ9vrF7HcDtXfvqGO4+ddxFQ8+YF/0CVGDww= 12 | github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng= 13 | github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= 14 | github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= 15 | github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= 16 | github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= 17 | github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4= 18 | github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= 19 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 20 | github.com/remyoudompheng/bigfft v0.0.0-20230126093431-47fa9a501578 h1:VstopitMQi3hZP0fzvnsLmzXZdQGc4bEcgu24cp+d4M= 21 | github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= 22 | github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= 23 | github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= 24 | golang.org/x/sys v0.4.0 h1:Zr2JFtRQNX3BCZ8YtxRE9hNJYC8J6I1MVbMg6owUp18= 25 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 26 | golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= 27 | golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 28 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= 29 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 30 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= 31 | modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 h1:5D53IMaUuA5InSeMu9eJtlQXS2NxAhyWQvkKEgXZhHI= 32 | modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6/go.mod h1:Qz0X07sNOR1jWYCrJMEnbW/X55x206Q7Vt4mz6/wHp4= 33 | modernc.org/libc v1.22.2 h1:4U7v51GyhlWqQmwCHj28Rdq2Yzwk55ovjFrdPjs8Hb0= 34 | modernc.org/libc v1.55.3 h1:AzcW1mhlPNrRtjS5sS+eW2ISCgSOLLNyFzRh/V3Qj/U= 35 | modernc.org/libc v1.55.3/go.mod h1:qFXepLhz+JjFThQ4kzwzOjA/y/artDeg+pcYnY+Q83w= 36 | modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ= 37 | modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4= 38 | modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo= 39 | modernc.org/memory v1.5.0 h1:N+/8c5rE6EqugZwHii4IFsaJ7MUhoWX07J5tC/iI5Ds= 40 | modernc.org/memory v1.8.0 h1:IqGTL6eFMaDZZhEWwcREgeMXYwmW83LYW8cROZYkg+E= 41 | modernc.org/memory v1.8.0/go.mod h1:XPZ936zp5OMKGWPqbD3JShgd/ZoQ7899TUuQqxY+peU= 42 | modernc.org/sqlite v1.20.3 h1:SqGJMMxjj1PHusLxdYxeQSodg7Jxn9WWkaAQjKrntZs= 43 | modernc.org/sqlite v1.31.1 h1:XVU0VyzxrYHlBhIs1DiEgSl0ZtdnPtbLVy8hSkzxGrs= 44 | modernc.org/sqlite v1.31.1/go.mod h1:UqoylwmTb9F+IqXERT8bW9zzOWN8qwAIcLdzeBZs4hA= 45 | modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA= 46 | modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0= 47 | modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= 48 | modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= 49 | -------------------------------------------------------------------------------- /orca/rpm_checker/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "flag" 6 | "fmt" 7 | "log" 8 | 9 | rpmdb "github.com/knqyf263/go-rpmdb/pkg" 10 | _ "github.com/mattn/go-sqlite3" 11 | _ "modernc.org/sqlite" 12 | ) 13 | 14 | type PackageInfo struct { 15 | Package string `json:"package"` 16 | Version string `json:"version"` 17 | Author string `json:"author"` 18 | Files []string `json:"files"` 19 | SourceRpm string `json:"rpm"` 20 | } 21 | 22 | func main() { 23 | 24 | if len(flag.Args()) == 0 { 25 | fmt.Println("Usage: rpm_checker -dbpath=") 26 | fmt.Println("Example: rpm_checker -dbpath=./Packages") 27 | return 28 | } 29 | // Define flags for the database path and package names 30 | dbPath := flag.String("dbpath", "./Packages", "Path to the RPM database") 31 | flag.Parse() 32 | 33 | if err := run(*dbPath); err != nil { 34 | log.Fatal(err) 35 | } 36 | } 37 | 38 | func run(dbPath string) error { 39 | db, err := rpmdb.Open(dbPath) 40 | if err != nil { 41 | return err 42 | } 43 | defer db.Close() 44 | 45 | packages := []PackageInfo{} 46 | pkgList, err := db.ListPackages() 47 | if err != nil { 48 | return err 49 | } 50 | 51 | for _, pkg := range pkgList { 52 | files := []string{} 53 | fileinfo, _ := pkg.InstalledFiles() 54 | 55 | for _, f := range fileinfo { 56 | files = append(files, f.Path[1:]) 57 | } 58 | 59 | packages = append(packages, PackageInfo{pkg.Name, pkg.Version, pkg.Vendor, files, pkg.SourceRpm}) 60 | } 61 | res, _ := json.Marshal(packages) 62 | fmt.Println(string(res)) 63 | return nil 64 | } 65 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | astroid==2.11.7 2 | attrs==25.3.0 3 | bandit==1.8.3 4 | beartype==0.20.2 5 | blinker==1.9.0 6 | boolean.py==4.0 7 | bump2version==1.0.1 8 | CacheControl==0.14.2 9 | certifi==2025.1.31 10 | chardet==5.2.0 11 | charset-normalizer==3.4.1 12 | click==8.1.8 13 | coverage==7.7.1 14 | cyclonedx-python-lib==8.9.0 15 | defusedxml==0.7.1 16 | dill==0.3.9 17 | docker==7.1.0 18 | execnet==2.1.1 19 | filelock==3.18.0 20 | Flask==3.1.0 21 | Flask-Admin==1.6.1 22 | flask-cors==5.0.1 23 | Flask-Login==0.6.3 24 | Flask-SQLAlchemy==3.1.1 25 | gevent==24.11.1 26 | greenlet==3.1.1 27 | gunicorn==23.0.0 28 | idna==3.10 29 | iniconfig==2.1.0 30 | isort==5.13.2 31 | itsdangerous==2.2.0 32 | Jinja2==3.1.6 33 | lazy-object-proxy==1.10.0 34 | license-expression==30.4.1 35 | markdown-it-py==3.0.0 36 | MarkupSafe==3.0.2 37 | mccabe==0.7.0 38 | mdurl==0.1.2 39 | msgpack==1.1.0 40 | natsort==8.4.0 41 | packageurl-python==0.16.0 42 | packaging==24.2 43 | pbr==6.1.1 44 | pip-api==0.0.34 45 | pip-requirements-parser==32.0.1 46 | pip_audit==2.8.0 47 | platformdirs==4.3.7 48 | pluggy==1.5.0 49 | ply==3.11 50 | py==1.11.0 51 | py-serializable==1.1.2 52 | Pygments==2.19.1 53 | pykg-config==1.3.0 54 | pylint==2.13.9 55 | pyparsing==3.2.3 56 | pytest==8.3.5 57 | pytest-cov==6.0.0 58 | pytest-xdist==3.6.1 59 | python-debian==1.0.1 60 | python-dotenv==0.20.0 61 | PyYAML==6.0.2 62 | rdflib==7.1.3 63 | requests==2.32.3 64 | rich==13.9.4 65 | rpm==0.3.1 66 | rpmfile==2.1.0 67 | semantic-version==2.10.0 68 | setuptools==78.1.0 69 | sortedcontainers==2.4.0 70 | spdx-tools==0.8.3 71 | SQLAlchemy==2.0.39 72 | SQLAlchemy-Utils==0.41.2 73 | stevedore==5.4.1 74 | toml==0.10.2 75 | tomli==2.2.1 76 | tomlkit==0.13.2 77 | typing_extensions==4.12.2 78 | uritools==4.0.3 79 | urllib3==2.3.0 80 | validators==0.34.0 81 | Werkzeug==3.1.3 82 | wrapt==1.17.2 83 | WTForms==3.2.1 84 | xmltodict==0.14.2 85 | zope.event==5.0 86 | zope.interface==7.2 87 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup,find_packages 2 | from setuptools.command.install import install 3 | import subprocess 4 | import os 5 | 6 | def read_requirements(filename="requirements.txt"): 7 | with open(filename, "r") as f: 8 | return [line.strip() for line in f if line and not line.startswith("#")] 9 | 10 | setup( 11 | name='orca', 12 | version='0.1.20', 13 | packages=find_packages(), 14 | install_requires=read_requirements(), 15 | entry_points={ 16 | 'console_scripts': [ 17 | 'orca=orca.main:main', 18 | ], 19 | }, 20 | include_package_data=True, 21 | ) 22 | --------------------------------------------------------------------------------