├── .bumpversion.cfg
├── .github
└── workflows
│ ├── build.yml
│ └── publish.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── Pipfile
├── Pipfile.lock
├── README.md
├── build.sh
├── docs
└── orca.png
├── orca
├── __init__.py
├── find_cpes.py
├── lib
│ ├── __init__.py
│ ├── analyzer.py
│ ├── apk.py
│ ├── ascii_checkers.py
│ ├── bin_checkers.py
│ ├── bin_checkers_cpe.py
│ ├── composer.py
│ ├── cpe2cve.py
│ ├── dnf.py
│ ├── dockerfile.py
│ ├── dpkg.py
│ ├── golang.py
│ ├── jar.py
│ ├── logger.py
│ ├── package_json.py
│ ├── path.py
│ ├── path_checkers.py
│ ├── perl.py
│ ├── pkgconfig.py
│ ├── python.py
│ ├── rpm_packages.py
│ ├── spdx.py
│ ├── test_apk.py
│ ├── test_ascii_checkers.py
│ ├── test_bin_checkers.py
│ ├── test_bin_checkers_cpe.py
│ ├── test_composer.py
│ ├── test_dockerfile.py
│ ├── test_jar.py
│ ├── test_path.py
│ ├── test_path_checkers.py
│ ├── test_perl.py
│ ├── test_pkgconfig.py
│ ├── test_python.py
│ ├── types.py
│ └── utils.py
├── main.py
└── rpm_checker
│ ├── __init__.py
│ ├── go.mod
│ ├── go.sum
│ └── main.go
├── requirements.txt
└── setup.py
/.bumpversion.cfg:
--------------------------------------------------------------------------------
1 | [bumpversion]
2 | current_version = 0.1.20
3 | commit = True
4 | tag = True
5 | tag_name = v{new_version}
6 |
7 | [bumpversion:file:setup.py]
8 |
9 | [bumpversion:file:Dockerfile]
10 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: Build ORCA
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 |
8 | permissions:
9 | contents: write
10 |
11 | jobs:
12 | build:
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - name: Checkout
17 | uses: actions/checkout@v3
18 |
19 | - name: Set up Python
20 | uses: actions/setup-python@v4
21 | with:
22 | python-version: '3.12'
23 |
24 | - name: Install Pipenv
25 | run: pip install pipenv
26 |
27 | - name: Install dependencies
28 | run: pipenv install --dev
29 |
30 | - name: Run tests with coverage
31 | run: pipenv run pytest --cov=. --cov-report=json
32 |
33 | - name: Update Coverage Badge
34 | # https://stackoverflow.com/questions/64781462/github-actions-default-branch-variable
35 | if: github.ref == format('refs/heads/{0}', github.event.repository.default_branch)
36 | uses: we-cli/coverage-badge-action@main
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: Publish ORCA
2 |
3 | on:
4 | push:
5 | tags:
6 | - 'v*' # Triggers only when tags starting with 'v' are pushed (e.g., v1.0.0)
7 |
8 | permissions:
9 | contents: write
10 | packages: write
11 |
12 | jobs:
13 | build-and-publish:
14 | runs-on: ubuntu-latest
15 |
16 | steps:
17 | - name: Checkout
18 | uses: actions/checkout@v3
19 |
20 | - name: Set up Python
21 | uses: actions/setup-python@v4
22 | with:
23 | python-version: '3.12'
24 |
25 | - name: Install Pipenv
26 | run: pip install pipenv
27 |
28 | - name: Install dependencies
29 | run: |
30 | pipenv install --dev
31 |
32 | - name: Run tests
33 | run: |
34 | pipenv run pytest
35 |
36 | - name: Build the package
37 | run: pipenv run python setup.py sdist
38 |
39 | - name: Smoke test
40 | run: |
41 | TAG_NAME=${GITHUB_REF#refs/tags/v}
42 | pip install dist/orca-${TAG_NAME}.tar.gz
43 | orca busybox
44 |
45 | - name: Create Release
46 | id: create_release
47 | uses: softprops/action-gh-release@v2
48 | with:
49 | tag_name: ${{ github.ref }} # Use the tag that triggered the workflow
50 | files: dist/* # Attach all files in the dist directory
51 |
52 | - name: Set up Docker Buildx
53 | uses: docker/setup-buildx-action@v2
54 |
55 | - name: Login to GitHub Container Registry
56 | uses: docker/login-action@v2
57 | with:
58 | registry: ghcr.io
59 | username: ${{ github.actor }}
60 | password: ${{ secrets.GITHUB_TOKEN }}
61 |
62 | - name: Build and push Docker image
63 | uses: docker/build-push-action@v4
64 | with:
65 | context: .
66 | push: true
67 | tags: ghcr.io/${{ github.repository }}:latest
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/__pycache__/**
2 | model_output/**
3 | diagrams/**
4 | inputs/**
5 | tmpdir/**
6 | .DS_Store
7 | logs/**
8 | results/**
9 | .DS_Store
10 | dist/**
11 |
12 | # Byte-compiled / optimized / DLL files
13 | __pycache__/
14 | *.py[cod]
15 | *$py.class
16 |
17 | # C extensions
18 | *.so
19 |
20 | # Distribution / packaging
21 | .Python
22 | build/
23 | develop-eggs/
24 | dist/
25 | downloads/
26 | eggs/
27 | .eggs/
28 | lib64/
29 | parts/
30 | sdist/
31 | var/
32 | wheels/
33 | share/python-wheels/
34 | *.egg-info/
35 | .installed.cfg
36 | *.egg
37 | MANIFEST
38 |
39 | # PyInstaller
40 | # Usually these files are written by a python script from a template
41 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
42 | *.manifest
43 | *.spec
44 |
45 | # Installer logs
46 | pip-log.txt
47 | pip-delete-this-directory.txt
48 |
49 | # Unit test / coverage reports
50 | htmlcov/
51 | .tox/
52 | .nox/
53 | .coverage
54 | .coverage.*
55 | .cache
56 | nosetests.xml
57 | coverage.xml
58 | *.cover
59 | *.py,cover
60 | .hypothesis/
61 | .pytest_cache/
62 | cover/
63 |
64 | # Translations
65 | *.mo
66 | *.pot
67 |
68 | # Django stuff:
69 | *.log
70 | local_settings.py
71 | db.sqlite3
72 | db.sqlite3-journal
73 |
74 | # Flask stuff:
75 | instance/
76 | .webassets-cache
77 |
78 | # Scrapy stuff:
79 | .scrapy
80 |
81 | # Sphinx documentation
82 | docs/_build/
83 |
84 | # PyBuilder
85 | .pybuilder/
86 | target/
87 |
88 | # Jupyter Notebook
89 | .ipynb_checkpoints
90 |
91 | # IPython
92 | profile_default/
93 | ipython_config.py
94 |
95 | # pyenv
96 | # For a library or package, you might want to ignore these files since the code is
97 | # intended to run in multiple environments; otherwise, check them in:
98 | # .python-version
99 |
100 | # pipenv
101 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
102 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
103 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
104 | # install all needed dependencies.
105 | #Pipfile.lock
106 |
107 | # poetry
108 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
109 | # This is especially recommended for binary packages to ensure reproducibility, and is more
110 | # commonly ignored for libraries.
111 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
112 | #poetry.lock
113 |
114 | # pdm
115 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
116 | #pdm.lock
117 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
118 | # in version control.
119 | # https://pdm.fming.dev/#use-with-ide
120 | .pdm.toml
121 |
122 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
123 | __pypackages__/
124 |
125 | # Celery stuff
126 | celerybeat-schedule
127 | celerybeat.pid
128 |
129 | # SageMath parsed files
130 | *.sage.py
131 |
132 | # Environments
133 | .env
134 | .venv
135 | env/
136 | venv/
137 | ENV/
138 | env.bak/
139 | venv.bak/
140 |
141 | # Spyder project settings
142 | .spyderproject
143 | .spyproject
144 |
145 | # Rope project settings
146 | .ropeproject
147 |
148 | # mkdocs documentation
149 | /site
150 |
151 | # mypy
152 | .mypy_cache/
153 | .dmypy.json
154 | dmypy.json
155 |
156 | # Pyre type checker
157 | .pyre/
158 |
159 | # pytype static type analyzer
160 | .pytype/
161 |
162 | # Cython debug symbols
163 | cython_debug/
164 |
165 | # PyCharm
166 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
167 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
168 | # and can be added to the global gitignore or merged into this file. For a more nuclear
169 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
170 | #.idea/
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG ORCA_VERSION=0.1.20
2 |
3 | FROM golang:1.21 AS gobuilder
4 |
5 | WORKDIR /app
6 |
7 | COPY orca /app/orca
8 | WORKDIR /app/orca/rpm_checker
9 | RUN go vet
10 | RUN CGO_ENABLED=0 GOOS=linux go build -o rpm_checker main.go
11 |
12 | FROM python:3.12-slim AS pythonbuild
13 |
14 | WORKDIR /app
15 | COPY . .
16 | RUN pip install --no-cache-dir -r requirements.txt
17 |
18 | RUN python setup.py sdist
19 |
20 | FROM python:3.12-slim
21 |
22 | RUN apt update && apt install golang -y
23 | WORKDIR /app
24 | ENV ORCA_VERSION=0.1.20
25 | COPY --from=pythonbuild /app/dist/orca-${ORCA_VERSION}.tar.gz /app
26 | COPY --from=gobuilder /app/orca/rpm_checker /bin/
27 | COPY requirements.txt .
28 | RUN pip install orca-${ORCA_VERSION}.tar.gz
29 |
30 |
31 | ENTRYPOINT [ "orca" ]
32 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 kube-security
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | include orca/rpm_checker/*
--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | url = "https://pypi.org/simple"
3 | verify_ssl = true
4 | name = "pypi"
5 |
6 | [packages]
7 | packaging = "*"
8 | requests = "*"
9 | docker = "*"
10 | natsort = "*"
11 | python-debian = "*"
12 | pykg-config = "*"
13 | rpmfile = "*"
14 | rpm = "*"
15 | spdx-tools = "*"
16 | packageurl-python = "*"
17 | setuptools = "*"
18 | validators = "*"
19 | pytest-xdist = "*"
20 | pytest-cov = "*"
21 | pip-audit = "*"
22 |
23 | [dev-packages]
24 | bump2version = "*"
25 | pytest = "*"
26 |
27 | [requires]
28 | python_version = "3.12"
29 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ORCA (Obfuscation-Resilient Container Analysis)
2 |
3 | [](https://github.com/kube-security/orca/actions)
4 | [](https://github.com/kube-security/orca/blob/main/LICENSE)
5 | [](https://github.com/kube-security/orca/releases)
6 |
7 |
8 |
9 | ORCA is a tool designed to analyze obfuscated or obscure container images, providing reliable Software Bill of Materials (SBOMs) even when traditional tools fail. It addresses the challenge of container image obfuscation and empowers developers and security teams to better manage and secure containerized environments.
10 |
11 | [](https://asciinema.org/a/iqqpx2iHC5BZM10hscxn7cayl)
12 |
13 | ## Motivation
14 |
15 | Containers often undergo obfuscation or contain altered content, making it difficult for standard tools to generate accurate SBOMs. ORCA scans all container layers and analyzes as many files as possible, even inspecting Dockerfile content for hidden commands.
16 |
17 | ## Installation
18 |
19 | The easiest way to use ORCA is via the `kube-security/orca` container image that is available on [GitHub Container Registry (GHCR)](https://github.com/kube-security/orca/pkgs/container/orca)
20 |
21 | You can use the image as follows
22 |
23 | ```bash
24 | docker run -v /var/run/docker.sock:/var/run/docker.sock \
25 | -v $(pwd)/results:/app/results \
26 | -v $(pwd)/logs:/app/logs \
27 | -it ghcr.io/kube-security/orca:latest \
28 |
29 | ```
30 |
31 | ### From source
32 |
33 | 1. **Download the package archive** from the [releases](https://github.com/kube-security/orca/releases) page.
34 |
35 | 2. **Install the package**:
36 | ```bash
37 | pip install dist/orca-.tar.gz
38 | ```
39 |
40 | 3. **Build the necessary Go binary** (ORCA includes Go code that needs to be compiled):
41 | ```bash
42 | tar -xvf orca-.tar.gz
43 | cd orca-/orca/rpm_checker
44 | go build -o rpm_checker main.go
45 | mv rpm_checker /usr/local/bin # should be in PATH
46 | ```
47 |
48 | ## Usage
49 |
50 | Once installed, ORCA can be used to scan container images.
51 |
52 | ```bash
53 | orca --help
54 | usage: orca [-h] [-d DIR] [--csv] [-b] [-c] containers
55 |
56 | Software composition analysis for containers
57 |
58 | positional arguments:
59 | containers Comma separated list of containers to analyze
60 |
61 | options:
62 | -h, --help show this help message and exit
63 | -d DIR, --dir DIR Folder where to store results *without ending /*
64 | --csv Store also a csv file with package information
65 | -b, --with-binaries Analyze every binary file (slower). Go binaries are always analyzed
66 | -c, --complete Generate complete SPDX report with relationships (>200MB file is generated)
67 | ```
68 |
69 | Example usage: `orca alpine:latest`
70 |
71 |
72 | ## Results
73 |
74 | Some basic statistics will be shown in the terminal. Additionally two folders: `results` and `logs` should appear and will contain the SPDX and additional logs.
75 |
76 |
77 | ## Development
78 |
79 | 1. **Clone the repository**:
80 | ```bash
81 | git clone https://github.com/kube-security/orca.git
82 | cd orca
83 | ```
84 |
85 | 2. **Install dependencies** using `pip` or `Pipenv`:
86 | ```bash
87 | pip install -r requirements.txt
88 | ```
89 | or, if using Pipenv:
90 | ```bash
91 | pipenv install
92 | ```
93 |
94 | ### Obfucation benchmark dataset
95 |
96 | You can compare the results of ORCA against other container scanning tools using our [container obfuscation benchmark](https://github.com/kube-security/container-obfuscation-benchmark).
97 | ### Contributing
98 |
99 | Contributions are welcome!!
100 |
101 |
102 | ## License
103 |
104 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
105 |
--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
1 | pipenv run python setup.py sdist
--------------------------------------------------------------------------------
/docs/orca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kube-security/orca/7a6f756bfbf70d7031ff1810c1c5c414747ac63d/docs/orca.png
--------------------------------------------------------------------------------
/orca/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kube-security/orca/7a6f756bfbf70d7031ff1810c1c5c414747ac63d/orca/__init__.py
--------------------------------------------------------------------------------
/orca/find_cpes.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import datetime
3 | import json
4 | import os
5 | from typing import List, Set
6 |
7 | from orca.lib.apk import get_apk
8 | from orca.lib.ascii_checkers import parse_gemspec
9 | from orca.lib.bin_checkers import check_binaries
10 | from orca.lib.composer import get_composer
11 | from orca.lib.cpe2cve import cpe2cve
12 | from orca.lib.dnf import get_dnf
13 | from orca.lib.dpkg import get_dpkg
14 | from orca.lib.golang import extract_go_dependencies, get_gomod
15 | from orca.lib.jar import get_jar
16 | from orca.lib.package_json import get_package_json
17 | from orca.lib.path import get_filepaths
18 | from orca.lib.perl import get_perl
19 | from orca.lib.pkgconfig import get_pkgconfig
20 | from orca.lib.python import extract_python_dependencies
21 | from orca.lib.rpm_packages import get_rpm
22 | from orca.lib.logger import logger
23 | from orca.lib.types import VulnerabilityReport
24 |
25 | unuseful_extensions = [".php",".h",".c",".xml",".png",".csv",".js",".css",".jar"]
26 |
27 |
28 |
29 | def is_executable(file_path):
30 | return os.path.isfile(file_path) and os.access(file_path, os.X_OK)
31 |
32 |
33 | def get_executables(files,directory) -> List[str]:
34 | no_ext = filter(lambda x: "." not in x.split("/")[-1],files) # First no extension.
35 | no_ext_executable = filter(lambda x: is_executable(os.path.join(directory,x)),no_ext)
36 |
37 | no_ext_binary = list(filter(lambda x: is_binary_executable(os.path.join(directory,x)),no_ext_executable))
38 | libs = list(filter(lambda x: x.endswith(".so"),files))
39 | return no_ext_binary + libs
40 |
41 | def split_executable_files(files, directory):
42 | executables = []
43 | non_executables = []
44 | for path in files:
45 | file = path.split("/")[-1]
46 | real_path = os.path.join(directory, path)
47 | if any([file.endswith(ext) for ext in unuseful_extensions]):
48 | non_executables.append(path)
49 | elif file.startswith("lib") and ".so" in file:
50 | continue
51 | elif is_binary_executable(real_path):
52 | executables.append(path)
53 | elif os.path.isdir(real_path):
54 | continue
55 | else:
56 | non_executables.append(path)
57 | return executables, non_executables
58 |
59 |
60 | def is_binary_executable(file_path):
61 | if not os.path.isfile(file_path):
62 | return False
63 | try:
64 | with open(file_path, "rb") as f:
65 | magic = f.read(4)
66 | except Exception:
67 | return False
68 |
69 | return magic == b"\x7fELF" # Check for ELF magic number
70 |
71 | def add_duplicate_links(directory,paths,files):
72 | fcopy = set()
73 | for file in paths.union(files):
74 | if len(file) < 2:
75 | # paths.remove(file)
76 | continue
77 | orig_path = directory + "/" + file
78 | realpath = os.path.realpath(orig_path)
79 | cleanpath = realpath.replace(directory + "/", "")
80 |
81 | if orig_path != realpath and (
82 | (cleanpath in files and file not in files)
83 | or (cleanpath in paths and file in files)
84 | ):
85 | fcopy.add(cleanpath)
86 | fcopy.add(file)
87 | return fcopy
88 |
89 | def remove_links(directory,paths):
90 | real_paths = set()
91 | for file in paths:
92 | if len(file) < 2:
93 | # paths.remove(file)
94 | continue
95 | orig_path = directory + "/" + file
96 | realpath = os.path.realpath(orig_path)
97 | cleanpath = realpath.replace(directory + "/", "")
98 |
99 | if orig_path != realpath:
100 | real_paths.add(cleanpath)
101 | else:
102 | real_paths.add(file)
103 | return real_paths
104 |
105 | def maybe_binary(file: str):
106 | end = file.split("/")[-1]
107 | return ("." not in file or ".so" in file) and end.lower() == end
108 |
109 | def scan_os(paths: List[str],directory: str)-> None:
110 | OS_INFOS = ["etc/os-release","etc-release","usr/lib/os-release","etc/debian_version"]
111 | os_relevant_paths = [path for path in paths if path in OS_INFOS ]
112 | if len(os_relevant_paths) == 0:
113 | logger.warning("Could not find os information")
114 | else:
115 | osinfo = {}
116 | for path in os_relevant_paths:
117 | content = open(os.path.join(directory,path)).read()
118 | if "debian_version" in path:
119 | osinfo["version"] = content.strip().split("/")[0]
120 | data = {}
121 | for line in content.split("\n"):
122 | # Strip the line of extra whitespace and ignore comments or empty lines
123 | line = line.strip()
124 | if line and not line.startswith('#'):
125 | # Split the line by '=' to separate key and value
126 | try:
127 | key, value = line.split('=', 1)
128 | except Exception as _:
129 | break
130 | # Remove surrounding quotes if the value is quoted
131 | value = value.strip('"')
132 | # Add to the dictionary
133 | data[key] = value
134 | if "NAME" in data:
135 | osinfo["name"] = data.get("NAME")
136 | osinfo["major"] = data.get("VERSION_ID")
137 | osinfo["codename"] = data.get("VERSION_CODENAME")
138 | osinfo["cpe"] = data.get("CPE_NAME")
139 | osinfo["prettyname"] = data.get("PRETTY_NAME")
140 |
141 |
142 | return osinfo
143 | return None
144 |
145 | def scan_filesystem(directory: str,files,analyze_binaries=False,accurate=False) -> VulnerabilityReport:
146 | """
147 | Scans the filesystem to identify and analyze files, extract dependencies, and generate a vulnerability report.
148 | Args:
149 | directory (str): The root directory to scan for files. This directory does not contain links or devices.
150 | files (list): A list of files to analyze. This includes also links and devices.
151 | analyze_binaries (bool, optional): Whether to analyze binary files for dependencies. Defaults to False.
152 | accurate (bool, optional): Whether to perform additional steps to remove duplicate files for more accurate results. Defaults to False.
153 | Returns:
154 | VulnerabilityReport: A report containing information about identified vulnerabilities, packages, and remaining files.
155 |
156 | """
157 | paths: Set[str] = get_filepaths(directory)
158 |
159 |
160 | report: VulnerabilityReport = VulnerabilityReport(paths,files)
161 |
162 | osinfo = scan_os(report.remaining_files,directory)
163 | if osinfo is not None:
164 | report.os = osinfo
165 |
166 | # OS-packages
167 | logger.info(f"Initial files {len(paths)}")
168 |
169 | logger.info("Parsing executables")
170 | executable = get_executables(report.remaining_files, directory)
171 | logger.info(f"Found {len(executable)} executables")
172 |
173 | # assume go
174 | go = extract_go_dependencies(executable, directory)
175 | report.add_package_files(go)
176 |
177 |
178 | # Try to remove duplicates probably could be removed
179 | if accurate:
180 | logger.info("Removing duplicates")
181 | duplicates = add_duplicate_links(directory,paths,report.analyzed_files)
182 | report.analyzed_files.update(duplicates)
183 | report.remaining_files = report.remaining_files.difference(duplicates)
184 |
185 |
186 | logger.info("Parsing language-specific packages")
187 |
188 | report.add_package_files(extract_python_dependencies(paths,directory))
189 | report.add_package_files(get_jar(report.remaining_files,directory))
190 | report.add_package_files(get_package_json(report.remaining_files,directory))
191 | report.add_package_files(get_composer(report.remaining_files, directory))
192 | report.add_package_files(get_perl(report.remaining_files,directory))
193 | report.add_package_files(parse_gemspec(report.remaining_files,directory))
194 | report.add_package_files(get_gomod(report.remaining_files,directory))
195 |
196 |
197 |
198 | logger.info("Parsing OS package managers")
199 | report.add_package_files(get_dpkg(report.remaining_files, directory))
200 | report.add_package_files(get_rpm(report.remaining_files, directory))
201 | report.add_package_files(get_apk(report.remaining_files,directory))
202 | report.add_package_files(get_dnf(report.remaining_files,directory))
203 | report.add_package_files(get_pkgconfig(report.remaining_files,directory))
204 |
205 | if analyze_binaries:
206 | binaries = check_binaries(directory,executable)
207 | report.add_package_files(binaries)
208 |
209 |
210 | logger.info(f"Files not indexed {len(report.remaining_files)}")
211 | logger.info(f"Total Packages {len(report.packages)}")
212 | return report
213 |
214 |
215 | def get_cpes(directory: str,analyze_binaries=False,store_cpes=True,store_cpe_files=True,accurate=False,analyze_cves=False):
216 |
217 | report:VulnerabilityReport = scan_filesystem(directory,analyze_binaries,accurate)
218 | pkgset = list(set(report.packages))
219 |
220 | if store_cpes:
221 | with open("result.csv","w") as fp:
222 | fp.write("product,version,vendor\n")
223 | for pkg in pkgset:
224 | fp.write(pkg.to_csv_entry() + "\n")
225 | fp.close()
226 |
227 | if store_cpe_files:
228 | with open("cpe_files.json","w") as fp:
229 | json.dump(report.to_json(),fp,indent="\t")
230 | fp.close()
231 |
232 | if analyze_cves:
233 | cpeset = set([cpe.to_cpe() for cpe in report.packages])
234 | total_cves = set()
235 | for cpe in cpeset:
236 | cves = cpe2cve(cpe)
237 | total_cves.update(cves)
238 | for cve in cves:
239 | logger.error(cve)
240 | logger.error(f"Found {len(total_cves)} CVEs")
241 |
242 |
243 |
244 | if __name__ == "__main__":
245 | parser = argparse.ArgumentParser(
246 | description="""Scans for CPEs in a given directory. Currently in alpha phase, the program will randomly select N=30 subfolders and scan for cpes therein"""
247 | )
248 | parser.add_argument(
249 | "-d", "--directory", type=str, help="Directory to analyze", required=True
250 | )
251 | parser.add_argument(
252 | "--store-cpes", type=bool, help="Store cpes to file (result.csv)", required=False,default=True
253 | )
254 | parser.add_argument(
255 | "--store-cpe-files", type=bool, help="Store cpe-related files to file (cpe_files.json)", required=False,default=True
256 | )
257 | parser.add_argument(
258 | "--analyze-cves", type=bool, help="Scan for CVEs", required=False,default=False
259 | )
260 | args = parser.parse_args()
261 |
262 | path: str = args.directory
263 | store_cpes = args.store_cpes
264 | store_cpe_files = args.store_cpe_files
265 | analyze_cves = args.analyze_cves
266 | subdirs = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
267 | for subdir in subdirs:
268 | directory = os.path.join(path,subdir)
269 | start = datetime.datetime.now()
270 | get_cpes(directory,analyze_binaries=False,store_cpes=store_cpes,store_cpe_files=store_cpe_files,analyze_cves=analyze_cves)
271 | end = datetime.datetime.now()
272 | logger.info(f"Elapsed time: {(end-start).total_seconds() * 1000} ms")
273 | logger.debug("------END------")
274 |
--------------------------------------------------------------------------------
/orca/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kube-security/orca/7a6f756bfbf70d7031ff1810c1c5c414747ac63d/orca/lib/__init__.py
--------------------------------------------------------------------------------
/orca/lib/analyzer.py:
--------------------------------------------------------------------------------
1 | from.utils import calculate_sha256
2 | from.logger import logger
3 | import base64
4 | import os
5 | import requests
6 | from.types import PackageRecord, to_record
7 | import re
8 | from packaging.version import Version
9 |
10 |
11 | def search_vulnerabilities(name: str,version: str):
12 | """
13 | Code adapted from : https://github.com/pypa/packaging/blob/main/src/packaging/version.py
14 | """
15 | url = f"https://pypi.org/pypi/{name}/{version}/json"
16 | response : requests.Response = requests.get(url,timeout=10)
17 | # TODO: error handling
18 | response_json = response.json()
19 | results: list = []
20 | vulns = response_json.get("vulnerabilities")
21 |
22 | # No `vulnerabilities` key means that there are no vulnerabilities for any version
23 | if vulns is None:
24 | return results
25 | for v in vulns:
26 | id = v["id"]
27 |
28 | # If the vulnerability has been withdrawn, we skip it entirely.
29 | withdrawn_at = v.get("withdrawn")
30 | if withdrawn_at is not None:
31 | logger.debug(f"PyPI vuln entry '{id}' marked as withdrawn at {withdrawn_at}")
32 | continue
33 |
34 | # Put together the fix versions list
35 | try:
36 | fix_versions = [Version(fixed_in) for fixed_in in v["fixed_in"]]
37 | except Exception as _:
38 | logger.error(f'Received malformed version from PyPI: {v["fixed_in"]}')
39 |
40 | # The ranges aren't guaranteed to come in chronological order
41 | fix_versions.sort()
42 |
43 | description = v.get("summary")
44 | if description is None:
45 | description = v.get("details")
46 |
47 | if description is None:
48 | description = "N/A"
49 |
50 | # The "summary" field should be a single line, but "details" might
51 | # be multiple (Markdown-formatted) lines. So, we normalize our
52 | # description into a single line (and potentially break the Markdown
53 | # formatting in the process).
54 | description = description.replace("\n", " ")
55 |
56 | results.append({
57 | 'id':id,
58 | 'description':description,
59 | 'fix_versions':fix_versions,
60 | 'aliases':set(v["aliases"])
61 | })
62 | return results
63 |
64 |
65 |
66 | def analyze_record(directory: str, record: PackageRecord) -> bool:
67 | #logger.info(f"Analysing record {directory}/{record.path}")
68 | if not os.path.exists(f"{directory}/{record.path}") and record.nlines is not None:
69 | logger.error(f"Path does not exist {directory}/{record.path}")
70 | print(record)
71 | if record.nlines is not None:
72 | fp = open(f"{directory}/{record.path}","rb")
73 | content = fp.read()
74 | if len(content) != record.nlines:
75 | logger.error(f"File {directory}/{record.path} has incorrect number of bytes: expected {record.nlines}, actual {len(content)}")
76 | return False
77 | assert record.hashtype == "sha256"
78 | hash = calculate_sha256(f"{directory}/{record.path}")
79 | digest = base64.urlsafe_b64encode(hash).decode()
80 | if digest[-1] == "=":
81 | digest = digest[:-1]
82 | if digest != record.hash:
83 | logger.error(f"Hash value does not match for file: {directory}/{record.path}")
84 | return False
85 |
86 |
87 | return True
88 |
89 | def get_package_version(directory: str, package: str) -> str:
90 | version_regex = r'\nVersion: (.*)'
91 | assert os.path.exists(f"{directory}/{package}/METADATA")
92 | fp = open(f"{directory}/{package}/METADATA")
93 | content = fp.read()
94 | matches = re.findall(version_regex,content)
95 | assert len(matches) == 1
96 | version = matches[0]
97 | package_name = package.split("-")[0]
98 | return (package_name,version)
99 |
100 |
101 | def analyze_package(directory: str,package: str):
102 | # Read the content of RECORD FILE
103 | assert os.path.exists(f"{directory}/{package}/RECORD")
104 | fp = open(f"{directory}/{package}/RECORD")
105 | content = fp.read()
106 | logger.info(f"Analysing package {package}")
107 | integrity = []
108 | for record_item in content.split("\n"): # Package integrity
109 | if len(record_item) > 1:
110 | record = to_record(record_item)
111 | integrity.append(analyze_record(directory, record))
112 | (name, version) = get_package_version(directory,package)
113 | results = search_vulnerabilities(name,version)
114 | if results != []:
115 | for result in results:
116 | logger.error(f"Vulnerability {result['id']} found on dependency")
117 | return False
118 | return not any(integrity)
119 |
--------------------------------------------------------------------------------
/orca/lib/apk.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | from typing import Dict, List
4 |
5 | from . import logger
6 | from .types import PackageInfo,PackageInfoType
7 |
8 |
9 |
10 | def read_apk_db(db_path,path) -> Dict[PackageInfo,List[str]]:
11 | fs = open(db_path).read()
12 | cpeMap = {}
13 | for entry in fs.split("\n\n"):
14 | print(entry)
15 | package = ""
16 | version = ""
17 | last_folder = ""
18 | files = set()
19 | for line in entry.split():
20 | if line.startswith("P:"):
21 | package = line[2:]
22 | elif line.startswith("V:"):
23 | version = line[2:]
24 | elif line.startswith("F:"):
25 | last_folder = line[2:]
26 | elif line.startswith("R:"):
27 | files.add(last_folder + "/"+ line[2:])
28 | if package == "":
29 | continue
30 | files.add(path)
31 | package = PackageInfo(package,version,None,PackageInfoType.APK)
32 | cpeMap[package] = files
33 | return cpeMap
34 |
35 | def read_world_file(db_path,path) -> Dict[PackageInfo,List[str]]:
36 | lines = open(db_path).readlines()
37 | cpeMap = {}
38 | files = set()
39 | files.add(path)
40 | for entry in lines:
41 | package = PackageInfo(entry.strip(),None,None,PackageInfoType.APK)
42 | cpeMap[package] = files
43 | return cpeMap
44 |
45 | # 1549 65
46 | def get_apk(paths: List[str],directory: str)-> Dict[PackageInfo,List[str]]:
47 | apks = [p for p in paths if "apk/db/installed" in p or "apk/world" in p]# or "apk/db/names" in p ]
48 | total_pkgs = {}
49 | for path in apks:
50 | if "installed" in path:
51 | packages = read_apk_db(os.path.join(directory,path),path)
52 | total_pkgs.update(packages)
53 | elif "world" in path:
54 | packages = read_world_file(os.path.join(directory,path),path)
55 | total_pkgs.update(packages)
56 |
57 |
58 | if len(total_pkgs.keys()):
59 | logger.logger.info(f"APKs: {len(total_pkgs.keys())}")
60 | return total_pkgs
--------------------------------------------------------------------------------
/orca/lib/ascii_checkers.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import os
4 | import re
5 | from typing import Dict, List
6 |
7 | from . import logger
8 | from.types import PackageInfo, PackageInfoType
9 |
10 | GOSUM = re.compile(r'(\S+)\s+(\S+)\s+h1:(\S+)')
11 |
12 | def parse_gemspec(paths: List[str],directory: str) -> Dict[PackageInfo, List[str]]:
13 | files = [f for f in paths if f.endswith(".gemspec")]
14 |
15 | patterns = {
16 | 'name': r'\.name\s*=\s*["\']([^"\']+)["\']',
17 | 'version': r'\.version\s*=\s*["\']([^"\']+)["\']',
18 | }
19 | packages: Dict[PackageInfo, List[str]] = {}
20 | for filename in files:
21 | try:
22 | file = open(os.path.join(directory,filename), 'r')
23 | except Exception as e:
24 | logger.logger.error(f"[GEM] could not open file {filename} - {e}")
25 | continue
26 | content = file.read()
27 |
28 | spec_blocks = re.findall(r'Gem::Specification\.new do (.+?)end', content, re.DOTALL)
29 | for block in spec_blocks:
30 | gemspec_data = {}
31 | for key, pattern in patterns.items():
32 | match = re.search(pattern, block)
33 | if match:
34 | gemspec_data[key] = match.group(1)
35 | if "version" not in gemspec_data:
36 | #gemspec_data['version'] = ""
37 | continue
38 | if "name" not in gemspec_data:
39 | continue
40 | p = PackageInfo(gemspec_data['name'],gemspec_data['version'],None,PackageInfoType.GEM)
41 |
42 | if p in packages:
43 | packages[p].append(filename)
44 | else:
45 | packages[p] = [filename]
46 | if len(packages.keys()):
47 | logger.logger.info(f"Gemspec : {len(packages)}")
48 | return packages
49 |
50 |
51 | def parse_gosum(filepath):
52 | cpes = []
53 | with open(filepath, 'r') as file:
54 | lines = file.readlines()
55 | for line in lines:
56 | match = GOSUM.match(line)
57 | if match:
58 | module_path = match.group(1)
59 | version = match.group(2)[1:]
60 | org = module_path.split("/")[-2]
61 | module = module_path.split("/")[-1]
62 | version = version if "/go.mod" not in version else version.split("/")[0]
63 | cpes.append(f"cpe:2.3:a:{org}:{module}:{version}:*:*:*:*:*:*:*")
64 | return list(set(cpes))
65 |
66 |
--------------------------------------------------------------------------------
/orca/lib/bin_checkers.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import os
4 | import re
5 | from typing import List
6 |
7 | from . import logger
8 | from.types import PackageInfo
9 |
10 | zlib = re.compile(r'inflate\s\(.*\)\s([0-9]+\.[0-9]+\.[0-9]+)')
11 | GCC_re = re.compile(r'GCC:\s\(.*\)\s([0-9]+\.[0-9]+\.[0-9]+)')
12 | GCC2_re = re.compile(r'gcc\s([0-9]+\.[0-9]+\.[0-9]+)')
13 | openssl_re = re.compile(r'.*OpenSSL\s([0-9]+\.[0-9]+\.[0-9]+)')
14 | pg_re = re.compile(r'.*\(PostgreSQL\)\s([0-9]+\.[0-9]+\.[0-9]+)')
15 |
16 | def check_gcc(strings: List[str]):
17 |
18 | for string in strings:
19 | match = GCC_re.search(string)
20 | if match:
21 | version = match.group(1)
22 | return PackageInfo("gcc",version,"gnu",None)
23 | return None
24 |
25 | def check_gcc2(strings: List[str]):
26 | for string in strings:
27 | match = GCC2_re.search(string)
28 | if match:
29 | version = match.group(1)
30 | return PackageInfo("gcc",version,"gnu",None)
31 | return None
32 |
33 | def check_openssl(strings: List[str]):
34 | for string in strings:
35 | match = openssl_re.search(string)
36 | if match:
37 | version = match.group(1)
38 | return PackageInfo("openssl",version,"openssl",None)
39 | return None
40 |
41 | def check_postgres(strings: List[str]):
42 | for string in strings:
43 | match = pg_re.search(string)
44 | if match:
45 | version = match.group(1)
46 | return PackageInfo("postgresql",version,"postgresql",None)
47 | return None
48 |
49 | def check_zlib(strings: List[str]):
50 |
51 | for string in strings:
52 | match = zlib.search(string)
53 | if match:
54 | version = match.group(1)
55 | return PackageInfo("zlib",version,"zlib",None)
56 | return None
57 |
58 | def check_self(strings: List[str],binary_name):
59 | if len(binary_name) == 1:
60 | return None
61 | pattern = r'{binary_name}\s(v?[0-9]+\.[0-9]+\.[0-9]+)'.format(binary_name=binary_name)
62 | try:
63 | selfbin = re.compile(pattern)
64 | except Exception as e :
65 | logger.logger.info(f"Could not compile regex for {binary_name} {e}")
66 | return None,None
67 | for string in strings:
68 | match = selfbin.search(string)
69 | if match:
70 | version = match.group(1)
71 | return PackageInfo(binary_name,version,None,None)
72 | return None
73 |
74 |
75 | def extract_strings(filename, min_length=4):
76 | thestrings = []
77 | with open(filename, 'rb') as file:
78 | data = file.read()
79 |
80 | # Use a regex to find sequences of printable characters of at least `min_length`
81 | pattern = re.compile(b'[\x20-\x7E]{' + str(min_length).encode() + b',}')
82 | strings = pattern.findall(data)
83 |
84 | for s in strings:
85 | thestrings.append(s.decode('ascii'))
86 | return thestrings
87 |
88 | def check_binaries(directory,executables):
89 | results = {}
90 | for exec_file in executables:
91 | cpes = static_check_cpes(os.path.join(directory,exec_file))
92 | if len(cpes):
93 | for cpe in cpes:
94 | if cpe in results:
95 | results[cpe].append(exec_file)
96 | else:
97 | results[cpe] = [exec_file]
98 | if len(results):
99 | logger.logger.info(f"Binaries {len(results)}")
100 | return results
101 |
102 | def static_check_cpes(filepath):
103 | """
104 | This function extracts strings from a file and
105 | applies regex to fing known applications and versions
106 |
107 | ---
108 | Returns: List of CPEs
109 | """
110 | strings = set(extract_strings(filepath,4))
111 | cpes = []
112 | gcc_ver = check_gcc(strings)
113 | if gcc_ver is not None:
114 | cpes.append(gcc_ver)
115 | gcc_ver2 = check_gcc2(strings)
116 | if gcc_ver2 is not None:
117 | cpes.append(gcc_ver2)
118 | ssl_ver = check_openssl(strings)
119 | if ssl_ver is not None:
120 | cpes.append(ssl_ver)
121 | zlib_ver = check_zlib(strings)
122 | if zlib_ver is not None:
123 | cpes.append(zlib_ver)
124 | pg = check_postgres(strings)
125 | if pg is not None:
126 | cpes.append(pg)
127 | self_ver = check_self(strings,filepath.split("/")[-1].strip())
128 | if self_ver is not None:
129 | cpes.append(self_ver)
130 | return cpes
131 |
--------------------------------------------------------------------------------
/orca/lib/bin_checkers_cpe.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import re
4 | from typing import List
5 |
6 | zlib = re.compile(r'inflate\s\(.*\)\s([0-9]+\.[0-9]+\.[0-9]+)')
7 | GCC_re = re.compile(r'GCC:\s\(.*\)\s([0-9]+\.[0-9]+\.[0-9]+)')
8 | GCC2_re = re.compile(r'gcc\s([0-9]+\.[0-9]+\.[0-9]+)')
9 | openssl_re = re.compile(r'.*OpenSSL\s([0-9]+\.[0-9]+\.[0-9]+)')
10 | pg_re = re.compile(r'.*\(PostgreSQL\)\s([0-9]+\.[0-9]+\.[0-9]+)')
11 |
12 | def check_gcc(strings: List[str]):
13 |
14 | for string in strings:
15 | match = GCC_re.search(string)
16 | if match:
17 | version = match.group(1)
18 | return f"cpe:2.3:a:gnu:gcc:{version}:*:*:*:*:*:*:*",string
19 | return None,None
20 |
21 | def check_gcc2(strings: List[str]):
22 | for string in strings:
23 | match = GCC2_re.search(string)
24 | if match:
25 | version = match.group(1)
26 | return f"cpe:2.3:a:gnu:gcc:{version}:*:*:*:*:*:*:*",string
27 | return None,None
28 |
29 | def check_openssl(strings: List[str]):
30 | for string in strings:
31 | match = openssl_re.search(string)
32 | if match:
33 | version = match.group(1)
34 | return f"cpe:2.3:a:openssl:openssl:{version}:*:*:*:*:*:*:*",string
35 | return None,None
36 |
37 | def check_postgres(strings: List[str]):
38 | for string in strings:
39 | match = pg_re.search(string)
40 | if match:
41 | version = match.group(1)
42 | return f"cpe:2.3:a:postgresql:postgresql:{version}:*:*:*:*:*:*:*",string
43 | return None,None
44 |
45 | def check_zlib(strings: List[str]):
46 |
47 | for string in strings:
48 | match = zlib.search(string)
49 | if match:
50 | version = match.group(1)
51 | return f"cpe:2.3:a:zlib:zlib:{version}:*:*:*:*:*:*:*",string
52 | return None,None
53 |
54 | def check_self(strings: List[str],binary_name):
55 | pattern = r'{binary_name}\s(v?[0-9]+\.[0-9]+\.[0-9]+)'.format(binary_name=binary_name)
56 | selfbin = re.compile(pattern)
57 | for string in strings:
58 | match = selfbin.search(string)
59 | if match:
60 | version = match.group(1)
61 | return f"cpe:2.3:a:*:{binary_name}:{version}:*:*:*:*:*:*:*",string
62 | return None,None
63 |
64 |
65 | def extract_strings(filename, min_length=4):
66 | thestrings = []
67 | with open(filename, 'rb') as file:
68 | data = file.read()
69 |
70 | # Use a regex to find sequences of printable characters of at least `min_length`
71 | pattern = re.compile(b'[\x20-\x7E]{' + str(min_length).encode() + b',}')
72 | strings = pattern.findall(data)
73 |
74 | for s in strings:
75 | thestrings.append(s.decode('ascii'))
76 | return thestrings
77 |
78 | def static_check_cpes(filepath):
79 | """
80 | This function extracts strings from a file and
81 | applies regex to fing known applications and versions
82 |
83 | ---
84 | Returns: List of CPEs
85 | """
86 | strings = set(extract_strings(filepath,4))
87 | cpes = []
88 | gcc_ver,gcc_str = check_gcc(strings)
89 | if gcc_ver is not None:
90 | strings.remove(gcc_str)
91 | cpes.append(gcc_ver)
92 |
93 | gcc_ver2,gcc_str2 = check_gcc2(strings)
94 | if gcc_ver2 is not None:
95 | strings.remove(gcc_str2)
96 | cpes.append(gcc_ver2)
97 |
98 | ssl_ver,ssl_str = check_openssl(strings)
99 | if ssl_ver is not None:
100 | strings.remove(ssl_str)
101 | cpes.append(ssl_ver)
102 |
103 | zlib_ver,zlib_str = check_zlib(strings)
104 | if zlib_ver is not None:
105 | strings.remove(zlib_str)
106 | cpes.append(zlib_ver)
107 |
108 | pg,pg_str = check_postgres(strings)
109 | if pg is not None:
110 | strings.remove(pg_str)
111 | cpes.append(pg)
112 |
113 | self_ver,self_str = check_self(strings,filepath.split("/")[-1].strip())
114 | if self_ver is not None:
115 | strings.remove(self_str)
116 | cpes.append(self_ver)
117 |
118 | return list(set(cpes))
119 |
--------------------------------------------------------------------------------
/orca/lib/composer.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | from typing import List
4 | import json
5 |
6 | from . import logger
7 | from .types import PackageInfo, PackageInfoType
8 |
9 |
10 | def parse_composer_lock(paths,directory,filename):
11 | composer_lock = json.load(open(directory +"/" + filename))
12 | packages = []
13 | accessed_paths = []
14 | files = [filename]
15 | for package in composer_lock["packages"]:
16 | name = package["name"]
17 | version = package["version"]
18 | pkg = PackageInfo(name.split("/")[1],version,name.split("/")[0],PackageInfoType.COMPOSER)
19 |
20 | basepath = os.path.dirname(filename)
21 | packages.append(pkg)
22 |
23 | if "autoload" in composer_lock:
24 | for key,value in composer_lock["autoload"].items():
25 | if "psr" in key:
26 | v = list(value.values())
27 | if type(v[0]) is list:
28 | accessed_paths.extend(v[0])
29 | else:
30 | accessed_paths.extend(v)
31 | if "autoload-dev" in composer_lock:
32 | for key,value in composer_lock["autoload-dev"].items():
33 | if "psr" in key:
34 | if type(v[0]) is list:
35 | accessed_paths.extend(v[0])
36 | else:
37 | accessed_paths.extend(v)
38 | for path in accessed_paths:
39 | baseinfo = os.path.join(basepath,path)
40 | for imagepath in paths:
41 | if baseinfo in imagepath:
42 | files.append(baseinfo)
43 |
44 | return {pkg: files for pkg in packages}
45 |
46 | def parse_composer(paths,directory,filename):
47 | try:
48 | composer = json.load(open(directory +"/" + filename))
49 | except Exception as e:
50 | logger.logger.error(f"[COMPOSER] Could not open file {filename} -- {e}")
51 | return {}
52 |
53 | if "name" in composer:
54 | name = composer["name"]
55 | version = composer["version"] if "version" in composer else None
56 | pkg = PackageInfo(name.split("/")[1],version,name.split("/")[0],PackageInfoType.COMPOSER)
57 | accessed_paths = []
58 | files = [filename]
59 | basepath = os.path.dirname(filename)
60 | if "autoload" in composer:
61 | for key,value in composer["autoload"].items():
62 | if "psr" in key:
63 | values = value.values()
64 | for v in values:
65 | if type(v) is list:
66 | accessed_paths.extend(v)
67 | else:
68 | accessed_paths.append(v)
69 | if "autoload-dev" in composer:
70 | for key,value in composer["autoload-dev"].items():
71 | if "psr" in key:
72 | values = value.values()
73 | for v in values:
74 | if type(v) is list:
75 | accessed_paths.extend(v)
76 | else:
77 | accessed_paths.append(v)
78 | for path in accessed_paths:
79 | baseinfo = os.path.join(basepath,path)
80 | for imagepath in paths:
81 | if baseinfo in imagepath:
82 | files.append(baseinfo)
83 |
84 | return {pkg: files}
85 | return {}
86 |
87 |
88 | def get_composer(paths: List[str],directory: str): # Assuming only one composer per container
89 | packages = {}
90 | files = set()
91 | composer_lock = sorted([path for path in paths if "composer.lock" in path ],key=len)
92 | composer_json = sorted([path for path in paths if "composer.json" in path ],key=len)
93 | if len(composer_lock) == 0:
94 | return {}
95 | files.update(composer_json)
96 | files.update(composer_lock)
97 | raw_packages = []
98 | # Start by root composer.lock
99 | root_composer_lock = json.load(open(directory +"/" + composer_lock[0]))
100 | #root_composer_json = json.load(open(directory +"/" + composer_json[0]))
101 | for package in root_composer_lock["packages"]:
102 | name = package["name"]
103 | version = package["version"]
104 | pkg = PackageInfo(name.split("/")[1],version,name.split("/")[0],PackageInfoType.COMPOSER)
105 | packages[pkg] = []
106 | raw_packages.append(name)
107 |
108 | basepath = os.path.dirname(composer_json[0])
109 | for package in raw_packages:
110 | composer_lock = [x for x in composer_lock if package not in x]
111 | composer_json = [x for x in composer_json if package not in x]
112 |
113 | accessed_paths = []
114 | if "autoload" in root_composer_lock:
115 | for key,value in root_composer_lock["autoload"].items():
116 | if "psr" in key:
117 | v = list(value.values())
118 | if type(v[0]) is list:
119 | accessed_paths.extend(v[0])
120 | else:
121 | accessed_paths.extend(v)
122 | if "autoload-dev" in root_composer_lock:
123 | for key,value in root_composer_lock["autoload-dev"].items():
124 | if "psr" in key:
125 | if type(v[0]) is list:
126 | accessed_paths.extend(v[0])
127 | else:
128 | accessed_paths.extend(v)
129 | for path in paths:
130 | for accessed_path in accessed_paths:
131 | if f"{basepath}/{accessed_path}" in path:
132 | files.add(path)
133 | if "vendor" in path:
134 | upath = path.replace(f"{basepath}/vendor/","")
135 | try:
136 | upathsplit = upath.split("/")
137 | final_package = f"{upathsplit[0]}/{upathsplit[1]}"
138 | if final_package in raw_packages:
139 | files.add(path)
140 | else:
141 | pass
142 | #print(path)
143 | except Exception:
144 | pass # probably a folder
145 |
146 |
147 |
148 | for package in packages:
149 | packages[package] = files
150 |
151 | for composer in composer_json:
152 | packages.update(parse_composer(paths,directory,composer))
153 | if len(packages):
154 | logger.logger.info(f"PHP composer: {len(packages)}")
155 | return packages
--------------------------------------------------------------------------------
/orca/lib/cpe2cve.py:
--------------------------------------------------------------------------------
1 | # Original Author: Matteo (xonoxitron) Pisani
2 | # Description: Given a CPE, this script returns all related CVE, ordered by severity (desc)
3 | # Usage: python3 cpe2cve.py -c cpe:2.3:a:apache:http_server:2.4.54
4 |
5 | # Import necessary modules
6 | import requests
7 |
8 | CPES_URL = "https://services.nvd.nist.gov/rest/json/cpes/2.0"
9 | # Function to retrieve CVE data for a given CPE
10 | def get_cve_data(session:requests.Session,cpe:str):
11 | base_url = "https://services.nvd.nist.gov/rest/json/cves/2.0?cpeName="
12 | response = session.get(base_url + cpe)
13 | # Check if the request was successful (status code 200)
14 | if response.status_code == 200:
15 | try:
16 | cve_data = response.json()
17 |
18 | return cve_data.get("vulnerabilities", [])
19 | except Exception:
20 | print(response.text)
21 | exit()
22 | else:
23 | print(f"Error in HTTP request: {response.status_code}")
24 | print(response.text)
25 | return []
26 |
27 |
28 | # Function to retrieve the CVE ID from a CVE object
29 | def get_cve_id(cve):
30 | try:
31 | return cve["cve"]["CVE_data_meta"]["ID"]
32 | except (KeyError, TypeError, ValueError):
33 | # In case of missing or non-numeric data, assign a high value for non-evaluability
34 | return "N/A/"
35 |
36 |
37 | # Function to retrieve metric version
38 | def get_cve_metric_version(cve):
39 | if "baseMetricV4" in cve["impact"]:
40 | return "4"
41 | if "baseMetricV3" in cve["impact"]:
42 | return "3"
43 | if "baseMetricV2" in cve["impact"]:
44 | return "2"
45 | if "baseMetricV1" in cve["impact"]:
46 | return "1"
47 | return "N/A"
48 |
49 |
50 | # Function to retrieve the score from a CVE object
51 | def get_cve_score(cve):
52 | try:
53 | v = get_cve_metric_version(cve)
54 | return float(cve["impact"]["baseMetricV" + v]["cvssV" + v]["baseScore"])
55 | except (KeyError, TypeError, ValueError):
56 | # In case of missing or non-numeric data, assign a high value for non-evaluability
57 | return float("inf")
58 |
59 |
60 | # Function to retrieve the severity from a CVE object
61 | def get_cve_severity(cve):
62 | v = get_cve_metric_version(cve)
63 | cvss = cve["impact"]["baseMetricV" + v]
64 | if "severity" in cvss:
65 | return cvss["severity"]
66 | if "baseSeverity" in cvss["cvssV" + v]:
67 | return cvss["cvssV" + v]["baseSeverity"]
68 | return "N/A"
69 |
70 |
71 | def create_session():
72 | s = requests.Session()
73 | s.headers.update({"apiKey":"1d424904-314b-4ebe-9740-23b427694cf4"})
74 | return s
75 |
76 | def search_cpe(session: requests.Session,cpe:str):
77 | response = session.get(f"{CPES_URL}?cpeMatchString={cpe}")
78 | cpes = []
79 | if response.status_code != 200:
80 | return []
81 | json_response = response.json()
82 | products = json_response.get("products",[])
83 | for product in products:
84 | cpeName = product["cpe"]["cpeName"]
85 | cpes.append(cpeName)
86 | return cpes
87 |
88 |
89 |
90 |
91 | # Main function for parsing command-line arguments and performing the sorting and printing
92 | def cpe2cve(cpe:str):
93 | # Set up the argument parser
94 | session = create_session()
95 | cpeNames = search_cpe(session,cpe)
96 | print(cpeNames)
97 | cves = []
98 | for cpeName in cpeNames:
99 | # Retrieve CVE data for the given CPE
100 | cve_data = get_cve_data(session,cpeName)
101 | for item in cve_data:
102 | print(item["cve"]["id"])
103 | cves.append(item["cve"]["id"])
104 |
105 |
106 | return cves
107 | if len(cve_data) == 0:
108 | return []
109 | # Sort the CVEs by score in descending order
110 | sorted_cve = sorted(cve_data["CVE_Items"], key=get_cve_score, reverse=True)
111 |
112 | # Print the sorted CVEs
113 | i = 1
114 | cves = []
115 | for cve in sorted_cve:
116 | cve_id = get_cve_id(cve)
117 | score = get_cve_score(cve)
118 | severity = get_cve_severity(cve)
119 | cves.append(f"[{i}] ID: {cve_id}, Score: {score}, Severity: {severity}")
120 | i += 1
121 | return cves
122 |
--------------------------------------------------------------------------------
/orca/lib/dnf.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | from typing import Dict, List
4 |
5 | from . import logger
6 | from .types import PackageInfo, PackageInfoType
7 | import sqlite3
8 |
9 |
10 | def read_dnf_db(db_path,path) -> Dict[PackageInfo,List[str]]:
11 | c = sqlite3.connect(db_path)
12 | cur = c.cursor()
13 | res = cur.execute("SELECT name,version from rpm")
14 | packagesMap = {}
15 | for entry in res.fetchall():
16 | package = PackageInfo(entry[0],entry[1],None,PackageInfoType.RPM)
17 | packagesMap[package] = [path]
18 | return packagesMap
19 |
20 |
21 | def get_dnf(paths: List[str],directory: str)-> Dict[PackageInfo,List[str]]:
22 | if "var/lib/dnf/history.sqlite" in paths:
23 | packages = read_dnf_db(os.path.join(directory,"var/lib/dnf/history.sqlite"),"var/lib/dnf/history.sqlite")
24 |
25 | if len(packages.keys()):
26 | logger.logger.info(f"DNFs: {len(packages.keys())}")
27 | return packages
28 |
29 | return {}
--------------------------------------------------------------------------------
/orca/lib/dockerfile.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import List, Tuple
3 | import validators
4 |
5 | from orca.lib.types import PackageInfo, PackageInfoType, VulnerabilityReport
6 |
7 | def extract_urls(text):
8 | """
9 | This function extracts all the http and https urls from the list of commands
10 | """
11 | url_pattern = re.compile(r'https?://[^\s";|()\']+(?:\([^)]*\))?')
12 | return url_pattern.findall(text)
13 |
14 |
15 | def replace_curly_variables(url, line,env_variables=""):
16 | """
17 | This function searches for user-defined variables in the Dockerfile
18 | TODO: needs to be updated with variables of the Dockerfile. RN is only checking for variables in the same line.
19 | """
20 | variables = re.findall(r'\$\{[^}]+\}', url)
21 | env_var_map = {}
22 | for var in env_variables.split("\n"):
23 | if "=" in var:
24 | key,value = var.split("=",1)
25 | env_var_map[key] = value
26 | # Refactor line
27 | for k, v in env_var_map.items():
28 | line = line.replace(f"${{{k}}}",v)
29 | if variables:
30 | for variable in variables:
31 | var_name = variable.strip("${}()")
32 | var_pattern = re.compile(rf'{var_name}=(\S+)')
33 | match_line = var_pattern.search(line)
34 | match_env = var_pattern.search(env_variables)
35 | if match_line:
36 | url = url.replace(variable, match_line.group(1))
37 | elif match_env:
38 | url = url.replace(variable, match_env.group(1))
39 |
40 | array_pattern = re.compile(r'for\s+(\w+)\s+in\s+"\${(\w+)\[@\]}"')
41 | array_match = array_pattern.search(line)
42 | if array_match:
43 | urls = []
44 | component = array_match.group(1)
45 | array_name = array_match.group(2)
46 | array_pattern = re.compile(rf'{array_name}\s*=\s*\(([^)]+)\)')
47 | array_match = array_pattern.search(line)
48 | if array_match:
49 | array_values = array_match.group(1).split()
50 | for value in array_values:
51 | urls.append(url.replace(f"${{{component}}}", value).replace("\"",""))
52 | return urls
53 |
54 | return [url]
55 |
56 | def replace_dollar_variables(url, line, env_variables=""):
57 | """
58 | This function searches for user-defined variables in the Dockerfile
59 | and replaces them with their values. It checks for variables in the same lin and in the environment variables.
60 | """
61 | variables = re.findall(r'\$[a-zA-Z_][a-zA-Z0-9_]*', url)
62 | if variables:
63 | for variable in variables:
64 | var_name = variable.strip("$")
65 | var_pattern = re.compile(rf'{var_name}=(\S+)')
66 | match_line = var_pattern.search(line)
67 | match_env = var_pattern.search(env_variables)
68 |
69 | if match_line:
70 | url = url.replace(variable, match_line.group(1))
71 | elif match_env:
72 | url = url.replace(variable, match_env.group(1))
73 | return url
74 |
75 | def interpolate_variables(dockerfile_config):
76 | extracted_urls = []
77 | urls = []
78 | configurations = [""]
79 |
80 | if 'Env' not in dockerfile_config['config'] or dockerfile_config['config']['Env'] is None :
81 | pass
82 | else:
83 | configurations = '\n'.join(dockerfile_config['config']['Env'])
84 |
85 | if len(dockerfile_config['history']) == 1 and 'created_by' in dockerfile_config['history'][0] and "crane" in dockerfile_config['history'][0]['created_by']:
86 | item = dockerfile_config['history'][0]
87 | comments = item["comment"]
88 | for comment in comments:
89 | if 'created_by' not in comment:
90 | continue
91 | line = comment['created_by']
92 | if "LABEL" in line or "http" not in line:
93 | continue
94 | else:
95 | ex_u = extract_urls(line)
96 | extracted_urls.extend(ex_u)
97 | for url in ex_u:
98 | replaced_url = replace_curly_variables(url, line,configurations)
99 | urls.append(replaced_url)
100 |
101 | else:
102 | for history_line in dockerfile_config['history']:
103 | if 'created_by' not in history_line:
104 | #print("Empty history entry:",history_line)
105 | continue
106 | line = history_line['created_by']
107 | if "LABEL" in line or "http" not in line:
108 | continue
109 | else:
110 | ex_u = extract_urls(line)
111 | extracted_urls.extend(ex_u)
112 | for url in ex_u:
113 | replaced_url = replace_curly_variables(url, line,configurations)
114 | urls.append(replaced_url)
115 | return urls
116 |
117 |
118 | def github_to_cpe(urls)->List[Tuple[PackageInfo,str]]:
119 | # Now find github stuff
120 | found_cpes = []
121 | github_pattern = re.compile(r'https://github\.com/([^/]+)/([^/]+)/releases/download/(v?\d+(\.\d+)*(\+\d+)?)/[^/]+')
122 | github_urls = [url for url in urls if github_pattern.match(url)]
123 | for github_url in github_urls:
124 | match = github_pattern.match(github_url)
125 | if match:
126 | author = match.group(1)
127 | name = match.group(2)
128 | version = match.group(3)
129 | found_cpes.append((PackageInfo(name,version,author,PackageInfoType.GITHUB),github_url))
130 | return found_cpes
131 |
132 |
133 | def selected_websites_to_cpe(urls)->List[Tuple[PackageInfo,str]]:
134 | rust_pattern = re.compile(r'https://static\.rust-lang\.org/rustup/archive/(\d+\.\d+\.\d+)/')
135 |
136 | github_content_pattern = re.compile(r'https://raw\.githubusercontent\.com/([^\/]+)\/([^\/]+)\/([^\/]*\d+[^\/]*)')
137 |
138 | github_archive_pattern = re.compile(r'https://github\.com/([^\/]+)\/([^\/]+)\/archive\/([^\/]+)\.tar\.gz')
139 |
140 | gradle_pattern = re.compile(r'https://services\.gradle\.org/distributions/gradle-(.*)-bin\.zip')
141 |
142 | postgresql_pattern = re.compile(r'https://ftp\.postgresql\.org/pub/source/(v[\d\.]+)')
143 |
144 | bitnami_pattern = re.compile(r'https://downloads\.bitnami\.com/files/stacksmith/([^\/]+)\.tar\.gz')
145 |
146 | generic_compressed_app_pattern = re.compile(r'.*\/(\w+)-([\d\.]+)\.tar\.[a-z]z')
147 | cpes = []
148 |
149 | for kurl in urls:
150 | url = kurl.rstrip()
151 | match_rust = rust_pattern.match(url)
152 | github_content_pattern_match = github_content_pattern.match(url)
153 | github_archive_pattern_match = github_archive_pattern.match(url)
154 | gradle_pattern_match = gradle_pattern.match(url)
155 | postgresql_pattern_match = postgresql_pattern.match(url)
156 | bitnami_pattern_match = bitnami_pattern.match(url)
157 | generic_compressed_app_pattern_match = generic_compressed_app_pattern.match(url)
158 | if match_rust:
159 | cpes.append((PackageInfo("rust",match_rust.group(1),"rust",type=PackageInfoType.RUST),url))
160 | elif github_content_pattern_match:
161 | cpes.append((PackageInfo(github_content_pattern_match.group(1),github_content_pattern_match.group(3),
162 | github_content_pattern_match.group(2),type=PackageInfoType.GITHUB),url))
163 | elif github_archive_pattern_match:
164 | #print(github_archive_pattern_match.groups())
165 | cpes.append((PackageInfo(github_archive_pattern_match.group(1),github_archive_pattern_match.group(3),
166 | github_archive_pattern_match.group(2),type=PackageInfoType.GITHUB),url))
167 | elif gradle_pattern_match:
168 | cpes.append((PackageInfo("gradle",gradle_pattern_match.group(1),"gradle",PackageInfoType.GRADLE),url))
169 | elif postgresql_pattern_match:
170 | cpes.append((PackageInfo("postgresql",postgresql_pattern_match.group(1),"postgresql"),url))
171 | elif bitnami_pattern_match:
172 | regex = r"^([a-zA-Z0-9-]+)-([\d.]+-\d+)-linux-(amd64)-debian-(\d+)"
173 | match = re.match(regex, bitnami_pattern_match.group(1))
174 | if match:
175 | name, version, arch, distro = match.groups()
176 | pkg = PackageInfo(name,version,"bitnami",arch=arch,type=PackageInfoType.BITNAMI)
177 | #purl = f"pkg:bitnami/{name}@{version}?arch={arch}&distro=debian-{distro}"
178 | cpes.append((pkg,url))
179 | elif generic_compressed_app_pattern_match: # TODO: this should probably be separated into a different function
180 | pkg = PackageInfo(generic_compressed_app_pattern_match.group(1),generic_compressed_app_pattern_match.group(2),generic_compressed_app_pattern_match.group(1))
181 | cpes.append((pkg,url))
182 |
183 | return cpes
184 |
185 | def extract_cpes_from_dockerfile(dockerfile_config):
186 | # Those are all the urls + the ones that have been interpolated with the env variables and other variables in the Dockerfile line.
187 | urls = [u.rstrip() for theurls in interpolate_variables(dockerfile_config) for u in theurls]
188 |
189 | useful_urls = [u for u in urls if "$(" not in u]# TO be removed.
190 | found_cpes = github_to_cpe(useful_urls)
191 | found_cpes.extend(selected_websites_to_cpe(useful_urls))
192 |
193 | files_with_cpe = [cpe[1] for cpe in found_cpes]
194 | non_cpes = list(set(urls).difference(set(files_with_cpe)))
195 |
196 |
197 | return found_cpes,non_cpes
198 |
199 | def extract_cpes_from_dockerfile_with_validation(dockerfile_config) -> VulnerabilityReport:
200 | report = VulnerabilityReport(set("Dockerfile"))
201 | cpes,non_cpes = extract_cpes_from_dockerfile(dockerfile_config)
202 | new_cpes = []
203 | packagefiles = {}
204 | for cpe in cpes:
205 | if not validators.url(cpe[1]):
206 | non_cpes.append(cpe[1])
207 | else:
208 | new_cpes.append(cpe[0])
209 | packagefiles[cpe[0]] = ["Dockerfile"]
210 |
211 | report.add_package_files(packagefiles)
212 | return report
--------------------------------------------------------------------------------
/orca/lib/dpkg.py:
--------------------------------------------------------------------------------
1 |
2 | from typing import Dict, List
3 | import debian.deb822
4 | from .logger import logger
5 | import os
6 | from .types import PackageInfo, PackageInfoType
7 |
8 | def parse_dpkg_status(file_path):
9 | with open(file_path, "r", encoding="utf-8") as file:
10 | status_file = debian.deb822.Deb822.iter_paragraphs(file)
11 | packages = [dict(pkg) for pkg in status_file]
12 | pp = []
13 | for package in packages:
14 | version = package["Version"]
15 | epoch = None
16 |
17 | if len(version.split(":")) > 1:
18 | epoch = version.split(":")[0]
19 | version = version.split(":")[1]
20 | pp.append(PackageInfo(package["Package"],version,None,PackageInfoType.DEBIAN,package["Architecture"],epoch))
21 | if "python-" in package["Package"]:
22 | pp.append(PackageInfo(package["Package"].replace("python-",""),version,None,PackageInfoType.PYPI,package["Architecture"],epoch))
23 | elif "python3-" in package["Package"]:
24 | pp.append(PackageInfo(package["Package"].replace("python3-",""),version,None,PackageInfoType.PYPI,package["Architecture"],epoch))
25 | if "Source" in package:
26 | pp.append(PackageInfo(package["Source"].split(" ")[0],version,None,PackageInfoType.DEBIAN,package["Architecture"],epoch))
27 | return pp
28 |
29 | installed_bins = {"coreutils":
30 | ["arch","base64","basename","cat","chcon","chgrp","chmod","chown","chroot","cksum","comm","cp","csplit","cut","date","dd","df","dir","dircolors","dirname","du","echo","env","expand","expr","factor","false","flock","fmt","fold","groups","head","hostid","id","install","join","link","ln","logname","ls","md5sum","mkdir","mkfifo","mknod","mktemp","mv","nice","nl","nohup","nproc","numfmt","od","paste","pathchk","pinky","pr","printenv","printf","ptx","pwd","readlink","realpath","rm","rmdir","runcon","sha1sum","shasum","sha256sum","sha384sum","sha224sum","sha512sum","seq","shred","sleep","sort","split","stat","stty","sum","sync","tac","tail","tee","test","timeout","touch","tr","true","truncate","tsort","tty","uname","unexpand","uniq","unlink","users","vdir","wc","who","whoami","yes"],
31 |
32 | "findutils": ["find","xargs"],
33 |
34 | "procps": ["ee","kill","pkill","pgrep","pmap","ps","pwdx","skill","slabtop","snice","sysctl","tload","top","uptime","vmstat","w","watch"],
35 | "bsdutils": ["logger", "renice", "script", "scriptlive", "scriptreplay","wall"],
36 | "debianutils": ["add-shell", "installkernel", "ischroot", "remove-shell", "run-parts", "savelog","update-shells", "which"],
37 | "libc-bin": ["getconf","getent","iconv","ldd","lddconfig","locale","localedef","tzselect","zdump","zic"]
38 | }
39 |
40 | additional_files = [".preinst",".prerm",".postrm",".postinst",".list",".md5sums",".shlibs",".symbols",".triggers",".conffiles",".templates",".config"]
41 |
42 |
43 | def find_individual_packages(paths: List[str],directory: str)-> Dict[PackageInfo,List[str]]:
44 | packagesMap = {}
45 | for path in paths:
46 | if "var/lib/dpkg/status.d/" in path and "." not in path.split("/")[-1]:
47 | packages = parse_dpkg_status(directory + "/" +path)
48 | for package in packages:
49 | packagesMap[package] = [path]
50 | elif "var/lib/dpkg/status.d/" in path and os.path.isfile(path):
51 | for package in packages:
52 | packagesMap[package].add(path)
53 | return packagesMap
54 |
55 | def parse_dpkg_from_status(paths,directory,status) -> Dict[PackageInfo,List[str]]:
56 | package_dict = dict()
57 | os_pkgs = parse_dpkg_status(directory + "/" + status)
58 | for package in os_pkgs:
59 | files_checked = []
60 | target_file = "var/lib/dpkg/info/" + package.name + ".list"
61 | if target_file in paths:
62 | content = open(directory + "/" + target_file).readlines()
63 | content = [ c.replace("\n","")[1:] if c[0] == "/" else c.replace("\n","") for c in content]
64 | files_checked.extend(content)
65 | for f in additional_files:
66 | fname = "var/lib/dpkg/info/" + package.name + f
67 | if fname in paths:
68 | files_checked.append(fname)
69 | else:
70 | target_file = "var/lib/dpkg/info/" + package.name + ":amd64.list"
71 | try:
72 | content = open(directory + "/" + target_file).readlines()
73 | content = [ c.replace("\n","")[1:] if c[0] == "/" else c.replace("\n","") for c in content]
74 | files_checked.extend(content)
75 | for f in additional_files:
76 | fname = "var/lib/dpkg/info/" + package.name + ":amd64" + f
77 | if fname in paths:
78 | files_checked.append(fname)
79 | except Exception:
80 | #logger.debug(f"DPKG indexed file not found: {target_file}")
81 | pass
82 | # Check binaries
83 | if package.name in installed_bins:
84 | for f in installed_bins[package.name]:
85 | files_checked.append("bin/"+f)
86 |
87 | files_checked.append("var/lib/dpkg/status")
88 | if package in package_dict:
89 | package_dict[package] = list(set([*package_dict[package],*files_checked]))
90 | else:
91 | package_dict[package] = files_checked
92 | return package_dict
93 |
94 | def get_dpkg(paths: List[str],directory: str)-> Dict[PackageInfo,List[str]]:
95 | status = [path for path in paths if path.endswith("dpkg/status")]
96 | others = [path for path in paths if "var/lib/dpkg" in path]
97 |
98 | assert len(status) < 2
99 | packages = {}
100 |
101 | if len(status) == 1:
102 | packages.update(parse_dpkg_from_status(paths, directory, status[0]))
103 | if len(packages.keys()):
104 | logger.info(f"DPKGS: {len(packages.keys())}")
105 | for package in packages.keys():
106 | packages[package].extend(others)
107 |
108 | packages.update(find_individual_packages(paths,directory))
109 | if len(packages.keys()):
110 | logger.info(f"DPKGS: {len(packages.keys())}")
111 | for package in packages.keys():
112 | packages[package].extend(others)
113 |
114 | return packages
115 |
116 |
117 |
118 |
119 |
--------------------------------------------------------------------------------
/orca/lib/golang.py:
--------------------------------------------------------------------------------
1 | import re
2 | import subprocess
3 | import os
4 | from typing import Dict, List
5 | from .types import PackageInfo, PackageInfoType
6 | from .logger import logger
7 | def extract_go_dependencies(go_binary_path,directory: str):
8 | results = {}
9 | for path in go_binary_path:
10 | result = extract_dependency(os.path.join(directory,path))
11 | for res in result:
12 | results[res] = [path]
13 | if len(results):
14 | logger.info(f"GO executables {len(results)}")
15 | return results
16 |
17 | def extract_dependency(go_binary_path):
18 | packages = []
19 | # Use go list to get package dependencies
20 | deps_process = subprocess.Popen(['go', 'version',"-m" ,go_binary_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
21 | deps_output, deps_error = deps_process.communicate()
22 |
23 | if deps_process.returncode != 0 or len(deps_error) > 1:
24 | return []
25 | lines = deps_output.decode('utf-8').splitlines()
26 | try:
27 | version = lines[0].split(" ")[1]
28 | except Exception as e:
29 | logger.warning(f"Go binary {go_binary_path} is too old to be analyzed {e}")
30 | return packages
31 | pkg = PackageInfo("stdlib", version[2:],None,PackageInfoType.GOLANG)
32 | packages.append(pkg)
33 |
34 | dep_lines = [line for line in lines if "dep" in line or "=>" in line or "mod" in line]
35 | for line in dep_lines:
36 | dep_split = line.split("\t")
37 | if len(dep_split) < 4:
38 | logger.error(f"[GO] Could not parse: {line}")
39 | continue
40 |
41 | packages.append(PackageInfo(dep_split[2], dep_split[3],None,PackageInfoType.GOLANG))
42 | for build_line in [line for line in lines if "build" in line]:
43 | last_item = build_line.split("\t")[-1]
44 |
45 | if "-X " in last_item in last_item:
46 | #print(last_item)
47 | flags = last_item.split("-X ")[1:]
48 | found = False
49 | for flag in flags:
50 | f = flag.split(" ")[0]
51 | if "version.Version" in f:
52 | found = True
53 | split = flag.split("/version.Version=")
54 | if len(split) < 2 :
55 | continue
56 | p = PackageInfo(split[0], split[1],None,PackageInfoType.GOLANG)
57 | packages.append(p)
58 | if not found:
59 | f = flags[0].split(" ")[0]
60 | name = "/".join(f.split("/")[:-1])
61 | if name is not None and "/" in name:
62 | p = PackageInfo(name, "unknown",None,PackageInfoType.GOLANG)
63 | packages.append(p)
64 |
65 |
66 | #imported_symbols = [line.strip() for line in objdump_output.decode('utf-8').splitlines() if 'imported symbol' in line]
67 | return packages#lines#, imported_symbols
68 |
69 |
70 | go_version_pattern = r'^go\s+(\d+\.\d+)'
71 | require_pattern = r'require\s+\(\s*([^)]*)\s*\)' # for multiline `require`
72 | single_require_pattern = r'require\s+([^\s]+)\s+([^\s]+)' # for single line `require`
73 |
74 |
75 | def get_gomod(paths: List[str],directory: str) -> Dict[PackageInfo,List[str]]:
76 | gomods = [path for path in paths if path.endswith("/go.mod")]
77 |
78 | packages = {}
79 | for gomod in gomods:
80 | file_content = open(os.path.join(directory,gomod)).read()
81 |
82 | match = re.search(go_version_pattern, file_content, re.MULTILINE)
83 | if match:
84 | go_version = match.group(1)
85 | p = PackageInfo("go",go_version,None)
86 | if p in packages:
87 | packages[p].append(gomod)
88 | else:
89 | packages[p] = [gomod]
90 |
91 |
92 | require_block = re.search(require_pattern, file_content, re.DOTALL)
93 | if require_block:
94 | # Extract dependencies from a multiline require block
95 | modules = require_block.group(1).strip().splitlines()
96 | for module in modules:
97 | module_info = module.strip().split()
98 | if len(module_info) == 2:
99 | name, version = module_info
100 | p = PackageInfo(name,version,None)
101 | if p in packages:
102 | packages[p].append(gomod)
103 | else:
104 | packages[p] = [gomod]
105 | # TODO: Add also gofiles here
106 |
107 | if len(packages):
108 | logger.info(f"GoMOD : {len(packages)}")
109 | return packages
110 |
111 |
--------------------------------------------------------------------------------
/orca/lib/jar.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | import re
4 | from typing import List
5 | import zipfile
6 | from . import logger
7 | from.types import PackageInfo, PackageInfoType
8 |
9 | # TODO: fix this
10 | TMP_DIR = f"{os.getcwd()}/tmpdir"
11 |
12 |
13 | def parse_pom_properties(jar: zipfile.ZipFile,content: str):
14 | packages = []
15 | package_info = {}
16 | data = jar.open(content).readlines()
17 | for line in data:
18 | sline = line.decode()
19 | if "=" in sline:
20 | kv = sline.replace("\n","").replace("\r","").split("=")
21 | package_info[kv[0]] = kv[1]
22 | try:
23 | packages.append(PackageInfo(package_info["artifactId"],package_info["version"],package_info["groupId"],PackageInfoType.MAVEN))
24 | except Exception as e :
25 | logger.logger.warn(f"{jar.filename} - {package_info.keys()} - {data} - {e}")
26 | pass
27 | return packages
28 |
29 |
30 | def list_jar_props(jar_path,directory):
31 | packages = []
32 | try:
33 | with zipfile.ZipFile(os.path.join(directory,jar_path), 'r') as jar:
34 | contents = jar.namelist()
35 | real_contents = [content for content in contents if content.endswith("pom.properties") ]
36 | nested_jars = [content for content in contents if content.endswith(".jar")]
37 |
38 | for nested_jar in nested_jars:
39 | jar.extract(nested_jar,os.path.join(TMP_DIR,nested_jar[:-4]))
40 | packages.extend(list_jar_props(nested_jar[:-4],TMP_DIR))
41 |
42 |
43 | for content in real_contents:
44 | packages.extend(parse_pom_properties(jar,content))
45 | return packages
46 | except Exception as _:
47 | return packages
48 |
49 | def extract_jar(input_string: str):
50 | dots = input_string.split(".")
51 |
52 | for idx, dot in enumerate(dots):
53 | if len(dot) > 2 and dot[-2] == "-" and dot[-1].isdigit():
54 | author = ".".join(dots[:idx])
55 | name = dot[:-2]
56 | version =dot[-1] + "." + ".".join(dots[idx+1:])
57 | return {"author": author,"name": name, "version": version}
58 | elif len(dot) > 2 and dot[-3] == "-" and dot[-2].isdigit() and dot[-1].isdigit():
59 | author = ".".join(dots[:idx])
60 | name = dot[:-3]
61 | version =dot[-2] + dot[-1] +"." + ".".join(dots[idx+1:])
62 | return {"author": author,"name": name, "version": version}
63 | return None
64 |
65 | def get_jar(paths: List[str],directory: str):
66 | jars = [path for path in paths if path.endswith(".jar") ]
67 | packages = {}
68 | for jar in jars:
69 | basename = os.path.basename(jar).split(".jar")[0]
70 | tokens = basename.split("-")
71 | dots = basename.split(".")
72 | #print(basename)
73 | # AwsJavaSdk-CognitoIdentityProvider-2.0.jar
74 | if len(tokens) > 2 and len(dots) < 4:
75 | #print("first")
76 | version = tokens[-1]
77 | pattern = re.compile(r"^([a-zA-Z0-9\-_]+?)-(\d+\.\d+(?:\.\d+)?)(?:[-_][a-zA-Z0-9\-._]+)?$")
78 | match = pattern.match(basename)
79 | if match:
80 | name, version = match.groups()
81 | package = PackageInfo(name,version,name,PackageInfoType.MAVEN)
82 | packages[package] = [basename]
83 | else:
84 | result = extract_jar(basename)
85 | if result is None:
86 | continue
87 | name = result["name"]
88 | version = result["version"]
89 | author = result["author"]
90 | package = PackageInfo(name,version,author,PackageInfoType.MAVEN)
91 | packages[package] = [basename]
92 |
93 |
94 |
95 | for jar in jars:
96 | pkgs = list_jar_props(jar,directory)
97 | basepath = os.path.dirname(jar)
98 | files = list(filter(lambda x: basepath in x, paths))
99 | for pkg in pkgs:
100 | packages[pkg] = files
101 | if len(packages):
102 | logger.logger.info(f"JARs: {len(packages)}")
103 |
104 | return packages
105 |
106 |
--------------------------------------------------------------------------------
/orca/lib/logger.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | LOG_LEVEL = os.getenv("LOG_LEVEL",logging.WARNING)
4 |
5 | class CustomFormatter(logging.Formatter):
6 |
7 | grey = "\x1b[38;20m"
8 | yellow = "\x1b[33;20m"
9 | red = "\x1b[31;20m"
10 | bold_red = "\x1b[31;1m"
11 | reset = "\x1b[0m"
12 | blue = "\x1b[34m"
13 | debug_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s (%(filename)s:%(lineno)d)"
14 | format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
15 |
16 | FORMATS = {
17 | logging.DEBUG: grey + format + reset,
18 | logging.INFO: blue + format + reset,
19 | logging.WARNING: yellow + debug_format + reset,
20 | logging.ERROR: red + debug_format + reset,
21 | logging.CRITICAL: bold_red + debug_format + reset
22 | }
23 |
24 | def format(self, record):
25 | log_fmt = self.FORMATS.get(record.levelno)
26 | formatter = logging.Formatter(log_fmt)
27 | return formatter.format(record)
28 |
29 | def setup_logger():
30 | logger = logging.getLogger('ORCA')
31 | logger.setLevel(logging.DEBUG)
32 |
33 | ch = logging.StreamHandler()
34 | ch.setLevel(logging.DEBUG)
35 |
36 | ch.setFormatter(CustomFormatter())
37 |
38 | logger.addHandler(ch)
39 |
40 |
41 | return logger
42 |
43 | # Setup logger for the entire project
44 | logger: logging.Logger = setup_logger()
--------------------------------------------------------------------------------
/orca/lib/package_json.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | from typing import Dict, List
4 | import json
5 |
6 | from.logger import logger
7 | from.types import PackageInfo, PackageInfoType
8 | #import rpm
9 |
10 |
11 | def parse_package_json(paths,enclosing_dir,file: str):
12 | try:
13 | content = json.load(open(file))
14 | except Exception:
15 | logger.error(f"[JS] Could not parse {file}")
16 | return {}
17 | if "name" not in content:
18 | return {}
19 | name = content["name"]
20 | files = set([path for path in paths if enclosing_dir in path])
21 | main_package = PackageInfo(name,"","npm")
22 | packages = {}
23 | if "version" in content:
24 | main_package = PackageInfo(name,content["version"],None,PackageInfoType.NPM)
25 | else:
26 | logger.info(f"Could not parse version from package.json at {file}")
27 |
28 | if "dependencies" not in content:
29 | return {main_package: list(files)}
30 | else:
31 | # TODO: Maybe we should also add dev-packages
32 | for dependency,version in content["dependencies"].items():
33 | if type(version) is dict:
34 | version = version['version']
35 | package = PackageInfo(dependency,version.split(" ")[0].replace("<","").replace(">","").replace("=",""),None,PackageInfoType.NPM)
36 | files_to_add = set([path for path in paths if os.path.join(enclosing_dir,"node_modules","package") in path])
37 | packages[package] = list(files_to_add)
38 | files.difference_update(files_to_add)
39 | packages[main_package] = list(files)
40 |
41 | return packages
42 |
43 |
44 | def parse_package_lock(paths,enclosing_dir,file: str):
45 | packages = {}
46 | content = json.load(open(file))
47 | name_author = content["name"].split("/")
48 | author = "npm"
49 | name = ""
50 | if len(name_author) > 1:
51 | author = name_author[0].replace("@","")
52 | name = name_author[1]
53 | else:
54 | name = name_author[0]
55 | if "version" in content:
56 | packages[PackageInfo(name,content["version"],author)] = [path for path in paths if enclosing_dir in path]
57 | key = "packages" if "packages" in content else "dependencies"
58 | for pkgname,package in content[key].items():
59 | if pkgname == "":
60 | continue
61 | if "node_modules" in pkgname:
62 | pkg = pkgname.split("node_modules/")[-1]
63 | if "version" not in package:
64 | continue
65 | if "/" in pkg:
66 | pkg_split = pkg.replace("@","").split("/")
67 | packages[PackageInfo(pkg_split[1],package["version"],pkg_split[0],PackageInfoType.NPM)] = [enclosing_dir + "/package_lock.json",enclosing_dir + "/package.json"]
68 | else:
69 | packages[PackageInfo(pkg,package["version"],"npm",PackageInfoType.NPM)] = [path for path in paths if enclosing_dir in path]
70 | else:
71 | if "/" in pkgname:
72 | pkg_split = pkgname.replace("@","").split("/")
73 | packages[PackageInfo(pkg_split[1],package["version"],pkg_split[0])] = [enclosing_dir + "/package_lock.json",enclosing_dir + "/package.json"]
74 | else:
75 | packages[PackageInfo(pkgname,package["version"],"npm",PackageInfoType.NPM)] = [enclosing_dir + "/package_lock.json",enclosing_dir + "/package.json"]
76 |
77 | return packages
78 |
79 | def parse_library_packages(directory,paths,package_jsons)-> Dict[PackageInfo,List[str]]:
80 | packageMap = {}
81 | for file in package_jsons:
82 | pmap = parse_package_json(paths,os.path.dirname(file),os.path.join(directory,file))
83 | packageMap.update(pmap)
84 | return packageMap
85 |
86 |
87 | def get_package_json(paths: List[str],directory: str):
88 | total_packages = {}
89 |
90 | package_json_node_modules = [path for path in paths if path.endswith("package.json") or path.endswith("package-lock.json")]
91 | package_lock = sorted([path for path in package_json_node_modules if "node_modules" not in path ],key=len)
92 |
93 | package_json = sorted([path for path in paths if path.endswith("package.json") and "node_modules" not in path ],key=len)
94 |
95 | if len(package_json_node_modules) > 200: # Number can be changes
96 | logger.warning(f"Discovered {len(package_json_node_modules)} package modules. Analyzing all of these files will take time")
97 |
98 | total_packages = parse_library_packages(directory,paths,package_json_node_modules)
99 |
100 | if len(package_lock) == 0 and len(package_json) == 0:
101 | if len(package_json_node_modules) == 0:
102 | return {}
103 | else:
104 | if len(total_packages.keys()):
105 | logger.info(f"JS packages: {len(total_packages.keys())}")
106 | return total_packages
107 | else:
108 | biggest = max(package_json,package_lock,key=len)
109 | for item in biggest:
110 | basepath = os.path.dirname(item)
111 | if basepath + "/package.json" in package_json and basepath + "/package-lock.json" in package_lock:
112 | total_packages.update(parse_package_lock(paths,basepath,os.path.join(directory,basepath,"package-lock.json")))
113 |
114 | elif basepath + "/package.json" in package_json and basepath + "/package-lock.json" not in package_lock:
115 | pmap = parse_package_json(paths,basepath,os.path.join(directory,basepath,"package.json"))
116 | total_packages.update(pmap)
117 |
118 | else:
119 | continue
120 | #files.update(package_json)
121 | #files.update(package_lock)
122 |
123 |
124 | if len(total_packages.keys()):
125 | logger.info(f"JS packages: {len(total_packages.keys())}")
126 | return total_packages
--------------------------------------------------------------------------------
/orca/lib/path.py:
--------------------------------------------------------------------------------
1 | import glob
2 | from typing import Set
3 |
4 | def remove_folders(paths):
5 | dir_set = set()
6 | result = []
7 |
8 | for path in paths:
9 | parts = path.split("/")
10 | for i in range(1, len(parts)):
11 | dir_set.add("/".join(parts[:i]))
12 |
13 | for path in paths:
14 | if path not in dir_set and len(path) > 2:
15 | result.append(path)
16 |
17 | return result
18 |
19 | def get_filepaths(directory:str) -> Set[str]:
20 | paths = filter(lambda path: len(path) > 2
21 | and "etc/ssl/certs/" not in path
22 | and "usr/share/zoneinfo" not in path
23 | and "etc/nginx/" not in path,
24 | glob.glob(directory + "/**", recursive=True,include_hidden=True))
25 | mapped_paths = map(lambda path: path.replace(directory + "/",""),paths)
26 | return set(mapped_paths)#set(remove_folders(paths))
--------------------------------------------------------------------------------
/orca/lib/path_checkers.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import os
4 | import re
5 | from typing import List
6 | from .types import PackageInfo
7 |
8 | python_dist_regex = re.compile(r'.*python(\d\.\d+)\/site-packages\/(([a-zA-Z0-9_\-]+)\/)?([a-zA-Z0-9]+)-(\d+\.\d+\.?\d*)\.dist-info')
9 | def check_python_from_path_once(filename: str,directory: str):
10 | result = re.match(python_dist_regex,filename)
11 | files = [filename]
12 | if result:
13 | pkg = [PackageInfo("python", result.group(1),None)]
14 | if result.group(3) is not None:
15 | package = f"{result.group(3)}-{result.group(4)}"
16 | version = result.group(5)
17 | pkg.append(PackageInfo(package,version,None))
18 | else:
19 | package = result.group(4)
20 | version = result.group(5)
21 | pkg.append(PackageInfo(package,version,None))
22 | if filename.endswith("RECORD"):
23 | record = open(os.path.join(directory,filename)).readlines()
24 | basepath = "/".join(filename.split("/")[:-1])
25 | files = [basepath + "/" + line.split(",")[0] for line in record]
26 | files.append(filename)
27 |
28 | return pkg,files
29 | return None,files
30 |
31 |
32 | def check_python_from_path(paths: List[str],directory: str):
33 | files = set()
34 | cpes = []
35 | for path in [p for p in paths if ".dist-info" in p]:
36 | res,fn = check_python_from_path_once(path,directory)
37 | if res:
38 | cpes.extend(res)
39 | files.update(fn)
40 | return cpes,files
--------------------------------------------------------------------------------
/orca/lib/perl.py:
--------------------------------------------------------------------------------
1 |
2 | from typing import List
3 | import re
4 | import os
5 |
6 | from . import logger
7 | from .types import PackageInfo, PackageInfoType
8 |
9 | package_regex = r'package\s+([^\s;]+)'
10 | # Regex for extracting the version
11 | version_regex = r'\$VERSION\s*=\s*\'([^\']+)\''
12 |
13 |
14 |
15 |
16 | def parse_module(filepath):
17 | try:
18 | content = open(filepath).read()
19 | except Exception as _:
20 | return "",""
21 | # Extract package name
22 | package_match = re.search(package_regex, content)
23 | if package_match:
24 | package_name = package_match.group(1)
25 | version_match = re.search(version_regex, content)
26 | if version_match:
27 | version = version_match.group(1)
28 | return package_name,version
29 | return "",""
30 |
31 | def get_perl(paths: List[str],directory: str):
32 | packages = {}
33 | perl_modules = [path for path in paths if path.endswith(".pm") and "perl" in path]
34 | for module in perl_modules:
35 | package,version = parse_module(os.path.join(directory,module))
36 | if len(package) > 0 and len(package.split("::")) < 3:
37 | packages[PackageInfo(package,version,None,PackageInfoType.PERL)] = [module]
38 | if len(packages):
39 | logger.logger.info(f"Perl: {len(packages)}")
40 | return packages
41 |
42 |
43 |
44 |
45 |
--------------------------------------------------------------------------------
/orca/lib/pkgconfig.py:
--------------------------------------------------------------------------------
1 |
2 | from typing import Dict, List
3 | import pykg_config
4 | import pykg_config.pcfile
5 |
6 | from . import logger
7 | from .types import PackageInfo
8 |
9 | def get_pkgconfig(paths: List[str],directory: str) -> Dict[PackageInfo,List[str]]:
10 | pkgs = filter(lambda path: "pkgconfig" in path and path.endswith(".pc"), paths)
11 | pkgmap = {}
12 | for pkg in pkgs:
13 | name = pkg.split("/")[-1]
14 | if name not in pkgmap:
15 | pkgmap[name] = pkg
16 | pkg_dir = {}
17 | for pkg in pkgmap.values():
18 | directories = []
19 | pc_file_path = directory + "/" + pkg
20 | vars = {}
21 | props = {}
22 | try:
23 | _, vars, props = pykg_config.pcfile.read_pc_file(pc_file_path,{})
24 | except Exception as _:
25 | logger.logger.warning(f"Could not parse pkgconfig file {pc_file_path}")
26 | continue
27 | version = props.get("version")
28 | if "." not in version:
29 | version = vars.get("abiver")
30 |
31 | package = PackageInfo(props.get("name"),version,None,None)
32 |
33 | directories.append(pkg)
34 | if vars.get("exec_prefix") is not None:
35 | directories.append(vars.get("exec_prefix")[1:])
36 | if props.get("libdir") is not None:
37 | directories.append(props.get("libdir")[1:])
38 |
39 | if package in pkg_dir:
40 | pkg_dir[package] = [*pkg_dir[package],*directories]
41 | else:
42 | pkg_dir[package] = directories
43 |
44 | package_files = {}
45 | for package,dirs in pkg_dir.items():
46 | for directory in list(set(dirs)):
47 | files_found = []
48 | for path in paths:
49 | if directory in path:
50 | files_found.append(path)
51 | package_files[package] = files_found
52 | return package_files
53 |
--------------------------------------------------------------------------------
/orca/lib/python.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import List
3 | from .types import PackageInfo, PackageInfoType
4 | from .logger import logger
5 | from email.parser import Parser
6 | from packaging.requirements import Requirement
7 |
8 | import re
9 | python_dist_regex = re.compile(
10 | r'.*python(\d\.\d+)\/(?:site|dist)-packages\/(([a-zA-Z0-9_\-]+)\/)?([a-zA-Z0-9]+)-(\d+\.\d+\.?\d*)\.dist-info'
11 | )
12 |
13 | def check_python_from_path_once(paths,filename: str,directory: str):
14 | filenamenopath = [split for split in filename.split("/") if "-info" in split]
15 | if len(filenamenopath) == 0:
16 | return {}
17 | filenamenopath = filenamenopath[0]
18 | basename = os.path.dirname(filename)
19 | files = list(filter(lambda x: basename in x,paths))
20 | if filenamenopath.endswith(".dist-info") or filenamenopath.endswith(".egg-info"):
21 | file = filenamenopath.replace(".dist-info","").replace(".egg-info","")
22 | splits = file.split("-")
23 | package = "-".join(splits[:-1]).replace(".wh.","")
24 | version = splits[-1].replace(".dist","")
25 | if package is None or package== "":
26 | return {}
27 | pkg = PackageInfo(package,version,None,PackageInfoType.PYPI)
28 | if filename.endswith("RECORD"):
29 | record = open(os.path.join(directory,filename)).readlines()
30 | basepath = "/".join(filename.split("/")[:-2])
31 | files.extend([basepath + "/" + line.split(",")[0] for line in record])
32 |
33 | return {pkg: files}
34 | return {}
35 |
36 |
37 | def check_python_from_path(paths: List[str],directory: str):
38 | packages = {}
39 | all_dist_info_records = [p for p in paths if ".dist-info" in p or "egg" in p]
40 |
41 | for path in all_dist_info_records:
42 | for k,v in check_python_from_path_once(paths,path,directory).items():
43 | if k in packages:
44 | packages[k] = list(set([*packages[k],*v]))
45 | else:
46 | packages[k] = v
47 | return packages
48 |
49 | def extract_egg_dependencies(depfile):
50 | packages = []
51 | pkg_info_content = open(depfile, 'r').read()
52 |
53 | pkg_info = Parser().parsestr(pkg_info_content)
54 |
55 | # Access general metadata fields
56 | package_name = pkg_info.get('Name')
57 | package_version = pkg_info.get('Version').replace(".dist","")
58 | author = pkg_info.get('Author')
59 | packages.append(PackageInfo(package_name,package_version,author,PackageInfoType.PYPI))
60 | requires_dist = pkg_info.get_all('Requires-Dist')
61 | if requires_dist:
62 | for requirement in requires_dist:
63 | req = Requirement(requirement)
64 | name = req.name.replace(".wh.","")
65 | version = req.specifier.__str__().replace("!","").replace(">","").replace("<","").replace("=","").split(",")[0].replace(".dist","")
66 | if req.marker and "runtime" not in req.marker.__str__():
67 | continue
68 | packages.append(PackageInfo(name,version,None,PackageInfoType.PYPI))
69 | return packages
70 |
71 | def get_egg_files(file:str,sources: str):
72 | basepath = "/".join(file.split("/")[:-2])
73 | if not os.path.exists(sources):
74 | return []
75 | lines = open(sources).readlines()
76 | return [basepath + "/"+line.replace("\n","") for line in lines]
77 |
78 | def get_record_files(file:str,sources: str):
79 | basepath = "/".join(file.split("/")[:-2])
80 | lines = open(sources).readlines()
81 | return [basepath + "/"+line.replace("\n","").split(",")[0] for line in lines]
82 |
83 | def parse_egg_info(paths,file,dirpath: str):
84 | packagesMap = {}
85 | packages = extract_egg_dependencies(os.path.join(dirpath,"PKG-INFO"))
86 | basename = os.path.dirname(file)
87 | for package in packages:
88 | packagesMap[package] = [*get_egg_files(file,dirpath + "SOURCES.txt"),*list(filter(lambda x: basename in x, paths))]
89 | return packagesMap
90 |
91 | def parse_metadata(paths,file,dirpath: str):
92 | packagesMap = {}
93 | packages = extract_egg_dependencies(dirpath + "METADATA")
94 | basename = os.path.dirname(file)
95 | for package in packages:
96 | packagesMap[package] = [*get_record_files(file,dirpath + "RECORD"),*list(filter(lambda x: basename in x, paths))]
97 | return packagesMap
98 |
99 | def extract_python_dependencies(paths,directory: str):
100 | interesting_paths = [p for p in paths if "dist-info" in p or "site-packages" in p or "dist-packages" in p]
101 | total_packages = {}
102 | total_packages.update(check_python_from_path(interesting_paths,directory))
103 |
104 | for path in interesting_paths:
105 | if path.endswith(".egg-info") or path.endswith(".dist-info"):
106 | # pygpgme-0.3-py2.7.egg-info
107 | path.replace(".egg-info","").replace(".dist-info","")
108 | stuff = path.split("/")[-1]
109 | tokens = stuff.split("-")
110 | version = tokens[1].replace(".egg","").replace(".dist","")
111 | pkg = PackageInfo(tokens[0].replace(".wh.",""),version,None,PackageInfoType.PYPI)
112 | if pkg in total_packages:
113 | total_packages[pkg] = [*total_packages[pkg],path]
114 | else:
115 | total_packages[pkg] = [path]
116 |
117 | pkginfo = [path for path in interesting_paths if ".egg-info/PKG-INFO" in path]
118 | records = [path for path in interesting_paths if ".dist-info/RECORD" in path]
119 |
120 |
121 | for eggpkg in pkginfo:
122 | pakagesegg = parse_egg_info(interesting_paths,eggpkg,os.path.join(directory,eggpkg).replace("PKG-INFO",""))
123 | for k,v in pakagesegg.items():
124 | if k in total_packages:
125 | total_packages[k].extend(v)
126 | else:
127 | total_packages[k] = v
128 | total_packages.update(pakagesegg)
129 | for record in records:
130 | pakagesegg = parse_metadata(interesting_paths,record,os.path.join(directory,record).replace("RECORD",""))
131 | for k,v in pakagesegg.items():
132 | if k in total_packages:
133 | total_packages[k].extend(v)
134 | else:
135 | total_packages[k] = v
136 | if len(total_packages):
137 | logger.info(f"Python: {len(total_packages)}")
138 | return total_packages
139 |
140 |
--------------------------------------------------------------------------------
/orca/lib/rpm_packages.py:
--------------------------------------------------------------------------------
1 |
2 | import json
3 | import os
4 | import re
5 | import subprocess
6 | from typing import Dict, List
7 |
8 | from . import logger
9 | from .types import PackageInfo, PackageInfoType
10 |
11 | installed_bins = {"coreutils":
12 | ["arch","base64","basename","cat","chcon","chgrp","chmod","chown","chroot","cksum","comm","cp","csplit","cut","date","dd","df","dir","dircolors","dirname","du","echo","env","expand","expr","factor","false","flock","fmt","fold","groups","head","hostid","id","install","join","link","ln","logname","ls","md5sum","mkdir","mkfifo","mknod","mktemp","mv","nice","nl","nohup","nproc","numfmt","od","paste","pathchk","pinky","pr","printenv","printf","ptx","pwd","readlink","realpath","rm","rmdir","runcon","sha1sum","shasum","sha256sum","sha384sum","sha224sum","sha512sum","seq","shred","sleep","sort","split","stat","stty","sum","sync","tac","tail","tee","test","timeout","touch","tr","true","truncate","tsort","tty","uname","unexpand","uniq","unlink","users","vdir","wc","who","whoami","yes"],
13 |
14 | "findutils": ["find","xargs"],
15 |
16 | "procps": ["ee","kill","pkill","pgrep","pmap","ps","pwdx","skill","slabtop","snice","sysctl","tload","top","uptime","vmstat","w","watch"],
17 | "bsdutils": ["logger", "renice", "script", "scriptlive", "scriptreplay","wall"],
18 | "debianutils": ["add-shell", "installkernel", "ischroot", "remove-shell", "run-parts", "savelog","update-shells", "which"],
19 | "libc-bin": ["getconf","getent","iconv","ldd","lddconfig","locale","localedef","tzselect","zdump","zic"]
20 | }
21 |
22 | additional_files = [".preinst",".prerm",".postrm",".postinst",".list",".md5sums",".shlibs",".symbols",".triggers",".conffiles",".templates",".config"]
23 |
24 | def get_author(author):
25 | if "Red" in author:
26 | return "redhat"
27 | elif "Amazon" in author:
28 | return "amazonlinux"
29 | elif "suse" in author.lower():
30 | return "suse"
31 | else:
32 | return author.lower()
33 |
34 |
35 | def read_rpm_db(directory,path)->Dict[PackageInfo,List[str]]:
36 | packages_dict = {}
37 | try:
38 | # Run the rpm command with --dbpath to list installed packages from the specified database
39 | result = subprocess.run(['rpm_checker', '--dbpath', os.path.join(directory,path),], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
40 |
41 | # Check for errors
42 | if result.returncode != 0:
43 | print(f"Error reading RPM database: {result.stderr} - {path}")
44 | return
45 |
46 | # Print the list of installed packages
47 | packages_raw = result.stdout.splitlines()[0]
48 | json_data = json.loads(packages_raw)
49 |
50 | for item in json_data:
51 | author = get_author(item['author'])
52 |
53 |
54 |
55 | package = PackageInfo(item["package"],item["version"],author,PackageInfoType.RPM)
56 | packages_dict[package] = [*item["files"],path]
57 | if author == "amazonlinux":
58 | pattern = re.compile(r"^([a-zA-Z0-9\-_]+)-(\d+\.\d+(?:\.\d+)?)-")
59 | # Process each package
60 | match = pattern.match(item['rpm'])
61 | if match:
62 | name, version = match.groups()
63 | package = PackageInfo(name,version,author,PackageInfoType.RPM)
64 | if name.startswith("python-") or name.startswith("python3-"):
65 | split= version.split("-")
66 | if len(split) <=1:
67 | continue
68 | pythonp = split[1]
69 | ppkg = PackageInfo(pythonp,version,None,PackageInfoType.PYPI)
70 | packages_dict[ppkg] = [*item["files"],path]
71 | packages_dict[package] = [*item["files"],path]
72 |
73 | return packages_dict
74 |
75 | except Exception as e:
76 | print(f"An error occurred: {e.with_traceback()}")
77 |
78 |
79 | def get_rpm(paths: List[str],directory: str)-> Dict[PackageInfo,List[str]]:
80 | additional_files = [file for file in paths if "var/lib/yum" in file or "var/cache/yum/" in file or "etc/yum.repos.d/" in file or "var/log/yum" in file]
81 | total_packages = {}
82 | for path in paths:
83 | if "rpm/Packages" in path or path.endswith( "rpmdb.sqlite"):
84 | packages = read_rpm_db(directory,path)
85 | if packages and len(packages.keys()):
86 | logger.logger.info(f"RPMs: {len(packages.keys())}")
87 | if len(additional_files):
88 | for package in packages.keys():
89 | packages[package].extend(additional_files)
90 | total_packages.update(packages)
91 |
92 |
93 |
94 |
95 | return total_packages
--------------------------------------------------------------------------------
/orca/lib/spdx.py:
--------------------------------------------------------------------------------
1 | import hashlib
2 | from datetime import datetime
3 | from typing import Dict, List
4 | from spdx_tools.spdx.model import (Document,CreationInfo,Package,SpdxNone,Actor,ActorType,ExternalPackageRef,ExternalPackageRefCategory,PackagePurpose,Relationship,RelationshipType,File,Checksum,ChecksumAlgorithm)
5 | from spdx_tools.spdx.writer.write_anything import write_file
6 | import base64
7 | from.types import PackageInfo, PackageInfoType, VulnerabilityReport
8 | from packageurl import PackageURL
9 |
10 |
11 |
12 |
13 | def getpurl(packageInfo: PackageInfo) -> str:
14 | if packageInfo.type == "debian":
15 | return f"pkg:deb/debian/{packageInfo.name.lower()}@{packageInfo.version}"
16 | elif packageInfo.type == "pypi":
17 | return f"pkg:pypi/{packageInfo.name.lower()}@{packageInfo.version}"
18 | else:
19 | return f"pkg:generic/{packageInfo.name.lower()}@{packageInfo.version}"
20 |
21 | def create_anchore_purl(osinfo,packageInfo: PackageInfo):
22 |
23 | if packageInfo.type is None:
24 | purl = PackageURL(type="generic",name=packageInfo.name.lower(),version=packageInfo.version)
25 | elif packageInfo.type == PackageInfoType.DEBIAN:
26 | qualifiers = {
27 | "arch": packageInfo.arch,
28 | "distro": "debian-"+osinfo["version"],
29 | }
30 | if packageInfo.epoch is not None:
31 | qualifiers["epoch"] = packageInfo.epoch
32 |
33 | if "name" in osinfo and osinfo["name"].lower().rstrip() == "ubuntu":
34 | purl = PackageURL(type="deb",namespace="ubuntu",name=packageInfo.name.lower(),version=packageInfo.version,qualifiers=qualifiers)
35 | else:
36 | purl = PackageURL(type="deb",namespace="debian",name=packageInfo.name.lower(),version=packageInfo.version,qualifiers=qualifiers)
37 |
38 | elif packageInfo.type == PackageInfoType.RPM:
39 | purl = PackageURL(type="rpm",name=packageInfo.name.lower(),version=packageInfo.version,namespace=packageInfo.author)
40 | elif packageInfo.type == PackageInfoType.APK:
41 | purl = PackageURL(type="apk",name=packageInfo.name.lower(),version=packageInfo.version,namespace="alpine")
42 | elif packageInfo.type == PackageInfoType.NPM:
43 | purl = PackageURL(type="npm",name=packageInfo.name.lower(),version=packageInfo.version)
44 | elif packageInfo.type == PackageInfoType.PYPI:
45 | purl = PackageURL(type="pypi",name=packageInfo.name.lower(),version=packageInfo.version)
46 | elif packageInfo.type == PackageInfoType.PERL:
47 | purl = PackageURL(type="perl",name=packageInfo.name.lower(),version=packageInfo.version)
48 | elif packageInfo.type == PackageInfoType.MAVEN:
49 | purl = PackageURL(type="maven",name=packageInfo.name.lower(),version=packageInfo.version,namespace=packageInfo.author)
50 | elif packageInfo.type == PackageInfoType.GOLANG: # This should probably be edited
51 | path = packageInfo.name.lower()
52 | path_split = path.split("/")
53 | name = path
54 | other = None
55 | if len(path_split) > 3:
56 | name = "/".join(path_split[:3])
57 | other = "/".join(path_split[3:])
58 | purl = PackageURL(type="golang",name=name,version=packageInfo.version,namespace=packageInfo.author,subpath=other)
59 | elif packageInfo.type == PackageInfoType.COMPOSER:
60 | purl = PackageURL(type="composer",name=packageInfo.name.lower(),version=packageInfo.version,namespace=packageInfo.author)
61 | elif packageInfo.type == PackageInfoType.GEM:
62 | purl = PackageURL(type="gem",name=packageInfo.name.lower(),version=packageInfo.version)
63 | elif packageInfo.type == PackageInfoType.GITHUB:
64 | purl = PackageURL(type="github",name=packageInfo.name.lower(),version=packageInfo.version,namespace=packageInfo.author)
65 | elif packageInfo.type == PackageInfoType.BITNAMI:
66 | purl = PackageURL(type="bitnami",name=packageInfo.name.lower(),version=packageInfo.version,qualifiers={"arch":packageInfo.arch})
67 | elif packageInfo.type == PackageInfoType.RUST:
68 | purl = PackageURL(type="cargo",name=packageInfo.name.lower(),version=packageInfo.version)
69 | elif packageInfo.type == PackageInfoType.GRADLE:
70 | purl = PackageURL(type="gradle",name=packageInfo.name.lower(),version=packageInfo.version)
71 | else:
72 | purl = PackageURL(type="generic",name=packageInfo.name.lower(),version=packageInfo.version)
73 | return purl
74 |
75 |
76 | def map_package(osinfo,index: int,packageInfo: PackageInfo) -> Package:
77 | def getid():
78 | return f"SPDXRef-PACKAGE-{base64.b64encode(bytes(f"{packageInfo.name} {packageInfo.version} {packageInfo.author} {packageInfo.arch}",'utf-8')).decode("utf-8",errors="ignore").replace("=","").replace("+","")}"
79 | external_refs = []
80 | anchore_purl = create_anchore_purl(osinfo,packageInfo)
81 | external_refs = [
82 | #ExternalPackageRef(ExternalPackageRefCategory.PACKAGE_MANAGER,reference_type="purl",locator=generic_purl+f"?arch=allu0026distro=debian-{osinfo['version']}"),
83 | ExternalPackageRef(ExternalPackageRefCategory.PACKAGE_MANAGER,reference_type="purl",locator=anchore_purl.to_string())
84 | #ExternalPackageRef(ExternalPackageRefCategory.PACKAGE_MANAGER,reference_type="purl",locator=generic_purl+f"?os_distro={osinfo['codename']}&os_name=debian&os_version={osinfo['version']}")
85 | ]
86 |
87 | package: Package = Package(
88 | name=packageInfo.name,
89 | version=packageInfo.version,
90 | download_location=SpdxNone(),
91 | license_concluded=SpdxNone(),
92 | license_declared=SpdxNone(),
93 | primary_package_purpose=PackagePurpose.LIBRARY,
94 | spdx_id=getid(),
95 | copyright_text=SpdxNone(),
96 | external_references=external_refs)
97 | return package
98 |
99 |
100 | def generateSPDXFromCPE(containerImage: str,inputPackages: List[PackageInfo],output_filename: str):
101 |
102 | containerPackage: Package = Package(name=containerImage, download_location=SpdxNone(),license_concluded=SpdxNone(),license_declared=SpdxNone(),spdx_id="SPDXRef-ContainerImage",copyright_text=SpdxNone(),primary_package_purpose=PackagePurpose.CONTAINER)
103 |
104 |
105 |
106 | creation_info = CreationInfo(spdx_version="SPDX-2.3",spdx_id="SPDXRef-DOCUMENT",name="CPE Finder",created=datetime.now(),creators=[Actor(ActorType.ORGANIZATION,"CNAM"),Actor(ActorType.TOOL,"CPE finder")],document_namespace="http://example.com")
107 |
108 | packages = [map_package(idx,p) for idx,p in enumerate(inputPackages)]
109 |
110 | relationships = []
111 | relationships.append(Relationship("SPDXRef-DOCUMENT",RelationshipType.DESCRIBES,"SPDXRef-ContainerImage"))
112 |
113 | packages.append(containerPackage)
114 | doc = Document(creation_info,packages=packages,relationships=relationships)
115 | write_file(doc, output_filename,validate=True)
116 |
117 |
118 | def generateFileMappingReport(reportMap: Dict[str,VulnerabilityReport])-> Dict[str,File]:
119 | filemap: Dict[str,File] = dict()
120 | for layer,report in reportMap.items():
121 | layer_id = layer.split("/")[-1]
122 |
123 | for file in report.initial_files:
124 | fid = file.replace("/","-").replace("_","").replace(" ","")
125 | sid = f"SPDXRef-File-{layer_id}-{fid}"
126 | if sid in filemap:
127 | filemap[sid].comment += f"\n Layer: {layer_id}"
128 | else:
129 | checksum = Checksum(ChecksumAlgorithm.SHA1,hashlib.sha1("testme".encode()).hexdigest())
130 |
131 | filemap[sid] = File(name=file,spdx_id=sid,comment=f"Layer: {layer}",checksums=[checksum])
132 | return filemap
133 |
134 |
135 | def getOsInfo(reportMap: Dict[str,VulnerabilityReport]) -> Dict[str,str]:
136 | osinfo = None
137 | for layer,report in reportMap.items():
138 | if report.os is not None:
139 | if osinfo is not None:
140 | print(f"Received multiple entries of os. The latest one is: {report.os} \nOld one was: {osinfo} \n Merging them")
141 | for k,v in report.os.items():
142 | osinfo[k] = v
143 | else:
144 | osinfo = report.os
145 | return osinfo
146 |
147 | def getTotalCPE(reportMap: Dict[str,VulnerabilityReport]) -> List[PackageInfo]:
148 | total_cpe = set()
149 | for layer,report in reportMap.items():
150 | if len(report.packages) == 1 and report.packages[0] == (None,None):
151 | continue
152 | total_cpe.update(report.packages)
153 | return list(total_cpe)
154 |
155 | def generateRelationships(reportMap: Dict[str,VulnerabilityReport],filemap: Dict[str,File],packagesmap: Dict[str,Package],osinfo: Dict[str,str]) -> List[Relationship]:
156 | relmap = {}
157 |
158 | for layer,report in reportMap.items():
159 | layer_id = layer.split("/")[-1]
160 |
161 | for package,files in report.package_files.items():
162 | for file in files:
163 | fid = file.replace("/","-").replace("_","").replace(" ","")
164 | sid = f"SPDXRef-File-{layer_id}-{fid}"
165 | filemapid = filemap.get(sid).spdx_id if sid in filemap else None
166 | if filemapid is None:
167 | # TODO: this is the case where a file is recorded by ORCA but does not exist in the layer (e.g., updates to a dpkg/status).
168 | continue
169 | customid = f"{packagesmap.get(package).spdx_id}{filemapid}"
170 | if customid in relmap:
171 | continue
172 | relmap[customid] = Relationship(packagesmap.get(package).spdx_id,RelationshipType.CONTAINS,filemap.get(sid).spdx_id)
173 | relmap["root"] = Relationship("SPDXRef-DOCUMENT",RelationshipType.DESCRIBES,"SPDXRef-ContainerImage")
174 | return relmap
175 |
176 |
177 |
178 | def generateSPDXFromReportMap(containerImage: str,reportMap: Dict[str,VulnerabilityReport],output_filename: str,complete_report: bool):
179 |
180 | osinfo = getOsInfo(reportMap)
181 | if osinfo is not None and "version" not in osinfo:
182 | osinfo = None
183 |
184 | total_cpe = getTotalCPE(reportMap)
185 | filemap = generateFileMappingReport(reportMap)
186 | packagesmap = {p:map_package(osinfo,idx,p) for idx,p in enumerate(list(total_cpe))}
187 |
188 | containerPackage: Package = Package(name=containerImage, download_location=SpdxNone(),license_concluded=SpdxNone(),license_declared=SpdxNone(),spdx_id="SPDXRef-ContainerImage",copyright_text=SpdxNone(),primary_package_purpose=PackagePurpose.CONTAINER)
189 | creation_info = CreationInfo(spdx_version="SPDX-2.3",spdx_id="SPDXRef-DOCUMENT",name="CPE Finder",created=datetime.now(),creators=[Actor(ActorType.ORGANIZATION,"CNAM"),Actor(ActorType.TOOL,"CPE finder")],document_namespace="http://example.com")
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 | packages = list(packagesmap.values())
198 |
199 | packages.append(containerPackage)
200 | if osinfo is not None:
201 | osPackage: Package = Package(name=osinfo["name"].split(" ")[0].lower(),
202 | version=osinfo["version"],
203 | download_location=SpdxNone(),license_concluded=SpdxNone(),license_declared=SpdxNone(),spdx_id="SPDXRef-OperatingSystem",copyright_text=SpdxNone(),primary_package_purpose=PackagePurpose.OPERATING_SYSTEM)
204 | packages.append(osPackage)
205 |
206 | files = list(filemap.values())
207 |
208 | doc = None
209 | if complete_report:
210 | relmap = generateRelationships(reportMap,filemap,packagesmap,osinfo)
211 | relationships = list(relmap.values())
212 | doc = Document(creation_info,packages=packages,relationships=relationships,files=files)
213 | else:
214 | doc = Document(creation_info,packages=packages,files=files)
215 |
216 | write_file(doc, output_filename,validate=False)
217 | #write_file(doc, output_filename,validate=True)
218 |
--------------------------------------------------------------------------------
/orca/lib/test_apk.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pytest
3 | from unittest.mock import patch
4 | from orca.lib.apk import get_apk, read_apk_db, read_world_file
5 | from orca.lib.types import PackageInfo, PackageInfoType
6 |
7 |
8 | class TestApk:
9 | @patch("orca.lib.apk.open", create=True)
10 | def test_read_apk_db(self, mock_open):
11 | # Mock the file content
12 | file_content = """P:test_package
13 | V:1.2.3
14 | F:lib
15 | R:test.so
16 |
17 | P:another_package
18 | V:4.5.6
19 | F:usr/bin
20 | R:executable
21 | """
22 | mock_open.return_value.read.return_value = file_content
23 | mock_open.return_value.__enter__.return_value = mock_open.return_value
24 |
25 | # Call the function
26 | db_path = "fake_path"
27 | path = "actual_path"
28 | result = read_apk_db(db_path, path)
29 |
30 | # Assert the result
31 | expected_package1 = PackageInfo(
32 | "test_package", "1.2.3", None, PackageInfoType.APK
33 | )
34 | expected_package2 = PackageInfo(
35 | "another_package", "4.5.6", None, PackageInfoType.APK
36 | )
37 | assert expected_package1 in result
38 | assert expected_package2 in result
39 | assert result[expected_package1] == {"lib/test.so", "actual_path"}
40 | assert result[expected_package2] == {"usr/bin/executable", "actual_path"}
41 |
42 | @patch("orca.lib.apk.open", create=True)
43 | def test_read_world_file(self, mock_open):
44 | # Mock the file content
45 | file_content = "package1\npackage2\n"
46 | mock_open.return_value.readlines.return_value = file_content.splitlines()
47 | mock_open.return_value.__enter__.return_value = mock_open.return_value
48 |
49 | # Call the function
50 | db_path = "fake_path"
51 | path = "actual_path"
52 | result = read_world_file(db_path, path)
53 |
54 | # Assert the result
55 | expected_package1 = PackageInfo("package1", None, None, PackageInfoType.APK)
56 | expected_package2 = PackageInfo("package2", None, None, PackageInfoType.APK)
57 | assert expected_package1 in result
58 | assert expected_package2 in result
59 | assert result[expected_package1] == {"actual_path"}
60 | assert result[expected_package2] == {"actual_path"}
61 |
62 | @patch("orca.lib.apk.read_apk_db")
63 | @patch("orca.lib.apk.read_world_file")
64 | def test_get_apk(self, mock_read_world_file, mock_read_apk_db):
65 | # Mock the paths
66 | paths = ["path/to/apk/db/installed", "path/to/apk/world"]
67 | directory = "test_dir"
68 |
69 | # Mock the return values of read_apk_db and read_world_file
70 | mock_read_apk_db.return_value = {
71 | PackageInfo("package1", "1.0", None, PackageInfoType.APK): {"file1"}
72 | }
73 | mock_read_world_file.return_value = {
74 | PackageInfo("package2", None, None, PackageInfoType.APK): {"file2"}
75 | }
76 |
77 | # Call the function
78 | result = get_apk(paths, directory)
79 |
80 | # Assert the calls and the result
81 | mock_read_apk_db.assert_called_once_with(
82 | os.path.join(directory, paths[0]), paths[0]
83 | )
84 | mock_read_world_file.assert_called_once_with(
85 | os.path.join(directory, paths[1]), paths[1]
86 | )
87 | expected_result = {
88 | PackageInfo("package1", "1.0", None, PackageInfoType.APK): {"file1"},
89 | PackageInfo("package2", None, None, PackageInfoType.APK): {"file2"},
90 | }
91 | assert result == expected_result
92 |
93 | def test_get_apk_no_apks(self):
94 | # Test when there are no apk files in the paths
95 | paths = ["path/to/some/other/file"]
96 | directory = "test_dir"
97 | result = get_apk(paths, directory)
98 | assert result == {}
99 |
100 | @patch("orca.lib.apk.logger")
101 | @patch("orca.lib.apk.read_apk_db")
102 | def test_get_apk_logging(self, mock_read_apk_db, mock_logger):
103 | # Mock the paths
104 | paths = ["path/to/apk/db/installed"]
105 | directory = "test_dir"
106 |
107 | # Mock the return values of read_apk_db
108 | mock_read_apk_db.return_value = {
109 | PackageInfo("package1", "1.0", None, PackageInfoType.APK): {"file1"}
110 | }
111 |
112 | # Call the function
113 | get_apk(paths, directory)
114 |
115 | # Assert that the logger was called
116 | mock_logger.logger.info.assert_called_with("APKs: 1")
117 |
--------------------------------------------------------------------------------
/orca/lib/test_ascii_checkers.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from unittest.mock import patch
3 | import os
4 | from orca.lib import ascii_checkers
5 | from orca.lib.types import PackageInfo, PackageInfoType
6 |
7 | class TestAsciiCheckers:
8 |
9 | def test_parse_gemspec_empty(self):
10 | paths = []
11 | directory = ""
12 | result = ascii_checkers.parse_gemspec(paths, directory)
13 | assert result == {}
14 |
15 | def test_parse_gemspec_no_gemspec(self):
16 | paths = ["test.txt"]
17 | directory = ""
18 | result = ascii_checkers.parse_gemspec(paths, directory)
19 | assert result == {}
20 |
21 | @patch("orca.lib.ascii_checkers.logger.logger")
22 | def test_parse_gemspec_file_not_found(self, mock_logger):
23 | paths = ["test.gemspec"]
24 | directory = ""
25 | result = ascii_checkers.parse_gemspec(paths, directory)
26 | mock_logger.error.assert_called()
27 | assert result == {}
28 |
29 | def test_parse_gemspec_ok(self):
30 | # Create a dummy gemspec file
31 | gemspec_content = """
32 | Gem::Specification.new do |s|
33 | s.name = 'test_gem'
34 | s.version = '1.2.3'
35 | end
36 | """
37 | with open("test.gemspec", "w") as f:
38 | f.write(gemspec_content)
39 |
40 | paths = ["test.gemspec"]
41 | directory = ""
42 | result = ascii_checkers.parse_gemspec(paths, directory)
43 | expected_package_info = PackageInfo("test_gem", "1.2.3", None, PackageInfoType.GEM)
44 | assert list(result.keys())[0] == expected_package_info
45 | assert result[expected_package_info] == ["test.gemspec"]
46 |
47 | # Clean up the dummy file
48 | os.remove("test.gemspec")
49 |
50 | def test_parse_gemspec_no_version(self):
51 | # Create a dummy gemspec file
52 | gemspec_content = """
53 | Gem::Specification.new do |s|
54 | s.name = 'test_gem'
55 | end
56 | """
57 | with open("test.gemspec", "w") as f:
58 | f.write(gemspec_content)
59 |
60 | paths = ["test.gemspec"]
61 | directory = ""
62 | result = ascii_checkers.parse_gemspec(paths, directory)
63 | assert result == {}
64 |
65 | # Clean up the dummy file
66 | os.remove("test.gemspec")
67 |
68 | def test_parse_gemspec_no_name(self):
69 | # Create a dummy gemspec file
70 | gemspec_content = """
71 | Gem::Specification.new do |s|
72 | s.version = '1.2.3'
73 | end
74 | """
75 | with open("test.gemspec", "w") as f:
76 | f.write(gemspec_content)
77 |
78 | paths = ["test.gemspec"]
79 | directory = ""
80 | result = ascii_checkers.parse_gemspec(paths, directory)
81 | assert result == {}
82 |
83 | # Clean up the dummy file
84 | os.remove("test.gemspec")
85 |
86 | def test_parse_gosum_empty(self):
87 | # Create a dummy go.sum file
88 | gosum_content = ""
89 | with open("go.sum", "w") as f:
90 | f.write(gosum_content)
91 |
92 | result = ascii_checkers.parse_gosum("go.sum")
93 | assert result == []
94 |
95 | # Clean up the dummy file
96 | os.remove("go.sum")
97 |
98 | def test_parse_gosum_ok(self):
99 | # Create a dummy go.sum file
100 | gosum_content = """
101 | github.com/test/module v1.2.3 h1:abcdefg
102 | """
103 | with open("go.sum", "w") as f:
104 | f.write(gosum_content)
105 |
106 | result = ascii_checkers.parse_gosum("go.sum")
107 | assert result == ['cpe:2.3:a:test:module:1.2.3:*:*:*:*:*:*:*']
108 |
109 | # Clean up the dummy file
110 | os.remove("go.sum")
111 |
112 | def test_parse_gosum_multiple(self):
113 | # Create a dummy go.sum file
114 | gosum_content = """
115 | github.com/test/module v1.2.3 h1:abcdefg
116 | github.com/test/module v1.2.4 h1:abcdefg
117 | """
118 | with open("go.sum", "w") as f:
119 | f.write(gosum_content)
120 |
121 | result = ascii_checkers.parse_gosum("go.sum")
122 | assert set(result) == {'cpe:2.3:a:test:module:1.2.3:*:*:*:*:*:*:*', 'cpe:2.3:a:test:module:1.2.4:*:*:*:*:*:*:*'}
123 |
124 | # Clean up the dummy file
125 | os.remove("go.sum")
126 |
127 | def test_parse_gosum_file_not_found(self):
128 | with pytest.raises(FileNotFoundError):
129 | ascii_checkers.parse_gosum("nonexistent_file.sum")
--------------------------------------------------------------------------------
/orca/lib/test_bin_checkers.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pytest
3 | from unittest.mock import patch
4 | from orca.lib import bin_checkers
5 | from orca.lib.types import PackageInfo
6 |
7 | def test_check_gcc():
8 | strings = ["GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0"]
9 | expected = PackageInfo("gcc", "9.4.0", "gnu", None)
10 | assert bin_checkers.check_gcc(strings) == expected
11 |
12 | strings = ["Some other string", "GCC: (GNU) 7.5.0"]
13 | expected = PackageInfo("gcc", "7.5.0", "gnu", None)
14 | assert bin_checkers.check_gcc(strings) == expected
15 |
16 | strings = ["No match here"]
17 | assert bin_checkers.check_gcc(strings) is None
18 |
19 | def test_check_gcc2():
20 | strings = ["gcc 4.8.5"]
21 | expected = PackageInfo("gcc", "4.8.5", "gnu", None)
22 | assert bin_checkers.check_gcc2(strings) == expected
23 |
24 | strings = ["Some other string", "gcc 5.4.0"]
25 | expected = PackageInfo("gcc", "5.4.0", "gnu", None)
26 | assert bin_checkers.check_gcc2(strings) == expected
27 |
28 | strings = ["No match here"]
29 | assert bin_checkers.check_gcc2(strings) is None
30 |
31 | def test_check_openssl():
32 | strings = ["OpenSSL 1.1.1f 31 Mar 2020"]
33 | expected = PackageInfo("openssl", "1.1.1", "openssl", None)
34 | assert bin_checkers.check_openssl(strings) == expected
35 |
36 | strings = ["Some other string", "* OpenSSL 1.0.2g 1 Mar 2016"]
37 | expected = PackageInfo("openssl", "1.0.2", "openssl", None)
38 | assert bin_checkers.check_openssl(strings) == expected
39 |
40 | strings = ["No match here"]
41 | assert bin_checkers.check_openssl(strings) is None
42 |
43 | def test_check_postgres():
44 | expected = PackageInfo("postgresql", "12.3.4", "postgresql", None)
45 | assert bin_checkers.check_postgres(["(PostgreSQL) 12.3.4"]) == expected
46 |
47 | expected = PackageInfo("postgresql", "9.6.17", "postgresql", None)
48 | assert bin_checkers.check_postgres(["Some other string", "(PostgreSQL) 9.6.17"]) == expected
49 |
50 | assert bin_checkers.check_postgres(["No match here"]) is None
51 |
52 | def test_check_zlib():
53 | strings = ["inflate (zlib v1.2.11) 1.2.11"]
54 | expected = PackageInfo("zlib", "1.2.11", "zlib", None)
55 | assert bin_checkers.check_zlib(strings) == expected
56 |
57 | strings = ["Some other string", "inflate (zlib v1.2.8) 1.2.8"]
58 | expected = PackageInfo("zlib", "1.2.8", "zlib", None)
59 | assert bin_checkers.check_zlib(strings) == expected
60 |
61 | strings = ["No match here"]
62 | assert bin_checkers.check_zlib(strings) is None
63 |
64 | def test_check_self():
65 | strings = ["mybinary v1.2.3"]
66 | expected = PackageInfo("mybinary", "v1.2.3", None, None)
67 | assert bin_checkers.check_self(strings, "mybinary") == expected
68 |
69 | strings = ["Some other string", "anotherbin 2.0.0"]
70 | expected = PackageInfo("anotherbin", "2.0.0", None, None)
71 | assert bin_checkers.check_self(strings, "anotherbin") == expected
72 |
73 | strings = ["No match here"]
74 | assert bin_checkers.check_self(strings, "testbin") is None
75 |
76 | # Test with binary name of length 1
77 | assert bin_checkers.check_self(strings, "a") is None
78 |
79 | @patch('orca.lib.bin_checkers.logger.logger.info')
80 | def test_check_self_regex_error(mock_info):
81 | strings = ["test v1.2.3"]
82 | # Force a regex error by using an invalid binary name
83 | result = bin_checkers.check_self(strings, "*invalid*")
84 | assert result == (None,None)
85 | mock_info.assert_called()
86 |
87 | def test_extract_strings():
88 | # Create a dummy file for testing
89 | with open("test_file.txt", "wb") as f:
90 | f.write(b"This is a test\n")
91 | f.write(b"with some strings\n")
92 | f.write(b"and some non-ascii: \x80\x81\x82\n") # Include some non-ASCII bytes
93 | f.write(b"short\n")
94 | f.write(b"toolongstringgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg")
95 |
96 | # Test code for the "static_check_cpes" function
97 | # Test with an empty file
98 | with open("test_file.txt", "w" ) as f:
99 | f.write("")
100 |
101 | assert bin_checkers.static_check_cpes("test_file.txt") == []
102 |
103 | # Clean up the dummy file
104 | os.remove("test_file.txt")
105 |
106 | # Test for check_binaries
107 | @pytest.mark.skip(reason="no way to test this functionality yet")
108 | def test_check_binaries():
109 | # Create a dummy directory and files for testing
110 | os.makedirs("test_dir", exist_ok=True)
111 | with open("test_dir/file1.txt", "w") as f:
112 | f.write("gcc 1.2.3")
113 | with open("test_dir/file2.txt", "w") as f:
114 | f.write("zlib v1.2.8")
115 |
116 | executables = ["file1.txt", "file2.txt"]
117 | # Call check_binaries
118 | results = bin_checkers.check_binaries("test_dir", executables)
119 |
120 | # Assert the expected results
121 | assert len(results) == 2
122 | assert "gcc" in results
123 | assert "zlib" in results
124 |
125 | # Clean up the dummy directory and files
126 | os.remove("test_dir/file1.txt")
127 | os.remove("test_dir/file2.txt")
128 | os.rmdir("test_dir")
--------------------------------------------------------------------------------
/orca/lib/test_bin_checkers_cpe.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from orca.lib import bin_checkers_cpe as bcc
3 | import os
4 | import os
5 | import os
6 |
7 | def test_check_gcc():
8 | strings = ["GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0"]
9 | cpe, string = bcc.check_gcc(strings)
10 | assert cpe == "cpe:2.3:a:gnu:gcc:9.4.0:*:*:*:*:*:*:*"
11 | assert string == "GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0"
12 |
13 | strings = ["Some other string", "GCC: (GNU) 7.5.0"]
14 | cpe, string = bcc.check_gcc(strings)
15 | assert cpe == "cpe:2.3:a:gnu:gcc:7.5.0:*:*:*:*:*:*:*"
16 | assert string == "GCC: (GNU) 7.5.0"
17 |
18 | strings = ["No match here"]
19 | cpe, string = bcc.check_gcc(strings)
20 | assert cpe is None
21 | assert string is None
22 |
23 | def test_check_gcc2():
24 | strings = ["gcc 5.4.0"]
25 | cpe, string = bcc.check_gcc2(strings)
26 | assert cpe == "cpe:2.3:a:gnu:gcc:5.4.0:*:*:*:*:*:*:*"
27 | assert string == "gcc 5.4.0"
28 |
29 | strings = ["Another string", "gcc 4.9.3"]
30 | cpe, string = bcc.check_gcc2(strings)
31 | assert cpe == "cpe:2.3:a:gnu:gcc:4.9.3:*:*:*:*:*:*:*"
32 | assert string == "gcc 4.9.3"
33 |
34 | strings = ["No match"]
35 | cpe, string = bcc.check_gcc2(strings)
36 | assert cpe is None
37 | assert string is None
38 |
39 | def test_check_openssl():
40 | strings = ["OpenSSL 1.1.1f 31 Mar 2020"]
41 | cpe, string = bcc.check_openssl(strings)
42 | assert cpe == "cpe:2.3:a:openssl:openssl:1.1.1:*:*:*:*:*:*:*"
43 | assert string == "OpenSSL 1.1.1f 31 Mar 2020"
44 |
45 | strings = ["Some text", "OpenSSL 1.0.2k-fips 26 Jan 2017"]
46 | cpe, string = bcc.check_openssl(strings)
47 | assert cpe == "cpe:2.3:a:openssl:openssl:1.0.2:*:*:*:*:*:*:*"
48 | assert string == "OpenSSL 1.0.2k-fips 26 Jan 2017"
49 |
50 | strings = ["No OpenSSL here"]
51 | cpe, string = bcc.check_openssl(strings)
52 | assert cpe is None
53 | assert string is None
54 |
55 | def test_check_postgres():
56 | strings = ["(PostgreSQL) 12.3.2"]
57 | cpe, string = bcc.check_postgres(strings)
58 | assert cpe == "cpe:2.3:a:postgresql:postgresql:12.3.2:*:*:*:*:*:*:*"
59 | assert string == "(PostgreSQL) 12.3.2"
60 |
61 | strings = ["Other stuff", "(PostgreSQL) 9.6.17"]
62 | cpe, string = bcc.check_postgres(strings)
63 | assert cpe == "cpe:2.3:a:postgresql:postgresql:9.6.17:*:*:*:*:*:*:*"
64 | assert string == "(PostgreSQL) 9.6.17"
65 |
66 | strings = ["No PostgreSQL"]
67 | cpe, string = bcc.check_postgres(strings)
68 | assert cpe is None
69 | assert string is None
70 |
71 | def test_check_zlib():
72 | strings = ["inflate (zlib) 1.2.11"]
73 | cpe, string = bcc.check_zlib(strings)
74 | assert cpe == "cpe:2.3:a:zlib:zlib:1.2.11:*:*:*:*:*:*:*"
75 | assert string == "inflate (zlib) 1.2.11"
76 |
77 | strings = ["Another string", "inflate (zlib) 1.2.8"]
78 | cpe, string = bcc.check_zlib(strings)
79 | assert cpe == "cpe:2.3:a:zlib:zlib:1.2.8:*:*:*:*:*:*:*"
80 | assert string == "inflate (zlib) 1.2.8"
81 |
82 | strings = ["No zlib here"]
83 | cpe, string = bcc.check_zlib(strings)
84 | assert cpe is None
85 | assert string is None
86 |
87 | def test_check_self():
88 | strings = ["mybinary v1.0.0"]
89 | cpe, string = bcc.check_self(strings, "mybinary")
90 | assert cpe == "cpe:2.3:a:*:mybinary:v1.0.0:*:*:*:*:*:*:*"
91 | assert string == "mybinary v1.0.0"
92 |
93 | strings = ["Some other string", "anotherbin 2.5.1"]
94 | cpe, string = bcc.check_self(strings, "anotherbin")
95 | assert cpe == "cpe:2.3:a:*:anotherbin:2.5.1:*:*:*:*:*:*:*"
96 | assert string == "anotherbin 2.5.1"
97 |
98 | strings = ["No match for this binary"]
99 | cpe, string = bcc.check_self(strings, "nonexistent")
100 | assert cpe is None
101 | assert string is None
102 |
103 | def test_extract_strings():
104 | # Create a dummy file for testing
105 | with open("test_file.txt", "wb") as f:
106 | f.write(b"This is a test file.\n")
107 | f.write(b"It contains some strings.\n")
108 | f.write(b"Short: abc\n")
109 | f.write(b"Longer: abcdefg\n")
110 | f.write(b"\x01\x02\x03BinaryData\x04\x05\x06\n")
111 |
112 | strings = bcc.extract_strings("test_file.txt", min_length=4)
113 | assert "This is a test file." in strings
114 | assert "abc" not in strings # Shorter than min_length
115 |
116 | # Clean up the dummy file
117 | os.remove("test_file.txt")
118 |
119 | def test_static_check_cpes():
120 | # Create a dummy file with strings that match known CPE patterns
121 | with open("test_binary", "wb") as f:
122 | f.write(b"GCC: (GNU) 7.5.0\n")
123 | f.write(b"OpenSSL 1.1.1f 31 Mar 2020\n")
124 | f.write(b"test_binary v2.0.1\n")
125 |
126 | cpes = bcc.static_check_cpes("test_binary")
127 | assert "cpe:2.3:a:gnu:gcc:7.5.0:*:*:*:*:*:*:*" in cpes
128 | assert "cpe:2.3:a:openssl:openssl:1.1.1:*:*:*:*:*:*:*" in cpes
129 | assert "cpe:2.3:a:*:test_binary:v2.0.1:*:*:*:*:*:*:*" in cpes
130 |
131 | # Clean up the dummy file
132 | os.remove("test_binary")
133 |
134 | def test_static_check_cpes_empty():
135 | # Create an empty dummy file
136 | with open("empty_binary", "wb") as f:
137 | pass
138 |
139 | cpes = bcc.static_check_cpes("empty_binary")
140 | assert cpes == []
141 |
142 | # Clean up the dummy file
143 | os.remove("empty_binary")
--------------------------------------------------------------------------------
/orca/lib/test_composer.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | from orca.lib import composer
4 | from orca.lib.types import PackageInfo, PackageInfoType
5 |
6 | def test_parse_composer_lock_empty(tmp_path):
7 | d = tmp_path / "sub"
8 | d.mkdir()
9 | p = d / "composer.lock"
10 | p.write_text(json.dumps({"packages": []}))
11 |
12 | result = composer.parse_composer_lock([], str(d), "composer.lock")
13 | assert result == {}
14 |
15 | def test_parse_composer_lock_basic(tmp_path):
16 | d = tmp_path / "sub"
17 | d.mkdir()
18 | p = d / "composer.lock"
19 | p.write_text(json.dumps({
20 | "packages": [
21 | {"name": "vendor/package1", "version": "1.0.0"},
22 | {"name": "vendor2/package2", "version": "2.0.0"}
23 | ]
24 | }))
25 |
26 | result = composer.parse_composer_lock([], str(d), "composer.lock")
27 |
28 | assert len(result) == 2
29 |
30 | expected_package1 = PackageInfo("package1", "1.0.0", "vendor", PackageInfoType.COMPOSER)
31 | expected_package2 = PackageInfo("package2", "2.0.0", "vendor2", PackageInfoType.COMPOSER)
32 |
33 | assert expected_package1 in result
34 | assert expected_package2 in result
35 |
36 | assert result[expected_package1] == ["composer.lock"]
37 | assert result[expected_package2] == ["composer.lock"]
38 |
39 | def test_parse_composer_empty(tmp_path):
40 | d = tmp_path / "sub"
41 | d.mkdir()
42 | p = d / "composer.json"
43 | p.write_text(json.dumps({}))
44 |
45 | result = composer.parse_composer([], str(d), "composer.json")
46 | assert result == {}
47 |
48 | def test_parse_composer_basic(tmp_path):
49 | d = tmp_path / "sub"
50 | d.mkdir()
51 | p = d / "composer.json"
52 | p.write_text(json.dumps({
53 | "name": "vendor/package1",
54 | "version": "1.0.0"
55 | }))
56 |
57 | result = composer.parse_composer([], str(d), "composer.json")
58 |
59 | assert len(result) == 1
60 |
61 | expected_package = PackageInfo("package1", "1.0.0", "vendor", PackageInfoType.COMPOSER)
62 |
63 | assert expected_package in result
64 | assert result[expected_package] == ["composer.json"]
65 |
66 | def test_parse_composer_no_version(tmp_path):
67 | d = tmp_path / "sub"
68 | d.mkdir()
69 | p = d / "composer.json"
70 | p.write_text(json.dumps({
71 | "name": "vendor/package1",
72 | }))
73 |
74 | result = composer.parse_composer([], str(d), "composer.json")
75 |
76 | assert len(result) == 1
77 |
78 | expected_package = PackageInfo("package1", None, "vendor", PackageInfoType.COMPOSER)
79 |
80 | assert expected_package in result
81 | assert result[expected_package] == ["composer.json"]
82 |
83 | def test_get_composer_no_composer_files(tmp_path):
84 | d = tmp_path / "sub"
85 | d.mkdir()
86 |
87 | result = composer.get_composer([], str(d))
88 | assert result == {}
89 |
90 | def test_get_composer_only_composer_json(tmp_path):
91 | d = tmp_path / "sub"
92 | d.mkdir()
93 | p = d / "composer.json"
94 | p.write_text(json.dumps({"name": "vendor/package1", "version": "1.0.0"}))
95 |
96 | result = composer.get_composer([str(p)], str(d))
97 | assert result == {} # Because it requires composer.lock
98 |
99 | def test_parse_composer_exception(tmp_path, caplog):
100 | d = tmp_path / "sub"
101 | d.mkdir()
102 | p = d / "composer.json"
103 | p.write_text("Invalid JSON")
104 |
105 | result = composer.parse_composer([], str(d), "composer.json")
106 | assert result == {}
107 | assert "Could not open file composer.json" in caplog.text
--------------------------------------------------------------------------------
/orca/lib/test_dockerfile.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from orca.lib.types import PackageInfo, PackageInfoType
3 | from orca.lib.dockerfile import (
4 | extract_urls,
5 | replace_curly_variables,
6 | replace_dollar_variables,
7 | interpolate_variables,
8 | github_to_cpe,
9 | selected_websites_to_cpe,
10 | extract_cpes_from_dockerfile
11 | )
12 |
13 | def test_extract_urls():
14 | text = "RUN curl https://example.com/file.tar.gz && wget http://test.org/pkg.zip"
15 | urls = extract_urls(text)
16 | assert urls == ["https://example.com/file.tar.gz", "http://test.org/pkg.zip"]
17 |
18 | def test_replace_curly_variables():
19 | url = "https://example.com/${VERSION}/file.tar.gz"
20 | line = "VERSION=1.2.3 curl ${VERSION}"
21 | result = replace_curly_variables(url, line)
22 | assert result == ["https://example.com/1.2.3/file.tar.gz"]
23 |
24 | def test_replace_dollar_variables():
25 | url = "https://example.com/$VERSION/file.tar.gz"
26 | line = "VERSION=1.2.3"
27 | result = replace_dollar_variables(url, line)
28 | assert result == "https://example.com/1.2.3/file.tar.gz"
29 |
30 | def test_github_to_cpe():
31 | urls = ["https://github.com/user/repo/releases/download/v1.2.3/file.tar.gz"]
32 | result = github_to_cpe(urls)
33 | expected = [(PackageInfo("repo", "v1.2.3", "user", PackageInfoType.GITHUB), urls[0])]
34 | assert result == expected
35 |
36 | def test_selected_websites_to_cpe():
37 | urls = [
38 | "https://static.rust-lang.org/rustup/archive/1.2.3/",
39 | "https://services.gradle.org/distributions/gradle-7.0-bin.zip",
40 | "https://ftp.postgresql.org/pub/source/v12.0"
41 | ]
42 | result = selected_websites_to_cpe(urls)
43 | expected = [
44 | (PackageInfo("rust", "1.2.3", "rust", type=PackageInfoType.RUST), urls[0]),
45 | (PackageInfo("gradle", "7.0", "gradle", PackageInfoType.GRADLE), urls[1]),
46 | (PackageInfo("postgresql", "v12.0", "postgresql"), urls[2])
47 | ]
48 | assert result == expected
49 |
--------------------------------------------------------------------------------
/orca/lib/test_jar.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import os
3 | from .jar import extract_jar, list_jar_props, parse_pom_properties, get_jar
4 | from .types import PackageInfo, PackageInfoType
5 | import zipfile
6 |
7 | class TestJar(unittest.TestCase):
8 | def test_extract_jar_simple(self):
9 | # Test simple version format
10 | result = extract_jar("org.apache.commons.text-1.9")
11 | self.assertEqual(result["author"], "org.apache.commons")
12 | self.assertEqual(result["name"], "text")
13 | self.assertEqual(result["version"], "1.9")
14 |
15 | def test_extract_jar_complex(self):
16 | # Test complex version format
17 | result = extract_jar("com.google.guava-31.jar")
18 | self.assertEqual(result["author"], "com.google")
19 | self.assertEqual(result["name"], "guava")
20 | self.assertEqual(result["version"], "31.jar")
21 |
22 | def test_extract_jar_invalid(self):
23 | # Test invalid format
24 | result = extract_jar("invalid_format")
25 | self.assertIsNone(result)
26 |
27 | def test_get_jar(self):
28 | test_paths = [
29 | "test.jar",
30 | "commons-text-1.9.jar",
31 | "guava-31.0.jar"
32 | ]
33 | packages = get_jar(test_paths, ".")
34 | self.assertIsInstance(packages, dict)
35 |
36 | def test_list_jar_props_empty(self):
37 | # Test with non-existent jar
38 | packages = list_jar_props("nonexistent.jar", ".")
39 | self.assertEqual(packages, [])
40 |
41 | if __name__ == '__main__':
42 | unittest.main()
--------------------------------------------------------------------------------
/orca/lib/test_path.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from orca.lib.path import remove_folders, get_filepaths
3 | import os
4 | import tempfile
5 |
6 | def test_remove_folders_empty():
7 | assert remove_folders([]) == []
8 |
9 | def test_remove_folders_single_file():
10 | assert remove_folders(["file.txt"]) == ["file.txt"]
11 |
12 | def test_get_filepaths():
13 | with tempfile.TemporaryDirectory() as tmp_dir:
14 | # Create test directory structure
15 | os.makedirs(os.path.join(tmp_dir, "a/b"))
16 | os.makedirs(os.path.join(tmp_dir, "etc/ssl/certs"))
17 | os.makedirs(os.path.join(tmp_dir, "usr/share/zoneinfo"))
18 | os.makedirs(os.path.join(tmp_dir, "etc/nginx"))
19 |
20 | # Create some test files
21 | open(os.path.join(tmp_dir, "a/b/test.txt"), "w").close()
22 | open(os.path.join(tmp_dir, "file.txt"), "w").close()
23 | open(os.path.join(tmp_dir, "etc/ssl/certs/cert.pem"), "w").close()
24 |
25 | paths = get_filepaths(tmp_dir)
26 |
27 | assert "a/b/test.txt" in paths
28 | assert "file.txt" in paths
29 | assert "etc/ssl/certs/cert.pem" not in paths
30 |
--------------------------------------------------------------------------------
/orca/lib/test_path_checkers.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from .path_checkers import check_python_from_path_once, check_python_from_path
3 | from .types import PackageInfo
4 | import os
5 |
6 | class TestPathCheckers(unittest.TestCase):
7 | def test_check_python_from_path_once_basic(self):
8 | filename = "python3.8/site-packages/requests-2.25.1.dist-info"
9 | result, files = check_python_from_path_once(filename, "")
10 | self.assertEqual(len(result), 2)
11 | self.assertEqual(result[0], PackageInfo("python", "3.8", None))
12 | self.assertEqual(result[1], PackageInfo("requests", "2.25.1", None))
13 | self.assertEqual(files, [filename])
14 |
15 | def test_check_python_from_path_once_nested(self):
16 | filename = "python3.9/site-packages/urllib3/urllib3-1.26.4.dist-info"
17 | result, files = check_python_from_path_once(filename, "")
18 | self.assertEqual(len(result), 2)
19 | self.assertEqual(result[0], PackageInfo("python", "3.9", None))
20 | self.assertEqual(result[1], PackageInfo("urllib3-urllib3", "1.26.4", None))
21 | self.assertEqual(files, [filename])
22 |
23 | def test_check_python_from_path_once_invalid(self):
24 | filename = "invalid/path/format"
25 | result, files = check_python_from_path_once(filename, "")
26 | self.assertIsNone(result)
27 | self.assertEqual(files, [filename])
28 |
29 | def test_check_python_from_path_multiple(self):
30 | paths = [
31 | "python3.8/site-packages/requests-2.25.1.dist-info",
32 | "python3.8/site-packages/urllib3/urllib3-1.26.4.dist-info",
33 | "not/a/valid/path"
34 | ]
35 | result, files = check_python_from_path(paths, "")
36 | self.assertEqual(len(result), 4)
37 | self.assertEqual(len(files), 2)
38 |
39 | def test_check_python_from_path_empty(self):
40 | paths = ["not/a/valid/path"]
41 | result, files = check_python_from_path(paths, "")
42 | self.assertEqual(len(result), 0)
43 | self.assertEqual(len(files), 0)
--------------------------------------------------------------------------------
/orca/lib/test_perl.py:
--------------------------------------------------------------------------------
1 | import os
2 | from .perl import parse_module, get_perl
3 | from .types import PackageInfo, PackageInfoType
4 | import pytest
5 |
6 | class TestPerl:
7 | @pytest.fixture(autouse=True)
8 | def setup(self):
9 | self.test_dir = os.path.dirname(os.path.abspath(__file__))
10 |
11 | def test_parse_module_empty(self):
12 | result = parse_module("nonexistent_file.pm")
13 | assert result == ("", "")
14 |
15 | def test_parse_module_valid(self, tmp_path):
16 | test_content = """
17 | package Test::Module;
18 | $VERSION = '1.2.3';
19 | """
20 | test_file = tmp_path / "test.pm"
21 | test_file.write_text(test_content)
22 |
23 | package, version = parse_module(str(test_file))
24 | assert package == "Test::Module"
25 | assert version == "1.2.3"
26 |
27 | def test_parse_module_no_version(self, tmp_path):
28 | test_content = """
29 | package Test::Module;
30 | """
31 | test_file = tmp_path / "test.pm"
32 | test_file.write_text(test_content)
33 |
34 | package, version = parse_module(str(test_file))
35 | assert package == ""
36 | assert version == ""
37 |
38 | def test_get_perl_empty(self):
39 | result = get_perl([], "")
40 | assert result == {}
41 |
--------------------------------------------------------------------------------
/orca/lib/test_pkgconfig.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from .pkgconfig import get_pkgconfig
3 | from .types import PackageInfo
4 |
5 | def test_get_pkgconfig_empty():
6 | paths = []
7 | directory = "/test"
8 | result = get_pkgconfig(paths, directory)
9 | assert result == {}
10 |
11 | def test_get_pkgconfig_no_pc_files():
12 | paths = ["/test/lib/file.so", "/test/include/header.h"]
13 | directory = "/test"
14 | result = get_pkgconfig(paths, directory)
15 | assert result == {}
16 |
17 | def test_get_pkgconfig_invalid_pc():
18 | paths = ["/test/usr/lib/pkgconfig/invalid.pc"]
19 | directory = "/test"
20 |
21 | def mock_read_pc_file(path, vars):
22 | raise Exception("Invalid PC file")
23 |
24 | import pykg_config.pcfile
25 | pykg_config.pcfile.read_pc_file = mock_read_pc_file
26 |
27 | result = get_pkgconfig(paths, directory)
28 | assert result == {}
--------------------------------------------------------------------------------
/orca/lib/test_python.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from orca.lib.types import PackageInfo, PackageInfoType
3 | import os
4 |
5 | from orca.lib.python import (
6 | check_python_from_path_once,
7 | check_python_from_path,
8 | extract_egg_dependencies,
9 | get_egg_files,
10 | get_record_files,
11 | parse_egg_info,
12 | parse_metadata,
13 | extract_python_dependencies,
14 | )
15 |
16 | def test_check_python_from_path_once_dist_info():
17 | paths = ["/path/to/package-1.0.dist-info/METADATA", "/path/to/package-1.0.dist-info/RECORD"]
18 | filename = "/path/to/package-1.0.dist-info"
19 | directory = "/path/to"
20 | result = check_python_from_path_once(paths, filename, directory)
21 | expected_package = PackageInfo("package",'1.0',None,PackageInfoType.PYPI)
22 |
23 | assert len(result) == 1
24 | assert expected_package == list(result.keys())[0]
25 | assert len(result[expected_package]) == 2
26 |
27 | def test_check_python_from_path_once_egg_info():
28 | paths = ["/path/to/package-1.0.egg-info/PKG-INFO", "/path/to/package-1.0.egg-info/SOURCES.txt"]
29 | filename = "/path/to/package-1.0.egg-info"
30 | directory = "/path/to"
31 | result = check_python_from_path_once(paths, filename, directory)
32 | expected_package = PackageInfo("package",'1.0',None,PackageInfoType.PYPI)
33 |
34 | assert len(result) == 1
35 | assert expected_package == list(result.keys())[0]
36 | assert len(result[expected_package]) == 2
37 |
38 | def skip_test_check_python_from_path_once_record():
39 | paths = ["/path/to/package-1.0.dist-info/METADATA", "/path/to/package-1.0.dist-info/RECORD", "/path/to/package/file1.py", "/path/to/package/file2.py"]
40 | filename = "/path/to/package-1.0.dist-info/RECORD"
41 | directory = "/path/to"
42 | result = check_python_from_path_once(paths, filename, directory)
43 | assert len(result) == 1
44 | package_info = list(result.keys())[0]
45 | assert package_info.name == "package"
46 | assert package_info.version == "1.0"
47 | assert result[package_info] == ['/path/to/package/file1.py', '/path/to/package/file2.py']
48 |
49 | def test_check_python_from_path():
50 | paths = ["/path/to/package1-1.0.dist-info/METADATA",
51 | "/path/to/package2-2.0.egg-info/PKG-INFO", "/path/to/package2-2.0.egg-info/SOURCES.txt"]
52 | directory = "/path/to"
53 | result = check_python_from_path(paths, directory)
54 | assert isinstance(result, dict)
55 |
56 | def test_extract_egg_dependencies(tmpdir):
57 | depfile_content = """Name: test_package
58 | Version: 1.2.3
59 | Author: Test Author
60 | Requires-Dist: requests
61 | Requires-Dist: flask"""
62 | depfile = tmpdir.join("PKG-INFO")
63 | depfile.write(depfile_content)
64 | packages = extract_egg_dependencies(str(depfile))
65 | assert len(packages) == 3
66 | assert packages[0].name == "test_package"
67 | assert packages[0].version == "1.2.3"
68 |
69 | def skip_test_get_egg_files(tmpdir):
70 | sources_content = """file1.py
71 | file2.py
72 | """
73 | sources = tmpdir.join("SOURCES.txt")
74 | sources.write(sources_content)
75 | file = "/path/to/package-1.0.egg-info"
76 | result = get_egg_files(file, str(sources))
77 | assert result == ['/path/to/file1.py', '/path/to/file2.py']
78 |
79 | def skip_test_get_record_files(tmpdir):
80 | record_content = """file1.py,sha256=abc,100
81 | file2.py,sha256=def,200
82 | """
83 | record = tmpdir.join("RECORD")
84 | record.write(record_content)
85 | file = "/path/to/package-1.0.dist-info"
86 | result = get_record_files(file, str(record))
87 | assert result == ['/path/to/file1.py', '/path/to/file2.py']
88 |
89 | def skip_test_parse_egg_info(tmpdir):
90 | pkg_info_content = """Metadata-Version: 2.1
91 | Name: test_package
92 | Version: 1.2.3
93 | Author: Test Author
94 | Requires-Dist: requests"""
95 | sources_content = "file1.py\nfile2.py\n"
96 | pkg_info = tmpdir.join("PKG-INFO")
97 | sources = tmpdir.join("SOURCES.txt")
98 | pkg_info.write(pkg_info_content)
99 | sources.write(sources_content)
100 | paths = ["/path/to/file1.py", "/path/to/file2.py", "/path/to/package-1.0.egg-info/PKG-INFO"]
101 | file = "/path/to/package-1.0.egg-info"
102 | dirpath = str(tmpdir) + "/"
103 | result = parse_egg_info(paths, file, dirpath)
104 | assert len(result) == 2
105 | package_info = list(result.keys())[0]
106 | assert package_info.name == "test_package"
107 | assert package_info.version == "1.2.3"
108 | assert len(result[package_info]) == 3
109 |
110 | def skip_test_parse_metadata(tmpdir):
111 | metadata_content = """Metadata-Version: 2.1
112 | Name: test_package
113 | Version: 1.2.3
114 | Author: Test Author
115 | Requires-Dist: requests"""
116 | record_content = "file1.py,sha256=abc,100\nfile2.py,sha256=def,200\n"
117 | metadata = tmpdir.join("METADATA")
118 | record = tmpdir.join("RECORD")
119 | metadata.write(metadata_content)
120 | record.write(record_content)
121 | paths = ["/path/to/file1.py", "/path/to/file2.py", "/path/to/package-1.0.dist-info/METADATA"]
122 | file = "/path/to/package-1.0.dist-info"
123 | dirpath = str(tmpdir) + "/"
124 | result = parse_metadata(paths, file, dirpath)
125 | assert len(result) == 2
126 | package_info = list(result.keys())[0]
127 | assert package_info.name == "test_package"
128 | assert package_info.version == "1.2.3"
129 | assert len(result[package_info]) == 3
130 |
131 | def skip_test_extract_python_dependencies(tmpdir):
132 | # Create dummy files and directories
133 | dist_info_dir = tmpdir.mkdir("test_package-1.0.dist-info")
134 | dist_info_dir.join("METADATA").write("Metadata-Version: 2.1\nName: test_package\nVersion: 1.0")
135 | dist_info_dir.join("RECORD").write("file1.py,,\nfile2.py,,\n")
136 |
137 | egg_info_dir = tmpdir.mkdir("another_package-2.0.egg-info")
138 | egg_info_dir.join("PKG-INFO").write("Metadata-Version: 2.1\nName: another_package\nVersion: 2.0")
139 | egg_info_dir.join("SOURCES.txt").write("file3.py\nfile4.py\n")
140 |
141 | # Define paths
142 | paths = [str(dist_info_dir.join("METADATA")), str(dist_info_dir.join("RECORD")),
143 | str(egg_info_dir.join("PKG-INFO")), str(egg_info_dir.join("SOURCES.txt"))]
144 | directory = str(tmpdir)
145 |
146 | # Call the function
147 | dependencies = extract_python_dependencies(paths, directory)
148 |
149 | # Assertions
150 | assert len(dependencies) == 2
151 | assert PackageInfo("test_package", "1.0", None, PackageInfoType.PYPI) in dependencies
152 | assert PackageInfo("another_package", "2.0", None, PackageInfoType.PYPI) in dependencies
--------------------------------------------------------------------------------
/orca/lib/types.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from typing import Dict, List, Optional, Set
3 | from enum import Enum
4 |
5 | class LayerAction(Enum):
6 | ADDED = "added"
7 | REPLACED = "replaced"
8 | DELETED = "deleted"
9 |
10 | @dataclass
11 | class PackageRecord:
12 | path: str
13 | hashtype: Optional[str]
14 | hash: Optional[str]
15 | nlines: Optional[int]
16 |
17 |
18 | def to_record(record_item: str) -> PackageRecord:
19 | split = record_item.split(',')
20 | if len(split) < 2:
21 | return PackageRecord(split[0],None,None,None)
22 | if len(split[1]) < 5:
23 | htype = None
24 | hash = None
25 | nlines = None
26 | else:
27 | htype = split[1].split("=")[0]
28 | hash = split[1].split("=")[1]
29 | nlines = int(split[2])
30 | return PackageRecord(split[0],htype,hash,nlines)
31 |
32 | @dataclass
33 | class LayerChangeRecord:
34 | action: LayerAction
35 | layer: str
36 |
37 | class PackageInfoType(Enum):
38 | DEBIAN = "debian",
39 | PYPI = "pypi",
40 | NPM = "npm",
41 | MAVEN = "maven",
42 | GOLANG = "golang",
43 | APK = "apk",
44 | COMPOSER = "composer",
45 | RPM = "rpm",
46 | GEM = "gem",
47 | PERL = "perl",
48 | GITHUB = "github",
49 | BITNAMI = "bitnami",
50 | RUST="rust",
51 | GRADLE="gradle",
52 |
53 |
54 |
55 | @dataclass(frozen=True)
56 | class PackageInfo:
57 | name: str
58 | version: str
59 | author: Optional[str]
60 | type: Optional[PackageInfoType] = None
61 | arch: Optional[str] = None
62 | epoch: Optional[str] = None
63 |
64 | def to_cpe(self):
65 | return f"cpe:2.3:a:{self.author if self.author is not None and "Amazon" not in self.author else "*"}:{self.name}:{self.version}:*:*:*:*:*:*:*"
66 |
67 | def to_csv_entry(self):
68 | author = "unknown" if self.author is None else self.author
69 | author = author if "Amazon" not in author else "unknown"
70 |
71 | return f"{self.name},{self.version},{author}"
72 |
73 | class VulnerabilityReport:
74 | def __init__(self,paths: Set[str],files=None):
75 | if files is not None:
76 | self.original_files = files
77 | else:
78 | self.original_files = paths
79 | self.initial_files = paths
80 | self.remaining_files = paths
81 | assert isinstance(self.remaining_files, set), "remaining_files must be a set"
82 | self.packages: List[PackageInfo] = []
83 | self.package_files: Dict[PackageInfo,List[str]] = {}
84 | self.analyzed_files: Set[str] = set()
85 | self.os = None
86 |
87 | def add_package_files(self,package_files: Dict[PackageInfo,List[str]]):
88 | self.packages.extend(package_files.keys())
89 | self.package_files.update({pkg: files for pkg, files in package_files.items() if any(f in self.initial_files for f in files)}) # TODO: probably add the other files to another dict
90 | fs = [file for file_list in package_files.values() for file in file_list ]
91 | fs_in_initial = [f for f in fs if f in self.initial_files]
92 | self.analyzed_files.update(fs_in_initial)
93 | self.remaining_files = self.remaining_files.difference(fs_in_initial)
94 |
95 | def to_json(self):
96 | json_dict = {}
97 | for k,v in self.package_files.items():
98 | json_dict[f"{k.name}_{k.version}_{k.author}"] = list(v)
99 | return json_dict
100 |
101 | def to_json_all(self):
102 | json_dict = {'package_files': {}, 'analyzed_files': [], 'remaining_files': []}
103 | for k, v in self.package_files.items():
104 | if isinstance(k, PackageInfo):
105 | json_dict['package_files'][f"{k.name}_{k.version}_{k.author}"] = {
106 | "type": self.package_types[k],
107 | "list_files": list(v)
108 | }
109 | json_dict['analyzed_files'] = list(set(self.analyzed_files))
110 | json_dict['remaining_files'] = list(set(self.remaining_files))
111 | return json_dict
112 |
113 | def summary(self) -> str:
114 | return f"Found {len(self.packages)} packages. Indexed {len(self.analyzed_files)} files over a total of {len(self.original_files)} - Remaining files {len(self.original_files) - len(self.analyzed_files)}"
115 |
116 |
--------------------------------------------------------------------------------
/orca/lib/utils.py:
--------------------------------------------------------------------------------
1 | import hashlib
2 | def calculate_sha256(file_path):
3 | try:
4 | # Open the file in binary mode
5 | with open(file_path, 'rb') as file:
6 | # Initialize the SHA-256 hash object
7 | sha256_hash = hashlib.sha256()
8 | # Read the file in chunks to efficiently handle large files
9 | for chunk in iter(lambda: file.read(4096), b''):
10 | # Update the hash object with the current chunk
11 | sha256_hash.update(chunk)
12 | # Get the hexadecimal representation of the digest (hash value)
13 | hash_value = sha256_hash.digest()
14 | return hash_value
15 | except FileNotFoundError:
16 | return None
17 |
18 | def map_container_id(container_id: str):
19 | return container_id.replace(":", "twodots").replace("/", "slash")
20 |
--------------------------------------------------------------------------------
/orca/main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import datetime
3 | import json
4 | import shutil
5 | from typing import Dict, List
6 | import docker
7 | import docker.errors
8 | from orca.find_cpes import scan_filesystem
9 | from orca.lib.dockerfile import extract_cpes_from_dockerfile_with_validation
10 | from orca.lib.logger import logger
11 | import tarfile
12 | import os
13 |
14 | from orca.lib.spdx import generateSPDXFromReportMap
15 | from orca.lib.types import VulnerabilityReport
16 | from orca.lib.utils import map_container_id
17 |
18 | TMP_DIR = f"{os.getcwd()}/tmpdir"
19 |
20 |
21 |
22 | def tar_remove_links(file: tarfile.TarInfo,path):
23 | if not file.islnk() and not file.issym() and not file.isdev() and not file.isdir():
24 | return file
25 | return None
26 |
27 |
28 | def save_image(client:docker.DockerClient,container:str,filepath:str):
29 | try:
30 | image = client.images.get(container)
31 | except docker.errors.ImageNotFound as _:
32 | logger.info(f"Image {container} not found")
33 | logger.info(f"Pulling image {container}")
34 | image = client.images.pull(container)
35 | except Exception as e:
36 | print(e)
37 |
38 |
39 | shutil.rmtree(TMP_DIR,ignore_errors=True)
40 |
41 | os.mkdir(TMP_DIR,mode=0o755)
42 |
43 |
44 | logger.info(f"Saving image {container} to {filepath}")
45 | f = open(filepath, 'wb')
46 | for chunk in image.save(named=False):
47 | f.write(chunk)
48 | f.close()
49 | return
50 |
51 |
52 | def extract_config(config_path: str):
53 | config_file = json.load(open(config_path))
54 | data = config_file['history']
55 | if len(data) > 1:
56 | return config_file
57 | # Compressed images with crane
58 | for item in config_file['history']:
59 | if "comment" in item:
60 | try:
61 | x = json.loads(item["comment"])
62 | item["comment"] = x
63 | except json.JSONDecodeError:
64 | break
65 | #print(f"Error parsing nested JSON - {item}")
66 | #exit()
67 | if 'comment' not in data[0]:
68 | return config_file
69 | config_file['history'] = data[0]['comment']
70 | return config_file
71 |
72 |
73 |
74 | def extract_with_config_and_layers(image_location:str):
75 | tarf = tarfile.open(image_location)
76 | manifests = [x for x in tarf.getmembers() if x.name == "manifest.json"]
77 | assert len(manifests) == 1
78 | manifest = manifests[0]
79 | tarf.extract(manifest,path=f"{TMP_DIR}",set_attrs=False,filter=tar_remove_links)
80 | manifestFile = json.load(open(f"{TMP_DIR}/manifest.json"))
81 | layers = manifestFile[0]['Layers']
82 | config_path = manifestFile[0]['Config']
83 | tarf.extract(config_path,path=f"{TMP_DIR}",set_attrs=False,filter=tar_remove_links)
84 | config = extract_config(f"{TMP_DIR}/{config_path}")
85 | return tarf,config,layers
86 |
87 | def scan_tar(image_tar:str,client:docker.DockerClient,binary_analysis:bool):
88 | layers_archive,config,layers = extract_with_config_and_layers(image_tar)
89 |
90 | report_by_layer: Dict[str,VulnerabilityReport] = {}
91 | for layer in layers:
92 | logger.info(f"Analyzing layer {layer}")
93 | layers_archive.extract(layer,f"{TMP_DIR}",set_attrs=False,filter=tar_remove_links)
94 | if not os.path.exists(f"{TMP_DIR}/{layer}"):
95 | logger.error(f"Layer {layer} does not exist on container {image_tar}")
96 | continue
97 | image_layer = tarfile.open(f"{TMP_DIR}/{layer}")
98 | image_layer.extractall(f"{TMP_DIR}/{layer}_layer",filter=tar_remove_links,numeric_owner=True)
99 | image_files = image_layer.getnames()
100 | report = scan_filesystem(f"{TMP_DIR}/{layer}_layer",image_files,binary_analysis,False)
101 | report_by_layer[layer] = report
102 | # Add dockerfile:
103 | logger.info(report.summary())
104 |
105 | cpes = extract_cpes_from_dockerfile_with_validation(config)
106 | # FIXME: this is a hack to make the report work with the dockerfile. Obfiously Dockerfile commands are not files.
107 | cpes.remaining_files = set()
108 | cpes.initial_files = set()
109 | cpes.original_files = set()
110 | report_by_layer["Dockerfile"] = cpes
111 | report_by_layer["Dockerfile"] = cpes
112 |
113 | # Cleanup: TODO: probably should be done in a separate function
114 | shutil.rmtree(TMP_DIR,ignore_errors=True)
115 | return report_by_layer
116 |
117 | def scan_image(container:str,client:docker.DockerClient,binary_analysis:bool):
118 | image_tar = f'{TMP_DIR}/container.tar'
119 | save_image(client,container,image_tar)
120 | layers_archive,config,layers = extract_with_config_and_layers(image_tar)
121 |
122 | report_by_layer: Dict[str,VulnerabilityReport] = {}
123 | for layer in layers:
124 | logger.info(f"Analyzing layer {layer}")
125 | layers_archive.extract(layer,f"{TMP_DIR}",set_attrs=False,filter=tar_remove_links)
126 | if not os.path.exists(f"{TMP_DIR}/{layer}"):
127 | logger.error(f"Layer {layer} does not exist on container {container}")
128 | continue
129 | image_layer = tarfile.open(f"{TMP_DIR}/{layer}")
130 | image_layer.extractall(f"{TMP_DIR}/{layer}_layer",filter=tar_remove_links)
131 | image_files = image_layer.getnames()
132 | report = scan_filesystem(f"{TMP_DIR}/{layer}_layer",image_files, binary_analysis,False)
133 | report_by_layer[layer] = report
134 |
135 | logger.info(report.summary())
136 |
137 | cpes = extract_cpes_from_dockerfile_with_validation(config)
138 | report_by_layer["Dockerfile"] = cpes
139 | # FIXME: this is a hack to make the report work with the dockerfile. Obfiously Dockerfile commands are not files.
140 | cpes.remaining_files = set()
141 | cpes.initial_files = set()
142 | cpes.original_files = set()
143 | report_by_layer["Dockerfile"] = cpes
144 | # Cleanup: TODO: probably should be done in a separate function
145 | shutil.rmtree(TMP_DIR,ignore_errors=True)
146 | return report_by_layer
147 |
148 | def write_logfile(report_by_layer: dict[str, VulnerabilityReport],container:str,container_name:str,elapsed:int)->None:
149 | total_files = set()
150 | total_files_duplicates = []
151 | analyzed_files = set()
152 | analyzed_files_duplicates = []
153 | for _layer,report in report_by_layer.items():
154 | total_files.update(report.initial_files)
155 | total_files_duplicates.extend(report.initial_files)
156 | analyzed_files.update(report.analyzed_files)
157 | analyzed_files_duplicates.extend(report.analyzed_files)
158 |
159 | loginfo = {
160 | "analyzed_files":len(analyzed_files),
161 | "analyzed_files_duplicates":len(analyzed_files_duplicates),
162 | "container": container,
163 | "container_usable_name": container_name,
164 | "total_files": len(total_files),
165 | "total_files_duplicates": len(total_files_duplicates),
166 | "elapsed_time":elapsed
167 | }
168 | with open(f"logs/orca-{container_name}_logs.json","w") as fp:
169 | json.dump(loginfo,fp)
170 |
171 |
172 | def orca(client: docker.DockerClient,output_folder: str,csv:bool,binary_analysis:bool,with_complete_report:bool,containers: List[str]):
173 |
174 | if not os.path.exists("logs/"):
175 | os.mkdir("logs",mode=0o755)
176 | if output_folder == "results" and not os.path.exists("results"):
177 | os.mkdir("results",mode=0o755)
178 |
179 | for container in containers:
180 | start = datetime.datetime.now()
181 | container_usable_name = map_container_id(container)
182 |
183 | if not container.endswith(".tar"):
184 | report_by_layer = scan_image(container,client,binary_analysis)
185 | else:
186 | report_by_layer = scan_tar(container,client,binary_analysis)
187 |
188 | end = datetime.datetime.now()
189 |
190 | elapsed = (end-start).total_seconds() * 1000
191 | total_cpe = set()
192 | for layer,report in report_by_layer.items():
193 | logger.info(f"{layer} - {report.summary()}")
194 | if len(report.packages) == 1 and report.packages[0] == (None,None):
195 | continue
196 | total_cpe.update(report.packages)
197 |
198 | print(f"[{container}] Total packages identified {len(total_cpe)}")
199 | logger.info(f"Elapsed time: {elapsed} ms")
200 | write_logfile(report_by_layer,container,container_usable_name,elapsed)
201 |
202 | if len(total_cpe) == 0:
203 | continue
204 | if csv:
205 | with open(f"{output_folder}/{container_usable_name}_packages.csv","w") as fp:
206 | fp.write("product,version,vendor\n")
207 | for pkg in total_cpe:
208 | fp.write(pkg.to_csv_entry() + "\n")
209 | fp.close()
210 |
211 | generateSPDXFromReportMap(container,report_by_layer,f"{output_folder}/orca-{container_usable_name}.json",with_complete_report)
212 |
213 |
214 | def main():
215 |
216 | parser = argparse.ArgumentParser(
217 | prog="orca",
218 | description="""Software composition analysis for containers"""
219 | )
220 |
221 | parser.add_argument(
222 | "-d","--dir", type=str, help="Folder where to store results *without ending /*",default="results")
223 |
224 | parser.add_argument(
225 | "--csv", action='store_true', help="Store also a csv file with package information",default=False)
226 |
227 | parser.add_argument(
228 | "-b","--with-binaries", action='store_true', help="Analyze every binary file (slower). Go binaries are always analyzed",default=False)
229 |
230 | parser.add_argument(
231 | "-c","--complete", action='store_true', help="Generate complete SPDX report with relationships (>200MB file is generated)", default=False)
232 |
233 | parser.add_argument(
234 | "containers", type=str, help="Comma separated list of containers to analyze")
235 |
236 | args = parser.parse_args()
237 | client = docker.from_env(timeout=900) # TODO: if scanning a tar there is no reason to access the docker engine
238 | output = args.dir
239 | csv = args.csv
240 | with_bin = args.with_binaries
241 | with_complete_report = args.complete
242 | containers = args.containers.split(",")
243 | orca(client,output,csv,with_bin,with_complete_report,containers)
244 |
245 | if __name__ == "__main__":
246 | main()
--------------------------------------------------------------------------------
/orca/rpm_checker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kube-security/orca/7a6f756bfbf70d7031ff1810c1c5c414747ac63d/orca/rpm_checker/__init__.py
--------------------------------------------------------------------------------
/orca/rpm_checker/go.mod:
--------------------------------------------------------------------------------
1 | module test.me/rpm
2 |
3 | go 1.20
4 |
5 | require github.com/knqyf263/go-rpmdb v0.1.1
6 |
7 | require (
8 | github.com/dustin/go-humanize v1.0.1 // indirect
9 | github.com/google/uuid v1.6.0 // indirect
10 | github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
11 | github.com/mattn/go-isatty v0.0.20 // indirect
12 | github.com/mattn/go-sqlite3 v1.14.22 // indirect
13 | github.com/ncruces/go-strftime v0.1.9 // indirect
14 | github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
15 | golang.org/x/sys v0.22.0 // indirect
16 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
17 | modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 // indirect
18 | modernc.org/libc v1.55.3 // indirect
19 | modernc.org/mathutil v1.6.0 // indirect
20 | modernc.org/memory v1.8.0 // indirect
21 | modernc.org/sqlite v1.31.1 // indirect
22 | modernc.org/strutil v1.2.0 // indirect
23 | modernc.org/token v1.1.0 // indirect
24 | )
25 |
--------------------------------------------------------------------------------
/orca/rpm_checker/go.sum:
--------------------------------------------------------------------------------
1 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
2 | github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
3 | github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
4 | github.com/glebarez/go-sqlite v1.20.3 h1:89BkqGOXR9oRmG58ZrzgoY/Fhy5x0M+/WV48U5zVrZ4=
5 | github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
6 | github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
7 | github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
8 | github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
9 | github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
10 | github.com/knqyf263/go-rpmdb v0.1.1 h1:oh68mTCvp1XzxdU7EfafcWzzfstUZAEa3MW0IJye584=
11 | github.com/knqyf263/go-rpmdb v0.1.1/go.mod h1:9LQcoMCMQ9vrF7HcDtXfvqGO4+ddxFQ8+YF/0CVGDww=
12 | github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
13 | github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
14 | github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
15 | github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
16 | github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
17 | github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
18 | github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
19 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
20 | github.com/remyoudompheng/bigfft v0.0.0-20230126093431-47fa9a501578 h1:VstopitMQi3hZP0fzvnsLmzXZdQGc4bEcgu24cp+d4M=
21 | github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
22 | github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
23 | github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
24 | golang.org/x/sys v0.4.0 h1:Zr2JFtRQNX3BCZ8YtxRE9hNJYC8J6I1MVbMg6owUp18=
25 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
26 | golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI=
27 | golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
28 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
29 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
30 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
31 | modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 h1:5D53IMaUuA5InSeMu9eJtlQXS2NxAhyWQvkKEgXZhHI=
32 | modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6/go.mod h1:Qz0X07sNOR1jWYCrJMEnbW/X55x206Q7Vt4mz6/wHp4=
33 | modernc.org/libc v1.22.2 h1:4U7v51GyhlWqQmwCHj28Rdq2Yzwk55ovjFrdPjs8Hb0=
34 | modernc.org/libc v1.55.3 h1:AzcW1mhlPNrRtjS5sS+eW2ISCgSOLLNyFzRh/V3Qj/U=
35 | modernc.org/libc v1.55.3/go.mod h1:qFXepLhz+JjFThQ4kzwzOjA/y/artDeg+pcYnY+Q83w=
36 | modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ=
37 | modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4=
38 | modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo=
39 | modernc.org/memory v1.5.0 h1:N+/8c5rE6EqugZwHii4IFsaJ7MUhoWX07J5tC/iI5Ds=
40 | modernc.org/memory v1.8.0 h1:IqGTL6eFMaDZZhEWwcREgeMXYwmW83LYW8cROZYkg+E=
41 | modernc.org/memory v1.8.0/go.mod h1:XPZ936zp5OMKGWPqbD3JShgd/ZoQ7899TUuQqxY+peU=
42 | modernc.org/sqlite v1.20.3 h1:SqGJMMxjj1PHusLxdYxeQSodg7Jxn9WWkaAQjKrntZs=
43 | modernc.org/sqlite v1.31.1 h1:XVU0VyzxrYHlBhIs1DiEgSl0ZtdnPtbLVy8hSkzxGrs=
44 | modernc.org/sqlite v1.31.1/go.mod h1:UqoylwmTb9F+IqXERT8bW9zzOWN8qwAIcLdzeBZs4hA=
45 | modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA=
46 | modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0=
47 | modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
48 | modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
49 |
--------------------------------------------------------------------------------
/orca/rpm_checker/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 | "flag"
6 | "fmt"
7 | "log"
8 |
9 | rpmdb "github.com/knqyf263/go-rpmdb/pkg"
10 | _ "github.com/mattn/go-sqlite3"
11 | _ "modernc.org/sqlite"
12 | )
13 |
14 | type PackageInfo struct {
15 | Package string `json:"package"`
16 | Version string `json:"version"`
17 | Author string `json:"author"`
18 | Files []string `json:"files"`
19 | SourceRpm string `json:"rpm"`
20 | }
21 |
22 | func main() {
23 |
24 | if len(flag.Args()) == 0 {
25 | fmt.Println("Usage: rpm_checker -dbpath=")
26 | fmt.Println("Example: rpm_checker -dbpath=./Packages")
27 | return
28 | }
29 | // Define flags for the database path and package names
30 | dbPath := flag.String("dbpath", "./Packages", "Path to the RPM database")
31 | flag.Parse()
32 |
33 | if err := run(*dbPath); err != nil {
34 | log.Fatal(err)
35 | }
36 | }
37 |
38 | func run(dbPath string) error {
39 | db, err := rpmdb.Open(dbPath)
40 | if err != nil {
41 | return err
42 | }
43 | defer db.Close()
44 |
45 | packages := []PackageInfo{}
46 | pkgList, err := db.ListPackages()
47 | if err != nil {
48 | return err
49 | }
50 |
51 | for _, pkg := range pkgList {
52 | files := []string{}
53 | fileinfo, _ := pkg.InstalledFiles()
54 |
55 | for _, f := range fileinfo {
56 | files = append(files, f.Path[1:])
57 | }
58 |
59 | packages = append(packages, PackageInfo{pkg.Name, pkg.Version, pkg.Vendor, files, pkg.SourceRpm})
60 | }
61 | res, _ := json.Marshal(packages)
62 | fmt.Println(string(res))
63 | return nil
64 | }
65 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | astroid==2.11.7
2 | attrs==25.3.0
3 | bandit==1.8.3
4 | beartype==0.20.2
5 | blinker==1.9.0
6 | boolean.py==4.0
7 | bump2version==1.0.1
8 | CacheControl==0.14.2
9 | certifi==2025.1.31
10 | chardet==5.2.0
11 | charset-normalizer==3.4.1
12 | click==8.1.8
13 | coverage==7.7.1
14 | cyclonedx-python-lib==8.9.0
15 | defusedxml==0.7.1
16 | dill==0.3.9
17 | docker==7.1.0
18 | execnet==2.1.1
19 | filelock==3.18.0
20 | Flask==3.1.0
21 | Flask-Admin==1.6.1
22 | flask-cors==5.0.1
23 | Flask-Login==0.6.3
24 | Flask-SQLAlchemy==3.1.1
25 | gevent==24.11.1
26 | greenlet==3.1.1
27 | gunicorn==23.0.0
28 | idna==3.10
29 | iniconfig==2.1.0
30 | isort==5.13.2
31 | itsdangerous==2.2.0
32 | Jinja2==3.1.6
33 | lazy-object-proxy==1.10.0
34 | license-expression==30.4.1
35 | markdown-it-py==3.0.0
36 | MarkupSafe==3.0.2
37 | mccabe==0.7.0
38 | mdurl==0.1.2
39 | msgpack==1.1.0
40 | natsort==8.4.0
41 | packageurl-python==0.16.0
42 | packaging==24.2
43 | pbr==6.1.1
44 | pip-api==0.0.34
45 | pip-requirements-parser==32.0.1
46 | pip_audit==2.8.0
47 | platformdirs==4.3.7
48 | pluggy==1.5.0
49 | ply==3.11
50 | py==1.11.0
51 | py-serializable==1.1.2
52 | Pygments==2.19.1
53 | pykg-config==1.3.0
54 | pylint==2.13.9
55 | pyparsing==3.2.3
56 | pytest==8.3.5
57 | pytest-cov==6.0.0
58 | pytest-xdist==3.6.1
59 | python-debian==1.0.1
60 | python-dotenv==0.20.0
61 | PyYAML==6.0.2
62 | rdflib==7.1.3
63 | requests==2.32.3
64 | rich==13.9.4
65 | rpm==0.3.1
66 | rpmfile==2.1.0
67 | semantic-version==2.10.0
68 | setuptools==78.1.0
69 | sortedcontainers==2.4.0
70 | spdx-tools==0.8.3
71 | SQLAlchemy==2.0.39
72 | SQLAlchemy-Utils==0.41.2
73 | stevedore==5.4.1
74 | toml==0.10.2
75 | tomli==2.2.1
76 | tomlkit==0.13.2
77 | typing_extensions==4.12.2
78 | uritools==4.0.3
79 | urllib3==2.3.0
80 | validators==0.34.0
81 | Werkzeug==3.1.3
82 | wrapt==1.17.2
83 | WTForms==3.2.1
84 | xmltodict==0.14.2
85 | zope.event==5.0
86 | zope.interface==7.2
87 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup,find_packages
2 | from setuptools.command.install import install
3 | import subprocess
4 | import os
5 |
6 | def read_requirements(filename="requirements.txt"):
7 | with open(filename, "r") as f:
8 | return [line.strip() for line in f if line and not line.startswith("#")]
9 |
10 | setup(
11 | name='orca',
12 | version='0.1.20',
13 | packages=find_packages(),
14 | install_requires=read_requirements(),
15 | entry_points={
16 | 'console_scripts': [
17 | 'orca=orca.main:main',
18 | ],
19 | },
20 | include_package_data=True,
21 | )
22 |
--------------------------------------------------------------------------------