├── .github ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yaml └── workflows │ ├── docker.yml │ ├── lint.yml │ ├── release.yml │ ├── test.yml │ └── update-kernels.yml ├── .gitignore ├── .yamllint ├── LICENSE ├── OWNERS ├── README.md ├── action.yml ├── docker └── Dockerfile ├── kernel_crawler ├── .gitignore ├── __init__.py ├── aliyunlinux.py ├── almalinux.py ├── amazonlinux.py ├── archlinux.py ├── bottlerocket.py ├── centos.py ├── container.py ├── crawler.py ├── deb.py ├── debian.py ├── fedora.py ├── flatcar.py ├── git.py ├── main.py ├── minikube.py ├── opensuse.py ├── oracle.py ├── photon.py ├── redhat.py ├── repo.py ├── rockylinux.py ├── rpm.py ├── talos.py ├── ubuntu.py └── utils │ ├── .gitignore │ ├── __init__.py │ ├── download.py │ └── py23.py ├── release.md ├── requirements.txt ├── setup.py └── site └── index.html /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 8 | 9 | **What type of PR is this?** 10 | 11 | > Uncomment one (or more) `/kind <>` lines: 12 | 13 | > /kind bug 14 | 15 | > /kind cleanup 16 | 17 | > /kind documentation 18 | 19 | > /kind feature 20 | 21 | 24 | 25 | **Any specific area of the project related to this PR?** 26 | 27 | > Uncomment one (or more) `/area <>` lines: 28 | 29 | > /area crawler 30 | 31 | > /area ci 32 | 33 | > /area utils 34 | 35 | 38 | 39 | **What this PR does / why we need it**: 40 | 41 | **Which issue(s) this PR fixes**: 42 | 43 | 48 | 49 | Fixes # 50 | 51 | **Special notes for your reviewer**: 52 | 53 | -------------------------------------------------------------------------------- /.github/dependabot.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | open-pull-requests-limit: 10 8 | groups: 9 | actions: 10 | update-types: 11 | - "minor" 12 | - "patch" 13 | -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Docker 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | paths: 8 | - 'docker/*' 9 | 10 | jobs: 11 | build: 12 | name: Build Dockerfile 13 | runs-on: ubuntu-latest 14 | outputs: 15 | dockerfile_changed: ${{ steps.filter.outputs.docker }} 16 | steps: 17 | - name: Checkout repo ⤵️ 18 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 19 | 20 | # test that the Dockerfile builds at all 21 | # CircleCI handles pushes for now 22 | - name: Build Image 23 | uses: docker/build-push-action@1dc73863535b631f98b2378be8619f83b136f4a0 # v6.17.0 24 | with: 25 | file: docker/Dockerfile 26 | context: . 27 | push: false 28 | tags: falcosecurity/kernel-crawler:test 29 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Lint 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | lint: 10 | name: Lints and Scans 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout repo ⤵️ 14 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 15 | 16 | - name: Lint Dockerfile 17 | uses: hadolint/hadolint-action@54c9adbab1582c2ef04b2016b760714a4bfde3cf # v3.1.0 18 | with: 19 | dockerfile: docker/Dockerfile 20 | ignore: DL3008 21 | 22 | - name: Lint kernel-crawler 23 | uses: cclauss/GitHub-Action-for-pylint@45e14ae074fb20b6b176a5d803ec05eb4e25b3f0 # 06.0 24 | with: 25 | args: "pip install -r requirements.txt ; pylint kernel_crawler" 26 | continue-on-error: true # allow failure for now 27 | 28 | - name: Lint YAML 29 | uses: karancode/yamllint-github-action@4052d365f09b8d34eb552c363d1141fd60e2aeb2 # v3.0.0 30 | with: 31 | yamllint_file_or_dir: '.github/workflows/' 32 | yamllint_strict: false 33 | yamllint_comment: false 34 | yamllint_config_filepath: .yamllint 35 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Release 3 | 4 | on: 5 | push: 6 | tags: 7 | - '[0-9]+.[0-9]+.[0-9]+' 8 | 9 | jobs: 10 | build-images: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 15 | with: 16 | fetch-depth: 0 17 | 18 | - name: Login to Docker Hub 19 | uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 20 | with: 21 | username: ${{ secrets.DOCKERHUB_USER }} 22 | password: ${{ secrets.DOCKERHUB_SECRET }} 23 | 24 | - name: Set up QEMU 25 | uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0 26 | with: 27 | platforms: 'amd64,arm64' 28 | 29 | - name: Set up Docker Buildx 30 | uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0 31 | 32 | - name: Build and push container images 33 | uses: docker/build-push-action@1dc73863535b631f98b2378be8619f83b136f4a0 # v6.17.0 34 | with: 35 | platforms: linux/amd64,linux/arm64 36 | file: docker/Dockerfile 37 | context: . 38 | tags: falcosecurity/kernel-crawler:${{ github.ref_name }},falcosecurity/kernel-crawler:latest 39 | push: true 40 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Test 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | push: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | paths-filter: 13 | runs-on: ubuntu-latest 14 | outputs: 15 | crawler_changed: ${{ steps.filter.outputs.crawler }} 16 | steps: 17 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 18 | - uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2 19 | id: filter 20 | with: 21 | filters: | 22 | crawler: 23 | - 'kernel_crawler/**' 24 | 25 | test: 26 | name: test kernel-crawler 27 | runs-on: ubuntu-latest 28 | needs: paths-filter 29 | if: needs.paths-filter.outputs.crawler_changed == 'true' 30 | steps: 31 | - name: Checkout repo ⤵️ 32 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 33 | 34 | - name: Run crawler 35 | id: crawler 36 | uses: ./ 37 | 38 | - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 39 | with: 40 | path: ${{ steps.crawler.outputs.json }} 41 | -------------------------------------------------------------------------------- /.github/workflows/update-kernels.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Update Kernels 3 | 4 | on: 5 | workflow_dispatch: 6 | schedule: 7 | - cron: '30 6 * * *' 8 | 9 | # Checks if any concurrent jobs is running for kernels CI and eventually cancel it. 10 | concurrency: 11 | group: kernels-ci 12 | cancel-in-progress: true 13 | 14 | jobs: 15 | update-kernels: 16 | runs-on: ubuntu-latest 17 | permissions: 18 | contents: read 19 | pages: write 20 | id-token: write 21 | steps: 22 | - name: Checkout crawler 23 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 24 | 25 | - name: Run crawler for x86_64 26 | id: crawler_x86_64 27 | uses: ./ 28 | with: 29 | arch: 'x86_64' 30 | 31 | - name: Run crawler for aarch64 32 | id: crawler_aarch64 33 | uses: ./ 34 | with: 35 | arch: 'aarch64' 36 | 37 | - name: Move generated files to site folder 38 | run: | 39 | mkdir site/x86_64 40 | mv ${{ steps.crawler_x86_64.outputs.json }} site/x86_64/list.json 41 | mkdir site/aarch64 42 | mv ${{ steps.crawler_aarch64.outputs.json }} site/aarch64/list.json 43 | 44 | - uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3.0.1 45 | with: 46 | path: 'site' 47 | 48 | - id: deployment 49 | uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4.0.5 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | kernel_crawler.egg-info/ 2 | __pycache__/ 3 | .idea/ 4 | -------------------------------------------------------------------------------- /.yamllint: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | yaml-files: 4 | - '*.yaml' 5 | - '*.yml' 6 | - '.yamllint' 7 | 8 | rules: 9 | braces: enable 10 | brackets: enable 11 | colons: enable 12 | commas: enable 13 | comments: 14 | level: warning 15 | comments-indentation: 16 | level: warning 17 | document-end: disable 18 | document-start: 19 | level: warning 20 | empty-lines: enable 21 | empty-values: disable 22 | float-values: disable 23 | hyphens: enable 24 | indentation: enable 25 | key-duplicates: enable 26 | key-ordering: disable 27 | line-length: 28 | max: 120 29 | new-line-at-end-of-file: enable 30 | new-lines: enable 31 | octal-values: disable 32 | quoted-strings: disable 33 | trailing-spaces: enable 34 | truthy: 35 | level: warning -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2019 The Falco Authors 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /OWNERS: -------------------------------------------------------------------------------- 1 | approvers: 2 | - fededp 3 | - maxgio92 4 | - leogr 5 | - zuc 6 | - EXONER4TED 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Falcosecurity kernel-crawler 2 | 3 | [![Falco Infra Repository](https://github.com/falcosecurity/evolution/blob/main/repos/badges/falco-infra-blue.svg)](https://github.com/falcosecurity/evolution/blob/main/REPOSITORIES.md#infra-scope) [![Incubating](https://img.shields.io/badge/status-incubating-orange?style=for-the-badge)](https://github.com/falcosecurity/evolution/blob/main/REPOSITORIES.md#incubating) [![License](https://img.shields.io/github/license/falcosecurity/kernel-crawler?style=for-the-badge)](./LICENSE) 4 | 5 | [![Latest](https://img.shields.io/github/v/release/falcosecurity/kernel-crawler?style=for-the-badge)](https://github.com/falcosecurity/kernel-crawler/releases/latest) 6 | ![Architectures](https://img.shields.io/badge/ARCHS-x86__64%7Caarch64-blueviolet?style=for-the-badge) 7 | 8 | It is a tool used to crawl supported kernels by multiple distros, and generate a [driverkit](https://github.com/falcosecurity/driverkit)-like config json. 9 | Output json can be found, for each supported architecture, on gh pages: https://falcosecurity.github.io/kernel-crawler/: 10 | * [aarch64](https://falcosecurity.github.io/kernel-crawler/aarch64/list.json) 11 | * [x86_64](https://falcosecurity.github.io/kernel-crawler/x86_64/list.json) 12 | 13 | A weekly [github action workflow](https://github.com/falcosecurity/kernel-crawler/actions/workflows/update-kernels.yml) will open a PR on this repo to update the json. 14 | As soon as the PR is merged and the json updated, a [prow job](https://github.com/falcosecurity/test-infra/blob/master/config/jobs/update-dbg/update-dbg.yaml) will create a PR on [test-infra](https://github.com/falcosecurity/test-infra) to generate the new Driverkit configs from the updated json. 15 | 16 | ## Usage 17 | 18 | Helper text and options: 19 | 20 | Main: 21 | ```commandline 22 | Usage: kernel-crawler [OPTIONS] COMMAND [ARGS]... 23 | 24 | Options: 25 | --debug / --no-debug 26 | --help Show this message and exit. 27 | 28 | Commands: 29 | crawl 30 | ``` 31 | 32 | Crawl command: 33 | ```commandline 34 | Usage: kernel-crawler crawl [OPTIONS] 35 | 36 | Options: 37 | --distro [alinux|almalinux|amazonlinux|amazonlinux2|amazonlinux2022|amazonlinux2023|arch|bottlerocket|centos|debian|fedora|flatcar|minikube|ol|opensuse|photon|redhat|rocky|talos|ubuntu|*] 38 | --version TEXT 39 | --arch [x86_64|aarch64] 40 | --image TEXT Option is required when distro is Redhat. 41 | --help Show this message and exit. 42 | ``` 43 | 44 | ## CI Usage 45 | 46 | To better suit the CI usage, a [Github composite action](https://docs.github.com/en/actions/creating-actions/creating-a-composite-action) has been developed. 47 | Therefore, running kernel-crawler in your Github workflow is as easy as adding this step: 48 | ``` 49 | - name: Crawl kernels 50 | uses: falcosecurity/kernel-crawler@main 51 | with: 52 | # Desired architecture. Either x86_64 or aarch64. 53 | # Default: 'x86_64'. 54 | arch: 'aarch64' 55 | 56 | # Desired distro. 57 | # Refer to crawl command helper message (above) to check supported distros. 58 | # Default: '*'. 59 | distro: 'ubuntu' 60 | ``` 61 | 62 | > __NOTE:__ Since we don't use annotated tags, one cannot use eg: falcosecurity/kernel-crawler@v0, but only either exact tag name, branch name or commit hash. 63 | 64 | ## Docker image 65 | 66 | A docker image is provided for releases, by a GitHub Actions workflow: `falcosecurity/kernel-crawler:latest`. 67 | You can also build it yourself, by issuing: 68 | ```commandline 69 | docker build -t falcosecurity/kernel_crawler -f docker/Dockerfile . 70 | ``` 71 | from project root. 72 | 73 | ## Install 74 | 75 | To install the project, a simple `pip3 install .` from project root is enough. 76 | 77 | ## Examples 78 | 79 | * Crawl amazonlinux2 kernels: 80 | ```commandline 81 | kernel-crawler crawl --distro=AmazonLinux2 82 | ``` 83 | 84 | * Crawl all supported distros kernels: 85 | ```commandline 86 | kernel-crawler crawl --distro=* 87 | ``` 88 | | :exclamation: **Note**: Passing ```--image``` argument is supported with ```--distro=*``` | 89 | |-------------------------------------------------------------------------------------------| 90 | 91 | * Crawl Redhat kernels (specific to the container supplied), with no-formatted output: 92 | ```commandline 93 | kernel-crawler crawl --distro=Redhat --image=redhat/ubi8:registered 94 | ``` 95 | -------------------------------------------------------------------------------- /action.yml: -------------------------------------------------------------------------------- 1 | name: 'kernel-crawler' 2 | description: 'A tool to crawl existing Linux kernel versions from multiple distros' 3 | 4 | inputs: 5 | arch: 6 | description: 'Architecture to run against. x86_64 or aarch64.' 7 | required: false 8 | default: 'x86_64' 9 | distro: 10 | description: 'Distro to run against. Defaults to all.' 11 | required: false 12 | default: '*' 13 | 14 | outputs: 15 | json: 16 | description: "Generated json" 17 | value: ${{ steps.store-outputs.outputs.json }} 18 | 19 | runs: 20 | using: "composite" 21 | steps: 22 | - name: Install deps 23 | shell: bash 24 | run: | 25 | sudo apt update -y 26 | sudo apt install -y --no-install-recommends python3 python3-pip python3-pygit2 jq 27 | 28 | - name: Install crawler 29 | shell: bash 30 | working-directory: ${{ github.action_path }} 31 | run: | 32 | pip3 install . 33 | 34 | - name: Run crawler 35 | shell: bash 36 | working-directory: ${{ github.action_path }} 37 | run: | 38 | kernel-crawler crawl --distro=${{ inputs.distro }} --arch=${{ inputs.arch }} > ${{ runner.temp }}/kernels_${{ inputs.arch }}.json 39 | 40 | - name: Validate json 41 | shell: bash 42 | working-directory: ${{ runner.temp }} 43 | run: | 44 | cat kernels_${{ inputs.arch }}.json | jq empty 45 | 46 | - name: Set output 47 | id: store-outputs 48 | shell: bash 49 | run: | 50 | echo "json=${{ runner.temp }}/kernels_${{ inputs.arch }}.json" >> $GITHUB_OUTPUT 51 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim 2 | 3 | RUN apt-get update && apt-get install -y --no-install-recommends git && apt-get clean && rm -rf /var/lib/apt/lists/* 4 | 5 | WORKDIR /app 6 | 7 | COPY requirements.txt . 8 | COPY setup.py . 9 | COPY kernel_crawler ./kernel_crawler 10 | 11 | RUN pip3 install --no-cache-dir . && \ 12 | useradd --create-home appuser 13 | 14 | USER appuser 15 | 16 | ENV PATH=/home/appuser/.local/bin:$PATH 17 | 18 | ENTRYPOINT ["kernel-crawler"] 19 | -------------------------------------------------------------------------------- /kernel_crawler/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | -------------------------------------------------------------------------------- /kernel_crawler/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | -------------------------------------------------------------------------------- /kernel_crawler/aliyunlinux.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | from . import repo 15 | from . import rpm 16 | 17 | def v2_only(ver): 18 | return ver.startswith('2') 19 | 20 | def v3_only(ver): 21 | return ver.startswith('3') 22 | 23 | class AliyunLinuxMirror(repo.Distro): 24 | def __init__(self, arch): 25 | mirrors = [ 26 | # AliyunLinux 2 27 | # Mirror list on cloud-init config example: 28 | # https://www.alibabacloud.com/help/en/elastic-compute-service/latest/use-alibaba-cloud-linux-2-images-in-an-on-premises-environment 29 | rpm.RpmMirror('http://mirrors.aliyun.com/alinux/', 'os/' + arch + '/', v2_only), 30 | rpm.RpmMirror('http://mirrors.aliyun.com/alinux/', 'updates/' + arch + '/', v2_only), 31 | rpm.RpmMirror('http://mirrors.aliyun.com/alinux/', 'plus/' + arch + '/', v2_only), 32 | 33 | # AliyunLinux 3 34 | # Mirror list on cloud-init config example: 35 | # https://www.alibabacloud.com/help/en/elastic-compute-service/latest/use-alibaba-cloud-linux-3-images-in-an-on-premises-environment 36 | rpm.RpmMirror('http://mirrors.aliyun.com/alinux/', 'os/' + arch + '/', v3_only), 37 | rpm.RpmMirror('http://mirrors.aliyun.com/alinux/', 'updates/' + arch + '/', v3_only), 38 | rpm.RpmMirror('http://mirrors.aliyun.com/alinux/', 'plus/' + arch + '/', v3_only), 39 | 40 | ] 41 | super(AliyunLinuxMirror, self).__init__(mirrors, arch) 42 | 43 | def to_driverkit_config(self, release, deps): 44 | for dep in deps: 45 | if dep.find("devel") != -1: 46 | return repo.DriverKitConfig(release, "alinux", dep) 47 | -------------------------------------------------------------------------------- /kernel_crawler/almalinux.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | from . import repo 15 | from . import rpm 16 | 17 | def v8_only(ver): 18 | return ver.startswith('8') 19 | 20 | def v9_only(ver): 21 | return ver.startswith('9') 22 | 23 | class AlmaLinuxMirror(repo.Distro): 24 | def __init__(self, arch): 25 | mirrors = [ 26 | # AlmaLinux 8 27 | rpm.RpmMirror('http://repo.almalinux.org/almalinux/', 'BaseOS/' + arch + '/os/', v8_only), 28 | rpm.RpmMirror('http://repo.almalinux.org/almalinux/', 'AppStream/' + arch + '/os/', v8_only), 29 | # AlmaLinux 9 30 | rpm.RpmMirror('http://repo.almalinux.org/almalinux/', 'BaseOS/' + arch + '/os/', v9_only), 31 | rpm.RpmMirror('http://repo.almalinux.org/almalinux/', 'AppStream/' + arch + '/os/', v9_only), 32 | ] 33 | super(AlmaLinuxMirror, self).__init__(mirrors, arch) 34 | 35 | def to_driverkit_config(self, release, deps): 36 | for dep in deps: 37 | if dep.find("devel") != -1: 38 | return repo.DriverKitConfig(release, "almalinux", dep) 39 | -------------------------------------------------------------------------------- /kernel_crawler/amazonlinux.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | #!/usr/bin/env python 15 | import sys 16 | 17 | import click 18 | 19 | from . import repo 20 | from . import rpm 21 | from kernel_crawler.utils.download import get_url 22 | from kernel_crawler.utils.py23 import make_string 23 | 24 | 25 | def get_al_repo(repo_root, repo_release, repo_arch = ''): 26 | repo_pointer = repo_root + repo_release + "/mirror.list" 27 | resp = get_url(repo_pointer) 28 | # Some distributions have a trailing slash (like AmazonLinux2022), some don't. 29 | return make_string(resp.splitlines()[0]).replace('$basearch', repo_arch).rstrip('/') + '/' 30 | 31 | 32 | class AmazonLinux1Mirror(repo.Distro): 33 | AL1_REPOS = [ 34 | 'latest/updates', 35 | 'latest/main', 36 | '2017.03/updates', 37 | '2017.03/main', 38 | '2017.09/updates', 39 | '2017.09/main', 40 | '2018.03/updates', 41 | '2018.03/main', 42 | ] 43 | 44 | def __init__(self, arch): 45 | super(AmazonLinux1Mirror, self).__init__([], arch) 46 | 47 | def list_repos(self): 48 | repo_urls = set() 49 | with click.progressbar( 50 | self.AL1_REPOS, label='Checking repositories', file=sys.stderr, item_show_func=repo.to_s) as repos: 51 | for r in repos: 52 | repo_urls.add(get_al_repo("http://repo.us-east-1.amazonaws.com/", r, self.arch)) 53 | return [rpm.RpmRepository(url) for url in sorted(repo_urls)] 54 | 55 | def to_driverkit_config(self, release, deps): 56 | for dep in deps: 57 | if dep.find("devel") != -1: 58 | return repo.DriverKitConfig(release, "amazonlinux", dep) 59 | 60 | 61 | class AmazonLinux2Mirror(repo.Distro): 62 | AL2_REPOS = [ 63 | 'core/2.0', 64 | 'core/latest', 65 | 'extras/kernel-ng/latest', 66 | 'extras/kernel-5.4/latest', 67 | 'extras/kernel-5.10/latest', 68 | 'extras/kernel-5.15/latest', 69 | ] 70 | 71 | def __init__(self, arch): 72 | super(AmazonLinux2Mirror, self).__init__([], arch) 73 | 74 | def list_repos(self): 75 | repo_urls = set() 76 | with click.progressbar( 77 | self.AL2_REPOS, label='Checking repositories', file=sys.stderr, item_show_func=repo.to_s) as repos: 78 | for r in repos: 79 | repo_urls.add(get_al_repo("http://amazonlinux.us-east-1.amazonaws.com/2/", r + '/' + self.arch)) 80 | return [rpm.RpmRepository(url) for url in sorted(repo_urls)] 81 | 82 | def to_driverkit_config(self, release, deps): 83 | for dep in deps: 84 | if dep.find("devel") != -1: 85 | return repo.DriverKitConfig(release, "amazonlinux2", dep) 86 | 87 | class AmazonLinux2022Mirror(repo.Distro): 88 | # This was obtained by running 89 | # docker run -it --rm amazonlinux:2022 python3 -c 'import dnf, json; db = dnf.dnf.Base(); print(json.dumps(db.conf.substitutions, indent=2))' 90 | AL2022_REPOS = [ 91 | 'latest', 92 | '2022.0.20220202', 93 | '2022.0.20220315', 94 | ] 95 | 96 | def __init__(self, arch): 97 | super(AmazonLinux2022Mirror, self).__init__([], arch) 98 | 99 | def list_repos(self): 100 | repo_urls = set() 101 | with click.progressbar( 102 | self.AL2022_REPOS, label='Checking repositories', file=sys.stderr, item_show_func=repo.to_s) as repos: 103 | # This was obtained by running: 104 | # cat /etc/yum.repos.d/amazonlinux.repo 105 | # https://al2022-repos-$awsregion-9761ab97.s3.dualstack.$awsregion.$awsdomain/core/mirrors/$releasever/$basearch/mirror.list 106 | for r in repos: 107 | repo_urls.add(get_al_repo("https://al2022-repos-us-east-1-9761ab97.s3.dualstack.us-east-1.amazonaws.com/core/mirrors/", r + '/' + self.arch)) 108 | return [rpm.RpmRepository(url) for url in sorted(repo_urls)] 109 | 110 | def to_driverkit_config(self, release, deps): 111 | for dep in deps: 112 | if dep.find("devel") != -1: 113 | return repo.DriverKitConfig(release, "amazonlinux2022", dep) 114 | 115 | class AmazonLinux2023Mirror(repo.Distro): 116 | AL2023_REPOS = [ 117 | 'latest', 118 | ] 119 | 120 | def __init__(self, arch): 121 | super(AmazonLinux2023Mirror, self).__init__([], arch) 122 | 123 | def list_repos(self): 124 | repo_urls = set() 125 | with click.progressbar( 126 | self.AL2023_REPOS, label='Checking repositories', file=sys.stderr, item_show_func=repo.to_s) as repos: 127 | for r in repos: 128 | repo_urls.add(get_al_repo("https://cdn.amazonlinux.com/al2023/core/mirrors/", r + '/' + self.arch)) 129 | return [rpm.RpmRepository(url) for url in sorted(repo_urls)] 130 | 131 | def to_driverkit_config(self, release, deps): 132 | for dep in deps: 133 | if dep.find("devel") != -1: 134 | return repo.DriverKitConfig(release, "amazonlinux2023", dep) 135 | -------------------------------------------------------------------------------- /kernel_crawler/archlinux.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | import requests 14 | from bs4 import BeautifulSoup 15 | import re 16 | 17 | from kernel_crawler.utils.download import get_url 18 | from . import repo 19 | 20 | class ArchLinuxRepository(repo.Repository): 21 | 22 | _linux_headers_pattern = 'linux.*headers-' 23 | _package_suffix_pattern = '.pkg.tar.*' 24 | 25 | def __init__(self, base_url, arch): 26 | self.base_url = base_url 27 | self.arch = arch 28 | 29 | def __str__(self): 30 | return self.base_url 31 | 32 | def parse_kernel_release(self, kernel_package): 33 | 34 | # trim off 'linux*headers' 35 | trimmed = re.sub(self._linux_headers_pattern, '', kernel_package) 36 | # trim off the '.pkg.tar.*' 37 | version_with_arch = re.sub(self._package_suffix_pattern, '', trimmed) 38 | 39 | # trim off the architecture 40 | version = re.sub(f'-{self.arch}', '', version_with_arch) 41 | 42 | return version 43 | 44 | def get_package_tree(self, filter=''): 45 | packages = {} 46 | 47 | try: 48 | soup = BeautifulSoup(get_url(self.base_url), features='lxml') 49 | for a in soup.find_all('a', href=True): 50 | package = a['href'] 51 | # skip .sig and .. links 52 | if not package.endswith('.sig') and package != '../': 53 | parsed_kernel_release = self.parse_kernel_release(package) 54 | 55 | packages.setdefault(parsed_kernel_release, set()).add(self.base_url + package) 56 | except requests.HTTPError: 57 | pass 58 | 59 | return packages 60 | 61 | 62 | class ArchLinuxMirror(repo.Distro): 63 | 64 | _base_urls = [] 65 | 66 | def __init__(self, arch): 67 | 68 | if arch == 'x86_64': 69 | self._base_urls.append('https://archive.archlinux.org/packages/l/linux-headers/') # stable 70 | self._base_urls.append('https://archive.archlinux.org/packages/l/linux-hardened-headers/') # hardened 71 | self._base_urls.append('https://archive.archlinux.org/packages/l/linux-lts-headers/') # lts 72 | self._base_urls.append('https://archive.archlinux.org/packages/l/linux-zen-headers/') # zen 73 | elif arch == 'aarch64': 74 | self._base_urls.append('https://alaa.ad24.cz/packages/l/linux-aarch64-headers/') # arm 64-bit 75 | else: # can be implemented later 76 | self._base_urls.append('https://alaa.ad24.cz/packages/l/linux-armv5-headers/') # arm v5 77 | self._base_urls.append('https://alaa.ad24.cz/packages/l/linux-armv7-headers/') # arm v7 78 | self._base_urls.append('https://alaa.ad24.cz/packages/l/linux-raspberrypi4-headers/') # rpi4 79 | self._base_urls.append('https://alaa.ad24.cz/packages/l/linux-raspberrypi-headers/') # other rpi 80 | 81 | super(ArchLinuxMirror, self).__init__(self._base_urls, arch) 82 | 83 | 84 | def list_repos(self): 85 | mirrors = [] 86 | 87 | for mirror in self._base_urls: 88 | mirrors.append(ArchLinuxRepository(mirror, self.arch)) 89 | 90 | return mirrors 91 | 92 | 93 | def to_driverkit_config(self, release, deps): 94 | for dep in deps: 95 | return repo.DriverKitConfig(release, "arch", dep) 96 | -------------------------------------------------------------------------------- /kernel_crawler/bottlerocket.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | import base64 15 | import os 16 | import re 17 | import sys 18 | 19 | import requests 20 | import rpmfile 21 | from click import progressbar as ProgressBar 22 | 23 | from .git import GitMirror 24 | 25 | 26 | class BottleRocketMirror(GitMirror): 27 | def __init__(self, arch): 28 | super(BottleRocketMirror, self).__init__("bottlerocket-os", "bottlerocket", arch) 29 | 30 | def fetch_base_config(self, kverspec): 31 | source = self.extract_value(kverspec, "Source0", ":") 32 | if source is None: 33 | return None 34 | 35 | alkernel = requests.get(source) 36 | alkernel.raise_for_status() 37 | with open('/tmp/alkernel.rpm', 'wb') as f: 38 | f.write(alkernel.content) 39 | 40 | with rpmfile.open('/tmp/alkernel.rpm') as rpm: 41 | # Extract a fileobject from the archive 42 | fd = rpm.extractfile('config-' + self.arch) 43 | baseconfig = [line for line in fd.readlines()] 44 | 45 | os.remove('/tmp/alkernel.rpm') 46 | return baseconfig 47 | 48 | def extract_flavor(self, flavorconfig_path): 49 | flavorconfig_file = os.path.basename(flavorconfig_path) 50 | return re.match(r"^config-bottlerocket-(.*)", flavorconfig_file).group(1) 51 | 52 | def extract_kver(self, kverspec_file): 53 | return re.match(r"^kernel-(.*).spec$", kverspec_file).group(1) 54 | 55 | def set_kernel_config(self, baseconfig, key, value): 56 | for i, line in enumerate(baseconfig): 57 | if key in str(line): 58 | baseconfig[i] = key.encode() + b'=' + value.encode() 59 | break 60 | 61 | def unset_kernel_config(self, baseconfig, key): 62 | for i, line in enumerate(baseconfig): 63 | if line.startswith(key): 64 | baseconfig[i] = b'# ' + key.encode() + b' is not set\n' 65 | break 66 | 67 | def patch_config(self, baseconfig, patch): 68 | for line in patch: 69 | if line.startswith("#"): 70 | continue 71 | vals = line.split("=", 1) 72 | if len(vals) != 2: 73 | continue 74 | key = vals[0] 75 | value = vals[1] 76 | if value == "n": 77 | self.unset_kernel_config(baseconfig, key) 78 | else: 79 | self.set_kernel_config(baseconfig, key, value) 80 | return baseconfig 81 | 82 | def get_package_tree(self, version=''): 83 | self.list_repo() 84 | sys.stdout.flush() 85 | kernel_configs = {} 86 | bottlerocket_versions = self.getVersions(3) 87 | 88 | for v in bottlerocket_versions: 89 | bar = ProgressBar(label="Building config for bottlerocket v{}".format(v), length=1, file=sys.stderr) 90 | self.checkout_version(v) 91 | 92 | # Find supported kernels dynamically 93 | supported_kernel_specs = self.match_file("kernel-.*.spec", True) 94 | for kverspec_file in supported_kernel_specs: 95 | name = os.path.basename(kverspec_file) 96 | wd = os.path.dirname(kverspec_file) 97 | kver = self.extract_kver(name) 98 | 99 | # same meaning as the output of "uname -r" 100 | kernel_release = self.extract_value(kverspec_file, "Version", ":") 101 | if kernel_release is None: 102 | continue 103 | 104 | # Load base config 105 | vanillaconfig = self.fetch_base_config(kverspec_file) 106 | if vanillaconfig is None: 107 | continue 108 | 109 | # Load common config 110 | specific_config_file = self.search_file("config-bottlerocket", wd) 111 | if specific_config_file is None: 112 | continue 113 | 114 | with open(specific_config_file, 'r') as fd: 115 | specific_config = fd.readlines() 116 | 117 | # Find supported flavors dynamically 118 | supported_flavors = self.match_file("config-bottlerocket-.*", True, wd) 119 | if supported_flavors: 120 | for flavorconfig_file in supported_flavors: 121 | flavor = self.extract_flavor(flavorconfig_file) 122 | 123 | # Load flavor specific config 124 | with open(flavorconfig_file, 'r') as fd: 125 | flavorconfig = fd.readlines() 126 | 127 | # Merge flavor and common config 128 | flavorconfig += specific_config 129 | 130 | # Finally, patch baseconfig with flavor config 131 | finalconfig = self.patch_config(vanillaconfig, flavorconfig) 132 | defconfig_base64 = base64.b64encode(b''.join(finalconfig)).decode() 133 | 134 | kernel_version = "1_" + v + "-" + flavor 135 | 136 | # Unique key 137 | kernel_configs[v + "_" + kver + "-" + flavor] = { 138 | self.KERNEL_VERSION: kernel_version, 139 | self.KERNEL_RELEASE: kernel_release, 140 | self.DISTRO_TARGET: "bottlerocket", 141 | self.BASE_64_CONFIG_DATA: defconfig_base64, 142 | } 143 | else: 144 | # NOTE: to keep backward compatibility with existing drivers 145 | # and driver loader logic, push these kernels for each flavor 146 | # even if the config is the same among all of them. 147 | # We will build 3x the drivers but we will be backward compatible. 148 | for flavor in ['aws','metal','vmware']: 149 | finalconfig = self.patch_config(vanillaconfig, specific_config) 150 | defconfig_base64 = base64.b64encode(b''.join(finalconfig)).decode() 151 | 152 | kernel_version = "1_" + v + "-" + flavor 153 | 154 | # Unique key 155 | kernel_configs[v + "_" + kver + "-" + flavor] = { 156 | self.KERNEL_VERSION: kernel_version, 157 | self.KERNEL_RELEASE: kernel_release, 158 | self.DISTRO_TARGET: "bottlerocket", 159 | self.BASE_64_CONFIG_DATA: defconfig_base64, 160 | } 161 | 162 | bar.update(1) 163 | bar.render_finish() 164 | 165 | self.cleanup_repo() 166 | return kernel_configs 167 | -------------------------------------------------------------------------------- /kernel_crawler/centos.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | from . import repo 15 | from . import rpm 16 | 17 | def v7_only(ver): 18 | return ver.startswith('7') 19 | 20 | def v8_only(ver): 21 | return ver.startswith('8') 22 | 23 | def v9_only(ver): 24 | return ver.startswith('9') 25 | 26 | def v6_or_v7(ver): 27 | return ver.startswith('6') or ver.startswith('7') 28 | 29 | class CentosMirror(repo.Distro): 30 | def __init__(self, arch): 31 | mirrors = [ 32 | # CentOS 6 + 7 33 | rpm.RpmMirror('http://vault.centos.org/centos/', 'os/' + arch + '/', v6_or_v7), 34 | rpm.RpmMirror('http://vault.centos.org/centos/', 'updates/' + arch + '/', v6_or_v7), 35 | rpm.RpmMirror('http://archive.kernel.org/centos/', 'os/' + arch + '/', v6_or_v7), 36 | rpm.RpmMirror('http://archive.kernel.org/centos/', 'updates/' + arch + '/', v6_or_v7), 37 | # CentOS 7 38 | rpm.RpmMirror('http://mirror.centos.org/centos/', 'os/' + arch + '/', v7_only), 39 | rpm.RpmMirror('http://mirror.centos.org/centos/', 'updates/' + arch + '/', v7_only), 40 | # CentOS 8 41 | rpm.RpmMirror('http://mirror.centos.org/centos/', 'BaseOS/' + arch + '/os/', v8_only), 42 | rpm.RpmMirror('http://vault.centos.org/centos/', 'BaseOS/' + arch + '/os/', v8_only), 43 | rpm.RpmMirror('http://archive.kernel.org/centos/', 'BaseOS/' + arch + '/os/', v8_only), 44 | # CentOS 9 45 | rpm.RpmMirror('http://mirror.stream.centos.org/', 'BaseOS/' + arch + '/os/', v9_only), 46 | 47 | # It seems like stream variants uses /AppStream as well 48 | rpm.RpmMirror('http://archive.kernel.org/centos/', 'AppStream/' + arch + '/os/', v8_only), 49 | rpm.RpmMirror('http://mirror.stream.centos.org/', 'AppStream/' + arch + '/os/', v9_only), 50 | 51 | # These are some advanced mirrors for CentOS that enable newer kernels for ML 52 | rpm.RpmMirror('http://elrepo.org/linux/kernel/', f'{arch}/'), 53 | rpm.RpmMirror('http://mirrors.coreix.net/elrepo/kernel/', f'{arch}/'), 54 | rpm.RpmMirror('http://mirror.rackspace.com/elrepo/kernel/', f'{arch}/'), 55 | rpm.RpmMirror('http://linux-mirrors.fnal.gov/linux/elrepo/kernel/', f'{arch}/'), 56 | ] 57 | super(CentosMirror, self).__init__(mirrors, arch) 58 | 59 | def to_driverkit_config(self, release, deps): 60 | for dep in deps: 61 | if dep.find("devel") != -1: 62 | return repo.DriverKitConfig(release, "centos", dep) 63 | -------------------------------------------------------------------------------- /kernel_crawler/container.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | import docker 15 | import click 16 | 17 | def decoded_str(s): 18 | if s is None: 19 | return '' 20 | return s.partition(b'\n')[0].decode("utf-8") 21 | 22 | class Container(): 23 | def __init__(self, image): 24 | self.image = image 25 | 26 | def run_cmd(self, cmd, encoding ="utf-8"): 27 | client = docker.from_env() 28 | container = client.containers.run(self.image, cmd, detach=True) 29 | logs = container.attach(stdout=True, stderr=True, stream=True, logs=True) 30 | # Depending on the command, the output could be buffered so first amalgamate 31 | # into one byte stream so that the outut can be processed correctly. 32 | with click.progressbar(logs, label='[' + self.image + '] Running command \'' + cmd + '\'', item_show_func=decoded_str) as logs: 33 | output = b'' 34 | for line in logs: 35 | output += line 36 | decoded_line = output.decode(encoding) 37 | cmd_output = list(filter(None, decoded_line.split("\n"))) 38 | return cmd_output 39 | -------------------------------------------------------------------------------- /kernel_crawler/crawler.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | from . import repo 15 | from .minikube import MinikubeMirror 16 | from .aliyunlinux import AliyunLinuxMirror 17 | from .almalinux import AlmaLinuxMirror 18 | from .amazonlinux import AmazonLinux1Mirror, AmazonLinux2Mirror, AmazonLinux2022Mirror, AmazonLinux2023Mirror 19 | from .centos import CentosMirror 20 | from .fedora import FedoraMirror 21 | from .oracle import OracleMirror 22 | from .photon import PhotonOsMirror 23 | from .rockylinux import RockyLinuxMirror 24 | 25 | from .opensuse import OpenSUSEMirror 26 | 27 | from .debian import DebianMirror 28 | from .ubuntu import UbuntuMirror 29 | 30 | from .flatcar import FlatcarMirror 31 | 32 | from .redhat import RedhatContainer 33 | 34 | from .archlinux import ArchLinuxMirror 35 | 36 | from .bottlerocket import BottleRocketMirror 37 | 38 | from .talos import TalosMirror 39 | 40 | # Keys are taken from /etc/os-release where available. 41 | # Must be the same used by driverkit builders (https://github.com/falcosecurity/driverkit). 42 | DISTROS = { 43 | 'alinux': AliyunLinuxMirror, 44 | 'almalinux': AlmaLinuxMirror, 45 | 'amazonlinux': AmazonLinux1Mirror, 46 | 'amazonlinux2': AmazonLinux2Mirror, 47 | 'amazonlinux2022': AmazonLinux2022Mirror, 48 | 'amazonlinux2023': AmazonLinux2023Mirror, 49 | 'centos': CentosMirror, 50 | 'fedora': FedoraMirror, 51 | 'ol': OracleMirror, 52 | 'photon': PhotonOsMirror, 53 | 'rocky': RockyLinuxMirror, 54 | 'opensuse': OpenSUSEMirror, 55 | 'debian': DebianMirror, 56 | 'ubuntu': UbuntuMirror, 57 | 'flatcar': FlatcarMirror, 58 | 'minikube': MinikubeMirror, 59 | 'redhat': RedhatContainer, 60 | 'arch': ArchLinuxMirror, 61 | 'bottlerocket': BottleRocketMirror, 62 | 'talos': TalosMirror, 63 | } 64 | 65 | def to_driverkit_config(d, res): 66 | dk_configs = [] 67 | # Note, this is not good performance-wise because we are post-processing the list 68 | # while we could do the same at generation time. 69 | # But this is much simpler and involved touching less code. 70 | # Moreover, we do not really care about performance here. 71 | for ver, deps in res.items(): 72 | dk_conf = d.to_driverkit_config(ver, deps) 73 | if dk_conf is not None: 74 | try: 75 | # Ubuntu returns multiple for each 76 | dk_configs.extend(dk_conf) 77 | except TypeError: 78 | # Others return just a single dk config 79 | dk_configs.append(dk_conf) 80 | 81 | return dk_configs 82 | 83 | def crawl_kernels(distro, version, arch, images): 84 | ret = {} 85 | 86 | for distname, dist in DISTROS.items(): 87 | if distname == distro or distro == "*": 88 | # If the distro requires an image (Redhat only so far), we need to amalgamate 89 | # the kernel versions from the supplied images before choosing the output. 90 | if issubclass(dist, repo.ContainerDistro): 91 | if images: 92 | kv = {} 93 | for image in images: 94 | d = dist(image) 95 | if len(kv) == 0: 96 | kv = d.get_kernel_versions() 97 | else: 98 | kv.update(d.get_kernel_versions()) 99 | # We should now have a list of all kernel versions for the supplied images 100 | res = kv 101 | else: 102 | d = None 103 | else: 104 | d = dist(arch) 105 | res = d.get_package_tree(version) 106 | 107 | if d and res: 108 | ret[distname] = to_driverkit_config(d, res) 109 | return ret 110 | -------------------------------------------------------------------------------- /kernel_crawler/deb.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | #!/usr/bin/env python 15 | 16 | from __future__ import print_function 17 | 18 | import re 19 | import sys 20 | 21 | import click 22 | import logging 23 | import requests 24 | 25 | from lxml import html 26 | 27 | from . import repo 28 | from kernel_crawler.utils.download import get_first_of, get_url 29 | from kernel_crawler.utils.py23 import make_bytes, make_string 30 | import pprint 31 | 32 | logger = logging.getLogger(__name__) 33 | pp = pprint.PrettyPrinter(depth=4) 34 | 35 | 36 | class IncompletePackageListException(Exception): 37 | pass 38 | 39 | 40 | class DebRepository(repo.Repository): 41 | 42 | def __init__(self, repo_base, repo_name): 43 | self.repo_base = repo_base 44 | self.repo_name = repo_name 45 | 46 | def __str__(self): 47 | return self.repo_base + self.repo_name 48 | 49 | @classmethod 50 | def scan_packages(cls, stream): 51 | """ 52 | Parse a Packages file into individual packages metadata. 53 | """ 54 | current_package = {} 55 | packages = {} 56 | for line in stream: 57 | line = make_string(line) 58 | line = line.rstrip() 59 | if line == '': 60 | name = current_package['Package'] 61 | depends = current_package.get('Depends', []) 62 | packages[name] = { 63 | 'Depends': set(depends), 64 | 'Version': current_package['Version'], 65 | 'Filename': current_package['Filename'], 66 | } 67 | current_package = {} 68 | continue 69 | # ignore multiline values 70 | if line.startswith(' '): 71 | continue 72 | try: 73 | key, value = line.split(': ', 1) 74 | if key in ('Provides', 'Depends'): 75 | value = value.split(', ') 76 | except ValueError: 77 | # Just skip the line if it is malformed 78 | continue 79 | current_package[key] = value 80 | 81 | if current_package: 82 | name = current_package['Package'] 83 | depends = current_package.get('Depends', []) 84 | packages[name] = { 85 | 'Depends': set(depends), 86 | 'Version': current_package['Version'], 87 | 'Filename': current_package['Filename'], 88 | } 89 | 90 | return packages 91 | 92 | KERNEL_PACKAGE_PATTERN = re.compile(r'^linux-.*?-[0-9]\.[0-9]+\.[0-9]+') 93 | KERNEL_RELEASE_UPDATE = re.compile(r'^([0-9]+\.[0-9]+\.[0-9]+-[0-9]+)\.(.+)') 94 | 95 | @classmethod 96 | def is_kernel_package(cls, dep): 97 | return (cls.KERNEL_PACKAGE_PATTERN.search(dep) and 98 | not dep.endswith('-dbg') and 99 | 'modules-extra' not in dep and 100 | 'linux-source' not in dep and 101 | 'tools' not in dep) or 'linux-kbuild' in dep 102 | 103 | @classmethod 104 | def filter_kernel_packages(cls, deps): 105 | return [dep for dep in deps if (cls.is_kernel_package(dep))] 106 | 107 | @classmethod 108 | def transitive_dependencies(cls, packages, pkg_name, dependencies=None, level=0, cache=None): 109 | if cache is None: 110 | cache = {} 111 | if dependencies is None: 112 | dependencies = {pkg_name} 113 | pkg_deps = cls.filter_kernel_packages(packages[pkg_name]['Depends']) 114 | for dep in pkg_deps: 115 | dep = dep.split(None, 1)[0] 116 | # Note: this always takes the first branch of alternative 117 | # dependencies like 'foo|bar'. In the kernel crawler, we don't care 118 | # 119 | # also, apparently libc6 and libgcc1 depend on each other 120 | # so we only filter for kernel packages 121 | if dep in packages: 122 | if dep not in dependencies: 123 | if dep not in cache: 124 | dependencies |= {dep} 125 | deps = {dep} 126 | deps |= cls.transitive_dependencies(packages, dep, dependencies, level + 1, cache) 127 | cache[dep] = deps 128 | dependencies |= cache[dep] 129 | else: 130 | raise (IncompletePackageListException("{} not in package list".format(dep))) 131 | return dependencies 132 | 133 | @classmethod 134 | def get_package_deps(cls, packages, pkg): 135 | all_deps = set() 136 | if not cls.is_kernel_package(pkg): 137 | return set() 138 | for dep in cls.filter_kernel_packages(cls.transitive_dependencies(packages, pkg)): 139 | all_deps.add(packages[dep]['URL']) 140 | return all_deps 141 | 142 | 143 | # this method returns a list of available kernel-looking package _names_ 144 | # (i.e., without version) available from within an individual .deb repository 145 | def get_package_list(self, packages, package_filter): 146 | kernel_packages = [] 147 | for p in packages.keys(): 148 | if not p.startswith('linux-headers-'): 149 | continue 150 | release = p.replace('linux-headers-', '') 151 | if 'linux-modules-{}'.format(release) in packages: 152 | kernel_packages.append(p) 153 | kernel_packages.append('linux-modules-{}'.format(release)) 154 | elif 'linux-image-{}'.format(release) in packages: 155 | kernel_packages.append(p) 156 | kernel_packages.append('linux-image-{}'.format(release)) 157 | 158 | if not package_filter: 159 | logger.debug("kernel_packages[{}]=\n{}".format(str(self), pp.pformat(kernel_packages))) 160 | return kernel_packages 161 | # return [dep for dep in kernel_packages if self.is_kernel_package(dep) and not dep.endswith('-dbg')] 162 | 163 | kernel_packages = set(kernel_packages) 164 | linux_modules = 'linux-modules-{}'.format(package_filter) 165 | linux_headers = 'linux-headers-{}'.format(package_filter) 166 | linux_image = 'linux-image-{}'.format(package_filter) 167 | # if the filter is an exact match on package name, just pick that 168 | if package_filter in packages: 169 | return [package_filter] 170 | # if the filter is an exact match on the suffix for headers and modules, use both 171 | elif linux_modules in kernel_packages and linux_headers in kernel_packages: 172 | return [linux_modules, linux_headers] 173 | # same for image 174 | elif linux_image in kernel_packages and linux_headers in kernel_packages: 175 | return [linux_image, linux_headers] 176 | # otherwise just pick up anything matching it 177 | else: 178 | return [k for k in kernel_packages if package_filter in k] 179 | 180 | def get_raw_package_db(self): 181 | try: 182 | repo_packages = get_first_of([ 183 | self.repo_base + self.repo_name + '/Packages.xz', 184 | self.repo_base + self.repo_name + '/Packages.gz', 185 | ]) 186 | except requests.HTTPError: 187 | return {} 188 | 189 | if repo_packages: 190 | repo_packages = repo_packages.splitlines(True) 191 | packages = self.scan_packages(repo_packages) 192 | for name, details in packages.items(): 193 | details['URL'] = self.repo_base + details['Filename'] 194 | return packages 195 | else: 196 | return {} 197 | 198 | @classmethod 199 | def build_package_tree(cls, packages, package_list): 200 | # this classmethod takes as input: 201 | # - packages, a dictionary of .deb packages with their metadata 202 | # - packages_list, a list of strings (package names) 203 | # it traverses the dependency chain within the package_list 204 | # and returns a dictionary of urls: 205 | # {'5.15.0-1001/2': {'http://security.ubuntu.com/ubuntu/pool/main/l/linux-azure/linux-azure-headers-5.15.0-1001_5.15.0-1001.2_all.deb', 206 | # 'http://security.ubuntu.com/ubuntu/pool/main/l/linux-azure/linux-headers-5.15.0-1001-azure_5.15.0-1001.2_amd64.deb', 207 | # 'http://security.ubuntu.com/ubuntu/pool/main/l/linux-azure/linux-modules-5.15.0-1001-azure_5.15.0-1001.2_amd64.deb', 208 | # 'http://security.ubuntu.com/ubuntu/pool/main/l/linux-signed-azure/linux-image-5.15.0-1001-azure_5.15.0-1001.2_amd64.deb'}, 209 | 210 | deps = {} 211 | logger.debug("packages=\n{}".format(pp.pformat(packages))) 212 | logger.debug("package_list=\n{}".format(pp.pformat(package_list))) 213 | with click.progressbar(package_list, label='Building dependency tree', file=sys.stderr, 214 | item_show_func=repo.to_s) as pkgs: 215 | for pkg in pkgs: 216 | pv = packages[pkg]['Version'] 217 | if ":" in pv: 218 | pv = pv.split(":")[1] 219 | m = cls.KERNEL_RELEASE_UPDATE.match(pv) 220 | if m: 221 | pv = '{}/{}'.format(m.group(1), m.group(2)) 222 | try: 223 | logger.debug("Building dependency tree for {}, pv={}".format(str(pkg), pv)) 224 | deps.setdefault(pv, set()).update(cls.get_package_deps(packages, pkg)) 225 | except IncompletePackageListException: 226 | logger.debug("No dependencies found for {}, pv={}".format(str(pkg), pv)) 227 | pass 228 | 229 | logger.debug("before pruning, deps=\n{}".format(pp.pformat(deps))) 230 | for pkg, dep_list in list(deps.items()): 231 | have_headers = False 232 | for dep in dep_list: 233 | if 'linux-headers' in dep: 234 | have_headers = True 235 | if not have_headers: 236 | del deps[pkg] 237 | logger.debug("after pruning, deps=\n{}".format(pp.pformat(deps))) 238 | return deps 239 | 240 | def get_package_tree(self, filter=''): 241 | packages = self.get_raw_package_db() 242 | package_list = self.get_package_list(packages, filter) 243 | return self.build_package_tree(packages, package_list) 244 | 245 | 246 | class DebMirror(repo.Mirror): 247 | 248 | def __init__(self, base_url, arch, repo_filter=None): 249 | self.base_url = base_url 250 | if repo_filter is None: 251 | repo_filter = lambda _: True 252 | self.repo_filter = repo_filter 253 | super().__init__(arch) 254 | 255 | def __str__(self): 256 | return self.base_url 257 | 258 | def scan_repo(self, dist): 259 | repos = {} 260 | all_comps = set() 261 | release = get_url(self.base_url + dist + 'Release') 262 | if release: # if release exists 263 | for line in release.splitlines(False): 264 | if line.startswith(make_bytes('Components: ')): 265 | for comp in line.split(None)[1:]: 266 | comp = make_string(comp) 267 | if comp in ('main', 'updates', 'updates/main'): 268 | if dist.endswith('updates/') and comp.startswith('updates/'): 269 | comp = comp.replace('updates/', '') 270 | all_comps.add(comp) 271 | break 272 | for comp in all_comps: 273 | url = dist + comp + '/binary-' + self.arch + '/' 274 | repos[url] = DebRepository(self.base_url, url) 275 | return repos 276 | 277 | def list_repos(self): 278 | dists_url = self.base_url + 'dists/' 279 | dists = requests.get(dists_url) 280 | dists.raise_for_status() 281 | dists = dists.content 282 | doc = html.fromstring(dists, dists_url) 283 | dists = [dist for dist in doc.xpath('/html/body//a[not(@href="../")]/@href') 284 | if dist.endswith('/') 285 | and not dist.startswith('/') 286 | and not dist.startswith('?') 287 | and not dist.startswith('http') 288 | and self.repo_filter(dist) 289 | ] 290 | 291 | repos = {} 292 | with click.progressbar( 293 | dists, label='Scanning {}'.format(self.base_url), file=sys.stderr, item_show_func=repo.to_s) as dists: 294 | for dist in dists: 295 | try: 296 | repos.update(self.scan_repo('dists/{}'.format(dist))) 297 | except requests.HTTPError: 298 | pass 299 | try: 300 | repos.update(self.scan_repo('dists/{}updates/'.format(dist))) 301 | except requests.HTTPError: 302 | pass 303 | 304 | return sorted(repos.values(), key=str) 305 | -------------------------------------------------------------------------------- /kernel_crawler/debian.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | from . import repo 15 | from . import deb 16 | import click 17 | import sys 18 | 19 | def repo_filter(dist): 20 | return 'stable' not in dist and 'testing' not in dist and not dist.startswith('Debian') 21 | 22 | def fixup_deb_arch(arch): 23 | if arch == 'x86_64': 24 | return 'amd64' 25 | elif arch == 'aarch64': 26 | return 'arm64' 27 | 28 | class DebianMirror(repo.Distro): 29 | def __init__(self, arch): 30 | arch = fixup_deb_arch(arch) 31 | mirrors = [ 32 | deb.DebMirror('http://mirrors.edge.kernel.org/debian/', arch, repo_filter), 33 | deb.DebMirror('http://security.debian.org/', arch, repo_filter), 34 | deb.DebMirror('http://archive.raspberrypi.com/debian/', arch, repo_filter), 35 | deb.DebMirror('http://security.debian.org/debian-security/', arch, repo_filter), 36 | ] 37 | super(DebianMirror, self).__init__(mirrors, arch) 38 | 39 | # For Debian mirrors, we need to override this method so that dependencies 40 | # can be resolved (i.e. build_package_tree) across multiple repositories. 41 | # This is namely required for the linux-kbuild package, which is typically 42 | # hosted on a different repository compared to the kernel packages 43 | def get_package_tree(self, version=''): 44 | all_packages = {} 45 | all_kernel_packages = [] 46 | packages = {} 47 | repos = self.list_repos() 48 | with click.progressbar(repos, label='Listing packages', file=sys.stderr, item_show_func=repo.to_s) as repos: 49 | for repository in repos: 50 | repo_packages = repository.get_raw_package_db() 51 | all_packages.update(repo_packages) 52 | kernel_packages = repository.get_package_list(repo_packages, version) 53 | all_kernel_packages.extend(kernel_packages) 54 | 55 | for release, dependencies in deb.DebRepository.build_package_tree(all_packages, all_kernel_packages).items(): 56 | packages.setdefault(release, set()).update(dependencies) 57 | return packages 58 | 59 | def to_driverkit_config(self, release, deps): 60 | headers = [] 61 | headers_rt = [] 62 | headers_cloud = [] 63 | headers_rpi = [] 64 | # Magic to obtain `rt`, `cloud`, `rpi` and normal headers: 65 | # List is like this one: 66 | # "http://security.debian.org/pool/updates/main/l/linux/linux-headers-4.19.0-23-common_4.19.269-1_all.deb", 67 | # "http://security.debian.org/pool/updates/main/l/linux/linux-headers-4.19.0-23-rt-amd64_4.19.269-1_amd64.deb", 68 | # "http://security.debian.org/pool/updates/main/l/linux/linux-headers-4.19.0-23-common-rt_4.19.269-1_all.deb", 69 | # "http://security.debian.org/pool/updates/main/l/linux/linux-kbuild-4.19_4.19.282-1_amd64.deb", 70 | # "http://security.debian.org/pool/updates/main/l/linux/linux-headers-4.19.0-23-cloud-amd64_4.19.269-1_amd64.deb", 71 | # "http://security.debian.org/pool/updates/main/l/linux/linux-headers-4.19.0-23-amd64_4.19.269-1_amd64.deb" 72 | # So: 73 | # * common is split in `common-rt`, `common-rpi` and `common` (for cloud and normal) 74 | # * kbuild is the same across all flavors 75 | # * headers are split between `rt`, `cloud` and normal 76 | for dep in deps: 77 | if dep.find("headers") != -1: 78 | if dep.find("common") != -1: 79 | if dep.find("-rt") != -1: 80 | headers_rt.append(dep) 81 | elif dep.find("-rpi") != -1: 82 | headers_rpi.append(dep) 83 | else: 84 | headers.append(dep) 85 | headers_cloud.append(dep) 86 | else: 87 | if dep.find("-rt") != -1: 88 | headers_rt.append(dep) 89 | elif dep.find("-cloud") != -1: 90 | headers_cloud.append(dep) 91 | elif dep.find("-rpi") != -1: 92 | headers_rpi.append(dep) 93 | else: 94 | headers.append(dep) 95 | if dep.find("kbuild") != -1: 96 | headers.append(dep) 97 | headers_rt.append(dep) 98 | headers_cloud.append(dep) 99 | headers_rpi.append(dep) 100 | 101 | final = [] 102 | if len(headers) >= 3: 103 | final.append(repo.DriverKitConfig(release + "-" + self.arch, "debian", headers)) 104 | if len(headers_rt) >= 3: 105 | final.append(repo.DriverKitConfig(release + "-rt-" + self.arch, "debian", headers_rt)) 106 | if len(headers_cloud) >= 3: 107 | final.append(repo.DriverKitConfig(release + "-cloud-" + self.arch, "debian", headers_cloud)) 108 | if len(headers_rpi) >= 3: 109 | final.append(repo.DriverKitConfig(release + "-rpi-" + self.arch, "debian", headers_rpi)) 110 | return final 111 | -------------------------------------------------------------------------------- /kernel_crawler/fedora.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | from . import repo, rpm 15 | 16 | def repo_filter(version): 17 | """Don't bother testing ancient versions""" 18 | try: 19 | return int(version.rstrip('/')) >= 32 20 | except ValueError: 21 | return False 22 | 23 | 24 | class FedoraMirror(repo.Distro): 25 | def __init__(self, arch): 26 | mirrors = [ 27 | rpm.RpmMirror('https://mirrors.kernel.org/fedora/releases/', 'Everything/' + arch + '/os/', repo_filter), 28 | rpm.RpmMirror('https://mirrors.kernel.org/fedora/updates/', 'Everything/' + arch + '/', repo_filter), 29 | rpm.RpmMirror('https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/', 'Everything/' + arch + '/os/', repo_filter), 30 | rpm.RpmMirror('https://archives.fedoraproject.org/pub/archive/fedora/linux/updates/', 'Everything/' + arch + '/os/', repo_filter), 31 | ] 32 | super(FedoraMirror, self).__init__(mirrors, arch) 33 | 34 | def to_driverkit_config(self, release, deps): 35 | for dep in deps: 36 | if dep.find("devel") != -1: 37 | return repo.DriverKitConfig(release, "fedora", dep) 38 | -------------------------------------------------------------------------------- /kernel_crawler/flatcar.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | import os 15 | import base64 16 | 17 | import requests 18 | from lxml import html 19 | 20 | from . import repo 21 | from .repo import Repository, Distro 22 | from .debian import fixup_deb_arch 23 | 24 | class FlatcarRepository(Repository): 25 | def __init__(self, base_url): 26 | self.base_url = base_url 27 | 28 | def get_package_tree(self, version=''): 29 | release = os.path.basename(self.base_url.rstrip('/')) 30 | if version not in release: 31 | return {} 32 | defconfig = os.path.join(self.base_url, 'flatcar_production_image_kernel_config.txt') 33 | defconfig_base64 = base64.b64encode(requests.get(defconfig).content).decode() 34 | return {release: [defconfig_base64]} 35 | 36 | def __str__(self): 37 | return self.base_url 38 | 39 | 40 | class FlatcarMirror(Distro): 41 | CHANNELS = ['stable', 'beta', 'alpha'] 42 | 43 | def __init__(self, arch): 44 | arch = fixup_deb_arch(arch) 45 | mirrors = ['https://{c}.release.flatcar-linux.net/{a}-usr/'.format(c=channel, a=arch) for channel in self.CHANNELS] 46 | super(FlatcarMirror, self).__init__(mirrors, arch) 47 | 48 | def scan_repo(self, base_url): 49 | try: 50 | dists = requests.get(base_url) 51 | dists.raise_for_status() 52 | except requests.exceptions.RequestException: 53 | return {} 54 | dists = dists.content 55 | doc = html.fromstring(dists, base_url) 56 | dists = doc.xpath('/html/body//a[not(@href="../")]/@href') 57 | return [FlatcarRepository('{}{}'.format(base_url, dist.lstrip('./'))) for dist in dists 58 | if dist.endswith('/') 59 | and dist.startswith('./') 60 | and 'current' not in dist 61 | and '-' not in dist 62 | ] 63 | 64 | def list_repos(self): 65 | repos = [] 66 | for repo in self.mirrors: 67 | repos.extend(self.scan_repo(repo)) 68 | return repos 69 | 70 | def to_driverkit_config(self, release, deps): 71 | return repo.DriverKitConfig(release, "flatcar", None, "1", list(deps)[0]) 72 | -------------------------------------------------------------------------------- /kernel_crawler/git.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | import tempfile 15 | import shutil 16 | import re 17 | import os 18 | import base64 19 | import sys 20 | 21 | from click import progressbar as ProgressBar 22 | from semantic_version import Version as SemVersion 23 | import pygit2 24 | 25 | from kernel_crawler.repo import Distro, DriverKitConfig 26 | 27 | 28 | class ProgressCallback(pygit2.RemoteCallbacks): 29 | def __init__(self, name): 30 | self.progress_bar_initialized = False 31 | self.bar = None 32 | self.name = name 33 | super().__init__() 34 | 35 | def transfer_progress(self, stats): 36 | if not self.progress_bar_initialized: 37 | self.bar = ProgressBar(label='Cloning ' + self.name + ' repository', length=stats.total_objects, file=sys.stderr) 38 | self.bar.update(1) 39 | self.progress_bar_initialized = True 40 | if not self.bar.is_hidden: 41 | self.bar.update(1, stats.indexed_objects) 42 | if stats.indexed_objects == stats.total_objects: 43 | self.bar.render_finish() 44 | 45 | 46 | class GitMirror(Distro): 47 | # dictionary keys used to build the kernel configuration dict. 48 | KERNEL_VERSION = "kernelversion" 49 | KERNEL_RELEASE = "kernelrelease" 50 | DISTRO_TARGET = "target" 51 | BASE_64_CONFIG_DATA = "kernelconfigdata" 52 | 53 | def __init__(self, repoorg, reponame, arch): 54 | mirrors = "https://github.com/"+repoorg+"/"+reponame+".git" 55 | self.repo = None 56 | self.repo_name = reponame 57 | Distro.__init__(self, mirrors, arch) 58 | 59 | def clone_repo(self, repo_url): 60 | work_dir = tempfile.mkdtemp(prefix=self.repo_name + "-") 61 | return pygit2.clone_repository(repo_url, work_dir, callbacks=ProgressCallback(self.repo_name)) 62 | 63 | def list_repo(self): 64 | self.repo = self.clone_repo(self.mirrors) 65 | 66 | def cleanup_repo(self): 67 | shutil.rmtree(self.repo.workdir, True) 68 | 69 | def getVersions(self, last_n=0): 70 | re_tags = re.compile(r'^refs/tags/v(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)$') 71 | 72 | all_versions = [os.path.basename(v).strip('v') for v in self.repo.references if re_tags.match(v)] 73 | all_versions.sort(key=SemVersion) 74 | 75 | no_patch_versions = list(filter((lambda x: SemVersion(x).patch == 0), all_versions)) 76 | no_patch_versions.sort(key=SemVersion) 77 | 78 | # We only get the lastN releases without considering the patch releases if requested 79 | if last_n > 0: 80 | no_patch_versions = no_patch_versions[-last_n:] 81 | 82 | # Here we are taking the three last releases plus the patch releases if they have any. 83 | # We are just taking all the releases(x.y.z) that are equal or greater than the older release we are considering, 84 | # i.e the older from the last three releases. 85 | return [v for v in all_versions if SemVersion(v) >= SemVersion(no_patch_versions[0])] 86 | 87 | def checkout_version(self, vers): 88 | self.repo.checkout("refs/tags/v" + vers) 89 | 90 | # Since pygit does not support checking out commits, 91 | # we create a fake ref for the hash, and checkout it. 92 | def checkout_hash(self, commithash): 93 | try: 94 | self.repo.references.create('refs/tags/v' + commithash, commithash) 95 | except pygit2.AlreadyExistsError: 96 | pass # already existent 97 | 98 | return self.checkout_version(commithash) 99 | 100 | def search_file(self, file_name, wd=''): 101 | if wd == '': 102 | wd = self.repo.workdir 103 | for dirpath, dirnames, files in os.walk(wd): 104 | for name in files: 105 | if name == file_name: 106 | return os.path.join(dirpath, name) 107 | return None 108 | 109 | def match_file(self, pattern, fullpath=True, wd=''): 110 | matches = [] 111 | if wd == '': 112 | wd = self.repo.workdir 113 | for dirpath, dirnames, files in os.walk(wd): 114 | for name in files: 115 | if re.search(r'^'+pattern, name): 116 | if fullpath: 117 | matches.append(os.path.join(dirpath, name)) 118 | else: 119 | matches.append(name) 120 | return matches 121 | 122 | def extract_value(self, file_name, key, sep): 123 | if os.path.isabs(file_name): 124 | full_path = file_name 125 | else: 126 | full_path = self.search_file(file_name) 127 | for line in open(full_path): 128 | stripped_line = line.lstrip() 129 | if re.search(r'^'+key + sep, stripped_line): 130 | tokens = stripped_line.strip().split(sep, 1) 131 | return tokens[1].strip('"').strip() 132 | return None 133 | 134 | def extract_line(self, file_path): 135 | full_path = self.repo.workdir + file_path 136 | for line in open(full_path): 137 | return line 138 | return None 139 | 140 | def encode_base64_defconfig(self, file_name): 141 | full_path = self.search_file(file_name) 142 | if full_path is None: 143 | return None 144 | with open(full_path, "rb") as config_file: 145 | return base64.b64encode(config_file.read()).decode() 146 | 147 | def to_driverkit_config(self, distro_release, config): 148 | return DriverKitConfig( 149 | config[self.KERNEL_RELEASE], 150 | config[self.DISTRO_TARGET], 151 | None, 152 | config[self.KERNEL_VERSION], 153 | config[self.BASE_64_CONFIG_DATA]) 154 | -------------------------------------------------------------------------------- /kernel_crawler/main.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | import logging 15 | import json 16 | import sys 17 | import click 18 | 19 | from .crawler import crawl_kernels, DISTROS 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | def init_logging(debug): 24 | level = 'DEBUG' if debug else 'INFO' 25 | logger.setLevel(level) 26 | handler = logging.StreamHandler(sys.stderr) 27 | handler.setFormatter(logging.Formatter('%(asctime)s %(message)s')) 28 | handler.setLevel(level) 29 | logger.addHandler(handler) 30 | logger.debug("DEBUG logging enabled") 31 | 32 | @click.group() 33 | @click.option('--debug/--no-debug') 34 | def cli(debug): 35 | init_logging(debug) 36 | 37 | class DistroImageValidation(click.Option): 38 | def __init__(self, *args, **kwargs): 39 | self.required_if_distro:list = kwargs.pop("required_if_distro") 40 | 41 | assert self.required_if_distro, "'required_if_distro' parameter required" 42 | kwargs["help"] = (kwargs.get("help", "") + "Option is required when distro is " + ", ".join(self.required_if_distro) + ".").strip() 43 | super(DistroImageValidation, self).__init__(*args, **kwargs) 44 | 45 | def handle_parse_result(self, ctx, opts, args): 46 | current_opt:bool = self.name in opts 47 | tgt_distro:str = opts["distro"] 48 | for distro_opt in self.required_if_distro: 49 | if distro_opt.casefold() == tgt_distro.casefold(): 50 | if current_opt: 51 | self.prompt = None 52 | else: 53 | raise click.UsageError("Missing argument: '" + str(self.name) + "' is required with " + str(distro_opt) + " distro.") 54 | return super(DistroImageValidation, self).handle_parse_result(ctx, opts, args) 55 | 56 | @click.command() 57 | @click.option('--distro', type=click.Choice(sorted(list(DISTROS.keys())) + ['*'], case_sensitive=True)) 58 | @click.option('--version', required=False, default='') 59 | @click.option('--arch', required=False, type=click.Choice(['x86_64', 'aarch64'], case_sensitive=True), default='x86_64') 60 | @click.option('--image', cls=DistroImageValidation, required_if_distro=["Redhat"], multiple=True) 61 | def crawl(distro, version='', arch='', image=''): 62 | res = crawl_kernels(distro, version, arch, image) 63 | json_object = json.dumps(res, indent=2, default=vars) 64 | print(json_object) 65 | 66 | cli.add_command(crawl, 'crawl') 67 | 68 | if __name__ == '__main__': 69 | cli() 70 | -------------------------------------------------------------------------------- /kernel_crawler/minikube.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | import sys 15 | 16 | from click import progressbar as ProgressBar 17 | from semantic_version import Version as SemVersion 18 | 19 | from .git import GitMirror 20 | 21 | 22 | class MinikubeMirror(GitMirror): 23 | def __init__(self, arch): 24 | super(MinikubeMirror, self).__init__("kubernetes", "minikube", arch) 25 | 26 | def get_kernel_config_file_name(self, minikube_version): 27 | if SemVersion(minikube_version) >= SemVersion("1.26.0"): 28 | return "linux_" + self.arch + "_defconfig" 29 | return "linux_defconfig" 30 | 31 | def get_minikube_config_file_name(self, minikube_version): 32 | if SemVersion(minikube_version) >= SemVersion("1.26.0"): 33 | return "minikube_" + self.arch + "_defconfig" 34 | return "minikube_defconfig" 35 | 36 | def get_package_tree(self, version=''): 37 | self.list_repo() 38 | sys.stdout.flush() 39 | kernel_configs = {} 40 | minikube_versions = self.getVersions(3) 41 | 42 | for v in minikube_versions: 43 | bar = ProgressBar(label="Building config for minikube v{}".format(v), length=1, file=sys.stderr) 44 | # minikube has support for aarch64 starting from version 1.26.0. 45 | # versions older than 1.26.0 are just skipped if building for aarch64. 46 | if self.arch == "aarch64" and SemVersion(v) < SemVersion("1.26.0"): 47 | continue 48 | self.checkout_version(v) 49 | # same meaning as the output of "uname -r" 50 | kernel_release = self.extract_value(self.get_minikube_config_file_name(v), 51 | "BR2_LINUX_KERNEL_CUSTOM_VERSION_VALUE", "=") 52 | # kernelversion is computed as "1_" + minikube version. 53 | # The reason behind that is due to how minikube distributes the iso images. 54 | # It could happen that two different minikube versions use the same kernel release but 55 | # built with a different defconfig file. So having the minikube version in the kernelversion 56 | # makes easier to get the right falco drivers from within a minikube instance. 57 | # same meaning as "uname -v" 58 | kernel_version = "1_" + v 59 | defconfig_base64 = self.encode_base64_defconfig(self.get_kernel_config_file_name(v)) 60 | kernel_configs[v] = { 61 | self.KERNEL_VERSION: kernel_version, 62 | self.KERNEL_RELEASE: kernel_release, 63 | self.DISTRO_TARGET: "minikube", 64 | self.BASE_64_CONFIG_DATA: defconfig_base64, 65 | } 66 | bar.update(1) 67 | bar.render_finish() 68 | 69 | self.cleanup_repo() 70 | return kernel_configs 71 | -------------------------------------------------------------------------------- /kernel_crawler/opensuse.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | from . import repo 15 | from . import rpm 16 | 17 | def tumbleweed_filter(dist): 18 | return dist.startswith('tumbleweed') or \ 19 | dist.startswith('./tumbleweed') 20 | 21 | 22 | class OpenSUSEMirror(repo.Distro): 23 | 24 | 25 | def __init__(self, arch): 26 | mirrors = [ 27 | # leap 28 | rpm.SUSERpmMirror('https://mirrors.edge.kernel.org/opensuse/distribution/leap/', 'repo/oss/', arch), 29 | rpm.SUSERpmMirror('https://mirrors.edge.kernel.org/opensuse/distribution/leap/', 'repo/oss/suse/', arch), 30 | # the rest 31 | rpm.SUSERpmMirror('https://mirrors.edge.kernel.org/opensuse/distribution/', 'repo/oss/', arch), 32 | rpm.SUSERpmMirror('https://mirrors.edge.kernel.org/opensuse/distribution/', 'repo/oss/suse/', arch), 33 | # opensuse site: tumbleweed -> enforce zstd for repo: 34 | # https://lists.opensuse.org/archives/list/factory@lists.opensuse.org/thread/LJNSBPCMIOJMP37PFPV7C7EJVIOW26BN/ 35 | rpm.SUSERpmMirror('http://download.opensuse.org/', 'repo/oss/', arch, tumbleweed_filter), 36 | # opensuse site: leaps 37 | rpm.SUSERpmMirror('http://download.opensuse.org/distribution/leap/', 'repo/oss/', arch), 38 | # opensuse Kernel repo - common 39 | rpm.SUSERpmMirror('https://download.opensuse.org/repositories/Kernel:/', 'Backport/standard/', arch), 40 | ] 41 | 42 | # other arch's are stored differently on SUSE's site 43 | # in general, the /repositories/Kernel:/ are stored differently and require a filter 44 | if arch == 'x86_64': 45 | mirrors.append(rpm.SUSERpmMirror('https://download.opensuse.org/repositories/Kernel:/', 'Submit/standard/', arch)) 46 | mirrors.append(rpm.SUSERpmMirror('https://download.opensuse.org/repositories/Kernel:/', 'standard/', arch)) 47 | else: 48 | mirrors.append(rpm.SUSERpmMirror('https://download.opensuse.org/repositories/Kernel:/', 'Submit/ports/', arch)) 49 | mirrors.append(rpm.SUSERpmMirror('https://download.opensuse.org/repositories/Kernel:/', 'ports/', arch)) 50 | mirrors.append(rpm.SUSERpmMirror('https://download.opensuse.org/repositories/Kernel:/', 'ARM/', arch)) 51 | mirrors.append(rpm.SUSERpmMirror('https://download.opensuse.org/repositories/Kernel:/', 'Backport/ports/', arch)), 52 | 53 | super(OpenSUSEMirror, self).__init__(mirrors, arch) 54 | 55 | 56 | def to_driverkit_config(self, release, deps): 57 | 58 | # matches driverkit target cli option 59 | target = 'opensuse' 60 | 61 | # dict for storing list of 62 | dk_configs = {} 63 | 64 | # loop over deps for a given release and append 65 | for dep in deps: 66 | val = dk_configs.get(target) 67 | if not val: 68 | headers = [dep] 69 | dk_configs[target] = repo.DriverKitConfig(release, target, headers) 70 | else: 71 | val.headers.append(dep) 72 | dk_configs[target] = val 73 | 74 | return dk_configs.values() 75 | -------------------------------------------------------------------------------- /kernel_crawler/oracle.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | from . import repo 15 | from . import rpm 16 | 17 | 18 | class OracleRepository(rpm.RpmRepository): 19 | @classmethod 20 | def kernel_package_query(cls): 21 | return '''(name IN ('kernel', 'kernel-devel', 'kernel-uek', 'kernel-uek-devel'))''' 22 | 23 | 24 | class OracleMirror(repo.Distro): 25 | 26 | def repos(self): 27 | 28 | # all the base URLs for major versions of OracleLinux 29 | base_urls = [ 30 | 'http://yum.oracle.com/repo/OracleLinux/OL6', # Oracle 6 31 | 'http://yum.oracle.com/repo/OracleLinux/OL7', # Oracle 7 32 | 'http://yum.oracle.com/repo/OracleLinux/OL8', # Oracle 8 33 | 'http://yum.oracle.com/repo/OracleLinux/OL9', # Oracle 9 34 | ] 35 | 36 | # setup list for possible UEK URLs 37 | possible_uek_urls = [] 38 | # Oracle seems to stick to 0 thru 9 for UEK versions, wrapping back to 0 after 9 39 | possible_uek_versions = list(range(0, 10)) 40 | # loop through base URLs and possible UEK versions to build possible UEK URLs 41 | for url in base_urls: 42 | for uek_version in possible_uek_versions: 43 | possible_uek_urls.append(f'{url}/UEKR{uek_version}/{self.arch}/') 44 | possible_uek_urls.append(f'{url}/UEKR{uek_version}/latest/{self.arch}/') # Oracle 6 has one URL subpath for /latest 45 | 46 | # setup list for possible non UEK URLs 47 | possible_non_uek_urls = [] 48 | # loop through base URLs and build other known URL subpaths 49 | for url in base_urls: 50 | possible_non_uek_urls.append(f'{url}/latest/{self.arch}/') # Oracle 6 & 7 51 | possible_non_uek_urls.append(f'{url}/MODRHCK/{self.arch}/') # Oracle 6 & 7 52 | possible_non_uek_urls.append(f'{url}/UEK/latest/{self.arch}/') # Oracle 6 has this non-versioned UEK subpath 53 | possible_non_uek_urls.append(f'{url}/baseos/latest/{self.arch}/') # Oracle 8 & 9 54 | possible_non_uek_urls.append(f'{url}/appstream/{self.arch}/') # Oracle 8 & 9 55 | 56 | # combine the built UEK URLs list and the base URLs 57 | repos = [ repo for mirror in (possible_uek_urls, possible_non_uek_urls) for repo in mirror ] 58 | 59 | return repos 60 | 61 | 62 | def __init__(self, arch): 63 | super(OracleMirror, self).__init__([], arch) 64 | 65 | def list_repos(self): 66 | return [OracleRepository(url) for url in self.repos()] 67 | 68 | def to_driverkit_config(self, release, deps): 69 | for dep in deps: 70 | if dep.find("devel") != -1: 71 | if 'uek' in release: # uek kernels have kernel versions of "2" 72 | # example: 73 | # # uname -a 74 | # Linux vm-ol8 5.15.0-100.96.32.el8uek.x86_64 #2 SMP Tue ... 75 | return repo.DriverKitConfig(release, "ol", dep, kernelversion="2") 76 | else: # else return default with kernelversion=1 77 | return repo.DriverKitConfig(release, "ol", dep) 78 | -------------------------------------------------------------------------------- /kernel_crawler/photon.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | from . import rpm 15 | from . import repo 16 | 17 | class PhotonOsRepository(rpm.RpmRepository): 18 | @classmethod 19 | def kernel_package_query(cls): 20 | # We exclude `esx` kernels because they don't support CONFIG_TRACEPOINTS, 21 | # see https://github.com/vmware/photon/issues/1223. 22 | return '''((name = 'linux' OR name LIKE 'linux-%devel%') AND name NOT LIKE '%esx%' AND name NOT LIKE '%PAM%')''' 23 | 24 | 25 | class PhotonOsMirror(repo.Distro): 26 | PHOTON_OS_VERSIONS = [ 27 | ('3.0', ''), 28 | ('3.0', '_release'), 29 | ('3.0', '_updates'), 30 | ('4.0', ''), 31 | ('4.0', '_release'), 32 | ('4.0', '_updates'), 33 | ('5.0', ''), 34 | ('5.0', '_release'), 35 | ('5.0', '_updates'), 36 | ] 37 | 38 | def __init__(self, arch): 39 | super(PhotonOsMirror, self).__init__([], arch) 40 | 41 | def list_repos(self): 42 | return [ 43 | PhotonOsRepository('https://packages.vmware.com/photon/{v}/photon{r}_{v}_{a}/'.format( 44 | v=version, r=repo_tag, a=self.arch)) 45 | for version, repo_tag in self.PHOTON_OS_VERSIONS] 46 | 47 | def to_driverkit_config(self, release, deps): 48 | # PhotonOS kernel packages have a ".$arch" suffix, 49 | # thus our kernelrelease is different from `uname -r` output. 50 | # Fix this by manually removing the suffix. 51 | suffix = "."+self.arch 52 | if release.endswith(suffix): 53 | release = release[:-len(suffix)] 54 | for dep in deps: 55 | if dep.find("-devel") != -1: 56 | return repo.DriverKitConfig(release, "photon", dep) 57 | -------------------------------------------------------------------------------- /kernel_crawler/redhat.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | from . import repo 15 | from .container import Container 16 | import re 17 | 18 | class RedhatContainer(repo.ContainerDistro): 19 | def __init__(self, image): 20 | super(RedhatContainer, self).__init__(image) 21 | 22 | def get_kernel_versions(self): 23 | kernels = {} 24 | c = Container(self.image) 25 | cmd_out = c.run_cmd("repoquery --show-duplicates kernel-devel") 26 | for log_line in cmd_out: 27 | m = re.search("(?<=kernel-devel-0:).*", log_line); 28 | if m: 29 | kernels[m.group(0)] = [] 30 | return kernels 31 | 32 | def to_driverkit_config(self, release, deps): 33 | return repo.DriverKitConfig(release, "redhat", list(deps)) 34 | -------------------------------------------------------------------------------- /kernel_crawler/repo.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | from __future__ import print_function 15 | from abc import ABC, abstractmethod 16 | 17 | import click 18 | import sys 19 | 20 | class Repository(object): 21 | def get_package_tree(self, version=''): 22 | raise NotImplementedError 23 | 24 | def __str__(self): 25 | raise NotImplementedError 26 | 27 | class DriverKitConfig(object): 28 | def __init__(self, kernelrelease, target, headers=None, kernelversion="1", kernelconfigdata=None): 29 | if not isinstance(kernelversion, str): 30 | raise TypeError('kernelversion should be a string') 31 | self.kernelversion = kernelversion 32 | self.kernelrelease = kernelrelease 33 | self.target = target 34 | if kernelconfigdata != None: 35 | self.kernelconfigdata = kernelconfigdata 36 | 37 | if isinstance(headers, list): 38 | self.headers = headers 39 | elif headers != None: 40 | # Fake single-list 41 | self.headers = [headers] 42 | 43 | def to_s(s): 44 | if s is None: 45 | return '' 46 | return str(s) 47 | 48 | 49 | class Mirror(object): 50 | def __init__(self, arch): 51 | self.arch = arch 52 | 53 | def list_repos(self,): 54 | raise NotImplementedError 55 | 56 | def get_package_tree(self, version=''): 57 | packages = {} 58 | repos = self.list_repos() 59 | with click.progressbar(repos, label='Listing packages', file=sys.stderr, item_show_func=to_s) as repos: 60 | for repo in repos: 61 | for release, dependencies in repo.get_package_tree(version).items(): 62 | packages.setdefault(release, set()).update(dependencies) 63 | return packages 64 | 65 | 66 | class Distro(Mirror): 67 | def __init__(self, mirrors, arch): 68 | self.mirrors = mirrors 69 | super().__init__(arch) 70 | 71 | def list_repos(self): 72 | repos = [] 73 | with click.progressbar( 74 | self.mirrors, label='Checking repositories', file=sys.stderr, item_show_func=to_s) as mirrors: 75 | for mirror in mirrors: 76 | repos.extend(mirror.list_repos()) 77 | return repos 78 | 79 | 80 | class ContainerDistro(ABC): 81 | def __init__(self, image): 82 | self.image = image 83 | 84 | @classmethod 85 | def __subclasshook__(cls, other): 86 | hook = getattr(other, 'get_kernel_versions', None) 87 | return callable(hook) 88 | 89 | @abstractmethod 90 | def get_kernel_versions(self): 91 | pass 92 | -------------------------------------------------------------------------------- /kernel_crawler/rockylinux.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | from . import repo 15 | from . import rpm 16 | 17 | def v8_only(ver): 18 | return ver.startswith('8') 19 | 20 | def v9_only(ver): 21 | return ver.startswith('9') 22 | 23 | class RockyLinuxMirror(repo.Distro): 24 | def __init__(self, arch): 25 | mirrors = [ 26 | # Rocky Linux 8 27 | rpm.RpmMirror('http://dl.rockylinux.org/pub/rocky/', 'BaseOS/' + arch + '/os/', v8_only), 28 | rpm.RpmMirror('http://dl.rockylinux.org/pub/rocky/', 'AppStream/' + arch + '/os/', v8_only), 29 | rpm.RpmMirror('http://dl.rockylinux.org/vault/rocky/', 'BaseOS/' + arch + '/os/', v8_only), 30 | # Rocky Linux 9 31 | rpm.RpmMirror('http://dl.rockylinux.org/pub/rocky/', 'BaseOS/' + arch + '/os/', v9_only), 32 | rpm.RpmMirror('http://dl.rockylinux.org/pub/rocky/', 'AppStream/' + arch + '/os/', v9_only), 33 | # Valut repo not yet available for Rocky Linux 9 34 | #rpm.RpmMirror('http://dl.rockylinux.org/vault/rocky/', v9_only, 'BaseOS/' + arch + '/os/'), 35 | ] 36 | super(RockyLinuxMirror, self).__init__(mirrors, arch) 37 | 38 | def to_driverkit_config(self, release, deps): 39 | for dep in deps: 40 | if dep.find("devel") != -1: 41 | return repo.DriverKitConfig(release, "rocky", dep) 42 | -------------------------------------------------------------------------------- /kernel_crawler/rpm.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | #!/usr/bin/env python 15 | from __future__ import print_function 16 | import traceback 17 | 18 | import requests 19 | from lxml import etree, html 20 | import sqlite3 21 | import tempfile 22 | import re 23 | import zstandard as zstd 24 | import io 25 | 26 | from . import repo 27 | from kernel_crawler.utils.download import get_url 28 | 29 | try: 30 | import lzma 31 | except ImportError: 32 | from backports import lzma 33 | 34 | 35 | class RpmRepository(repo.Repository): 36 | def __init__(self, base_url): 37 | self.base_url = base_url 38 | 39 | def __str__(self): 40 | return self.base_url 41 | 42 | @classmethod 43 | def get_loc_by_xpath(cls, text, expr): 44 | e = etree.fromstring(text) 45 | loc = e.xpath(expr, namespaces={ 46 | 'common': 'http://linux.duke.edu/metadata/common', 47 | 'repo': 'http://linux.duke.edu/metadata/repo', 48 | 'rpm': 'http://linux.duke.edu/metadata/rpm' 49 | }) 50 | 51 | # if unable to find the expression in the XML, return None 52 | if not loc: 53 | return None 54 | 55 | # else return the first item out of the tuple 56 | return loc[0] 57 | 58 | @classmethod 59 | def kernel_package_query(cls): 60 | return '''name IN ('kernel', 'kernel-devel', 'kernel-ml', 'kernel-ml-devel')''' 61 | 62 | @classmethod 63 | def build_base_query(cls, filter=''): 64 | base_query = '''SELECT version || '-' || release || '.' || arch, pkgkey FROM packages WHERE {}'''.format( 65 | cls.kernel_package_query()) 66 | if not filter: 67 | return base_query, () 68 | else: 69 | # if filtering, match anythint like 5.6.6 (version) or 5.6.6-300.fc32 (version || '-' || release) 70 | return base_query + ''' AND (version = ? OR version || '-' || "release" = ?)''', (filter, filter) 71 | 72 | @classmethod 73 | def parse_repo_db(cls, repo_db, filter=''): 74 | db = sqlite3.connect(repo_db) 75 | cursor = db.cursor() 76 | 77 | base_query, args = cls.build_base_query(filter) 78 | query = '''WITH RECURSIVE transitive_deps(version, pkgkey) AS ( 79 | {} 80 | UNION 81 | SELECT transitive_deps.version, provides.pkgkey 82 | FROM provides 83 | INNER JOIN requires USING (name, flags, epoch, version, "release") 84 | INNER JOIN transitive_deps ON requires.pkgkey = transitive_deps.pkgkey 85 | ) SELECT transitive_deps.version, location_href FROM packages INNER JOIN transitive_deps using(pkgkey); 86 | '''.format(base_query) 87 | 88 | cursor.execute(query, args) 89 | return cursor.fetchall() 90 | 91 | def get_repodb_url(self): 92 | repomd = get_url(self.base_url + 'repodata/repomd.xml') 93 | if not repomd: 94 | return None 95 | pkglist_url = self.get_loc_by_xpath(repomd, '//repo:repomd/repo:data[@type="primary_db"]/repo:location/@href') 96 | if not pkglist_url: 97 | return None 98 | return self.base_url + pkglist_url 99 | 100 | def get_package_tree(self, filter=''): 101 | packages = {} 102 | try: 103 | repodb_url = self.get_repodb_url() 104 | if not repodb_url: 105 | return {} 106 | repodb = get_url(repodb_url) 107 | if not repodb: 108 | return {} 109 | except requests.exceptions.RequestException: 110 | traceback.print_exc() 111 | return {} 112 | with tempfile.NamedTemporaryFile() as tf: 113 | tf.write(repodb) 114 | tf.flush() 115 | for pkg in self.parse_repo_db(tf.name, filter): 116 | version, url = pkg 117 | packages.setdefault(version, set()).add(self.base_url + url) 118 | return packages 119 | 120 | 121 | class RpmMirror(repo.Mirror): 122 | 123 | def __init__(self, base_url, variant, repo_filter=None): 124 | self.base_url = base_url 125 | self.variant = variant 126 | if repo_filter is None: 127 | repo_filter = lambda _: True 128 | self.repo_filter = repo_filter 129 | self.url = base_url 130 | 131 | def __str__(self): 132 | return self.base_url 133 | 134 | def dist_url(self, dist): 135 | return '{}{}{}'.format(self.base_url, dist, self.variant) 136 | 137 | def dist_exists(self, dist): 138 | try: 139 | r = requests.get( 140 | self.dist_url(dist), 141 | headers={ # some URLs require a user-agent, otherwise they return HTTP 406 - this one is fabricated 142 | 'user-agent': 'dummy' 143 | } 144 | ) 145 | r.raise_for_status() 146 | except requests.exceptions.RequestException: 147 | return False 148 | return True 149 | 150 | def list_repos(self): 151 | dists = requests.get( 152 | self.base_url, 153 | headers={ # some URLs require a user-agent, otherwise they return HTTP 406 - this one is fabricated 154 | 'user-agent': 'dummy' 155 | } 156 | ) 157 | dists.raise_for_status() 158 | dists = dists.content 159 | doc = html.fromstring(dists, self.base_url) 160 | dists = doc.xpath('/html/body//a[not(@href="../")]/@href') 161 | return [RpmRepository(self.dist_url(dist)) for dist in dists 162 | if dist.endswith('/') 163 | and not dist.startswith('/') 164 | and not dist.startswith('?') 165 | and not dist.startswith('http') 166 | and self.repo_filter(dist) 167 | and self.dist_exists(dist) 168 | ] 169 | 170 | 171 | class SUSERpmMirror(RpmMirror): 172 | 173 | def __init__(self, base_url, variant, arch, repo_filter=None): 174 | ''' 175 | SUSERpmMirror looks like a regular RpmMirror, except that it requires 176 | the arch in the constructor. The arch is used for passing through to SUSERpmRepository, 177 | which uses the arch to query for the correct kernel-default-devel out of the package listing. 178 | ''' 179 | self.base_url = base_url 180 | self.variant = variant 181 | self.arch = arch 182 | if repo_filter is None: 183 | repo_filter = lambda _: True 184 | self.repo_filter = repo_filter 185 | self.url = base_url 186 | 187 | def list_repos(self): 188 | ''' 189 | Overridden from RpmMirror exchanging RpmRepository for SUSERpmRepository. 190 | ''' 191 | dists = requests.get( 192 | self.base_url, 193 | headers={ # some URLs require a user-agent, otherwise they return HTTP 406 - this one is fabricated 194 | 'user-agent': 'dummy' 195 | } 196 | ) 197 | dists.raise_for_status() 198 | dists = dists.content 199 | doc = html.fromstring(dists, self.base_url) 200 | dists = doc.xpath('/html/body//a[not(@href="../")]/@href') 201 | ret = [SUSERpmRepository(self.dist_url(dist), self.arch) for dist in dists 202 | if dist.endswith('/') 203 | and not dist.startswith('/') 204 | and not dist.startswith('?') 205 | and not dist.startswith('http') 206 | and self.repo_filter(dist) 207 | and self.dist_exists(dist) 208 | ] 209 | 210 | return ret 211 | 212 | class SUSERpmRepository(RpmRepository): 213 | 214 | # the kernel headers package name pattern to search for in the package listing XML 215 | _kernel_devel_pattern = 'kernel-default-devel-' 216 | 217 | def __init__(self, base_url, arch): 218 | ''' 219 | Constructor, which sets the base URL and the arch. 220 | The arch is used for finding the correct package in the repomd. 221 | ''' 222 | self.base_url = base_url 223 | self.arch = arch 224 | 225 | def get_repodb_url(self): 226 | ''' 227 | SUSE stores their primary package listing under a different path in the XML from a normal RPM repomd. 228 | ''' 229 | repomd = get_url(self.base_url + 'repodata/repomd.xml') 230 | if not repomd: 231 | return None 232 | pkglist_url = self.get_loc_by_xpath(repomd, '//repo:repomd/repo:data[@type="primary"]/repo:location/@href') 233 | 234 | # if no pkglist was found, return None 235 | if not pkglist_url: 236 | return None 237 | 238 | # else add the pkglist_url to the base_url 239 | return self.base_url + pkglist_url 240 | 241 | def parse_kernel_release(self, kernel_devel_pkg): 242 | ''' 243 | Given the kernel devel package string, parse it for the kernel release 244 | by trimming off the front bits of the string and the extension. 245 | 246 | Example: 247 | x86_64/kernel-default-devel-5.14.21-150400.22.1.x86_64.rpm -> 5.14.21-150400.22.1.x86_64 248 | ''' 249 | trimmed = kernel_devel_pkg.replace(f'{self.arch}/{self._kernel_devel_pattern}', '') 250 | version = trimmed.replace('.rpm', '') 251 | 252 | return version 253 | 254 | def build_kernel_devel_noarch_url(self, kernel_release): 255 | ''' 256 | A simple method for building the noarch kernel-devel package using the kernel release. 257 | The kernel release will contain the package arch, but kernel-devel will be a noarch package. 258 | ''' 259 | return f'{self.base_url}noarch/kernel-devel-{kernel_release}.rpm'.replace(self.arch, 'noarch') 260 | 261 | def open_repo(self, repo_path, isZstd): 262 | package_match = f'{self.arch}/{self._kernel_devel_pattern}' 263 | # regex searching through a file is more memory efficient 264 | # than parsing the xml into an object structure with lxml etree 265 | open_mode = 'r' 266 | if isZstd: 267 | open_mode = 'rb' 268 | with open(repo_path, mode=open_mode) as f: 269 | if isZstd: 270 | dctx = zstd.ZstdDecompressor(max_window_size=2147483648) 271 | stream_reader = dctx.stream_reader(f) 272 | text = io.TextIOWrapper(stream_reader, encoding='utf-8').read() 273 | else: 274 | text = str(f.read()) 275 | 276 | search = re.search(f'.*href="({package_match}.*rpm)', text) 277 | if search: 278 | return search.group(1) 279 | return None 280 | 281 | def get_package_tree(self, filter=''): 282 | ''' 283 | Build the package tree for SUSE, which finds the repomd, parses it for the primary package listing, 284 | and queries for the kernel-default-devel package url. SUSE stores the primary package listing in XML. 285 | Once parsed, use the package URL to parse the kernel release and determine the kernel-devel*noarch package URL. 286 | ''' 287 | 288 | 289 | # attempt to query for the repomd - bail out if 404 290 | try: 291 | repodb_url = self.get_repodb_url() 292 | repodb = get_url(repodb_url) 293 | if not repodb: 294 | return {} 295 | except requests.exceptions.RequestException: 296 | # traceback.print_exc() # extremely verbose, uncomment if debugging 297 | return {} 298 | 299 | # write the repodb xml to a tempfile for parsing 300 | with tempfile.NamedTemporaryFile() as tf: 301 | tf.write(repodb) 302 | tf.flush() 303 | try: 304 | kernel_default_devel_pkg_url = self.open_repo(tf.name, False) 305 | except UnicodeDecodeError: 306 | kernel_default_devel_pkg_url = self.open_repo(tf.name, True) 307 | tf.close() # delete the tempfile to free up memory 308 | 309 | # check to ensure a kernel_devel_pkg was found 310 | if not kernel_default_devel_pkg_url: 311 | return {} # return an empty packages dict 312 | 313 | else: # was able to find some packages 314 | packages = {} 315 | 316 | # parse out the kernel release from the url, faster than re-parsing the xml 317 | parsed_kernel_release = self.parse_kernel_release(kernel_default_devel_pkg_url) 318 | 319 | # add the kernel-devel-default package 320 | packages.setdefault(parsed_kernel_release, set()).add(self.base_url + kernel_default_devel_pkg_url) 321 | 322 | # also add the noarch kernel-devel pacakge 323 | # SUSE combines the kernel-default-devel package and kernel-devel*.noarch pacakge for compilation 324 | noarch_kernel_devel = self.build_kernel_devel_noarch_url(parsed_kernel_release) 325 | packages.setdefault(parsed_kernel_release, set()).add(noarch_kernel_devel) 326 | 327 | return packages 328 | -------------------------------------------------------------------------------- /kernel_crawler/talos.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | import sys 15 | import tempfile 16 | import pygit2 17 | 18 | from click import progressbar as ProgressBar 19 | from semantic_version import Version as SemVersion 20 | 21 | from .git import GitMirror,ProgressCallback 22 | 23 | from .debian import fixup_deb_arch 24 | 25 | class TalosMirror(GitMirror): 26 | def __init__(self, arch): 27 | self.backup_repo = None 28 | self.pkgs_repo = None 29 | super(TalosMirror, self).__init__("siderolabs", "talos", fixup_deb_arch(arch)) 30 | 31 | def get_package_tree(self, version=''): 32 | self.list_repo() 33 | sys.stdout.flush() 34 | kernel_configs = {} 35 | talos_versions = self.getVersions(3) 36 | 37 | # Clone pkgs repo 38 | work_dir = tempfile.mkdtemp(prefix="pkgs-") 39 | self.pkgs_repo = pygit2.clone_repository("https://github.com/siderolabs/pkgs.git", work_dir, callbacks=ProgressCallback("pkgs")) 40 | 41 | # Store "talos" repo as we switch to use "pkgs" repo 42 | self.backup_repo = self.repo 43 | 44 | for v in talos_versions: 45 | # Use correct repo 46 | self.repo = self.backup_repo 47 | bar = ProgressBar(label="Building config for talos v{}".format(v), length=1, file=sys.stderr) 48 | 49 | self.checkout_version(v) 50 | 51 | # Fetch "pkgs" repo hash 52 | pkgs_ver = self.extract_line("pkg/machinery/gendata/data/pkgs") 53 | if pkgs_ver is None: 54 | continue 55 | 56 | sempkgs_ver = SemVersion(pkgs_ver[1:]) 57 | 58 | # Extract the commit hash if needed, else just use the tag name (eg: v1.4.0) 59 | # Note: full tag is like: v1.5.0-alpha.0-15-g813b3c3 or v1.5.0 60 | # so, pkgs_ver will be the string without "v". 61 | # In the end, in case of hash, the prerelease will be "alpha.0-15-g813b3c3"; 62 | # find "-g" and take the hash. 63 | if sempkgs_ver.prerelease: 64 | pkgs_ver = sempkgs_ver.prerelease[0].split("-g", 1)[1] 65 | 66 | # Use "pkgs" repo 67 | self.repo = self.pkgs_repo 68 | 69 | # Checkout required hash 70 | self.checkout_hash(pkgs_ver) 71 | 72 | # same meaning as the output of "uname -r" 73 | kernel_release = self.extract_value("Pkgfile", "linux_version", ":") 74 | # Skip when we cannot load a kernel_release 75 | if kernel_release is None: 76 | continue 77 | 78 | # kernelversion is computed as "1_" + talos version. 79 | # The reason behind that is due to how talos distributes the iso images. 80 | # It could happen that two different talos versions use the same kernel release but 81 | # built with a different defconfig file. So having the talos version in the kernelversion 82 | # makes easier to get the right falco drivers from within a talos instance. 83 | # same meaning as "uname -v" 84 | kernel_version = "1_v" + v 85 | defconfig_base64 = self.encode_base64_defconfig("config-" + self.arch) 86 | kernel_configs[v] = { 87 | self.KERNEL_VERSION: kernel_version, 88 | self.KERNEL_RELEASE: kernel_release + "-talos", 89 | self.DISTRO_TARGET: "talos", 90 | self.BASE_64_CONFIG_DATA: defconfig_base64, 91 | } 92 | bar.update(1) 93 | bar.render_finish() 94 | 95 | self.cleanup_repo() 96 | self.repo = self.pkgs_repo 97 | self.cleanup_repo() 98 | return kernel_configs 99 | -------------------------------------------------------------------------------- /kernel_crawler/ubuntu.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | from . import deb 15 | from . import repo 16 | from .debian import fixup_deb_arch 17 | import re 18 | 19 | class UbuntuMirror(repo.Distro): 20 | def __init__(self, arch): 21 | arch = fixup_deb_arch(arch) 22 | mirrors = [ 23 | deb.DebMirror('http://mirrors.edge.kernel.org/ubuntu/', arch), 24 | deb.DebMirror('http://security.ubuntu.com/ubuntu/', arch), 25 | deb.DebMirror('http://ports.ubuntu.com/ubuntu-ports/', arch), 26 | deb.DebMirror('http://archive.ubuntu.com/ubuntu/', arch), 27 | ] 28 | super(UbuntuMirror, self).__init__(mirrors, arch) 29 | 30 | def to_driverkit_config(self, release, deps): 31 | dk_configs = {} 32 | krel, kver = release.split("/") 33 | for dep in deps: 34 | if 'headers' in dep: 35 | 36 | # set a default flavor 37 | flavor = 'generic' 38 | # capture the flavor from the string after 'linux-' in the url subdir 39 | # example: http://security.ubuntu.com/ubuntu/pool/main/l/linux-oracle/linux-headers-4.15.0-1087-oracle_4.15.0-1087.95_amd64.deb 40 | flavor_capture = re.search(r"^.*l/linux-(.+)/.*$", dep) 41 | 42 | # if capture was successful, set the flavor 43 | if flavor_capture is not None: 44 | flavor = flavor_capture.group(1) # set flavor to the first capture group 45 | # in the case that the flavor results in aws- we remove the version, 46 | # e.g.: aws-5.19 -> aws 47 | if '-' in flavor: 48 | flavor = flavor.split('-')[0] 49 | 50 | target = f'ubuntu-{flavor}' # expose the correct ubuntu flavor 51 | release = f'{krel}-{flavor}' # add flavor to release 52 | 53 | val = dk_configs.get(target) 54 | if val is None: 55 | headers = [dep] 56 | dk_configs[target] = repo.DriverKitConfig(release, target, headers, kver) 57 | else: 58 | # If already existent, just add the new url to the list of headers 59 | val.headers.append(dep) 60 | dk_configs[target] = val 61 | return dk_configs.values() 62 | -------------------------------------------------------------------------------- /kernel_crawler/utils/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | -------------------------------------------------------------------------------- /kernel_crawler/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/falcosecurity/kernel-crawler/3a0fbe2a069fb94b375750ece8982a9b7bf86edc/kernel_crawler/utils/__init__.py -------------------------------------------------------------------------------- /kernel_crawler/utils/download.py: -------------------------------------------------------------------------------- 1 | import bz2 2 | import zlib 3 | import requests 4 | 5 | try: 6 | import lzma 7 | except ImportError: 8 | from backports import lzma 9 | 10 | def get_url(url): 11 | resp = requests.get( 12 | url, 13 | headers={ # some URLs require a user-agent, otherwise they return HTTP 406 - this one is fabricated 14 | 'user-agent': 'dummy' 15 | } 16 | ) 17 | 18 | # if 404, silently fail 19 | if resp.status_code == 404: 20 | return None 21 | else: # if any other error, raise the error - might be a bug in crawler 22 | resp.raise_for_status() 23 | 24 | # if no error, return the contents 25 | if url.endswith('.gz'): 26 | return zlib.decompress(resp.content, 47) 27 | elif url.endswith('.xz'): 28 | return lzma.decompress(resp.content) 29 | elif url.endswith('.bz2'): 30 | return bz2.decompress(resp.content) 31 | else: 32 | return resp.content 33 | 34 | 35 | def get_first_of(urls): 36 | last_exc = Exception('Empty url list') 37 | for idx, url in enumerate(urls): 38 | try: 39 | content = get_url(url) 40 | # If content is None and we got elements after this one, 41 | # try the next ones. 42 | if content is not None or idx == len(urls) - 1: 43 | return content 44 | except Exception as exc: 45 | last_exc = exc 46 | raise last_exc 47 | -------------------------------------------------------------------------------- /kernel_crawler/utils/py23.py: -------------------------------------------------------------------------------- 1 | def make_bytes(s): 2 | try: 3 | return s.encode('utf-8') 4 | except AttributeError: 5 | return s 6 | 7 | 8 | def make_string(s): 9 | try: 10 | return s.decode('utf-8') 11 | except AttributeError: 12 | return s 13 | -------------------------------------------------------------------------------- /release.md: -------------------------------------------------------------------------------- 1 | # Release Process 2 | 3 | Our release process is based upon [circleci](.circleci/config.yml). 4 | 5 | When we release we do the following process: 6 | 7 | 1. We decide together (usually in the #falco channel in [slack](https://kubernetes.slack.com/messages/falco)) what's the next version to tag 8 | 2. A person with repository rights does the tag 9 | 3. The same person runs commands in their machine following the "Release commands" section below 10 | 4. Once the CI has done its job, the tag is live on Github, and the container image is live on [DockerHub](https://hub.docker.com/r/falcosecurity/kernel-crawler) with proper tags 11 | 12 | ## Release commands 13 | 14 | Tag the version, keep the `v` and replace `x.y.z` with the version number. e.g: `0.2.0` 15 | 16 | ```bash 17 | git pull 18 | git checkout master 19 | git tag vx.y.z 20 | git push origin vx.y.z 21 | ``` 22 | > N.B.: do NOT use an annotated tag 23 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | lxml 2 | click 3 | requests 4 | docker 5 | semantic-version 6 | pygit2 7 | beautifulsoup4 8 | rpmfile 9 | zstandard -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # 3 | # Copyright (C) 2023 The Falco Authors. 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | #!/usr/bin/env python 15 | 16 | from setuptools import setup, find_packages 17 | 18 | 19 | setup(name='kernel-crawler', 20 | version='1.0.0', 21 | description='Falcosecurity kernel crawler', 22 | author='Grzegorz Nosek', 23 | author_email='grzegorz.nosek@sysdig.com', 24 | url='https://falco.org/', 25 | entry_points = { 26 | 'console_scripts': [ 27 | 'kernel-crawler = kernel_crawler.main:cli', 28 | ], 29 | }, 30 | packages=find_packages(), 31 | install_requires=[ 32 | 'click', 33 | 'requests', 34 | 'lxml', 35 | 'docker', 36 | 'semantic-version', 37 | 'pygit2', 38 | 'beautifulsoup4', 39 | 'rpmfile', 40 | 'zstandard' 41 | ], 42 | ) 43 | -------------------------------------------------------------------------------- /site/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Kernel Crawler 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 23 | 24 | 25 |
26 | falco logo 27 |
28 |
29 | Architecture: 30 |
31 |
32 | Target: 33 |
34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 |
TargetKernel ReleaseKernel VersionHeadersConfigLink
46 | 47 | 151 | 152 | --------------------------------------------------------------------------------