├── .github ├── dependabot.yml ├── pull_request_template.md └── workflows │ ├── daily_security.yml │ ├── publish_dev_artifacts.yml │ ├── publish_release_artifacts.yml │ └── rust.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.adoc ├── build.rs ├── deny.toml ├── deploy └── crd │ └── repository.crd.yaml ├── docs ├── antora.yml └── modules │ └── ROOT │ ├── nav.adoc │ └── pages │ ├── commandline_args.adoc │ ├── configuration.adoc │ ├── index.adoc │ ├── installation │ ├── binaries.adoc │ └── building.adoc │ ├── jobs.adoc │ ├── limitations.adoc │ ├── monitoring │ ├── logs.adoc │ └── restarts.adoc │ ├── services.adoc │ └── stages │ ├── cleanup.adoc │ └── overview.adoc ├── packaging ├── buildrpm.sh ├── config │ └── agent.conf ├── debian │ ├── postinst │ └── service └── rpm │ ├── SOURCES │ └── stackable-agent-VERSION │ │ └── usr │ │ └── lib │ │ └── systemd │ │ └── system │ │ └── stackable-agent.service │ └── SPECS │ └── stackable-agent.spec └── src ├── bin ├── generate_doc.rs └── stackable-agent.rs ├── config ├── config_documentation │ ├── bootstrap_file.adoc │ ├── config_directory.adoc │ ├── data_directory.adoc │ ├── hostname.adoc │ ├── log_directory.adoc │ ├── no_config.adoc │ ├── package_directory.adoc │ ├── plugin_directory.adoc │ ├── pod_cidr.adoc │ ├── server_cert_file.adoc │ ├── server_ip_address.adoc │ ├── server_key_file.adoc │ ├── server_port.adoc │ ├── session.adoc │ └── tags.adoc └── mod.rs ├── fsext.rs ├── lib.rs └── provider ├── cleanup.rs ├── error.rs ├── kubernetes ├── accessor.rs ├── mod.rs └── status.rs ├── mod.rs ├── repository ├── mod.rs ├── package.rs ├── repository_spec.rs └── stackablerepository.rs ├── states.rs ├── states ├── pod.rs └── pod │ ├── creating_config.rs │ ├── creating_service.rs │ ├── downloading.rs │ ├── downloading_backoff.rs │ ├── initializing.rs │ ├── installing.rs │ ├── running.rs │ ├── setup_failed.rs │ ├── starting.rs │ ├── terminated.rs │ └── waiting_config_map.rs └── systemdmanager ├── journal_reader.rs ├── manager.rs ├── mod.rs ├── service.rs ├── systemd1_api.rs └── systemdunit.rs /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | labels: 8 | - "type/dependencies" 9 | reviewers: 10 | - "stackabletech/developers" 11 | 12 | - package-ecosystem: "cargo" 13 | directory: "/" 14 | schedule: 15 | interval: "weekly" 16 | labels: 17 | - "type/dependencies" 18 | reviewers: 19 | - "stackabletech/rust-developers" 20 | ignore: 21 | # We never want to be notified about a kube-rs update. 22 | # It often contains breaking changes it has to be updated manually anyway 23 | # and it needs to be updated together with kube-runtime, kube-derive etc. 24 | # Also: We need to use the version from Krustlet to avoid conflicts 25 | - dependency-name: "kube*" 26 | - dependency-name: "k8s-openapi" 27 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | 3 | ## Review Checklist 4 | - [ ] Code contains useful comments 5 | - [ ] (Integration-)Test cases added (or not applicable) 6 | - [ ] Documentation added (or not applicable) 7 | - [ ] Changelog updated (or not applicable) 8 | -------------------------------------------------------------------------------- /.github/workflows/daily_security.yml: -------------------------------------------------------------------------------- 1 | name: Security audit 2 | on: 3 | schedule: 4 | - cron: '0 0 * * *' 5 | workflow_dispatch: 6 | 7 | jobs: 8 | audit: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2.3.5 12 | - uses: actions-rs/audit-check@v1.2.0 13 | with: 14 | token: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /.github/workflows/publish_dev_artifacts.yml: -------------------------------------------------------------------------------- 1 | name: Publish-Dev-Artifacts 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | CARGO_INCREMENTAL: '0' 12 | CARGO_PROFILE_DEV_DEBUG: '0' 13 | RUSTFLAGS: "-D warnings -W rust-2021-compatibility" 14 | REPO_APT_DEV_URL: https://repo.stackable.tech/repository/deb-dev 15 | REPO_RPM_DEV_URL: https://repo.stackable.tech/repository/rpm-dev 16 | 17 | jobs: 18 | debian10: 19 | runs-on: debian10 20 | steps: 21 | - uses: actions/checkout@v2.3.5 22 | - name: Change version if is PR 23 | if: ${{ github.event_name == 'pull_request' }} 24 | # We use "mr" instead of "pr" to denote pull request builds, as this prefix comes before "nightly" when lexically 25 | # sorting packages by version. This means that when installing the package without specifying a version the 26 | # nighly version is considered more current than mr versions and installed by default 27 | run: sed -i -e 's/^version = "\([0-9]\+\.[0-9]\+\.[0-9]\+\).*/version = "\1-mr${{ github.event.number }}"/' Cargo.toml 28 | - name: Build 29 | run: ~/.cargo/bin/cargo build --verbose --release 30 | - name: Build apt package 31 | run: ~/.cargo/bin/cargo deb --manifest-path Cargo.toml --no-build 32 | - name: Publish apt package 33 | env: 34 | NEXUS_PASSWORD: ${{ secrets.NEXUS_PASSWORD }} 35 | if: env.NEXUS_PASSWORD != null 36 | run: >- 37 | /usr/bin/curl 38 | --fail 39 | -u 'github:${{ secrets.NEXUS_PASSWORD }}' 40 | -H "Content-Type: multipart/form-data" 41 | --data-binary "@./$(find target/debian/ -name *.deb)" 42 | "${{ env.REPO_APT_DEV_URL }}/" 43 | - name: Clean 44 | run: ~/.cargo/bin/cargo clean 45 | 46 | centos: 47 | runs-on: centos${{ matrix.node }} 48 | strategy: 49 | matrix: 50 | node: [ 7, 8 ] 51 | steps: 52 | - uses: actions/checkout@v2.3.5 53 | - name: Change version if is PR 54 | if: ${{ github.event_name == 'pull_request' }} 55 | # We use "mr" instead of "pr" to denote pull request builds, as this prefix comes before "nightly" when lexically 56 | # sorting packages by version. This means that when installing the package without specifying a version the 57 | # nighly version is considered more current than mr versions and installed by default 58 | run: sed -i -e 's/^version = "\([0-9]\+\.[0-9]\+\.[0-9]\+\).*/version = "\1-mr${{ github.event.number }}"/' Cargo.toml 59 | - name: Build 60 | run: ~/.cargo/bin/cargo build --verbose --release 61 | - name: Build RPM package 62 | run: packaging/buildrpm.sh stackable-agent 63 | - name: Publish RPM package 64 | env: 65 | NEXUS_PASSWORD: ${{ secrets.NEXUS_PASSWORD }} 66 | if: env.NEXUS_PASSWORD != null 67 | run: >- 68 | /usr/bin/curl 69 | --fail 70 | -u 'github:${{ secrets.NEXUS_PASSWORD }}' 71 | --upload-file "./$(find target/rpm/RPMS/x86_64/ -name *.rpm)" 72 | "${{ env.REPO_RPM_DEV_URL }}/el${{ matrix.node }}/" 73 | - name: Clean 74 | run: ~/.cargo/bin/cargo clean 75 | -------------------------------------------------------------------------------- /.github/workflows/publish_release_artifacts.yml: -------------------------------------------------------------------------------- 1 | # ============= 2 | # This file is automatically generated from the templates in stackabletech/operator-templating 3 | # DON'T MANUALLY EDIT THIS FILE 4 | # ============= 5 | name: Publish-Release-Artifacts 6 | 7 | on: 8 | push: 9 | tags: 10 | - "*" 11 | 12 | env: 13 | CARGO_TERM_COLOR: always 14 | CARGO_INCREMENTAL: '0' 15 | CARGO_PROFILE_DEV_DEBUG: '0' 16 | RUSTFLAGS: "-D warnings -W rust-2021-compatibility" 17 | REPO_APT_RELEASE_URL: https://repo.stackable.tech/repository/deb-release 18 | REPO_RPM_RELEASE_URL: https://repo.stackable.tech/repository/rpm-release 19 | 20 | jobs: 21 | debian10: 22 | runs-on: debian10 23 | steps: 24 | - uses: actions/checkout@v2.3.5 25 | - name: Change version if is PR 26 | if: ${{ github.event_name == 'pull_request' }} 27 | # We use "mr" instead of "pr" to denote pull request builds, as this prefix comes before "nightly" when lexically 28 | # sorting packages by version. This means that when installing the package without specifying a version the 29 | # nighly version is considered more current than mr versions and installed by default 30 | run: sed -i -e 's/^version = "\([0-9]\+\.[0-9]\+\.[0-9]\+\).*/version = "\1-mr${{ github.event.number }}"/' Cargo.toml 31 | - name: Build 32 | run: ~/.cargo/bin/cargo build --verbose --release 33 | - name: Build apt package 34 | run: ~/.cargo/bin/cargo deb --manifest-path Cargo.toml --no-build 35 | - name: Publish apt package 36 | env: 37 | NEXUS_PASSWORD: ${{ secrets.NEXUS_PASSWORD }} 38 | if: env.NEXUS_PASSWORD != null 39 | run: >- 40 | /usr/bin/curl 41 | --fail 42 | -u 'github:${{ secrets.NEXUS_PASSWORD }}' 43 | -H "Content-Type: multipart/form-data" 44 | --data-binary "@./$(find target/debian/ -name *.deb)" 45 | "${{ env.REPO_APT_RELEASE_URL }}/" 46 | - name: Clean 47 | run: ~/.cargo/bin/cargo clean 48 | 49 | centos: 50 | runs-on: centos${{ matrix.node }} 51 | strategy: 52 | matrix: 53 | node: [ 7, 8 ] 54 | steps: 55 | - uses: actions/checkout@v2.3.5 56 | - name: Change version if is PR 57 | if: ${{ github.event_name == 'pull_request' }} 58 | # We use "mr" instead of "pr" to denote pull request builds, as this prefix comes before "nightly" when lexically 59 | # sorting packages by version. This means that when installing the package without specifying a version the 60 | # nighly version is considered more current than mr versions and installed by default 61 | run: sed -i -e 's/^version = "\([0-9]\+\.[0-9]\+\.[0-9]\+\).*/version = "\1-mr${{ github.event.number }}"/' Cargo.toml 62 | - name: Build 63 | run: ~/.cargo/bin/cargo build --verbose --release 64 | - name: Build RPM package 65 | run: packaging/buildrpm.sh stackable-agent 66 | - name: Publish RPM package 67 | env: 68 | NEXUS_PASSWORD: ${{ secrets.NEXUS_PASSWORD }} 69 | if: env.NEXUS_PASSWORD != null 70 | run: >- 71 | /usr/bin/curl 72 | --fail 73 | -u 'github:${{ secrets.NEXUS_PASSWORD }}' 74 | --upload-file "./$(find target/rpm/RPMS/x86_64/ -name *.rpm)" 75 | "${{ env.REPO_RPM_RELEASE_URL }}/el${{ matrix.node }}/" 76 | - name: Clean 77 | run: ~/.cargo/bin/cargo clean 78 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | 14 | test: 15 | name: Run tests 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v2.3.5 19 | - name: Update package index 20 | run: sudo apt-get update 21 | - name: Install dependencies 22 | run: sudo apt-get install libsystemd-dev pkg-config 23 | - uses: actions-rs/toolchain@v1.0.7 24 | with: 25 | profile: minimal 26 | toolchain: stable 27 | override: true 28 | - uses: Swatinem/rust-cache@v1.3.0 29 | - uses: actions-rs/cargo@v1.0.3 30 | with: 31 | command: test 32 | - uses: actions-rs/cargo@v1.0.3 33 | with: 34 | command: clean 35 | 36 | rustfmt: 37 | name: Run rustfmt 38 | runs-on: ubuntu-latest 39 | steps: 40 | - uses: actions/checkout@v2.3.5 41 | - name: Update package index 42 | run: sudo apt-get update 43 | - name: Install dependencies 44 | run: sudo apt-get install libsystemd-dev pkg-config 45 | - uses: actions-rs/toolchain@v1.0.7 46 | with: 47 | profile: minimal 48 | toolchain: stable 49 | components: rustfmt 50 | override: true 51 | - uses: actions-rs/cargo@v1.0.3 52 | with: 53 | command: fmt 54 | args: --all -- --check 55 | 56 | doc: 57 | name: Run rustdoc 58 | runs-on: ubuntu-latest 59 | steps: 60 | - uses: actions/checkout@v2.3.5 61 | - name: Update package index 62 | run: sudo apt-get update 63 | - name: Install dependencies 64 | run: sudo apt-get install libsystemd-dev pkg-config 65 | - uses: actions-rs/toolchain@v1.0.7 66 | with: 67 | profile: minimal 68 | toolchain: stable 69 | components: rustfmt 70 | override: true 71 | - uses: Swatinem/rust-cache@v1.3.0 72 | - uses: actions-rs/cargo@v1.0.3 73 | with: 74 | command: doc 75 | args: --document-private-items 76 | 77 | clippy: 78 | name: Run clippy 79 | runs-on: ubuntu-latest 80 | steps: 81 | - uses: actions/checkout@v2.3.5 82 | - name: Update package index 83 | run: sudo apt-get update 84 | - name: Install dependencies 85 | run: sudo apt-get install libsystemd-dev pkg-config 86 | - uses: actions-rs/toolchain@v1.0.7 87 | with: 88 | profile: minimal 89 | toolchain: stable 90 | components: clippy 91 | override: true 92 | # We need this due to: https://github.com/actions-rs/clippy-check/issues/2 93 | - name: Check workflow permissions 94 | id: check_permissions 95 | uses: scherermichael-oss/action-has-permission@1.0.6 96 | with: 97 | required-permission: write 98 | env: 99 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 100 | - name: Run clippy action to produce annotations 101 | uses: actions-rs/clippy-check@v1 102 | if: steps.check_permissions.outputs.has-permission 103 | with: 104 | args: --all-targets -- -D warnings 105 | token: ${{ secrets.GITHUB_TOKEN }} 106 | - name: Run clippy manually without annotations 107 | if: ${{ !steps.check_permissions.outputs.has-permission }} 108 | run: cargo clippy --all-targets -- -D warnings 109 | 110 | cargo-deny: 111 | name: Run cargo deny 112 | runs-on: ubuntu-latest 113 | strategy: 114 | matrix: 115 | checks: 116 | - advisories 117 | - bans 118 | - licenses 119 | - sources 120 | steps: 121 | - uses: actions/checkout@v2.3.5 122 | - uses: EmbarkStudios/cargo-deny-action@v1.2.6 123 | with: 124 | command: check ${{ matrix.checks }} 125 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [Unreleased] 4 | ### Added 5 | - Cleanup stage added where systemd units without corresponding pods are 6 | removed on startup ([#312]). 7 | 8 | ### Changed 9 | - Changed the version reported by the Stackable Agent in `nodeInfo.kubeletVersion` of the `Node` object in Kubernetes 10 | from the version of the Krustlet library to the Stackable Agent version ([#315]). 11 | - Restart agent on all crashes ([#318]). 12 | - Agent will now request content type "application/gzip" in package downloads and reject responses with content type 13 | that is not one of either "application/gzip", "application/tgz" or "application/x-gzip" ([#326]) 14 | - Agent now also accepts "application/x-tgz" as content_type when downloading packages ([#337]) 15 | 16 | ### Fixed 17 | - Agent deletes directories from failed install attempts ([#326]) 18 | 19 | [#312]: https://github.com/stackabletech/agent/pull/312 20 | [#315]: https://github.com/stackabletech/agent/pull/315 21 | [#318]: https://github.com/stackabletech/agent/pull/318 22 | [#326]: https://github.com/stackabletech/agent/pull/326 23 | [#337]: https://github.com/stackabletech/agent/pull/337 24 | 25 | ## [0.6.1] - 2021-09-14 26 | 27 | ### Changed 28 | - Changed the binary location for APT packages from 29 | `/opt/stackable-agent/stackable-agent` to 30 | `/opt/stackable/stackable-agent/stackable-agent` ([#304]). 31 | 32 | [#304]: https://github.com/stackabletech/agent/pull/304 33 | 34 | ## [0.6.0] - 2021-09-08 35 | 36 | ### Added 37 | - Prints self-diagnostic information on startup ([#270]). 38 | - Check added on startup if the configured directories exist and are 39 | writable by the Stackable agent ([#273]). 40 | - Missing directories are created ([#274]). 41 | - Annotation `featureRestartCount` added to the pods to indicate if the 42 | restart count is set properly ([#289]). 43 | 44 | ### Changed 45 | - Lazy validation of repository URLs changed to eager validation 46 | ([#262]). 47 | - `certificates.k8s.io/v1` used instead of `certificates.k8s.io/v1beta1` 48 | so that the Stackable Agent is now compatible with Kubernetes v1.22 49 | but not any longer with versions prior to v1.19 ([#267]). 50 | - Error message improved which is logged if a systemd unit file cannot 51 | be created ([#276]). 52 | - Handling of service restarts moved from the Stackable agent to 53 | systemd ([#263]). 54 | 55 | ### Removed 56 | - Check removed if a service starts up correctly within 10 seconds. 57 | systemd manages restarts now and the Stackable agent cannot detect if 58 | a service is in a restart loop ([#263]). 59 | 60 | ### Fixed 61 | - Systemd services in session mode are restarted after a reboot 62 | ([#263]). 63 | 64 | [#262]: https://github.com/stackabletech/agent/pull/262 65 | [#263]: https://github.com/stackabletech/agent/pull/263 66 | [#267]: https://github.com/stackabletech/agent/pull/267 67 | [#270]: https://github.com/stackabletech/agent/pull/270 68 | [#273]: https://github.com/stackabletech/agent/pull/273 69 | [#274]: https://github.com/stackabletech/agent/pull/274 70 | [#276]: https://github.com/stackabletech/agent/pull/276 71 | [#289]: https://github.com/stackabletech/agent/pull/289 72 | 73 | ## [0.5.0] - 2021-07-26 74 | 75 | ### Added 76 | - `hostIP` and `podIP` added to the pod status ([#224]). 77 | - Environment variable `KUBECONFIG` set in systemd services ([#234]). 78 | 79 | ### Fixed 80 | - Invalid or unreachable repositories are skipped when searching for 81 | packages ([#229]). 82 | - Access rights of the private key file restricted to the owner 83 | ([#235]). The permissions are set when the file is created. On 84 | existing installations the permissions must be set manually, e.g. with 85 | `chmod 600 /etc/stackable/stackable-agent/secret/agent.key`. 86 | 87 | [#224]: https://github.com/stackabletech/agent/pull/224 88 | [#229]: https://github.com/stackabletech/agent/pull/229 89 | [#234]: https://github.com/stackabletech/agent/pull/234 90 | [#235]: https://github.com/stackabletech/agent/pull/235 91 | 92 | ## [0.4.0] - 2021-06-23 93 | 94 | ### Added 95 | - Annotation `featureLogs` added to the pods to indicate if logs can be 96 | retrieved with `kubectl logs` ([#188]). 97 | 98 | ### Changed 99 | - Restart setting in systemd units removed because the agent already 100 | monitors the units and restarts them according to the restart policy 101 | in the pod spec ([#205]). 102 | 103 | ### Fixed 104 | - Pods with restart policy `Never` handled correctly ([#205]). 105 | 106 | [#188]: https://github.com/stackabletech/agent/pull/188 107 | [#205]: https://github.com/stackabletech/agent/pull/205 108 | 109 | ## [0.3.0] - 2021-05-27 110 | 111 | ### Added 112 | - Artifacts for merge requests are created ([#169], [#173]). 113 | 114 | ### Changed 115 | - Structure of the documentation changed so that it can be incorporated 116 | into the overall Stackable documentation ([#165]). 117 | 118 | ### Fixed 119 | - Deadlock fixed which occurred when multiple pods were started or 120 | stopped simultaneously ([#176]). 121 | 122 | [#165]: https://github.com/stackabletech/agent/pull/165 123 | [#169]: https://github.com/stackabletech/agent/pull/169 124 | [#173]: https://github.com/stackabletech/agent/pull/173 125 | [#176]: https://github.com/stackabletech/agent/pull/176 126 | 127 | ## [0.2.0] - 2021-05-20 128 | 129 | ### Added 130 | - Templating facility added to the `config-directory` parameter 131 | ([#159]). 132 | 133 | ### Fixed 134 | - Pod state synchronized with systemd service state ([#164]). 135 | 136 | [#159]: https://github.com/stackabletech/agent/pull/159 137 | [#164]: https://github.com/stackabletech/agent/pull/164 138 | 139 | ## [0.1.0] - 2021-05-17 140 | 141 | ### Added 142 | - Apache license v2.0 set ([#23]). 143 | - Krustlet based agent implementation created ([#1], [#18], [#26], 144 | [#35], [#40]). 145 | - Functionality to stop and restart processes added ([#25]). 146 | - Agent restart without impacting running services enabled ([#63]). 147 | - Rendering of template variables to environment variables added 148 | ([#30]). 149 | - Setting of pod condition "ready" for state "running" added ([#32]). 150 | - Support for command line parameters added ([#36], [#50], [#72], 151 | [#109]). 152 | - Integration with systemd implemented ([#43], [#53], [#100], [#152]). 153 | - Dependabot and security audit enabled ([#56], [#57]). 154 | - Building and publishing of nightly deb and rpm packages added ([#73], 155 | [#78], [#94], [#110], [#144]). 156 | - Bootstrapping of certificates and kubeconfig added ([#77]). 157 | - Support for running of services as application users added ([#79]). 158 | - Retrieval of container logs with kubectl logs implemented ([#135]). 159 | - Configuration of terminationGracePeriodSeconds considered in systemd 160 | units ([#138]). 161 | - Systemd dependency adapted so that it is compatible with systemd 162 | version 241 ([#145]). 163 | 164 | [#1]: https://github.com/stackabletech/agent/pull/1 165 | [#18]: https://github.com/stackabletech/agent/pull/18 166 | [#23]: https://github.com/stackabletech/agent/pull/23 167 | [#25]: https://github.com/stackabletech/agent/pull/25 168 | [#26]: https://github.com/stackabletech/agent/pull/26 169 | [#30]: https://github.com/stackabletech/agent/pull/30 170 | [#32]: https://github.com/stackabletech/agent/pull/32 171 | [#35]: https://github.com/stackabletech/agent/pull/35 172 | [#36]: https://github.com/stackabletech/agent/pull/36 173 | [#40]: https://github.com/stackabletech/agent/pull/40 174 | [#43]: https://github.com/stackabletech/agent/pull/43 175 | [#50]: https://github.com/stackabletech/agent/pull/50 176 | [#53]: https://github.com/stackabletech/agent/pull/53 177 | [#56]: https://github.com/stackabletech/agent/pull/56 178 | [#57]: https://github.com/stackabletech/agent/pull/57 179 | [#63]: https://github.com/stackabletech/agent/pull/63 180 | [#72]: https://github.com/stackabletech/agent/pull/72 181 | [#73]: https://github.com/stackabletech/agent/pull/73 182 | [#77]: https://github.com/stackabletech/agent/pull/77 183 | [#78]: https://github.com/stackabletech/agent/pull/78 184 | [#79]: https://github.com/stackabletech/agent/pull/79 185 | [#94]: https://github.com/stackabletech/agent/pull/94 186 | [#100]: https://github.com/stackabletech/agent/pull/100 187 | [#109]: https://github.com/stackabletech/agent/pull/109 188 | [#110]: https://github.com/stackabletech/agent/pull/110 189 | [#135]: https://github.com/stackabletech/agent/pull/135 190 | [#138]: https://github.com/stackabletech/agent/pull/138 191 | [#144]: https://github.com/stackabletech/agent/pull/144 192 | [#145]: https://github.com/stackabletech/agent/pull/145 193 | [#152]: https://github.com/stackabletech/agent/pull/152 194 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Sönke Liebau "] 3 | build = "build.rs" 4 | description = "The component of the Stackable Platform that manages installation of services on the workers" 5 | edition = "2018" 6 | license = "Apache-2.0" 7 | name = "stackable-agent" 8 | repository = "https://github.com/stackabletech/agent" 9 | version = "0.7.0-nightly" 10 | 11 | [dependencies] 12 | # We are currently referencing the Krustlet directly from a Stackable fork of the official repository. 13 | # The fork is needed to remove the node draining behavior of the Krustlet (see https://github.com/deislabs/krustlet/issues/523) 14 | # There is already a PR for cargo to allow patching of dependencies (see https://github.com/rust-lang/cargo/pull/9001). 15 | anyhow = "1.0" 16 | async-trait = "0.1" 17 | byteorder = "1.4" 18 | dirs = "4.0" 19 | env_logger = "0.9" 20 | flate2 = "1.0" 21 | futures-util = "0.3" 22 | handlebars = "4.1" 23 | hostname = "0.3" 24 | k8s-openapi = { version = "0.11", default-features = false, features = ["api", "v1_20"] } 25 | krator = { git = "https://github.com/stackabletech/krustlet.git", tag = "0.7.0-stackable.5" } # version = "0.2" 26 | kube = { version= "0.48", default-features = false, features = ["derive", "native-tls"] } 27 | kubelet = { git = "https://github.com/stackabletech/krustlet.git", tag = "0.7.0-stackable.5", default-features = true, features= ["derive", "cli"] } # version = "0.7" 28 | Inflector = "0.11" 29 | json-patch = "0.2" 30 | lazy_static = "1.4" 31 | log = "0.4" 32 | multimap = "0.8" 33 | nix = "0.22" 34 | # Pin notify to the latest version compatible with krustlet 0.7.0. 35 | # TODO Remove when upgrading krustlet 36 | notify = "= 5.0.0-pre.10" 37 | oci-distribution = { git = "https://github.com/stackabletech/krustlet.git", tag = "0.7.0-stackable.5" } # version = "0.6" 38 | regex = "1.4" 39 | reqwest = "0.11" 40 | schemars = "0.8" 41 | serde = "1.0" 42 | serde_derive = "1.0" 43 | serde_json = "1.0" 44 | shellexpand = "2.1" 45 | stackable-config = { git = "https://github.com/stackabletech/common.git", tag = "0.1.0" } 46 | strum = { version = "0.22", features = ["derive"] } 47 | strum_macros = "0.22" 48 | systemd = { version = "0.9", default-features = false, features = ["journal"] } 49 | tar = "0.4" 50 | thiserror = "1.0" 51 | tokio = { version = "1.12", features = ["macros", "rt-multi-thread", "time"] } 52 | url = "2.2" 53 | zbus = { git = "https://gitlab.freedesktop.org/dbus/zbus", rev = "ff08cbbbcd3eead16464012b92e3862d4dcb6f16" } # version 2.0.0-beta.6 + merge request !354 (fixes a race condition) + commit 6cdfe48cda5e0bf7b0dd8675be7a84439678afa9 (fixes another race condition) 54 | zvariant = { git = "https://gitlab.freedesktop.org/dbus/zbus", rev = "ff08cbbbcd3eead16464012b92e3862d4dcb6f16" } # version 2.8.0 which is compatible with the zbus version 55 | 56 | [dev-dependencies] 57 | indoc = "1.0" 58 | rstest = "0.11" 59 | serde_yaml = "0.8" 60 | 61 | [build-dependencies] 62 | built = { version = "0.5", features = ["chrono", "git2"] } 63 | 64 | [profile.release] 65 | opt-level = "s" 66 | lto = true 67 | codegen-units = 1 68 | 69 | [package.metadata.deb] 70 | maintainer-scripts = "packaging/debian/" 71 | systemd-units = { enable = false } 72 | assets = [ 73 | ["packaging/config/agent.conf", "etc/stackable/stackable-agent/", "644"], 74 | ["target/release/stackable-agent", "opt/stackable/stackable-agent/stackable-agent", "755"], 75 | ] 76 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.adoc: -------------------------------------------------------------------------------- 1 | = DEPRECATED 2 | 3 | We have changed our efforts to use the upstream Kubernetes Kubelet instead, and as such the Stackable Agent is no longer maintained. 4 | 5 | = Stackable Agent 6 | 7 | The Stackable Agent is an alternative to the Kubernetes Kubelet that executes Pods not in containers but using systemd as its backend. 8 | 9 | It is written by https://www.stackable.de[Stackable] in Rust, and it is supposed to be used with the https://github.com/stackabletech/agent[Stackable Agent] instead of the Kubernetes kubelet. 10 | 11 | The docs can be found in the `docs` subdirectory, and they are published together with docs for all other Stackable products at https://docs.stackable.tech. 12 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | built::write_built_file().expect("Failed to acquire build-time information"); 3 | } 4 | -------------------------------------------------------------------------------- /deny.toml: -------------------------------------------------------------------------------- 1 | targets = [ 2 | { triple = "x86_64-unknown-linux-gnu" }, 3 | { triple = "aarch64-unknown-linux-gnu" }, 4 | { triple = "x86_64-unknown-linux-musl" }, 5 | { triple = "aarch64-apple-darwin" }, 6 | { triple = "x86_64-apple-darwin" }, 7 | ] 8 | 9 | [advisories] 10 | vulnerability = "warn" 11 | unmaintained = "allow" 12 | unsound = "warn" 13 | yanked = "warn" 14 | notice = "warn" 15 | 16 | [bans] 17 | multiple-versions = "allow" 18 | 19 | # TODO Ban openssl when switched to rustls 20 | # [[bans.deny]] 21 | # name = "openssl" 22 | 23 | [licenses] 24 | unlicensed = "deny" 25 | copyleft = "deny" 26 | allow-osi-fsf-free = "neither" 27 | default = "deny" 28 | confidence-threshold = 1.0 29 | allow = [ 30 | "Apache-2.0", 31 | "BSD-2-Clause", 32 | "BSD-3-Clause", 33 | "CC0-1.0", 34 | "ISC", 35 | "LGPL-2.1 WITH GCC-exception-2.0", 36 | "MIT", 37 | "OpenSSL", 38 | "Unlicense", 39 | "Zlib", 40 | ] 41 | 42 | [[licenses.clarify]] 43 | name = "ring" 44 | version = "*" 45 | expression = "MIT AND ISC AND OpenSSL" 46 | license-files = [ 47 | { path = "LICENSE", hash = 0xbd0eed23 }, 48 | ] 49 | 50 | [[licenses.clarify]] 51 | name = "webpki" 52 | version = "*" 53 | expression = "ISC" 54 | license-files = [ 55 | { path = "LICENSE", hash = 0x001c7e6c }, 56 | ] 57 | 58 | [sources] 59 | unknown-registry = "deny" 60 | unknown-git = "deny" 61 | required-git-spec = "tag" 62 | allow-git = [ 63 | "https://gitlab.freedesktop.org/dbus/zbus", 64 | ] 65 | 66 | [sources.allow-org] 67 | github = ["stackabletech"] 68 | -------------------------------------------------------------------------------- /deploy/crd/repository.crd.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiextensions.k8s.io/v1 2 | kind: CustomResourceDefinition 3 | metadata: 4 | name: repositories.stable.stackable.de 5 | spec: 6 | group: stable.stackable.de 7 | versions: 8 | - name: v1 9 | served: true 10 | storage: true 11 | schema: 12 | openAPIV3Schema: 13 | type: object 14 | properties: 15 | spec: 16 | type: object 17 | properties: 18 | repo_type: 19 | type: string 20 | properties: 21 | type: object 22 | additionalProperties: 23 | type: string 24 | scope: Namespaced 25 | names: 26 | plural: repositories 27 | singular: repository 28 | kind: Repository 29 | shortNames: 30 | - repo -------------------------------------------------------------------------------- /docs/antora.yml: -------------------------------------------------------------------------------- 1 | name: agent 2 | version: master 3 | title: Stackable Agent 4 | nav: 5 | - modules/ROOT/nav.adoc 6 | -------------------------------------------------------------------------------- /docs/modules/ROOT/nav.adoc: -------------------------------------------------------------------------------- 1 | * Installation 2 | ** xref:installation/building.adoc[] 3 | ** xref:installation/binaries.adoc[] 4 | * xref:configuration.adoc[] 5 | * xref:limitations.adoc[] 6 | * xref:services.adoc[] 7 | * xref:jobs.adoc[] 8 | * Stages 9 | ** xref:stages/overview.adoc[] 10 | ** xref:stages/cleanup.adoc[] 11 | * Monitoring 12 | ** xref:monitoring/logs.adoc[] 13 | -------------------------------------------------------------------------------- /docs/modules/ROOT/pages/commandline_args.adoc: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | === no-config 5 | 6 | *Default value*: `No default value` 7 | 8 | *Required*: false 9 | 10 | *Multiple values:* false 11 | 12 | 13 | If this option is specified, any file referenced in AGENT_CONF environment variable will be ignored. 14 | 15 | 16 | === pod-cidr 17 | 18 | *Default value*: `` 19 | 20 | *Required*: false 21 | 22 | *Multiple values:* false 23 | 24 | 25 | This setting controls the pod address range that the agent reports to Kubernetes. 26 | The effect of this setting is that Kubernetes will reserve address blocks from withhin this range for every node. 27 | Depending on the setting for maximum pods per node, these will be larger or smaller ranges, and influence the maximum number of nodes for the cluster. 28 | 29 | The agent does not require any pod address ranges, and by default doesn't specify anything for this setting. 30 | 31 | WARNING: There should almost never be a reason to use this setting, this is mostly here for very special circumstances. Do not touch it unless you really know what you're doing. 32 | 33 | 34 | === bootstrap-file 35 | 36 | *Default value*: `/etc/stackable/stackable-agent/bootstrap-kubelet.conf` 37 | 38 | *Required*: false 39 | 40 | *Multiple values:* false 41 | 42 | 43 | The bootstrap file to use in case Kubernetes bootstraping is used to add the agent. 44 | 45 | 46 | === server-bind-ip 47 | 48 | *Default value*: `No default value` 49 | 50 | *Required*: false 51 | 52 | *Multiple values:* false 53 | 54 | 55 | The local IP to register as the node's ip with the apiserver. Will be automatically set to the first address of the first non-loopback interface if not specified. 56 | 57 | 58 | === server-key-file 59 | 60 | *Default value*: `/etc/stackable/stackable-agent/secret/agent.key` 61 | 62 | *Required*: false 63 | 64 | *Multiple values:* false 65 | 66 | 67 | Private key file (in PKCS8 format) to use for the local webserver the Krustlet starts. 68 | 69 | 70 | === package-directory 71 | 72 | *Default value*: `/opt/stackable/packages` 73 | 74 | *Required*: false 75 | 76 | *Multiple values:* false 77 | 78 | 79 | This directory will serve as starting point for packages that are needed by pods assigned to this node.\n Packages will be downloaded into the "_download" folder at the top level of this folder as archives and remain there for potential future use. 80 | 81 | Archives will the be extracted directly into this folder in subdirectories following the naming 82 | scheme of "productname-productversion". 83 | 84 | The agent will need full access to this directory and tries to create it if it does not exist. 85 | 86 | 87 | === hostname 88 | 89 | *Default value*: `No default value` 90 | 91 | *Required*: false 92 | 93 | *Multiple values:* false 94 | 95 | 96 | The hostname to register the node under in Kubernetes - defaults to system hostname. 97 | 98 | 99 | === data-directory 100 | 101 | *Default value*: `/var/lib/stackable/agent` 102 | 103 | *Required*: false 104 | 105 | *Multiple values:* false 106 | 107 | 108 | The directory where the stackable agent should keep its working data. 109 | 110 | 111 | === server-cert-file 112 | 113 | *Default value*: `/etc/stackable/stackable-agent/secret/agent.crt` 114 | 115 | *Required*: false 116 | 117 | *Multiple values:* false 118 | 119 | 120 | The certificate file for the local webserver which the Krustlet starts. 121 | 122 | 123 | === server-port 124 | 125 | *Default value*: `3000` 126 | 127 | *Required*: false 128 | 129 | *Multiple values:* false 130 | 131 | 132 | Port to listen on for callbacks. 133 | 134 | 135 | === config-directory 136 | 137 | *Default value*: `/etc/stackable/serviceconfig` 138 | 139 | *Required*: false 140 | 141 | *Multiple values:* false 142 | 143 | 144 | This directory will serve as starting point for all log files which this service creates. 145 | 146 | Every service will get its own subdirectories created within this directory - for every service start a 147 | new subdirectory will be created to show a full history of configuration that was used for this service. 148 | 149 | ConfigMaps which are specified in the pod that describes this service will be created relative to these run 150 | directories - unless the mounts specify an absolute path, in which case it is allowed to break out of this directory. 151 | 152 | WARNING: This allows anybody who can specify pods more or less full access to the file system on the machine running the agent! 153 | 154 | The agent will need full access to this directory and tries to create it if it does not exist. 155 | 156 | 157 | === log-directory 158 | 159 | *Default value*: `/var/log/stackable/servicelogs` 160 | 161 | *Required*: false 162 | 163 | *Multiple values:* false 164 | 165 | 166 | This directory will serve as starting point for all log files which this service creates. 167 | Every service will get its own subdirectory created within this directory. 168 | Anything that is then specified in the log4j config or similar files will be resolved relatively to this directory. 169 | 170 | The agent will need full access to this directory and tries to create it if it does not exist. 171 | 172 | 173 | === session 174 | 175 | *Default value*: `No default value` 176 | 177 | *Required*: false 178 | 179 | *Multiple values:* false 180 | 181 | 182 | This parameter specifies whether to use a session or the system DBus connection when talking to systemd. 183 | For our purposps the difference between the two can be explained as the session bus being restricted to the current user, whereas the system bus rolls out services that are available for every user. 184 | In reality is is a bit more involved than that, please refer to the https://dbus.freedesktop.org/doc/dbus-specification.html[official docs] for more information. 185 | 186 | When this flag is specified it causes symlinks for loaded services to be created in the currently active users systemd directory `~/.config/systemd/user` instead of one of the globally valid locations: 187 | 188 | - `/lib/systemd/system` 189 | - `/etc/systemd/system` 190 | 191 | The default is to use the system bus, for which it is necessary that the agent either run as root or have passwordless sudo rights. 192 | 193 | Using the session bus will mainly be useful for scenarios without root access and for testing on developer machines. 194 | 195 | 196 | === tag 197 | 198 | *Default value*: `No default value` 199 | 200 | *Required*: false 201 | 202 | *Multiple values:* true 203 | 204 | 205 | A "key=value" pair that should be assigned to this agent as tag. This can be specified multiple times to assign additional tags. 206 | 207 | Tags are the main way of identifying nodes to assign services to later on. -------------------------------------------------------------------------------- /docs/modules/ROOT/pages/configuration.adoc: -------------------------------------------------------------------------------- 1 | = Configuration 2 | 3 | == Command Line Parameters 4 | The agent accepts the following command line parameters: 5 | 6 | include::commandline_args.adoc[] 7 | 8 | == Config File 9 | In addition to directly specifying them on the command line, the agent allows specifying a config file via the environment variable `CONFIG_FILE`. Values specified in the file will have to adhere to the format `--parameter=value`. 10 | 11 | This file can contain all command line parameters and will be parsed before the actual command line. 12 | For parameters that are present in the file and on the command line, the command line will take precedence, unless it is a parameter that can be specified multiple times, in which case parameters from both, file and commandline, will be merged. 13 | 14 | .Example config file 15 | --package-directory=/opt/stackable/agent/work/packages 16 | --config-directory=/etc/stackable/agent 17 | --server-cert-file=/etc/stackable/agent/secure/cert.crt 18 | --server-key-file=/etc/stackable/agent/secure/key.key 19 | 20 | == Kubernetes Config 21 | The agent uses the default way of looking for a kube-apiserver, so if your system is already set up to connect to Kubernetes with kubectl you should be good to go right of the bat. 22 | 23 | The default location for the Kubernetes client config is `~/.kube/config`, if you want to change this location you can override this via the `KUBECONFIG` environment variable. 24 | 25 | export KUBECONFIG=/etc/stackable/agent/kubeconfig 26 | 27 | 28 | == Certificates 29 | The agent requires a keypair and signed certificate to start a webserver which can be used to handle callbacks. 30 | If these are not specified on the commandline, the agent will create a keypair, upload a certificate signing request to Kubernetes and wait for the certificate before continuing. 31 | These steps require a certificate manager to be set up and running in your Kubernetes cluster, which may or may not be the case. 32 | 33 | You can also manually create these files and specify them on the command line. 34 | The following example shows how to create these files using https://github.com/OpenVPN/easy-rsa[easy-rsa], but this can be done in any number of different ways as well. 35 | 36 | ./easyrsa init-pki 37 | ./easyrsa build-ca 38 | ./easyrsa gen-req krustlet1 39 | ./easyrsa import-req pki/reqs/krustlet1.req krustlet1-req 40 | ./easyrsa sign-req serverClient krustlet1-req 41 | # Convert key to pksc8 format 42 | openssl pkcs8 -topk8 -inform PEM -outform PEM -nocrypt -in pki/private/krustlet1.key -out pkcs8.key 43 | -------------------------------------------------------------------------------- /docs/modules/ROOT/pages/index.adoc: -------------------------------------------------------------------------------- 1 | = Stackable Agent 2 | 3 | The Stackable Agent is an alternative to the Kubernetes Kubelet that executes Pods not in containers but using systemd as its backend. 4 | It is implemented in Rust as a https://github.com/deislabs/krustlet[Krustlet] provider. 5 | 6 | The agent registers itself as a node with a kube-apiserver and will be considered by the Kubernetes scheduler for workloads (pods). 7 | To avoid _normal_ Kubernetes pods being scheduled on the Stackable agent (it would not know what to do with these) the agent assigns the following taints to its `Node` object: 8 | 9 | |=== 10 | |Taint |Type|Value 11 | 12 | |kubernetes.io/arch 13 | |NoSchedule 14 | |stackable-linux 15 | 16 | |kubernetes.io/arch 17 | |NoExecute 18 | |stackable-linux 19 | |=== 20 | 21 | These taints _suggest_ to the Kubernetes scheduler that only pods with matching tolerations should be scheduled on this node. 22 | 23 | == Contributing 24 | The agent is developed as an open source tool, and we absolutely welcome any and all contributions! 25 | Don't hesitate to drop us a line at info@stackable.de or reach out directly to any of our committers / contributors. 26 | -------------------------------------------------------------------------------- /docs/modules/ROOT/pages/installation/binaries.adoc: -------------------------------------------------------------------------------- 1 | = Binaries & Packages 2 | 3 | == Download binary 4 | We do not at this time provide pre-compiled binaries, as we are still in the process of setting up the build jobs to create these. 5 | This readme will be updated as soon as binary downloads are available! 6 | 7 | == OS Packages 8 | 9 | We provide OS packages for RHEL/CentOS 7 & 8 as well as Debian 10 (buster). 10 | 11 | * EL7: https://repo.stackable.tech/repository/rpm-release/el7/ 12 | * EL8: https://repo.stackable.tech/repository/rpm-release/el8/ 13 | * Debian 10: https://repo.stackable.tech/repository/deb-release 14 | 15 | Add these repositories to your OS and then install the `stackable-agent` package. 16 | -------------------------------------------------------------------------------- /docs/modules/ROOT/pages/installation/building.adoc: -------------------------------------------------------------------------------- 1 | = Building from source 2 | 3 | Building from source is fairly straightforward if you have the rust toolchain installed, our CI chain tests against the latest stable version at the time the checks are run. 4 | If you need to install this, generally the recommended way is to use https://rustup.rs/[rustup]. 5 | 6 | After rust is installed simply run 7 | 8 | cargo build 9 | 10 | To create the binary file in _target/debug_. 11 | 12 | == Build dependencies 13 | The agent depends on native systemd libraries to communicate with systemd. 14 | For the build process to work these need to be installed on the build system. 15 | 16 | 17 | 18 | |=== 19 | |Distribution |Package Names 20 | 21 | |Ubuntu/Debian 22 | a|- pkg-config 23 | - libsystemd-dev 24 | 25 | |CentOS/RHEL 26 | a|- pkg-config 27 | - systemd-devel 28 | 29 | |=== 30 | -------------------------------------------------------------------------------- /docs/modules/ROOT/pages/jobs.adoc: -------------------------------------------------------------------------------- 1 | = Jobs 2 | 3 | A job performs a task which terminates after some time. The 4 | `restartPolicy` must be explicitly set to `Never` or `OnFailure`: 5 | 6 | apiVersion: v1 7 | kind: Pod 8 | metadata: 9 | name: 10 | spec: 11 | containers: 12 | - name: 13 | image: 14 | command: 15 | - 16 | restartPolicy: Never 17 | 18 | If the job terminated successfully then the pod phase is set to 19 | `Succeeded` and the exit code of the container state is `0`: 20 | 21 | status: 22 | phase: Succeeded 23 | message: Completed 24 | reason: Completed 25 | containerStatuses: 26 | - state: 27 | terminated: 28 | exitCode: 0 29 | message: Completed 30 | 31 | If the job failed then the pod phase is set to `Failed` and the exit 32 | code of the container state is `1`: 33 | 34 | status: 35 | phase: Failed 36 | message: Error 37 | reason: Error 38 | containerStatuses: 39 | - state: 40 | terminated: 41 | exitCode: 1 42 | message: Error 43 | -------------------------------------------------------------------------------- /docs/modules/ROOT/pages/limitations.adoc: -------------------------------------------------------------------------------- 1 | = Limitations 2 | 3 | The Stackable Agent has limitations, some of which are documented here. 4 | 5 | * Kubernetes v1.19+ is required. 6 | * The maximum number of pods supported by the agent is currently hardcoded to 110. 7 | This restriction can be lifted if the need arises. 8 | * SSL certificate is not automatically renewed due to upstream bug https://github.com/krustlet/krustlet/issues/553 (manual activity required described below) 9 | 10 | == SSL certificate renewal == 11 | 12 | The agent SSL certificate expiry date can be observed via: 13 | 14 | [source,shell] 15 | ---- 16 | openssl x509 -enddate -noout -in /etc/stackable/stackable-agent/secret/agent.crt 17 | ---- 18 | 19 | See upstream documentation about the certificate system at https://kubernetes.io/docs/reference/access-authn-authz/certificate-signing-requests/, and the following steps. 20 | 21 | To generate a new certificate, first delete any current CSR: 22 | 23 | [source,shell] 24 | ---- 25 | kubectl delete csr/${HOSTNAME}-tls 26 | ---- 27 | 28 | If you have a CSR already, the agent will fail and log in to the journal (`journalctl -u stackable-agent`): 29 | 30 | ---- 31 | Sep 09 13:51:59 server1 stackable-agent[15457]: Caused by: 32 | Sep 09 13:51:59 server1 stackable-agent[15457]: certificatesigningrequests.certificates.k8s.io "server1-tls" already exists: AlreadyExists 33 | ---- 34 | 35 | Remove the current certificate file and restart the agent: 36 | 37 | [source,shell] 38 | ---- 39 | sudo rm /etc/stackable/stackable-agent/secret/agent.crt && \ 40 | sudo systemctl restart stackable-agent 41 | ---- 42 | 43 | And then observe the CSR is pending, with `kubectl get csr` 44 | 45 | ---- 46 | NAME AGE SIGNERNAME REQUESTOR CONDITION 47 | server1-tls 4s kubernetes.io/kubelet-serving kube:admin Pending 48 | ---- 49 | 50 | Note the name of the CSR, and then this can be approved with: 51 | 52 | [source,shell] 53 | ---- 54 | kubectl certificate approve server1-tls 55 | ---- 56 | 57 | If you need a list of pending CSRs for automation purposes, an example method is: 58 | 59 | [source,shell] 60 | ---- 61 | kubectl get csr -o go-template='{{range .items}}{{if not .status}}{{.metadata.name}}{{"\n"}}{{end}}{{end}}' 62 | ---- -------------------------------------------------------------------------------- /docs/modules/ROOT/pages/monitoring/logs.adoc: -------------------------------------------------------------------------------- 1 | = Logs 2 | 3 | The logs of a pod can be retrieved with `kubectl logs`. 4 | 5 | $ kubectl logs apache-kafka 6 | [2021-06-01 13:51:03,852] INFO Registered kafka:type=kafka.Log4jController MBean (kafka.utils.Log4jControllerRegistration$) 7 | [2021-06-01 13:51:04,361] INFO Registered signal handlers for TERM, INT, HUP (org.apache.kafka.common.utils.LoggingSignalHandler) 8 | [2021-06-01 13:51:04,362] INFO starting (kafka.server.KafkaServer) 9 | 10 | For this to work systemd version 232 or newer must be installed on the 11 | node. This is the case for Debian 10 and CentOS 8 but not for CentOS 7. 12 | The annotation `featureLogs` with a value of `true` or `false` is added 13 | to all pods to indicate the availability of the logs. 14 | 15 | $ kubectl describe pod apache-kafka 16 | … 17 | Annotations: featureLogs: true 18 | … 19 | 20 | If `featureLogs` is `false` then the output of `kubectl logs` is empty. 21 | 22 | The following options are not yet supported: 23 | 24 | * `--limit-bytes` 25 | * `-p --previous` 26 | * `--since` 27 | * `--since-time` 28 | * `--timestamps` 29 | -------------------------------------------------------------------------------- /docs/modules/ROOT/pages/monitoring/restarts.adoc: -------------------------------------------------------------------------------- 1 | = Restarts 2 | 3 | The restart count is stored in the container status if systemd version 4 | 235 or newer is running on the node which is the case for Debian 10 and 5 | CentOS 8 but not for CentOS 7. The annotation `featureRestartCount` 6 | indicates whether or not the restart count is set properly. 7 | 8 | $ kubectl get pod 9 | NAME READY STATUS RESTARTS AGE 10 | 1/1 Running 4 10m 11 | 12 | $ kubectl describe pod 13 | Name: 14 | Annotations: featureRestartCount: true 15 | Containers: 16 | : 17 | Restart Count: 4 18 | -------------------------------------------------------------------------------- /docs/modules/ROOT/pages/services.adoc: -------------------------------------------------------------------------------- 1 | = Services 2 | 3 | A pod which provides a service should never terminate on its own, so the 4 | `restartPolicy` must be set to `Always`. As `restartPolicy` defaults to 5 | `Always`, it can also be omitted. 6 | 7 | apiVersion: v1 8 | kind: Pod 9 | metadata: 10 | name: 11 | spec: 12 | containers: 13 | - name: 14 | image: 15 | command: 16 | - 17 | restartPolicy: Always 18 | -------------------------------------------------------------------------------- /docs/modules/ROOT/pages/stages/cleanup.adoc: -------------------------------------------------------------------------------- 1 | = Cleanup stage 2 | 3 | On startup the systemd units in the `system-stackable` slice are 4 | compared to the pods assigned to this node. If a systemd unit is as 5 | expected then it is kept and the Stackable agent will take ownership 6 | again in a later stage. If there is no corresponding pod or the systemd 7 | unit differs from the pod specification then it is removed and the 8 | Stackable agent will create a new systemd unit afterwards. 9 | -------------------------------------------------------------------------------- /docs/modules/ROOT/pages/stages/overview.adoc: -------------------------------------------------------------------------------- 1 | = Overview 2 | 3 | When the Stackable Agent starts, it runs through the following stages: 4 | 5 | * Check configured directories and files. 6 | ** Check if the optional files can be opened if they exist. 7 | ** Create the directories where write access is required and which do 8 | not exist yet. 9 | ** Check the configured directories if they are writable by the current 10 | process. 11 | * Bootstrap the cluster with TLS certificates but only if no existing 12 | kubeconfig can be found. 13 | * Remove all systemd units from a previous run without a corresponding 14 | pod (see xref:stages/cleanup.adoc[]). 15 | * Start the kubelet. 16 | 17 | After the kubelet was started, assigned pods run through the following 18 | stages: 19 | 20 | * Download the package from a registered Stackable repository. 21 | * Unpack the package and install it. 22 | * Create the configuration files according to the config maps. 23 | * Create, start, and enable the systemd units. 24 | * Monitor the systemd units and patch the pod status accordingly. 25 | * Stop, disable, and remove the systemd units on termination or when the 26 | pod is deleted. 27 | -------------------------------------------------------------------------------- /packaging/buildrpm.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # This script creates an RPM package containing the binary created by this Cargo project. 3 | # The script is not universally applicable, since it makes a few assumptions about the project structure: 4 | # 1. The RPM scaffolding needs to be provided in packaging/rpm 5 | # 2. The binary to be packaged needs to be created in target/release 6 | 7 | # The script takes one argument, which is the name of the binary that has been created by the build process. 8 | # This argument will be reused for naming the final RPM file. 9 | 10 | # Check if one parameter was specified - we'll use this as the name parameter for all files 11 | # This allows us to reuse the script across all operators 12 | if [ -z $1 ]; then 13 | echo "This script requires the project name to be specified as the first parameter!" 14 | exit 1 15 | fi 16 | 17 | export PACKAGE_NAME=$1 18 | BINARY_FILE=target/release/$PACKAGE_NAME 19 | 20 | # The package description is parsed from the output of `cargo metadata` by using jq. 21 | # We need to look up the package with a select statement to match the name from an array of packages 22 | # The name is passed into jq as a jq variable, as no substitution would take place within the single 23 | # quotes of the jq expression. 24 | export PACKAGE_DESCRIPTION=$(~/.cargo/bin/cargo metadata --format-version 1| jq --arg NAME "$PACKAGE_NAME" '.packages[] | select(.name == $NAME) | .description') 25 | if [ -z $PACKAGE_DESCRIPTION ]; then 26 | echo "Unable to parse package description from output of `cargo metadata`, cannot build RPM without this field!" 27 | exit 2 28 | fi 29 | echo 30 | 31 | # Check that we are being called from the main directory and the release build process has been run 32 | if [ ! -f $BINARY_FILE ]; then 33 | echo "Binary file not found at [$BINARY_FILE] - this script should be called from the root directory of the repository and 'cargo build --release' needs to have run before calling this script!" 34 | exit 3 35 | fi 36 | 37 | echo Cleaning up prior build attempts 38 | rm -rf target/rpm 39 | 40 | # Parse the version and release strings from the PKGID reported by Cargo 41 | # This is in the form Path#Projectname:version, which we parse by repeated calls to awk with different separators 42 | # This could most definitely be improved, but works for now 43 | export VERSION_STRING=$(~/.cargo/bin/cargo pkgid | awk -F'#' '{print $2}' | awk -F':' '{print $2}') 44 | echo version: ${VERSION_STRING} 45 | 46 | export PACKAGE_VERSION=$(echo ${VERSION_STRING} | awk -F '-' '{print $1}') 47 | 48 | # Any suffix like '-nightly' is split out into the release here, as - is not an allowed character in rpm versions 49 | # The final release will look like 0.suffix or 0 if no suffix is specified. 50 | export PACKAGE_RELEASE="0$(echo ${VERSION_STRING} | awk -F '-' '{ if ($2 != "") print "."$2;}')" 51 | 52 | echo Defined package version: [${PACKAGE_VERSION}] 53 | echo Defined package release: [${PACKAGE_RELEASE}] 54 | echo Defined package description: [${PACKAGE_DESCRIPTION}] 55 | 56 | echo Creating directory scaffolding for RPM 57 | cp -r packaging/rpm target/ 58 | # Create empty directory for the binary to be placed into 59 | mkdir -p target/rpm/SOURCES/${PACKAGE_NAME}-VERSION/opt/stackable/${PACKAGE_NAME} 60 | 61 | # Create config directory and copy config file template over 62 | mkdir -p target/rpm/SOURCES/${PACKAGE_NAME}-VERSION/etc/stackable/${PACKAGE_NAME} 63 | cp packaging/config/agent.conf target/rpm/SOURCES/${PACKAGE_NAME}-VERSION/etc/stackable/${PACKAGE_NAME} 64 | mkdir target/rpm/SOURCES/${PACKAGE_NAME}-VERSION/etc/stackable/${PACKAGE_NAME}/secret 65 | mkdir -p target/rpm/SOURCES/${PACKAGE_NAME}-VERSION/var/lib/stackable/${PACKAGE_NAME} 66 | mkdir -p target/rpm/SOURCES/${PACKAGE_NAME}-VERSION/var/log/stackable/servicelogs 67 | mkdir -p target/rpm/SOURCES/${PACKAGE_NAME}-VERSION/opt/stackable/packages 68 | 69 | # The packaging source directory does not contain the version yet, as this will need to be replaced for every 70 | # execution. Instead the directory name contains the marker "VERSION" which we now replace with the actual version. 71 | rename VERSION ${PACKAGE_VERSION} target/rpm/SOURCES/${PACKAGE_NAME}-VERSION 72 | 73 | cp target/release/${PACKAGE_NAME} target/rpm/SOURCES/${PACKAGE_NAME}-${PACKAGE_VERSION}/opt/stackable/${PACKAGE_NAME}/ 74 | 75 | pushd target/rpm/SOURCES 76 | tar czvf ${PACKAGE_NAME}-${PACKAGE_VERSION}.tar.gz ${PACKAGE_NAME}-${PACKAGE_VERSION} 77 | popd 78 | 79 | rpmbuild --define "_topdir `pwd`/target/rpm" -v -ba target/rpm/SPECS/${PACKAGE_NAME}.spec 80 | -------------------------------------------------------------------------------- /packaging/config/agent.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stackabletech/agent/2eeb42045fc9902be277fa5219130df1548068f7/packaging/config/agent.conf -------------------------------------------------------------------------------- /packaging/debian/postinst: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | mkdir -p /opt/stackable/packages 4 | mkdir -p /var/lib/stackable/stackable-agent 5 | mkdir -p /var/log/stackable/servicelogs 6 | mkdir -p /etc/stackable/stackable-agent 7 | mkdir -m 700 /etc/stackable/stackable-agent/secret 8 | 9 | #DEBHELPER# -------------------------------------------------------------------------------- /packaging/debian/service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Stackable Agent 3 | Before= 4 | After=network.target 5 | [Service] 6 | User=root 7 | ExecStart=/opt/stackable/stackable-agent/stackable-agent 8 | Restart=always 9 | RestartSec=1s 10 | StandardOutput=journal 11 | StandardError=journal 12 | Environment="CONFIG_FILE=/etc/stackable/stackable-agent/agent.conf" 13 | Environment="RUST_LOG=info" 14 | [Install] 15 | WantedBy=multi-user.target 16 | -------------------------------------------------------------------------------- /packaging/rpm/SOURCES/stackable-agent-VERSION/usr/lib/systemd/system/stackable-agent.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Stackable Agent 3 | Before= 4 | After=network.target 5 | [Service] 6 | User=root 7 | ExecStart=/opt/stackable/stackable-agent/stackable-agent 8 | Restart=always 9 | RestartSec=1s 10 | StandardOutput=journal 11 | StandardError=journal 12 | Environment="CONFIG_FILE=/etc/stackable/stackable-agent/agent.conf" 13 | Environment="RUST_LOG=info" 14 | [Install] 15 | WantedBy=multi-user.target 16 | -------------------------------------------------------------------------------- /packaging/rpm/SPECS/stackable-agent.spec: -------------------------------------------------------------------------------- 1 | %define __spec_install_post %{nil} 2 | %define __os_install_post %{_dbpath}/brp-compress 3 | %define debug_package %{nil} 4 | %define _servicedir /usr/lib/systemd/system 5 | %define _version %{getenv:PACKAGE_VERSION} 6 | %define _release %{getenv:PACKAGE_RELEASE} 7 | %define _name %{getenv:PACKAGE_NAME} 8 | %define _bindir /opt/stackable/%{_name} 9 | %define _confdir /etc/stackable/%{_name} 10 | %define _vardir /var/lib/stackable/%{_name} 11 | %define _description %{getenv:PACKAGE_DESCRIPTION} 12 | 13 | Name: %{_name} 14 | Summary: %{_description} 15 | Version: %{_version} 16 | Release: %{_release}%{?dist} 17 | License: ASL 2.0 18 | Group: Applications/System 19 | Source0: %{name}-%{version}.tar.gz 20 | 21 | BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root 22 | 23 | %description 24 | %{summary} 25 | 26 | %prep 27 | %setup -q 28 | 29 | %install 30 | rm -rf %{buildroot} 31 | mkdir -p %{buildroot} 32 | cp -a * %{buildroot} 33 | 34 | %post 35 | systemctl daemon-reload 36 | 37 | %preun 38 | if [ $1 == 0 ]; then #uninstall 39 | systemctl unmask %{name}.service 40 | systemctl stop %{name}.service 41 | systemctl disable %{name}.service 42 | fi 43 | 44 | %postun 45 | if [ $1 == 0 ]; then #uninstall 46 | systemctl daemon-reload 47 | systemctl reset-failed 48 | fi 49 | 50 | %clean 51 | rm -rf %{buildroot} 52 | 53 | %files 54 | %defattr(-,root,root,-) 55 | %{_bindir} 56 | %{_bindir}/* 57 | %{_servicedir}/%{name}.service 58 | %dir %{_confdir} 59 | %config %{_confdir}/agent.conf 60 | %dir %attr(700, root, root) %{_confdir}/secret 61 | %dir %{_vardir} 62 | %dir /var/log/stackable/servicelogs 63 | %dir /opt/stackable/packages -------------------------------------------------------------------------------- /src/bin/generate_doc.rs: -------------------------------------------------------------------------------- 1 | /// This is a helper binary which generates the file `documentation/commandline_args.adoc` which 2 | /// contains documentation of the available command line options for the agent binary. 3 | /// 4 | /// It gets the content by calling [`stackable_agent::config::AgentConfig::get_documentation()`] 5 | /// 6 | /// # Panics 7 | /// This will panic if an error occurs when trying to write the file. 8 | 9 | fn main() { 10 | use stackable_agent::config::AgentConfig; 11 | use std::fs; 12 | 13 | let target_file = "docs/modules/ROOT/pages/commandline_args.adoc"; 14 | fs::write(target_file, AgentConfig::get_documentation()).unwrap_or_else(|err| { 15 | panic!( 16 | "Could not write documentation to [{}]: {}", 17 | target_file, err 18 | ) 19 | }); 20 | } 21 | -------------------------------------------------------------------------------- /src/bin/stackable-agent.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::env; 3 | use std::ffi::OsString; 4 | use std::io::ErrorKind; 5 | use std::path::PathBuf; 6 | 7 | use kubelet::config::{Config, ServerConfig}; 8 | use kubelet::Kubelet; 9 | use log::{error, info}; 10 | use tokio::fs::{create_dir_all, File}; 11 | 12 | use stackable_agent::config::AgentConfig; 13 | use stackable_agent::fsext::check_dir_is_writable; 14 | use stackable_agent::provider::StackableProvider; 15 | use stackable_config::{ConfigBuilder, ConfigOption}; 16 | 17 | mod built_info { 18 | // The file has been placed there by the build script. 19 | include!(concat!(env!("OUT_DIR"), "/built.rs")); 20 | } 21 | 22 | pub fn print_startup_string( 23 | pkg_version: &str, 24 | git_version: Option<&str>, 25 | target: &str, 26 | built_time: &str, 27 | rustc_version: &str, 28 | ) { 29 | let git_information = match git_version { 30 | None => "".to_string(), 31 | Some(git) => format!(" (Git information: {})", git), 32 | }; 33 | info!("Starting the Stackable Agent"); 34 | info!( 35 | "This is version {}{}, built for {} by {} at {}", 36 | pkg_version, git_information, target, rustc_version, built_time 37 | ) 38 | } 39 | 40 | #[tokio::main] 41 | async fn main() -> anyhow::Result<()> { 42 | // Initialize the logger 43 | env_logger::init(); 44 | 45 | let agent_config: AgentConfig = 46 | ConfigBuilder::build(env::args_os().collect::>(), "CONFIG_FILE") 47 | .expect("Error initializing Configuration!"); 48 | 49 | // Make sure to only print diagnostic information once we are actually trying to start 50 | print_startup_string( 51 | built_info::PKG_VERSION, 52 | built_info::GIT_VERSION, 53 | built_info::TARGET, 54 | built_info::BUILT_TIME_UTC, 55 | built_info::RUSTC_VERSION, 56 | ); 57 | 58 | check_optional_files(&agent_config).await; 59 | create_missing_directories(&agent_config).await; 60 | check_configured_directories(&agent_config).await; 61 | 62 | // Currently the only way to _properly_ configure the Krustlet is via these environment exports, 63 | // as their config object only offers methods that parse from command line flags (or combinations 64 | // of those flags with other things). 65 | // Since we have our own command line flags that are not compatible with the Krustlet's we 66 | // configure the agent via a file from the environment variable (CONFIG_FILE), extract what 67 | // is needed for the Krustlet and pass it via environment variables. 68 | // This is an ugly hack for now, until we've had time to take a proper look at Krustlet's config 69 | export_env( 70 | "KRUSTLET_NODE_IP", 71 | &agent_config.server_ip_address.to_string(), 72 | ); 73 | 74 | // Convert node tags to string in the form of key=value,key=value,... 75 | // TODO: check for commas in the key value pairs themselves https://github.com/stackabletech/agent/issues/195 76 | let node_labels = agent_config 77 | .tags 78 | .iter() 79 | .map(|(k, v)| format!("{}={}", String::from(k), String::from(v))) 80 | .collect::>() 81 | .join(","); 82 | 83 | export_env("NODE_LABELS", &node_labels); 84 | 85 | export_env( 86 | "KRUSTLET_CERT_FILE", 87 | agent_config.server_cert_file.to_str().unwrap(), 88 | ); 89 | 90 | export_env( 91 | "KRUSTLET_PRIVATE_KEY_FILE", 92 | agent_config.server_key_file.to_str().unwrap(), 93 | ); 94 | 95 | info!("args: {:?}", env::args()); 96 | 97 | let server_config = ServerConfig { 98 | addr: agent_config.server_ip_address, 99 | port: agent_config.server_port, 100 | cert_file: agent_config.server_cert_file.clone(), 101 | private_key_file: agent_config.server_key_file.clone(), 102 | }; 103 | 104 | let plugins_directory = agent_config.data_directory.join("plugins"); 105 | 106 | let krustlet_config = Config { 107 | node_ip: agent_config.server_ip_address, 108 | hostname: agent_config.hostname.to_owned(), 109 | node_name: agent_config.hostname.to_owned(), 110 | server_config, 111 | data_dir: agent_config.data_directory.to_owned(), 112 | plugins_dir: plugins_directory.to_owned(), 113 | node_labels: agent_config.tags.to_owned(), 114 | max_pods: 110, 115 | bootstrap_file: agent_config.bootstrap_file.to_owned(), 116 | allow_local_modules: false, 117 | insecure_registries: None, 118 | }; 119 | 120 | // Bootstrap a kubernetes config, if no valid config is found 121 | // This also generates certificates for the webserver the krustlet 122 | // runs 123 | let kubeconfig = kubelet::bootstrap( 124 | &krustlet_config, 125 | &krustlet_config.bootstrap_file, 126 | notify_bootstrap, 127 | ) 128 | .await?; 129 | 130 | let provider = StackableProvider::new( 131 | kube::Client::new(kubeconfig.clone()), 132 | &agent_config, 133 | krustlet_config.max_pods, 134 | ) 135 | .await 136 | .expect("Error initializing provider."); 137 | 138 | provider.cleanup(&krustlet_config.node_name).await; 139 | 140 | let kubelet = Kubelet::new(provider, kubeconfig, krustlet_config).await?; 141 | kubelet.start().await 142 | } 143 | 144 | fn export_env(var_name: &str, var_value: &str) { 145 | info!("Exporting {}={}", var_name, var_value); 146 | std::env::set_var(var_name, var_value); 147 | } 148 | 149 | fn notify_bootstrap(message: String) { 150 | info!("Successfully bootstrapped TLS certificate: {}", message); 151 | } 152 | 153 | /// Checks if the optional files can be opened if they exist. An error 154 | /// is logged if they cannot be opened. 155 | async fn check_optional_files(config: &AgentConfig) { 156 | for (config_option, file) in [ 157 | (AgentConfig::SERVER_CERT_FILE, &config.server_cert_file), 158 | (AgentConfig::SERVER_KEY_FILE, &config.server_key_file), 159 | ] { 160 | if file.is_file() { 161 | if let Err(error) = File::open(file).await { 162 | error!( 163 | "Could not open file [{}] which is specified in \ 164 | the configuration option [{}]. {}", 165 | file.to_string_lossy(), 166 | config_option.name, 167 | error 168 | ); 169 | } 170 | } 171 | } 172 | } 173 | 174 | /// Creates the directories where write access is required and which do 175 | /// not exist yet. 176 | /// 177 | /// If a directory could not be created then an error is logged. 178 | async fn create_missing_directories(config: &AgentConfig) { 179 | for (config_option, directory) in directories_where_write_access_is_required(config).await { 180 | if directory.components().count() != 0 && !directory.exists() { 181 | if let Err(error) = create_dir_all(&directory).await { 182 | error!( 183 | "Could not create the directory [{}] which is \ 184 | specified in the configuration option [{}]. {}", 185 | directory.to_string_lossy(), 186 | config_option.name, 187 | error 188 | ); 189 | } else { 190 | info!( 191 | "Directory [{}] created which is specified in the \ 192 | configuration option [{}].", 193 | directory.to_string_lossy(), 194 | config_option.name 195 | ); 196 | } 197 | }; 198 | } 199 | } 200 | 201 | /// Checks the configured directories if they are writable by the 202 | /// current process. If this is not the case then errors are logged. 203 | /// 204 | /// This check is performed for informational purposes only. The process 205 | /// is intentionally not terminated on failure because there can be 206 | /// false positives, e.g. if the underlying file system does not support 207 | /// temporary files which are used for the check. 208 | /// 209 | /// A successful check also does not guarantee that the process can 210 | /// write to the directory at a later time, e.g. if permissions are 211 | /// changed or a quota is hit. 212 | async fn check_configured_directories(config: &AgentConfig) { 213 | for (config_option, directory) in directories_where_write_access_is_required(config).await { 214 | let directory = if directory.components().count() == 0 { 215 | PathBuf::from(".") 216 | } else { 217 | directory 218 | }; 219 | 220 | if let Err(error) = check_dir_is_writable(&directory).await { 221 | match error.kind() { 222 | ErrorKind::NotFound => error!( 223 | "The directory [{}] specified in the configuration \ 224 | option [{}] does not exist.", 225 | directory.to_string_lossy(), 226 | config_option.name 227 | ), 228 | ErrorKind::PermissionDenied => error!( 229 | "The directory [{}] specified in the configuration \ 230 | option [{}] is not writable by the process.", 231 | directory.to_string_lossy(), 232 | config_option.name 233 | ), 234 | _ => error!( 235 | "An IO error occurred while checking the directory \ 236 | [{}] specified in the configuration option [{}]. \ 237 | {}", 238 | directory.to_string_lossy(), 239 | config_option.name, 240 | error 241 | ), 242 | }; 243 | } 244 | } 245 | } 246 | 247 | /// Returns all directories configured in the given `AgentConfig` where 248 | /// write access is required. 249 | /// 250 | /// The directories of the certificate and key files are only returned 251 | /// if they do not already exist. 252 | async fn directories_where_write_access_is_required( 253 | config: &AgentConfig, 254 | ) -> HashMap<&ConfigOption, PathBuf> { 255 | let mut dirs = HashMap::new(); 256 | dirs.insert( 257 | &AgentConfig::PACKAGE_DIR, 258 | config.parcel_directory.to_owned(), 259 | ); 260 | dirs.insert(&AgentConfig::CONFIG_DIR, config.config_directory.to_owned()); 261 | dirs.insert(&AgentConfig::LOG_DIR, config.log_directory.to_owned()); 262 | dirs.insert(&AgentConfig::DATA_DIR, config.data_directory.to_owned()); 263 | 264 | if !config.server_cert_file.is_file() { 265 | dirs.insert( 266 | &AgentConfig::SERVER_CERT_FILE, 267 | config.server_cert_file_dir().into(), 268 | ); 269 | } 270 | if !config.server_key_file.is_file() { 271 | dirs.insert( 272 | &AgentConfig::SERVER_KEY_FILE, 273 | config.server_key_file_dir().into(), 274 | ); 275 | } 276 | 277 | dirs 278 | } 279 | -------------------------------------------------------------------------------- /src/config/config_documentation/bootstrap_file.adoc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stackabletech/agent/2eeb42045fc9902be277fa5219130df1548068f7/src/config/config_documentation/bootstrap_file.adoc -------------------------------------------------------------------------------- /src/config/config_documentation/config_directory.adoc: -------------------------------------------------------------------------------- 1 | This directory will serve as starting point for all log files which this service creates. 2 | 3 | Every service will get its own subdirectories created within this directory - for every service start a 4 | new subdirectory will be created to show a full history of configuration that was used for this service. 5 | 6 | ConfigMaps which are specified in the pod that describes this service will be created relative to these run 7 | directories - unless the mounts specify an absolute path, in which case it is allowed to break out of this directory. 8 | 9 | WARNING: This allows anybody who can specify pods more or less full access to the file system on the machine running the agent! 10 | 11 | The agent will need full access to this directory and tries to create it if it does not exist. -------------------------------------------------------------------------------- /src/config/config_documentation/data_directory.adoc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stackabletech/agent/2eeb42045fc9902be277fa5219130df1548068f7/src/config/config_documentation/data_directory.adoc -------------------------------------------------------------------------------- /src/config/config_documentation/hostname.adoc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stackabletech/agent/2eeb42045fc9902be277fa5219130df1548068f7/src/config/config_documentation/hostname.adoc -------------------------------------------------------------------------------- /src/config/config_documentation/log_directory.adoc: -------------------------------------------------------------------------------- 1 | This directory will serve as starting point for all log files which this service creates. 2 | Every service will get its own subdirectory created within this directory. 3 | Anything that is then specified in the log4j config or similar files will be resolved relatively to this directory. 4 | 5 | The agent will need full access to this directory and tries to create it if it does not exist. -------------------------------------------------------------------------------- /src/config/config_documentation/no_config.adoc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stackabletech/agent/2eeb42045fc9902be277fa5219130df1548068f7/src/config/config_documentation/no_config.adoc -------------------------------------------------------------------------------- /src/config/config_documentation/package_directory.adoc: -------------------------------------------------------------------------------- 1 | This directory will serve as starting point for packages that are needed by pods assigned to this node.\n Packages will be downloaded into the "_download" folder at the top level of this folder as archives and remain there for potential future use. 2 | 3 | Archives will the be extracted directly into this folder in subdirectories following the naming 4 | scheme of "productname-productversion". 5 | 6 | The agent will need full access to this directory and tries to create it if it does not exist. -------------------------------------------------------------------------------- /src/config/config_documentation/plugin_directory.adoc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stackabletech/agent/2eeb42045fc9902be277fa5219130df1548068f7/src/config/config_documentation/plugin_directory.adoc -------------------------------------------------------------------------------- /src/config/config_documentation/pod_cidr.adoc: -------------------------------------------------------------------------------- 1 | This setting controls the pod address range that the agent reports to Kubernetes. 2 | The effect of this setting is that Kubernetes will reserve address blocks from withhin this range for every node. 3 | Depending on the setting for maximum pods per node, these will be larger or smaller ranges, and influence the maximum number of nodes for the cluster. 4 | 5 | The agent does not require any pod address ranges, and by default doesn't specify anything for this setting. 6 | 7 | WARNING: There should almost never be a reason to use this setting, this is mostly here for very special circumstances. Do not touch it unless you really know what you're doing. -------------------------------------------------------------------------------- /src/config/config_documentation/server_cert_file.adoc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stackabletech/agent/2eeb42045fc9902be277fa5219130df1548068f7/src/config/config_documentation/server_cert_file.adoc -------------------------------------------------------------------------------- /src/config/config_documentation/server_ip_address.adoc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stackabletech/agent/2eeb42045fc9902be277fa5219130df1548068f7/src/config/config_documentation/server_ip_address.adoc -------------------------------------------------------------------------------- /src/config/config_documentation/server_key_file.adoc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stackabletech/agent/2eeb42045fc9902be277fa5219130df1548068f7/src/config/config_documentation/server_key_file.adoc -------------------------------------------------------------------------------- /src/config/config_documentation/server_port.adoc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stackabletech/agent/2eeb42045fc9902be277fa5219130df1548068f7/src/config/config_documentation/server_port.adoc -------------------------------------------------------------------------------- /src/config/config_documentation/session.adoc: -------------------------------------------------------------------------------- 1 | This parameter specifies whether to use a session or the system DBus connection when talking to systemd. 2 | For our purposps the difference between the two can be explained as the session bus being restricted to the current user, whereas the system bus rolls out services that are available for every user. 3 | In reality is is a bit more involved than that, please refer to the https://dbus.freedesktop.org/doc/dbus-specification.html[official docs] for more information. 4 | 5 | When this flag is specified it causes symlinks for loaded services to be created in the currently active users systemd directory `~/.config/systemd/user` instead of one of the globally valid locations: 6 | 7 | - `/lib/systemd/system` 8 | - `/etc/systemd/system` 9 | 10 | The default is to use the system bus, for which it is necessary that the agent either run as root or have passwordless sudo rights. 11 | 12 | Using the session bus will mainly be useful for scenarios without root access and for testing on developer machines. -------------------------------------------------------------------------------- /src/config/config_documentation/tags.adoc: -------------------------------------------------------------------------------- 1 | A "key=value" pair that should be assigned to this agent as tag. This can be specified multiple times to assign additional tags. 2 | 3 | Tags are the main way of identifying nodes to assign services to later on. -------------------------------------------------------------------------------- /src/fsext.rs: -------------------------------------------------------------------------------- 1 | //! Filesystem manipulation operations. 2 | //! 3 | //! This module contains additional operations which are not present in 4 | //! `std::fs` and `std::os::$platform`. 5 | 6 | use std::io; 7 | use std::path::{Component, Path, PathBuf}; 8 | 9 | use anyhow::{anyhow, Result}; 10 | use nix::{libc::O_TMPFILE, unistd}; 11 | use tokio::fs::OpenOptions; 12 | 13 | /// User identifier 14 | pub struct Uid(unistd::Uid); 15 | 16 | impl Uid { 17 | /// Gets a Uid by user name. 18 | /// 19 | /// If no user with the given `user_name` exists then `Ok(None)` is returned. 20 | /// 21 | /// # Errors 22 | /// 23 | /// If this function encounters any form of I/O or other error, an error 24 | /// variant will be returned. 25 | pub fn from_name(user_name: &str) -> Result> { 26 | match unistd::User::from_name(user_name) { 27 | Ok(maybe_user) => Ok(maybe_user.map(|user| Uid(user.uid))), 28 | Err(err) => Err(anyhow!("Could not retrieve user [{}]. {}", user_name, err)), 29 | } 30 | } 31 | } 32 | 33 | /// Changes the ownership of the file or directory at `path` to be owned by the 34 | /// given `uid`. 35 | /// 36 | /// # Errors 37 | /// 38 | /// If this function encounters any form of I/O or other error, an error 39 | /// variant will be returned. 40 | pub fn change_owner(path: &Path, uid: &Uid) -> Result<()> { 41 | Ok(unistd::chown(path, Some(uid.0), None)?) 42 | } 43 | 44 | /// Changes the ownership of the file or directory at `path` recursively to be 45 | /// owned by the given `uid`. 46 | /// 47 | /// # Errors 48 | /// 49 | /// If this function encounters any form of I/O or other error, an error 50 | /// variant will be returned. 51 | pub fn change_owner_recursively(root_path: &Path, uid: &Uid) -> Result<()> { 52 | visit_recursively(root_path, &|path| change_owner(path, uid)) 53 | } 54 | 55 | /// Calls the function `cb` on the given `path` and its contents recursively. 56 | fn visit_recursively(path: &Path, cb: &F) -> Result<()> 57 | where 58 | F: Fn(&Path) -> Result<()>, 59 | { 60 | cb(path)?; 61 | if path.is_dir() { 62 | for entry in path.read_dir()? { 63 | visit_recursively(entry?.path().as_path(), cb)?; 64 | } 65 | } 66 | Ok(()) 67 | } 68 | 69 | /// Checks if the given directory exists and is writable by the current 70 | /// process. 71 | /// 72 | /// The check is performed by creating an unnamed temporary file in the 73 | /// given directory. The file will be automatically removed by the 74 | /// operating system when the last handle is closed. 75 | pub async fn check_dir_is_writable(directory: &Path) -> io::Result<()> { 76 | OpenOptions::new() 77 | .read(true) 78 | .write(true) 79 | .custom_flags(O_TMPFILE) 80 | .open(directory) 81 | .await 82 | .map(|_| ()) 83 | } 84 | 85 | /// Normalizes a path. 86 | /// 87 | /// In contrast to [`std::fs::canonicalize`] the path does not need to 88 | /// exist. 89 | /// 90 | /// # Examples 91 | /// 92 | /// ```rust 93 | /// # use stackable_agent::fsext::*; 94 | /// use std::path::Path; 95 | /// 96 | /// assert_eq!(Path::new("foo/bar"), normalize_path(Path::new("foo//bar"))); 97 | /// assert_eq!(Path::new("foo/bar"), normalize_path(Path::new("foo/./bar"))); 98 | /// assert_eq!(Path::new("foo/bar"), normalize_path(Path::new("foo/bar/."))); 99 | /// assert_eq!(Path::new("foo/../bar"), normalize_path(Path::new("foo/../bar"))); 100 | /// assert_eq!(Path::new("foo/bar/.."), normalize_path(Path::new("foo/bar/.."))); 101 | /// assert_eq!(Path::new("/foo"), normalize_path(Path::new("/foo"))); 102 | /// assert_eq!(Path::new("./foo"), normalize_path(Path::new("./foo"))); 103 | /// assert_eq!(Path::new("foo"), normalize_path(Path::new("foo/"))); 104 | /// assert_eq!(Path::new("foo"), normalize_path(Path::new("foo"))); 105 | /// assert_eq!(Path::new("/"), normalize_path(Path::new("/"))); 106 | /// assert_eq!(Path::new("."), normalize_path(Path::new("."))); 107 | /// assert_eq!(Path::new(".."), normalize_path(Path::new(".."))); 108 | /// assert_eq!(Path::new(""), normalize_path(Path::new(""))); 109 | /// ``` 110 | pub fn normalize_path(path: &Path) -> PathBuf { 111 | path.components().collect() 112 | } 113 | 114 | /// Returns true if the given path could reference a file. 115 | /// 116 | /// In contrast to [`std::path::Path::is_file`] the file does not need 117 | /// to exist. 118 | /// 119 | /// Use normalized paths to avoid confusing results. 120 | /// 121 | /// # Examples 122 | /// 123 | /// ```rust 124 | /// # use stackable_agent::fsext::*; 125 | /// use std::path::Path; 126 | /// 127 | /// assert!(is_valid_file_path(Path::new("foo/bar"))); 128 | /// assert!(is_valid_file_path(Path::new("foo/bar/"))); 129 | /// assert!(is_valid_file_path(Path::new("foo/bar/."))); 130 | /// 131 | /// assert!(!is_valid_file_path(Path::new("foo/bar/.."))); 132 | /// assert!(!is_valid_file_path(Path::new("/"))); 133 | /// assert!(!is_valid_file_path(Path::new("."))); 134 | /// assert!(!is_valid_file_path(Path::new(".."))); 135 | /// assert!(!is_valid_file_path(Path::new(""))); 136 | /// ``` 137 | pub fn is_valid_file_path(path: &Path) -> bool { 138 | matches!(path.components().last(), Some(Component::Normal(_))) 139 | } 140 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod config; 2 | pub mod fsext; 3 | pub mod provider; 4 | -------------------------------------------------------------------------------- /src/provider/cleanup.rs: -------------------------------------------------------------------------------- 1 | //! Initial cleanup 2 | //! 3 | //! On startup the systemd units in the `system-stackable` slice are compared to the pods assigned 4 | //! to this node. If a systemd unit is as expected then it is kept and the Stackable Agent will 5 | //! take ownership again in the `Starting` stage. If there is no corresponding pod or the systemd 6 | //! unit differs from the pod specification then it is removed and the Stackable Agent will create 7 | //! a new systemd unit in the `CreatingService` stage. 8 | //! 9 | //! The cleanup stage is implemented as part of the [`StackableProvider`] because the expected 10 | //! content of a systemd unit file can only be determined with the directories configured in the 11 | //! provider. 12 | //! 13 | //! The cleanup code resides in a separate module because the amount of code justifies it and the 14 | //! log output is more meaningful. It makes it clearer whether a systemd unit is removed in the 15 | //! cleanup stage or in the normal process. 16 | use std::collections::HashMap; 17 | 18 | use anyhow::Context; 19 | use k8s_openapi::api::core::v1::Pod as KubePod; 20 | use kube::api::{ListParams, Meta, ObjectList}; 21 | use kube::Api; 22 | use kubelet::pod::Pod; 23 | use kubelet::provider::Provider; 24 | use log::{debug, error, info, warn}; 25 | use tokio::fs::{read_to_string, remove_file}; 26 | 27 | use super::systemdmanager::systemdunit::SystemDUnit; 28 | use super::systemdmanager::systemdunit::STACKABLE_SLICE; 29 | use super::StackableProvider; 30 | 31 | impl StackableProvider { 32 | /// Removes systemd units without corresponding pods. 33 | /// 34 | /// The systemd units in the `system-stackable` slice are compared with the pods assigned to 35 | /// this node and all units without corresponding pods or which differ from the pod 36 | /// specifications are removed. 37 | pub async fn cleanup(&self, node_name: &str) { 38 | let systemd_manager = &self.shared.systemd_manager; 39 | 40 | if let Err(error) = systemd_manager.reload().await { 41 | error!( 42 | "Skipping the cleanup stage because the systemd daemon reload failed. {}", 43 | error 44 | ); 45 | return; 46 | } 47 | 48 | let units_in_slice = match systemd_manager.slice_content(STACKABLE_SLICE).await { 49 | Ok(units_in_slice) => units_in_slice, 50 | Err(error) => { 51 | debug!( 52 | "Skipping the cleanup stage because no systemd units were found in the slice \ 53 | [{}]. {}", 54 | STACKABLE_SLICE, error 55 | ); 56 | return; 57 | } 58 | }; 59 | 60 | let pods = match self.assigned_pods(node_name).await { 61 | Ok(pods) => pods.items, 62 | Err(error) => { 63 | error!( 64 | "The assigned pods could not be retrieved. All systemd units in the slice [{}] \ 65 | will be removed. {}", 66 | STACKABLE_SLICE, error 67 | ); 68 | Vec::new() 69 | } 70 | }; 71 | 72 | let mut units_from_pods = HashMap::new(); 73 | for pod in pods { 74 | let pod_terminating = pod.metadata.deletion_timestamp.is_some(); 75 | 76 | match self.units_from_pod(&pod).await { 77 | Ok(units) => { 78 | for (unit_name, content) in units { 79 | units_from_pods.insert(unit_name, (content, pod_terminating)); 80 | } 81 | } 82 | Err(error) => warn!( 83 | "Systemd units could not be generated for pod [{}/{}]. {}", 84 | pod.namespace().unwrap_or_else(|| String::from("default")), 85 | pod.name(), 86 | error 87 | ), 88 | } 89 | } 90 | 91 | let mut unit_removed = false; 92 | 93 | for unit_name in &units_in_slice { 94 | let remove_unit = match units_from_pods.get(unit_name) { 95 | Some((expected_content, pod_terminating)) => { 96 | match self.unit_file_content(unit_name).await { 97 | Ok(Some(content)) if &content == expected_content && !pod_terminating => { 98 | info!( 99 | "The systemd unit [{}] will be kept because a corresponding pod \ 100 | exists.", 101 | unit_name 102 | ); 103 | false 104 | } 105 | Ok(Some(_)) if *pod_terminating => { 106 | info!( 107 | "The systemd unit [{}] will be removed because the corresponding \ 108 | pod is terminating.", 109 | unit_name 110 | ); 111 | true 112 | } 113 | Ok(Some(content)) => { 114 | info!( 115 | "The systemd unit [{}] will be removed because it differs from the \ 116 | corresponding pod specification.\n\ 117 | expected content:\n\ 118 | {}\n\n\ 119 | actual content:\n\ 120 | {}", 121 | unit_name, expected_content, content 122 | ); 123 | true 124 | } 125 | Ok(None) => { 126 | info!( 127 | "The systemd unit [{}] will be removed because its file path could \ 128 | not be determined.", 129 | unit_name 130 | ); 131 | true 132 | } 133 | Err(error) => { 134 | warn!( 135 | "The systemd unit [{}] will be removed because the file content \ 136 | could not be retrieved. {}", 137 | unit_name, error 138 | ); 139 | true 140 | } 141 | } 142 | } 143 | None => { 144 | info!( 145 | "The systemd unit [{}] will be removed because no corresponding pod \ 146 | exists.", 147 | unit_name 148 | ); 149 | true 150 | } 151 | }; 152 | 153 | if remove_unit { 154 | self.remove_unit(unit_name).await; 155 | unit_removed = true; 156 | } 157 | } 158 | 159 | if unit_removed { 160 | let _ = systemd_manager.reload().await; 161 | } 162 | } 163 | 164 | /// Returns a list of all pods assigned to the given node. 165 | async fn assigned_pods(&self, node_name: &str) -> anyhow::Result> { 166 | let client = &self.shared.client; 167 | 168 | let api: Api = Api::all(client.to_owned()); 169 | let lp = ListParams::default().fields(&format!("spec.nodeName={}", node_name)); 170 | api.list(&lp).await.with_context(|| { 171 | format!( 172 | "The pods assigned to this node (nodeName = [{}]) could not be retrieved.", 173 | node_name 174 | ) 175 | }) 176 | } 177 | 178 | /// Creates the systemd unit files for the given pod in memory. 179 | /// 180 | /// A mapping from systemd unit file names to the file content is returned. 181 | async fn units_from_pod(&self, kubepod: &KubePod) -> anyhow::Result> { 182 | let systemd_manager = &self.shared.systemd_manager; 183 | 184 | let mut units = HashMap::new(); 185 | let pod = Pod::from(kubepod.to_owned()); 186 | let pod_state = self.initialize_pod_state(&pod).await?; 187 | 188 | for container in pod.containers() { 189 | let unit = SystemDUnit::new( 190 | systemd_manager.is_user_mode(), 191 | &pod_state, 192 | &self.shared.kubeconfig_path, 193 | &pod, 194 | &container, 195 | )?; 196 | units.insert(unit.get_name(), unit.get_unit_file_content()); 197 | } 198 | 199 | Ok(units) 200 | } 201 | 202 | /// Returns the content of the given systemd unit file. 203 | async fn unit_file_content(&self, unit_name: &str) -> anyhow::Result> { 204 | let systemd_manager = &self.shared.systemd_manager; 205 | 206 | let file_path_result = systemd_manager 207 | .fragment_path(unit_name) 208 | .await 209 | .with_context(|| { 210 | format!( 211 | "The file path of the unit [{}] could not be determined.", 212 | unit_name 213 | ) 214 | }); 215 | 216 | match file_path_result { 217 | Ok(Some(file_path)) => { 218 | let file_content = read_to_string(&file_path) 219 | .await 220 | .with_context(|| format!("The file [{}] could not be read.", file_path))?; 221 | Ok(Some(file_content)) 222 | } 223 | Ok(None) => Ok(None), 224 | Err(error) => Err(error), 225 | } 226 | } 227 | 228 | /// Stops, disables and removes the given systemd unit. 229 | async fn remove_unit(&self, unit_name: &str) { 230 | let systemd_manager = &self.shared.systemd_manager; 231 | 232 | if let Err(error) = systemd_manager.stop(unit_name).await { 233 | warn!("{}", error); 234 | } 235 | if let Err(error) = systemd_manager.disable(unit_name).await { 236 | warn!("{}", error); 237 | } 238 | if let Ok(Some(file_path)) = systemd_manager.fragment_path(unit_name).await { 239 | debug!("Removing file [{}].", file_path); 240 | if let Err(error) = remove_file(file_path).await { 241 | warn!("{}", error); 242 | } 243 | } 244 | } 245 | } 246 | -------------------------------------------------------------------------------- /src/provider/error.rs: -------------------------------------------------------------------------------- 1 | use handlebars::{RenderError, TemplateError}; 2 | use k8s_openapi::url; 3 | use thiserror::Error; 4 | 5 | use crate::provider::repository::package::Package; 6 | use reqwest::Url; 7 | use std::ffi::OsString; 8 | 9 | #[derive(Error, Debug)] 10 | pub enum StackableError { 11 | #[error(transparent)] 12 | Parse(#[from] url::ParseError), 13 | #[error(transparent)] 14 | Reqwest(#[from] reqwest::Error), 15 | #[error(transparent)] 16 | Io(#[from] std::io::Error), 17 | #[error("unable to create repository from received repo object")] 18 | RepositoryConversionError, 19 | #[error("Invalid content in pod object: {msg}")] 20 | PodValidationError { msg: String }, 21 | #[error("Kubernetes reported error: {source}")] 22 | KubeError { 23 | #[from] 24 | source: kube::Error, 25 | }, 26 | #[error("An error has ocurred when trying to download [{package}] from [{download_link}]: {errormessage}")] 27 | PackageDownloadError { 28 | package: Package, 29 | download_link: Url, 30 | errormessage: String, 31 | }, 32 | #[error(transparent)] 33 | TemplateRenderError(#[from] RenderError), 34 | #[error(transparent)] 35 | TemplateError(#[from] TemplateError), 36 | #[error("A required CRD has not been registered: {missing_crds:?}")] 37 | CrdMissing { missing_crds: Vec }, 38 | #[error("Package {package} not found in repository")] 39 | PackageNotFound { package: Package }, 40 | #[error("{msg}")] 41 | RuntimeError { msg: String }, 42 | #[error("Unable to parse data for {target} from non-UTF8 String: {original:?}")] 43 | DirectoryParseError { target: String, original: OsString }, 44 | #[error("An error ocurred trying to write Config Map {config_map} to file {target_file}")] 45 | ConfigFileWriteError { 46 | target_file: String, 47 | config_map: String, 48 | }, 49 | #[error( 50 | "The following config maps were specified in a pod but not found: {missing_config_maps:?}" 51 | )] 52 | MissingConfigMapsError { missing_config_maps: Vec }, 53 | #[error("Object is missing key: {key}")] 54 | MissingObjectKey { key: &'static str }, 55 | } 56 | -------------------------------------------------------------------------------- /src/provider/kubernetes/accessor.rs: -------------------------------------------------------------------------------- 1 | //! Accessor methods for Kubernetes resources 2 | 3 | use std::str::FromStr; 4 | 5 | use kubelet::pod::Pod; 6 | use strum::{Display, EnumString, EnumVariantNames}; 7 | 8 | /// Restart policy for all containers within the pod. 9 | #[derive(Clone, Debug, Display, EnumString, EnumVariantNames, Eq, PartialEq)] 10 | pub enum RestartPolicy { 11 | Always, 12 | OnFailure, 13 | Never, 14 | } 15 | 16 | impl Default for RestartPolicy { 17 | fn default() -> Self { 18 | RestartPolicy::Always 19 | } 20 | } 21 | 22 | /// Returns the restart policy for all containers within the pod. 23 | pub fn restart_policy(pod: &Pod) -> RestartPolicy { 24 | pod.as_kube_pod() 25 | .spec 26 | .as_ref() 27 | .and_then(|spec| spec.restart_policy.as_ref()) 28 | .and_then(|restart_policy| RestartPolicy::from_str(restart_policy).ok()) 29 | .unwrap_or_default() 30 | } 31 | 32 | #[cfg(test)] 33 | mod test { 34 | use super::*; 35 | use crate::provider::test::TestPod; 36 | use rstest::rstest; 37 | 38 | #[rstest] 39 | #[case::restart_policy_onfailure( 40 | " 41 | apiVersion: v1 42 | kind: Pod 43 | metadata: 44 | name: test 45 | spec: 46 | containers: 47 | - name: test-container 48 | restartPolicy: OnFailure 49 | ", 50 | RestartPolicy::OnFailure 51 | )] 52 | #[case::restart_policy_default( 53 | " 54 | apiVersion: v1 55 | kind: Pod 56 | metadata: 57 | name: test 58 | spec: 59 | containers: 60 | - name: test-container 61 | ", 62 | RestartPolicy::Always 63 | )] 64 | fn should_return_specified_restart_policy_or_default( 65 | #[case] pod: TestPod, 66 | #[case] expected_restart_policy: RestartPolicy, 67 | ) { 68 | assert_eq!(expected_restart_policy, restart_policy(&pod)); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/provider/kubernetes/mod.rs: -------------------------------------------------------------------------------- 1 | //! Utility functions for Kubernetes 2 | 3 | pub mod accessor; 4 | pub mod status; 5 | -------------------------------------------------------------------------------- /src/provider/kubernetes/status.rs: -------------------------------------------------------------------------------- 1 | //! Functions for patching the pod status 2 | 3 | use anyhow::anyhow; 4 | use k8s_openapi::api::core::v1::Pod as KubePod; 5 | use kube::{ 6 | api::{Patch, PatchParams}, 7 | Api, Client, 8 | }; 9 | use kubelet::{ 10 | container::{ContainerKey, Status}, 11 | pod::Pod, 12 | }; 13 | use log::warn; 14 | 15 | /// Patches the pod status with the given container status. 16 | /// 17 | /// If the patching fails then a warning is logged. 18 | pub async fn patch_container_status( 19 | client: &Client, 20 | pod: &Pod, 21 | container_key: &ContainerKey, 22 | status: &Status, 23 | ) { 24 | let api: Api = Api::namespaced(client.clone(), pod.namespace()); 25 | 26 | if let Err(error) = 27 | kubelet::container::patch_container_status(&api, pod, container_key, status).await 28 | { 29 | warn!( 30 | "Status of container [{}] in pod [{}] could not be patched. {}", 31 | container_key, 32 | pod.name(), 33 | error 34 | ); 35 | } 36 | } 37 | 38 | /// Patches the restart count of a container. 39 | pub async fn patch_restart_count( 40 | client: &Client, 41 | pod: &Pod, 42 | container_key: &ContainerKey, 43 | restart_count: u32, 44 | ) -> anyhow::Result<()> { 45 | let api: Api = Api::namespaced(client.clone(), pod.namespace()); 46 | 47 | let index = pod 48 | .container_status_index(container_key) 49 | .ok_or_else(|| anyhow!("Container not found"))?; 50 | 51 | let container_type = if container_key.is_init() { 52 | "initContainer" 53 | } else { 54 | "container" 55 | }; 56 | 57 | let patch = json_patch::Patch(vec![json_patch::PatchOperation::Replace( 58 | json_patch::ReplaceOperation { 59 | path: format!("/status/{}Statuses/{}/restartCount", container_type, index), 60 | value: restart_count.into(), 61 | }, 62 | )]); 63 | 64 | api.patch_status( 65 | pod.name(), 66 | &PatchParams::default(), 67 | &Patch::<()>::Json(patch), 68 | ) 69 | .await?; 70 | 71 | Ok(()) 72 | } 73 | -------------------------------------------------------------------------------- /src/provider/mod.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::convert::TryFrom; 3 | use std::env; 4 | use std::net::IpAddr; 5 | use std::path::PathBuf; 6 | use std::sync::Arc; 7 | 8 | use anyhow::anyhow; 9 | use dirs::home_dir; 10 | use k8s_openapi::apiextensions_apiserver::pkg::apis::apiextensions::v1::CustomResourceDefinition; 11 | use kube::error::ErrorResponse; 12 | use kube::{Api, Client}; 13 | use kubelet::backoff::ExponentialBackoffStrategy; 14 | use kubelet::container::{ContainerKey, ContainerMap}; 15 | use kubelet::log::{SendError, Sender}; 16 | use kubelet::node::Builder; 17 | use kubelet::pod::state::prelude::*; 18 | use kubelet::pod::{Pod, PodKey}; 19 | use kubelet::provider::Provider; 20 | use log::{debug, error}; 21 | use tokio::{runtime::Runtime, sync::RwLock, task}; 22 | 23 | use crate::config::AgentConfig; 24 | use crate::provider::error::StackableError; 25 | use crate::provider::error::StackableError::{ 26 | CrdMissing, KubeError, MissingObjectKey, PodValidationError, 27 | }; 28 | use crate::provider::repository::package::Package; 29 | use crate::provider::states::pod::PodState; 30 | use crate::provider::systemdmanager::manager::SystemdManager; 31 | 32 | use states::pod::{initializing::Initializing, terminated::Terminated}; 33 | use systemdmanager::journal_reader; 34 | use systemdmanager::service::SystemdService; 35 | 36 | pub struct StackableProvider { 37 | shared: ProviderState, 38 | parcel_directory: PathBuf, 39 | config_directory: PathBuf, 40 | log_directory: PathBuf, 41 | pod_cidr: String, 42 | } 43 | 44 | pub const CRDS: &[&str] = &["repositories.stable.stackable.de"]; 45 | 46 | pub mod cleanup; 47 | mod error; 48 | pub mod kubernetes; 49 | mod repository; 50 | mod states; 51 | pub mod systemdmanager; 52 | 53 | mod built_info { 54 | // The file has been placed there by the build script. 55 | include!(concat!(env!("OUT_DIR"), "/built.rs")); 56 | } 57 | 58 | /// Provider-level state shared between all pods 59 | #[derive(Clone)] 60 | pub struct ProviderState { 61 | handles: Arc>, 62 | client: Client, 63 | systemd_manager: Arc, 64 | server_ip_address: IpAddr, 65 | kubeconfig_path: PathBuf, 66 | } 67 | 68 | /// Contains handles for running pods. 69 | /// 70 | /// A `PodHandleMap` maps a pod key to a pod handle which in turn 71 | /// contains/is a map from a container key to a container handle. 72 | /// A container handle contains all necessary runtime information like the 73 | /// name of the service unit. 74 | /// 75 | /// The implementation of `PodHandleMap` contains functions to access the 76 | /// parts of this structure while preserving the invariants. 77 | #[derive(Debug, Default)] 78 | struct PodHandleMap { 79 | handles: HashMap, 80 | } 81 | 82 | impl PodHandleMap { 83 | /// Returns the pod handle for the given key or [`None`] if not found. 84 | pub fn get(&self, pod_key: &PodKey) -> Option<&PodHandle> { 85 | self.handles.get(pod_key) 86 | } 87 | 88 | /// Removes the pod handle with the given key and returns it. 89 | pub fn remove(&mut self, pod_key: &PodKey) -> Option { 90 | self.handles.remove(pod_key) 91 | } 92 | 93 | /// Inserts a new [`ContainerHandle`] for the given pod and container key. 94 | /// 95 | /// A pod handle is created if not already existent. 96 | pub fn insert_container_handle( 97 | &mut self, 98 | pod_key: &PodKey, 99 | container_key: &ContainerKey, 100 | container_handle: &ContainerHandle, 101 | ) { 102 | self.handles 103 | .entry(pod_key.to_owned()) 104 | .or_insert_with(ContainerMap::new) 105 | .insert(container_key.to_owned(), container_handle.to_owned()); 106 | } 107 | 108 | /// Returns a reference to the container handle with the given pod and 109 | /// container key or [`None`] if not found. 110 | pub fn container_handle( 111 | &self, 112 | pod_key: &PodKey, 113 | container_key: &ContainerKey, 114 | ) -> Option<&ContainerHandle> { 115 | self.handles 116 | .get(pod_key) 117 | .and_then(|pod_handle| pod_handle.get(container_key)) 118 | } 119 | } 120 | 121 | /// Represents a handle to a running pod. 122 | type PodHandle = ContainerMap; 123 | 124 | /// Represents a handle to a running container. 125 | #[derive(Clone, Debug)] 126 | pub struct ContainerHandle { 127 | /// Contains the name of the corresponding service unit. 128 | /// Can be used as reference in [`crate::provider::systemdmanager::manager`]. 129 | pub service_unit: String, 130 | 131 | /// Proxy for the systemd service 132 | pub systemd_service: SystemdService, 133 | } 134 | 135 | impl StackableProvider { 136 | pub async fn new( 137 | client: Client, 138 | agent_config: &AgentConfig, 139 | max_pods: u16, 140 | ) -> Result { 141 | let systemd_manager = Arc::new(SystemdManager::new(agent_config.session, max_pods).await?); 142 | 143 | let kubeconfig_path = find_kubeconfig().ok_or_else(|| StackableError::RuntimeError { 144 | msg: String::from( 145 | "Kubeconfig file not found. If no kubeconfig is present then the Stackable Agent \ 146 | should have generated one.", 147 | ), 148 | })?; 149 | 150 | let provider_state = ProviderState { 151 | handles: Default::default(), 152 | client, 153 | systemd_manager, 154 | server_ip_address: agent_config.server_ip_address, 155 | kubeconfig_path, 156 | }; 157 | 158 | let provider = StackableProvider { 159 | shared: provider_state, 160 | parcel_directory: agent_config.parcel_directory.to_owned(), 161 | config_directory: agent_config.config_directory.to_owned(), 162 | log_directory: agent_config.log_directory.to_owned(), 163 | pod_cidr: agent_config.pod_cidr.to_owned(), 164 | }; 165 | let missing_crds = provider.check_crds().await?; 166 | return if missing_crds.is_empty() { 167 | debug!("All required CRDS present!"); 168 | Ok(provider) 169 | } else { 170 | debug!("Missing required CDRS: [{:?}]", &missing_crds); 171 | Err(CrdMissing { missing_crds }) 172 | }; 173 | } 174 | 175 | fn get_package(pod: &Pod) -> Result { 176 | if let Some((container, [])) = pod.containers().split_first() { 177 | container 178 | .image() 179 | .and_then(|maybe_ref| maybe_ref.ok_or_else(|| anyhow!("Image is required."))) 180 | .and_then(Package::try_from) 181 | .map_err(|err| PodValidationError { 182 | msg: format!( 183 | "Unable to get package reference from pod [{}]: {}", 184 | &pod.name(), 185 | &err 186 | ), 187 | }) 188 | } else { 189 | Err(PodValidationError { 190 | msg: String::from("Only one container is supported in the PodSpec."), 191 | }) 192 | } 193 | } 194 | 195 | async fn check_crds(&self) -> Result, StackableError> { 196 | let mut missing_crds = vec![]; 197 | let crds: Api = Api::all(self.shared.client.clone()); 198 | 199 | // Check all CRDS 200 | for crd in CRDS.iter() { 201 | debug!("Checking if CRD [{}] is registered", crd); 202 | match crds.get(crd).await { 203 | Err(kube::error::Error::Api(ErrorResponse { reason, .. })) 204 | if reason == "NotFound" => 205 | { 206 | error!("Missing required CRD: [{}]", crd); 207 | missing_crds.push(String::from(*crd)) 208 | } 209 | Err(e) => { 210 | error!( 211 | "An error ocurred when checking if CRD [{}] is registered: \"{}\"", 212 | crd, e 213 | ); 214 | return Err(KubeError { source: e }); 215 | } 216 | _ => debug!("Found registered crd: [{}]", crd), 217 | } 218 | } 219 | Ok(missing_crds) 220 | } 221 | } 222 | 223 | /// Tries to find the kubeconfig file in the environment variable `KUBECONFIG` and on the path 224 | /// `$HOME/.kube/config` 225 | fn find_kubeconfig() -> Option { 226 | let env_var = env::var_os("KUBECONFIG").map(PathBuf::from); 227 | let default_path = || home_dir().map(|home| home.join(".kube").join("config")); 228 | 229 | env_var.or_else(default_path).filter(|path| path.exists()) 230 | } 231 | 232 | #[async_trait::async_trait] 233 | impl Provider for StackableProvider { 234 | type ProviderState = ProviderState; 235 | type PodState = PodState; 236 | type InitialState = Initializing; 237 | type TerminatedState = Terminated; 238 | 239 | const ARCH: &'static str = "stackable-linux"; 240 | 241 | fn provider_state(&self) -> SharedState { 242 | Arc::new(RwLock::new(self.shared.clone())) 243 | } 244 | 245 | async fn node(&self, builder: &mut Builder) -> anyhow::Result<()> { 246 | builder.set_architecture(Self::ARCH); 247 | builder.set_pod_cidr(&self.pod_cidr); 248 | builder.set_kubelet_version(built_info::PKG_VERSION); 249 | builder.add_taint("NoSchedule", "kubernetes.io/arch", Self::ARCH); 250 | builder.add_taint("NoExecute", "kubernetes.io/arch", Self::ARCH); 251 | Ok(()) 252 | } 253 | 254 | async fn initialize_pod_state(&self, pod: &Pod) -> anyhow::Result { 255 | let service_name = format!("{}-{}", pod.namespace(), pod.name()); 256 | 257 | // Extract uid from pod object, if this fails we return an error - 258 | // this should not happen, as all objects we get from Kubernetes should have 259 | // a uid set! 260 | let service_uid = if let Some(uid) = pod.as_kube_pod().metadata.uid.as_ref() { 261 | uid.to_string() 262 | } else { 263 | return Err(anyhow::Error::new(MissingObjectKey { 264 | key: ".metadata.uid", 265 | })); 266 | }; 267 | let parcel_directory = self.parcel_directory.clone(); 268 | // TODO: make this configurable 269 | let download_directory = parcel_directory.join("_download"); 270 | let log_directory = self.log_directory.clone(); 271 | 272 | let package = Self::get_package(pod)?; 273 | 274 | Ok(PodState { 275 | parcel_directory, 276 | download_directory, 277 | log_directory, 278 | config_directory: self.config_directory.clone(), 279 | package_download_backoff_strategy: ExponentialBackoffStrategy::default(), 280 | service_name, 281 | service_uid, 282 | package, 283 | }) 284 | } 285 | 286 | async fn logs( 287 | &self, 288 | namespace: String, 289 | pod: String, 290 | container: String, 291 | mut sender: Sender, 292 | ) -> anyhow::Result<()> { 293 | let pod_key = PodKey::new(&namespace, &pod); 294 | let container_key = ContainerKey::App(container); 295 | 296 | debug!( 297 | "Logs for pod [{:?}] and container [{:?}] requested", 298 | pod_key, container_key 299 | ); 300 | 301 | let maybe_container_handle = { 302 | let handles = self.shared.handles.read().await; 303 | handles 304 | .container_handle(&pod_key, &container_key) 305 | .map(ContainerHandle::to_owned) 306 | }; 307 | 308 | let container_handle = maybe_container_handle.ok_or_else(|| { 309 | anyhow!( 310 | "Container handle for pod [{:?}] and container [{:?}] not found", 311 | pod_key, 312 | container_key 313 | ) 314 | })?; 315 | 316 | if let Ok(invocation_id) = container_handle.systemd_service.invocation_id().await { 317 | task::spawn_blocking(move || { 318 | let result = Runtime::new() 319 | .unwrap() 320 | .block_on(journal_reader::send_messages(&mut sender, &invocation_id)); 321 | 322 | if let Err(error) = result { 323 | match error.downcast_ref::() { 324 | Some(SendError::ChannelClosed) => (), 325 | _ => error!("Log could not be sent. {}", error), 326 | } 327 | } 328 | }); 329 | } else { 330 | debug!( 331 | "Logs for pod [{:?}] and container [{:?}] cannot be sent \ 332 | because the invocation ID is not available.", 333 | pod_key, container_key 334 | ); 335 | } 336 | 337 | Ok(()) 338 | } 339 | } 340 | 341 | #[cfg(test)] 342 | pub mod test { 343 | use super::*; 344 | use rstest::rstest; 345 | use std::ops::Deref; 346 | use std::str::FromStr; 347 | 348 | #[test] 349 | fn try_to_get_package_from_complete_configuration() { 350 | let pod = " 351 | apiVersion: v1 352 | kind: Pod 353 | metadata: 354 | name: test 355 | spec: 356 | containers: 357 | - name: kafka 358 | image: kafka:2.7 359 | " 360 | .parse::() 361 | .unwrap(); 362 | 363 | let maybe_package = StackableProvider::get_package(&pod); 364 | 365 | if let Ok(package) = maybe_package { 366 | assert_eq!("kafka", package.product); 367 | assert_eq!("2.7", package.version); 368 | } else { 369 | panic!("Package expected but got {:?}", maybe_package); 370 | } 371 | } 372 | 373 | #[rstest] 374 | #[case( 375 | " 376 | apiVersion: v1 377 | kind: Pod 378 | metadata: 379 | name: test 380 | spec: 381 | containers: 382 | - name: kafka 383 | image: kafka:2.7 384 | - name: zookeeper 385 | image: zookeeper:3.6.2 386 | ", 387 | "Only one container is supported in the PodSpec." 388 | )] 389 | #[case( 390 | " 391 | apiVersion: v1 392 | kind: Pod 393 | metadata: 394 | name: test 395 | spec: 396 | containers: 397 | - name: kafka 398 | ", 399 | "Unable to get package reference from pod [test]: Image is required." 400 | )] 401 | #[case( 402 | " 403 | apiVersion: v1 404 | kind: Pod 405 | metadata: 406 | name: test 407 | spec: 408 | containers: 409 | - name: kafka 410 | image: kafka 411 | ", 412 | "Unable to get package reference from pod [test]: Tag is required." 413 | )] 414 | fn try_to_get_package_from_insufficient_configuration( 415 | #[case] pod: TestPod, 416 | #[case] expected_err: &str, 417 | ) { 418 | let maybe_package = StackableProvider::get_package(&pod); 419 | 420 | if let Err(PodValidationError { msg }) = maybe_package { 421 | assert_eq!(expected_err, msg); 422 | } else { 423 | panic!("PodValidationError expected but got {:?}", maybe_package); 424 | } 425 | } 426 | 427 | /// Encapsulates a [`Pod`] with implementations for [`FromStr`] to 428 | /// deserialize from YAML and [`Deref`] to dereference into a [`Pod`]. 429 | /// 430 | /// This struct can also be used in rstest cases. 431 | /// 432 | /// # Example 433 | /// 434 | /// ```rust 435 | /// #[rstest] 436 | /// #[case(" 437 | /// apiVersion: v1 438 | /// kind: Pod 439 | /// metadata: 440 | /// name: test 441 | /// spec: 442 | /// containers: 443 | /// - name: kafka 444 | /// image: kafka 445 | /// ")] 446 | /// fn test(#[case] pod: TestPod) { 447 | /// do_with_pod(&pod); 448 | /// } 449 | /// ``` 450 | #[derive(Debug)] 451 | pub struct TestPod(Pod); 452 | 453 | impl FromStr for TestPod { 454 | type Err = serde_yaml::Error; 455 | 456 | fn from_str(s: &str) -> Result { 457 | let kube_pod: k8s_openapi::api::core::v1::Pod = serde_yaml::from_str(s)?; 458 | Ok(TestPod(Pod::from(kube_pod))) 459 | } 460 | } 461 | 462 | impl Deref for TestPod { 463 | type Target = Pod; 464 | 465 | fn deref(&self) -> &Self::Target { 466 | &self.0 467 | } 468 | } 469 | } 470 | -------------------------------------------------------------------------------- /src/provider/repository/mod.rs: -------------------------------------------------------------------------------- 1 | //! Functions to deal with Stackable repositories 2 | 3 | use kube::api::{ListParams, ObjectList}; 4 | use kube::{Api, Client}; 5 | use log::{debug, info, warn}; 6 | use std::convert::TryFrom; 7 | 8 | use crate::provider::error::StackableError; 9 | use package::Package; 10 | use repository_spec::Repository; 11 | use stackablerepository::StackableRepoProvider; 12 | 13 | pub mod package; 14 | pub mod repository_spec; 15 | pub mod stackablerepository; 16 | 17 | /// Searches for the given package in all registered repositories. 18 | /// 19 | /// The available repositories are retrieved from the API server and if 20 | /// the given package is provided by one of them then 21 | /// `Ok(Some(repository))` else `Ok(None)` is returned. 22 | /// 23 | /// If the repositories cannot be retrieved then `Err(error)` is 24 | /// returned. 25 | /// 26 | /// The repositories are sorted by their name to provide a deterministic 27 | /// behavior especially for tests. 28 | pub async fn find_repository( 29 | client: Client, 30 | package: &Package, 31 | ) -> Result, StackableError> { 32 | let repositories = retrieve_repositories(client).await?; 33 | 34 | let mut repo_providers = repositories 35 | .iter() 36 | .filter_map(convert_to_repo_provider) 37 | .collect::>(); 38 | 39 | repo_providers.sort_unstable_by_key(|repo_provider| repo_provider.name.to_owned()); 40 | 41 | let maybe_repo_provider = choose_repo_provider(&mut repo_providers, package).await; 42 | 43 | if let Some(repo_provider) = &maybe_repo_provider { 44 | debug!( 45 | "Package [{}] found in repository [{}]", 46 | &package, &repo_provider 47 | ); 48 | } else { 49 | let repository_names = repo_providers 50 | .iter() 51 | .map(|repo_provider| repo_provider.name.as_str()) 52 | .collect::>(); 53 | info!( 54 | "Package [{}] not found in the following repositories: {:?}", 55 | package, repository_names 56 | ); 57 | } 58 | 59 | Ok(maybe_repo_provider) 60 | } 61 | 62 | /// Retrieves all Stackable repositories in the default namespace from 63 | /// the API server. 64 | async fn retrieve_repositories(client: Client) -> Result, StackableError> { 65 | let api: Api = Api::namespaced(client, "default"); 66 | let repositories = api.list(&ListParams::default()).await?; 67 | Ok(repositories) 68 | } 69 | 70 | /// Converts the given Stackable repository into a repository provider. 71 | /// 72 | /// If this fails then a warning is emitted and `None` is returned. 73 | fn convert_to_repo_provider(repository: &Repository) -> Option { 74 | let result = StackableRepoProvider::try_from(repository); 75 | 76 | if let Err(error) = &result { 77 | warn!("Invalid repository definition: {}", error); 78 | } 79 | 80 | result.ok() 81 | } 82 | 83 | /// Retrieves the provided packages for the given repository providers 84 | /// and returns the first provider which provides the given package or 85 | /// `None` if none provides it. 86 | async fn choose_repo_provider( 87 | repo_providers: &mut [StackableRepoProvider], 88 | package: &Package, 89 | ) -> Option { 90 | for repo_provider in repo_providers { 91 | if let Ok(true) = repo_provider.provides_package(package.to_owned()).await { 92 | return Some(repo_provider.to_owned()); 93 | } 94 | } 95 | None 96 | } 97 | -------------------------------------------------------------------------------- /src/provider/repository/package.rs: -------------------------------------------------------------------------------- 1 | use std::convert::TryFrom; 2 | use std::fmt; 3 | 4 | use anyhow::{anyhow, Result}; 5 | use oci_distribution::Reference; 6 | use serde::{Deserialize, Serialize}; 7 | 8 | #[derive(Serialize, Deserialize, Debug, Clone)] 9 | pub struct Package { 10 | pub product: String, 11 | pub version: String, 12 | } 13 | 14 | impl Package { 15 | /// Derive a standardized archive name to use when downloading this package into the 16 | /// _download folder. 17 | /// This helps with not downloading the same version of a product twice simply due to 18 | /// different archive names. 19 | /// Currently this assumes all archives to be in .tar.gz format, we might revisit this at 20 | /// a later stage. 21 | pub fn get_file_name(&self) -> String { 22 | format!("{}.tar.gz", self.get_directory_name()) 23 | } 24 | 25 | /// Derive a standardized name for the folder that this package should be installed to. 26 | /// This helps avoiding duplicate binary installations due to different folder names. 27 | pub fn get_directory_name(&self) -> String { 28 | format!("{}-{}", self.product, self.version) 29 | } 30 | } 31 | 32 | impl TryFrom for Package { 33 | type Error = anyhow::Error; 34 | 35 | // Converts from an oci reference to a package representation 36 | // The oci tag (anything after the \":\" in the string) is used as 37 | // version by this code and needs to be present 38 | fn try_from(value: Reference) -> Result { 39 | let repository = value.repository(); 40 | let tag = value.tag().ok_or_else(|| anyhow!("Tag is required."))?; 41 | 42 | Ok(Package { 43 | product: String::from(repository), 44 | version: String::from(tag), 45 | }) 46 | } 47 | } 48 | 49 | impl fmt::Display for Package { 50 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 51 | write!(f, "{}:{}", self.product, self.version) 52 | } 53 | } 54 | 55 | #[cfg(test)] 56 | mod test { 57 | use super::*; 58 | 59 | #[test] 60 | fn try_from_complete_reference() { 61 | let reference = Reference::try_from("kafka:2.7").expect("Reference cannot be parsed."); 62 | 63 | let maybe_package = Package::try_from(reference); 64 | 65 | if let Ok(package) = maybe_package { 66 | assert_eq!("kafka", package.product); 67 | assert_eq!("2.7", package.version); 68 | } else { 69 | panic!("Package expected but got {:?}", maybe_package); 70 | } 71 | } 72 | 73 | #[test] 74 | fn try_from_reference_without_tag() { 75 | let reference = Reference::try_from("kafka").expect("Reference cannot be parsed."); 76 | 77 | let maybe_package = Package::try_from(reference); 78 | 79 | if let Err(error) = maybe_package { 80 | assert_eq!("Tag is required.", error.to_string()); 81 | } else { 82 | panic!("Error expected but got {:?}", maybe_package); 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/provider/repository/repository_spec.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use kube::CustomResource; 4 | use schemars::JsonSchema; 5 | use serde::{Deserialize, Serialize}; 6 | 7 | #[derive(CustomResource, Serialize, Deserialize, Default, Clone, Debug, JsonSchema)] 8 | #[kube( 9 | kind = "Repository", 10 | group = "stable.stackable.de", 11 | version = "v1", 12 | namespaced 13 | )] 14 | pub struct RepositorySpec { 15 | pub repo_type: RepoType, 16 | pub properties: HashMap, 17 | } 18 | 19 | #[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] 20 | pub enum RepoType { 21 | StackableRepo, 22 | } 23 | 24 | impl Default for RepoType { 25 | fn default() -> Self { 26 | RepoType::StackableRepo 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/provider/repository/stackablerepository.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::convert::TryFrom; 3 | use std::fmt; 4 | use std::fs::File; 5 | use std::hash::{Hash, Hasher}; 6 | use std::io::{copy, Cursor, Write}; 7 | use std::path::PathBuf; 8 | 9 | use crate::provider::error::StackableError; 10 | use crate::provider::error::StackableError::{PackageDownloadError, PackageNotFound}; 11 | use crate::provider::repository::package::Package; 12 | use crate::provider::repository::repository_spec::Repository; 13 | use kube::api::Meta; 14 | use log::{debug, trace, warn}; 15 | use reqwest::header::{ACCEPT, CONTENT_TYPE}; 16 | use reqwest::{Client, StatusCode}; 17 | use serde::{Deserialize, Serialize}; 18 | use url::Url; 19 | 20 | // These are the default content_types that we have seen in the wild 21 | // of these only 'application/gzip' is valid according to 22 | // https://www.iana.org/assignments/media-types/media-types.xhtml but our own 23 | // Nexus uses the other two, so we cannot really complain 24 | const DEFAULT_ALLOWED_CONTENT_TYPES: &[&str] = &[ 25 | "application/gzip", 26 | "application/tgz", 27 | "application/x-gzip", 28 | "application/x-tgz", 29 | ]; 30 | 31 | #[derive(Debug, Clone)] 32 | pub struct StackableRepoProvider { 33 | metadata_url: Url, 34 | pub name: String, 35 | content: Option, 36 | } 37 | 38 | #[derive(Serialize, Deserialize, Debug)] 39 | struct RepoData { 40 | version: String, 41 | packages: HashMap>, 42 | } 43 | 44 | #[derive(Serialize, Deserialize, Debug)] 45 | struct Product { 46 | version: String, 47 | path: String, 48 | hashes: HashMap, 49 | } 50 | 51 | #[derive(Serialize, Deserialize, Debug, Clone)] 52 | struct RepositoryContent { 53 | pub version: String, 54 | pub packages: HashMap>, 55 | } 56 | 57 | #[derive(Serialize, Deserialize, Debug, Clone)] 58 | struct StackablePackage { 59 | pub product: String, 60 | pub version: String, 61 | pub link: String, 62 | pub hashes: HashMap, 63 | } 64 | 65 | impl StackableRepoProvider { 66 | pub fn new(name: &str, base_url: &Url) -> Result { 67 | let mut metadata_url = base_url.to_owned(); 68 | 69 | metadata_url 70 | .path_segments_mut() 71 | .map_err(|_| StackableError::RepositoryConversionError)? 72 | .pop_if_empty() 73 | .push("metadata.json"); 74 | 75 | Ok(StackableRepoProvider { 76 | metadata_url, 77 | name: String::from(name), 78 | content: None, 79 | }) 80 | } 81 | 82 | pub async fn provides_package>( 83 | &mut self, 84 | package: T, 85 | ) -> Result { 86 | debug!( 87 | "Starting metadata refresh for repository of type {} at location {}", 88 | "StackableRepo", self.metadata_url 89 | ); 90 | let package = package.into(); 91 | let metadata = self.get_repo_metadata().await?; 92 | debug!("Repository provides the following products: {:?}", metadata); 93 | if let Some(product) = metadata.packages.get(&package.product) { 94 | return Ok(product.contains_key(&package.version)); 95 | } 96 | Ok(false) 97 | } 98 | 99 | async fn get_package(&mut self, package: Package) -> Result { 100 | if self.content.is_none() { 101 | self.get_repo_metadata().await?; 102 | } 103 | if let Some(content) = &self.content { 104 | let parcels = &content.packages; 105 | if let Some(product) = parcels.get(&package.product) { 106 | // product exists in repo 107 | if let Some(version) = product.get(&package.version) { 108 | // found our package 109 | return Ok(version.clone()); 110 | } 111 | }; 112 | } 113 | Err(PackageNotFound { package }) 114 | } 115 | 116 | pub async fn download_package( 117 | &mut self, 118 | package: &Package, 119 | target_path: PathBuf, 120 | ) -> Result<(), StackableError> { 121 | if self.content.is_none() { 122 | let _content = self.get_repo_metadata(); 123 | } 124 | 125 | let stackable_package = self.get_package(package.clone()).await?; 126 | let download_link = Url::parse(&stackable_package.link)?; 127 | 128 | let client = Client::builder() 129 | .build() 130 | .map_err(|error| PackageDownloadError { 131 | package: package.clone(), 132 | download_link: download_link.clone(), 133 | errormessage: format!("Unable to create http client: [{}]", error), 134 | })?; 135 | 136 | // We set the ACCEPT header field on our request which states that the only content type 137 | // we are willing to accept is 'application/gzip' 138 | // If the webserver is unable to provide this content type to us it _SHOULD_ respond with a 139 | // 406 response code, but it seems we can't rely on that. 140 | // For more details see: https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1 141 | let response = match client 142 | .get(download_link.clone()) 143 | .header(ACCEPT, "application/gzip") 144 | .send() 145 | .await 146 | { 147 | Ok(response) if response.status().is_success() => { 148 | // The request was successful, but just to be safe we'll still check the content_type, 149 | // since the webserver is free to ignore the requested content_type 150 | if let Some(content_type) = response.headers().get(CONTENT_TYPE) { 151 | let content_type = content_type.to_str().map_err(|error| PackageDownloadError { 152 | package: package.clone(), 153 | download_link: download_link.clone(), 154 | errormessage: format!("Got content_type with non-ascii characters from webserver: [{}]", error), 155 | })?; 156 | 157 | if DEFAULT_ALLOWED_CONTENT_TYPES.contains(&content_type) { 158 | Ok(response) 159 | } else { 160 | // If we get a known wrong content type we'll abort 161 | Err(PackageDownloadError { 162 | package: package.clone(), 163 | download_link, 164 | errormessage: format!( 165 | "Got wrong 'content_type' header [{:?}] in response from webserver.", 166 | content_type 167 | ), 168 | }) 169 | } 170 | } else { 171 | // If we get no content_type (not sure if this is even legal) we'll soldier on and hope for the best 172 | debug!("Response had no 'content_type' header set, we'll give the sender the benefit of the doubt and try processing this anyway."); 173 | Ok(response) 174 | } 175 | } 176 | Ok(response) if response.status() == StatusCode::NOT_ACCEPTABLE => 177 | Err(PackageDownloadError { 178 | package: package.clone(), 179 | download_link, 180 | errormessage: "Got response code 406 from webserver - Unable to negotiate content type, this is probably due to content encoding settings on the webserver.".to_string(), 181 | }), 182 | Ok(response) => Err(PackageDownloadError { 183 | package: package.clone(), 184 | download_link, 185 | errormessage: format!( 186 | "Got non-success response [{}] from webserver!", 187 | response.status() 188 | ), 189 | }), 190 | Err(error) => Err(PackageDownloadError { 191 | package: package.clone(), 192 | download_link, 193 | errormessage: format!("{}", error), 194 | }), 195 | }?; 196 | 197 | // All error cases return above, so we can safely assume that this is a valid download at 198 | // this point 199 | let mut content = Cursor::new(response.bytes().await?); 200 | 201 | let mut out = File::create(target_path.join(package.get_file_name()))?; 202 | copy(&mut content, &mut out)?; 203 | out.flush()?; 204 | Ok(()) 205 | } 206 | 207 | async fn get_repo_metadata(&mut self) -> Result { 208 | trace!("entering get_repo_metadata"); 209 | 210 | debug!("Retrieving repository metadata from {}", self.metadata_url); 211 | 212 | let repo_data = match reqwest::get(self.metadata_url.clone()).await { 213 | Ok(repo_data) => repo_data, 214 | Err(error) => { 215 | warn!( 216 | "Failed to retrieve metadata from {} due to {:?}", 217 | self.metadata_url, error 218 | ); 219 | return Err(error.into()); 220 | } 221 | }; 222 | let repo_data = match repo_data.json::().await { 223 | Ok(parsed_data) => parsed_data, 224 | Err(error) => { 225 | warn!( 226 | "Error parsing metadata from repository {}: {:?}", 227 | self.name, error 228 | ); 229 | return Err(error.into()); 230 | } 231 | }; 232 | 233 | debug!("Got repository metadata: {:?}", repo_data); 234 | 235 | let mut packages: HashMap> = HashMap::new(); 236 | for (product, versions) in repo_data.packages { 237 | let mut versionlist = HashMap::new(); 238 | for version in versions { 239 | versionlist.insert( 240 | version.version.clone(), 241 | StackablePackage { 242 | product: product.clone(), 243 | version: version.version, 244 | link: self.resolve_url(version.path.clone())?, 245 | hashes: version.hashes.clone(), 246 | }, 247 | ); 248 | } 249 | packages.insert(product, versionlist); 250 | } 251 | let repo_content: RepositoryContent = RepositoryContent { 252 | version: repo_data.version, 253 | packages, 254 | }; 255 | self.content = Some(repo_content.clone()); 256 | Ok(repo_content) 257 | } 258 | 259 | /// Resolves relative paths that are defined for elements in this repository against 260 | /// the repo's base URL. 261 | /// Unless the element has an absolute URL defined, in this case the base URL is ignored 262 | /// an the absolute URL returned unchanged. 263 | /// 264 | /// Public for testing 265 | pub fn resolve_url(&self, path: String) -> Result { 266 | if Url::parse(&path).is_ok() { 267 | // The URL defined for this element is an absolute URL, so we won't 268 | // resolve that agains the base url of the repository but simply 269 | // return it unchanged 270 | return Ok(path); 271 | } 272 | let resolved_path = self.metadata_url.join(&path)?; 273 | Ok(resolved_path.as_str().to_string()) 274 | } 275 | } 276 | 277 | impl fmt::Display for StackableRepoProvider { 278 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 279 | write!(f, "{}", self.name) 280 | } 281 | } 282 | 283 | impl TryFrom<&Repository> for StackableRepoProvider { 284 | type Error = StackableError; 285 | 286 | fn try_from(value: &Repository) -> Result { 287 | let name = Meta::name(value); 288 | 289 | let base_url = value 290 | .spec 291 | .properties 292 | .get("url") 293 | .and_then(|url| Url::parse(url).ok()) 294 | .ok_or(StackableError::RepositoryConversionError)?; 295 | 296 | let stackable_repo_provider = StackableRepoProvider::new(&name, &base_url)?; 297 | 298 | Ok(stackable_repo_provider) 299 | } 300 | } 301 | 302 | impl Eq for StackableRepoProvider {} 303 | 304 | impl PartialEq for StackableRepoProvider { 305 | fn eq(&self, other: &Self) -> bool { 306 | self.name.eq(&other.name) 307 | } 308 | } 309 | 310 | impl Hash for StackableRepoProvider { 311 | fn hash(&self, state: &mut H) { 312 | self.name.hash(state); 313 | } 314 | } 315 | 316 | #[cfg(test)] 317 | mod tests { 318 | use super::*; 319 | 320 | use crate::provider::repository::repository_spec::RepositorySpec; 321 | 322 | #[test] 323 | fn stackable_repo_provider_should_be_created_from_a_valid_url_with_a_trailing_slash() { 324 | let actual = 325 | StackableRepoProvider::new("test", &Url::parse("http://localhost:8000/repo/").unwrap()) 326 | .unwrap(); 327 | 328 | assert_eq!( 329 | Url::parse("http://localhost:8000/repo/metadata.json").unwrap(), 330 | actual.metadata_url 331 | ); 332 | assert_eq!(String::from("test"), actual.name); 333 | assert!(actual.content.is_none()); 334 | } 335 | 336 | #[test] 337 | fn stackable_repo_provider_should_be_created_from_a_valid_url_without_a_trailing_slash() { 338 | let actual = 339 | StackableRepoProvider::new("test", &Url::parse("http://localhost:8000/repo").unwrap()) 340 | .unwrap(); 341 | 342 | assert_eq!( 343 | Url::parse("http://localhost:8000/repo/metadata.json").unwrap(), 344 | actual.metadata_url 345 | ); 346 | assert_eq!(String::from("test"), actual.name); 347 | assert!(actual.content.is_none()); 348 | } 349 | 350 | #[test] 351 | fn stackable_repo_provider_should_not_be_created_from_an_url_which_cannot_be_a_base() { 352 | assert!(StackableRepoProvider::new( 353 | "test", 354 | &Url::parse("mailto:info@stackable.de").unwrap() 355 | ) 356 | .is_err()); 357 | } 358 | 359 | #[test] 360 | fn test_url_functions() { 361 | let repo = 362 | StackableRepoProvider::new("test", &Url::parse("http://localhost:8000").unwrap()) 363 | .unwrap(); 364 | 365 | // Check that a relative URL is correctly resolved against the repo's baseurl 366 | assert_eq!( 367 | repo.resolve_url(String::from("test")).unwrap(), 368 | "http://localhost:8000/test" 369 | ); 370 | 371 | // Test that an absolute URL is correctly returned without change 372 | assert_eq!( 373 | repo.resolve_url(String::from("http://test.com/test")) 374 | .unwrap(), 375 | "http://test.com/test" 376 | ); 377 | } 378 | 379 | #[test] 380 | fn test_repository_try_from() { 381 | let mut props = HashMap::new(); 382 | props.insert( 383 | String::from("url"), 384 | String::from("http://monitoring.stackable.demo:8000"), 385 | ); 386 | let test_repo_crd = Repository::new( 387 | "test", 388 | RepositorySpec { 389 | repo_type: Default::default(), 390 | properties: props, 391 | }, 392 | ); 393 | let converted_repo = StackableRepoProvider::try_from(&test_repo_crd).unwrap(); 394 | assert_eq!(converted_repo.name, "test"); 395 | assert_eq!( 396 | converted_repo.metadata_url.as_str(), 397 | "http://monitoring.stackable.demo:8000/metadata.json" 398 | ); 399 | } 400 | } 401 | -------------------------------------------------------------------------------- /src/provider/states.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod pod; 2 | 3 | /// When called in a state's `next` function, exits the state machine 4 | /// returns a fatal error to the kubelet. 5 | #[macro_export] 6 | macro_rules! fail_fatal { 7 | ($err:ident) => {{ 8 | let aerr = anyhow::Error::from($err); 9 | log::error!("{:?}", aerr); 10 | return Transition::Complete(Err(aerr)); 11 | }}; 12 | } 13 | -------------------------------------------------------------------------------- /src/provider/states/pod.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | 3 | use kubelet::backoff::ExponentialBackoffStrategy; 4 | use kubelet::pod::state::prelude::*; 5 | use kubelet::pod::{Pod, Status}; 6 | 7 | use crate::provider::repository::package::Package; 8 | use crate::provider::ProviderState; 9 | 10 | pub(crate) mod creating_config; 11 | pub(crate) mod creating_service; 12 | pub(crate) mod downloading; 13 | pub(crate) mod downloading_backoff; 14 | pub(crate) mod initializing; 15 | pub(crate) mod installing; 16 | pub(crate) mod running; 17 | pub(crate) mod setup_failed; 18 | pub(crate) mod starting; 19 | pub(crate) mod terminated; 20 | pub(crate) mod waiting_config_map; 21 | 22 | pub struct PodState { 23 | pub parcel_directory: PathBuf, 24 | pub download_directory: PathBuf, 25 | pub config_directory: PathBuf, 26 | pub log_directory: PathBuf, 27 | pub package_download_backoff_strategy: ExponentialBackoffStrategy, 28 | pub service_name: String, 29 | pub service_uid: String, 30 | pub package: Package, 31 | } 32 | 33 | impl PodState { 34 | pub fn get_service_config_directory(&self) -> PathBuf { 35 | self.config_directory 36 | .join(format!("{}-{}", &self.service_name, &self.service_uid)) 37 | } 38 | 39 | pub fn get_service_package_directory(&self) -> PathBuf { 40 | self.parcel_directory 41 | .join(&self.package.get_directory_name()) 42 | } 43 | 44 | pub fn get_service_log_directory(&self) -> PathBuf { 45 | self.log_directory.join(&self.service_name) 46 | } 47 | 48 | /// Resolve the directory in which the systemd unit files will be placed for this 49 | /// service. 50 | /// This defaults to "{{config_root}}/_service" 51 | /// 52 | /// From this place the unit files will be symlinked to the relevant systemd 53 | /// unit directories so that they are picked up by systemd. 54 | pub fn get_service_service_directory(&self) -> PathBuf { 55 | self.get_service_config_directory().join("_service") 56 | } 57 | } 58 | 59 | // No cleanup state needed, we clean up when dropping PodState. 60 | #[async_trait::async_trait] 61 | impl ObjectState for PodState { 62 | type Manifest = Pod; 63 | type Status = Status; 64 | type SharedState = ProviderState; 65 | 66 | async fn async_drop(self, _provider_state: &mut ProviderState) {} 67 | } 68 | -------------------------------------------------------------------------------- /src/provider/states/pod/creating_service.rs: -------------------------------------------------------------------------------- 1 | use std::fs::create_dir_all; 2 | 3 | use anyhow::{Context, Error}; 4 | use kubelet::container::ContainerKey; 5 | use kubelet::pod::state::prelude::*; 6 | use kubelet::pod::{Pod, PodKey}; 7 | use log::{debug, error, info}; 8 | 9 | use super::setup_failed::SetupFailed; 10 | use super::starting::Starting; 11 | use crate::provider::systemdmanager::systemdunit::SystemDUnit; 12 | use crate::provider::{ContainerHandle, PodState, ProviderState}; 13 | 14 | #[derive(Default, Debug, TransitionTo)] 15 | #[transition_to(Starting, SetupFailed)] 16 | pub struct CreatingService; 17 | 18 | #[async_trait::async_trait] 19 | impl State for CreatingService { 20 | async fn next( 21 | self: Box, 22 | shared: SharedState, 23 | pod_state: &mut PodState, 24 | pod: Manifest, 25 | ) -> Transition { 26 | let pod = pod.latest(); 27 | 28 | let (systemd_manager, kubeconfig_path) = { 29 | let provider_state = shared.read().await; 30 | ( 31 | provider_state.systemd_manager.clone(), 32 | provider_state.kubeconfig_path.clone(), 33 | ) 34 | }; 35 | 36 | let service_name: &str = pod_state.service_name.as_ref(); 37 | info!( 38 | "Creating service unit for service {}", 39 | &pod_state.service_name 40 | ); 41 | let service_directory = &pod_state.get_service_service_directory(); 42 | if !service_directory.is_dir() { 43 | debug!( 44 | "Creating config directory for service [{}]: {:?}", 45 | pod_state.service_name, service_directory 46 | ); 47 | if let Err(error) = create_dir_all(service_directory) { 48 | return Transition::Complete(Err(Error::from(error))); 49 | } 50 | } 51 | 52 | // Each pod can map to multiple systemd units/services as each container will get its own 53 | // systemd unit file/service. 54 | // Map every container from the pod object to a systemdunit 55 | for container in &pod.containers() { 56 | let unit = match SystemDUnit::new( 57 | systemd_manager.is_user_mode(), 58 | pod_state, 59 | &kubeconfig_path, 60 | &pod, 61 | container, 62 | ) { 63 | Ok(unit) => unit, 64 | Err(err) => return Transition::Complete(Err(Error::from(err))), 65 | }; 66 | 67 | // Create the service 68 | // As per ADR005 we currently write the unit files directly in the systemd 69 | // unit directory (by passing None as [unit_file_path]). 70 | if let Err(e) = systemd_manager 71 | .create_unit(&unit, None, true, true) 72 | .await 73 | .with_context(|| format!("Unit file [{}] could not be created", unit)) 74 | { 75 | // TODO: We need to discuss what to do here, in theory we could have loaded 76 | // other services already, do we want to stop those? 77 | return Transition::Complete(Err(e)); 78 | } 79 | 80 | let systemd_service = match systemd_manager 81 | .create_systemd_service(&unit.get_name()) 82 | .await 83 | { 84 | Ok(systemd_service) => systemd_service, 85 | Err(error) => { 86 | error!( 87 | "Proxy for the systemd service [{}] could not be created: {}", 88 | service_name, error 89 | ); 90 | return Transition::Complete(Err(error)); 91 | } 92 | }; 93 | 94 | { 95 | let provider_state = shared.write().await; 96 | let mut handles = provider_state.handles.write().await; 97 | handles.insert_container_handle( 98 | &PodKey::from(&pod), 99 | &ContainerKey::App(String::from(container.name())), 100 | &ContainerHandle { 101 | service_unit: unit.get_name(), 102 | systemd_service, 103 | }, 104 | ) 105 | }; 106 | 107 | // Done for now, if the service was created successfully we are happy 108 | // Starting and enabling comes in a later state after all service have been createddy 109 | } 110 | 111 | // All services were loaded successfully, otherwise we'd have returned early above 112 | Transition::next(self, Starting) 113 | } 114 | 115 | async fn status(&self, _pod_state: &mut PodState, _pod: &Pod) -> anyhow::Result { 116 | Ok(make_status(Phase::Pending, "CreatingService")) 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/provider/states/pod/downloading.rs: -------------------------------------------------------------------------------- 1 | use std::path::Path; 2 | 3 | use anyhow::Context; 4 | use kubelet::pod::state::prelude::*; 5 | use kubelet::pod::Pod; 6 | use log::{debug, error, info, warn}; 7 | use tokio::fs::create_dir_all; 8 | 9 | use super::downloading_backoff::DownloadingBackoff; 10 | use super::installing::Installing; 11 | use crate::provider::repository::find_repository; 12 | use crate::provider::repository::package::Package; 13 | use crate::provider::{PodState, ProviderState}; 14 | 15 | #[derive(Default, Debug, TransitionTo)] 16 | #[transition_to(Installing, DownloadingBackoff)] 17 | pub struct Downloading; 18 | 19 | impl Downloading { 20 | fn package_downloaded>(package: T, download_directory: &Path) -> bool { 21 | let package = package.into(); 22 | let package_file_name = download_directory.join(package.get_file_name()); 23 | debug!( 24 | "Checking if package {} has already been downloaded to {:?}", 25 | package, package_file_name 26 | ); 27 | Path::new(&package_file_name).exists() 28 | } 29 | } 30 | 31 | #[async_trait::async_trait] 32 | impl State for Downloading { 33 | async fn next( 34 | self: Box, 35 | provider_state: SharedState, 36 | pod_state: &mut PodState, 37 | _pod: Manifest, 38 | ) -> Transition { 39 | let package = pod_state.package.clone(); 40 | 41 | let client = { 42 | let provider_state = provider_state.read().await; 43 | provider_state.client.clone() 44 | }; 45 | 46 | info!("Looking for package: {} in known repositories", &package); 47 | debug!( 48 | "Checking if package {} has already been downloaded.", 49 | package 50 | ); 51 | if Downloading::package_downloaded(package.clone(), &pod_state.download_directory) { 52 | info!( 53 | "Package {} has already been downloaded to {:?}, continuing with installation", 54 | package, pod_state.download_directory 55 | ); 56 | return Transition::next( 57 | self, 58 | Installing { 59 | download_directory: pod_state.download_directory.clone(), 60 | parcel_directory: pod_state.parcel_directory.clone(), 61 | package: package.clone(), 62 | }, 63 | ); 64 | } 65 | let repo = find_repository(client, &package).await; 66 | return match repo { 67 | Ok(Some(mut repo)) => { 68 | // We found a repository providing the package, proceed with download 69 | // The repository has already downloaded its metadata at this time, as that 70 | // was used to check whether it provides the package 71 | info!( 72 | "Starting download of package {} from repository {}", 73 | &package, &repo 74 | ); 75 | let download_directory = pod_state.download_directory.clone(); 76 | 77 | if !(download_directory.is_dir()) { 78 | if let Err(error) = create_download_directory(&download_directory).await { 79 | return Transition::Complete(Err(error)); 80 | } 81 | }; 82 | 83 | let download_result = repo 84 | .download_package(&package, download_directory.clone()) 85 | .await; 86 | match download_result { 87 | Ok(()) => { 88 | info!( 89 | "Successfully downloaded package {} to {:?}", 90 | package, 91 | download_directory.clone() 92 | ); 93 | Transition::next( 94 | self, 95 | Installing { 96 | download_directory: pod_state.download_directory.clone(), 97 | parcel_directory: pod_state.parcel_directory.clone(), 98 | package: package.clone(), 99 | }, 100 | ) 101 | } 102 | Err(e) => { 103 | warn!("Download of package {} failed: {}", package, e); 104 | Transition::next( 105 | self, 106 | DownloadingBackoff { 107 | package: package.clone(), 108 | }, 109 | ) 110 | } 111 | } 112 | } 113 | Ok(None) => { 114 | // No repository was found that provides this package 115 | let message = format!( 116 | "Cannot find package {} in any repository, aborting ..", 117 | &package 118 | ); 119 | error!("{}", &message); 120 | Transition::next( 121 | self, 122 | DownloadingBackoff { 123 | package: package.clone(), 124 | }, 125 | ) 126 | } 127 | Err(e) => { 128 | // An error occurred when looking for a repository providing this package 129 | error!( 130 | "Error occurred trying to find package [{}]: [{:?}]", 131 | &package, e 132 | ); 133 | Transition::next( 134 | self, 135 | DownloadingBackoff { 136 | package: package.clone(), 137 | }, 138 | ) 139 | } 140 | }; 141 | } 142 | 143 | async fn status(&self, _pod_state: &mut PodState, _pod: &Pod) -> anyhow::Result { 144 | Ok(make_status(Phase::Pending, "Downloading")) 145 | } 146 | } 147 | 148 | async fn create_download_directory(download_directory: &Path) -> anyhow::Result<()> { 149 | info!("Creating download directory [{:?}].", download_directory); 150 | create_dir_all(&download_directory).await.with_context(|| { 151 | format!( 152 | "Download directory [{}] could not be created.", 153 | download_directory.to_string_lossy() 154 | ) 155 | }) 156 | } 157 | -------------------------------------------------------------------------------- /src/provider/states/pod/downloading_backoff.rs: -------------------------------------------------------------------------------- 1 | use kubelet::backoff::BackoffStrategy; 2 | use kubelet::pod::state::prelude::*; 3 | use log::info; 4 | 5 | use super::downloading::Downloading; 6 | use crate::provider::repository::package::Package; 7 | use crate::provider::{PodState, ProviderState}; 8 | 9 | #[derive(Debug, TransitionTo)] 10 | #[transition_to(Downloading)] 11 | /// A setup step for the service failed. 12 | pub struct DownloadingBackoff { 13 | pub package: Package, 14 | } 15 | 16 | #[async_trait::async_trait] 17 | impl State for DownloadingBackoff { 18 | async fn next( 19 | self: Box, 20 | _provider_state: SharedState, 21 | pod_state: &mut PodState, 22 | _pod: Manifest, 23 | ) -> Transition { 24 | info!( 25 | "Backing of before retrying download of package {}", 26 | self.package 27 | ); 28 | pod_state.package_download_backoff_strategy.wait().await; 29 | Transition::next(self, Downloading) 30 | } 31 | 32 | async fn status(&self, _pod_state: &mut PodState, _pod: &Pod) -> anyhow::Result { 33 | Ok(make_status(Phase::Pending, "DownloadingBackoff")) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/provider/states/pod/initializing.rs: -------------------------------------------------------------------------------- 1 | use std::net::IpAddr; 2 | 3 | use anyhow::Result; 4 | use k8s_openapi::api::core::v1::Pod as KubePod; 5 | use k8s_openapi::api::core::v1::PodStatus as KubePodStatus; 6 | use kube::api::Patch; 7 | use kube::api::PatchParams; 8 | use kube::Api; 9 | use kubelet::pod::state::prelude::*; 10 | use log::trace; 11 | use log::warn; 12 | use serde_json::json; 13 | 14 | use super::downloading::Downloading; 15 | use crate::provider::{PodState, ProviderState}; 16 | 17 | #[derive(Default, Debug, TransitionTo)] 18 | #[transition_to(Downloading)] 19 | pub struct Initializing; 20 | 21 | #[async_trait::async_trait] 22 | impl State for Initializing { 23 | async fn next( 24 | self: Box, 25 | shared: SharedState, 26 | _pod_state: &mut PodState, 27 | pod: Manifest, 28 | ) -> Transition { 29 | let (client, server_ip_address) = { 30 | let provider_state = shared.read().await; 31 | ( 32 | provider_state.client.clone(), 33 | provider_state.server_ip_address, 34 | ) 35 | }; 36 | 37 | let pod = pod.latest(); 38 | 39 | let api = Api::namespaced(client, pod.namespace()); 40 | 41 | match patch_ip_address(&api, pod.name(), server_ip_address).await { 42 | Ok(_) => trace!( 43 | "Status of pod [{}] patched with hostIP and podIP [{}]", 44 | pod.name(), 45 | server_ip_address 46 | ), 47 | Err(error) => warn!( 48 | "Status of pod [{}] could not be patched with hostIP and podIP: {}", 49 | pod.name(), 50 | error 51 | ), 52 | } 53 | 54 | Transition::next(self, Downloading) 55 | } 56 | 57 | async fn status(&self, _pod_state: &mut PodState, _pod: &Pod) -> Result { 58 | Ok(make_status(Phase::Pending, "Initializing")) 59 | } 60 | } 61 | 62 | /// Patches the `hostIP` and `podIP` in the pod status. 63 | async fn patch_ip_address(api: &Api, pod_name: &str, ip_address: IpAddr) -> Result<()> { 64 | let patch = json!({ 65 | "status": Some(KubePodStatus { 66 | host_ip: Some(ip_address.to_string()), 67 | pod_ip: Some(ip_address.to_string()), 68 | ..Default::default() 69 | }) 70 | }); 71 | 72 | api.patch_status(pod_name, &PatchParams::default(), &Patch::Strategic(patch)) 73 | .await?; 74 | 75 | Ok(()) 76 | } 77 | -------------------------------------------------------------------------------- /src/provider/states/pod/installing.rs: -------------------------------------------------------------------------------- 1 | use std::fs; 2 | use std::fs::File; 3 | use std::path::PathBuf; 4 | 5 | use flate2::read::GzDecoder; 6 | use kubelet::pod::state::prelude::*; 7 | use kubelet::pod::Pod; 8 | use log::{debug, error, info}; 9 | use tar::Archive; 10 | 11 | use super::creating_config::CreatingConfig; 12 | use super::setup_failed::SetupFailed; 13 | use crate::provider::error::StackableError; 14 | use crate::provider::repository::package::Package; 15 | use crate::provider::{PodState, ProviderState}; 16 | 17 | #[derive(Debug, TransitionTo)] 18 | #[transition_to(CreatingConfig, SetupFailed)] 19 | pub struct Installing { 20 | pub download_directory: PathBuf, 21 | pub parcel_directory: PathBuf, 22 | pub package: Package, 23 | } 24 | 25 | impl Installing { 26 | fn package_installed>(&self, package: T) -> bool { 27 | let package = package.into(); 28 | 29 | let target_directory = self.get_target_directory(&package); 30 | debug!( 31 | "Checking if package {:?} has already been installed to {:?}", 32 | package, target_directory 33 | ); 34 | target_directory.exists() 35 | } 36 | 37 | fn get_target_directory(&self, package: &Package) -> PathBuf { 38 | self.parcel_directory.join(package.get_directory_name()) 39 | } 40 | 41 | fn install_package>(&self, package: T) -> Result<(), StackableError> { 42 | let package: Package = package.into(); 43 | 44 | let archive_path = self.download_directory.join(package.get_file_name()); 45 | let tar_gz = File::open(&archive_path)?; 46 | let tar = GzDecoder::new(tar_gz); 47 | let mut archive = Archive::new(tar); 48 | 49 | let target_directory = self.get_target_directory(&package); 50 | 51 | info!( 52 | "Installing package: {:?} from {:?} into {:?}", 53 | package, archive_path, target_directory 54 | ); 55 | archive.unpack(target_directory)?; 56 | Ok(()) 57 | } 58 | } 59 | 60 | #[async_trait::async_trait] 61 | impl State for Installing { 62 | async fn next( 63 | self: Box, 64 | _provider_state: SharedState, 65 | _pod_state: &mut PodState, 66 | _pod: Manifest, 67 | ) -> Transition { 68 | let package = self.package.clone(); 69 | let package_name = &package.get_directory_name(); 70 | return if self.package_installed(package.clone()) { 71 | info!("Package {} has already been installed", package); 72 | return Transition::next( 73 | self, 74 | CreatingConfig { 75 | target_directory: None, 76 | }, 77 | ); 78 | } else { 79 | info!("Installing package {}", package); 80 | match self.install_package(package.clone()) { 81 | Ok(()) => Transition::next( 82 | self, 83 | CreatingConfig { 84 | target_directory: None, 85 | }, 86 | ), 87 | Err(e) => { 88 | error!( 89 | "Failed to install package [{}] due to: [{:?}]", 90 | &package_name, e 91 | ); 92 | // Clean up partially unpacked directory to avoid later iterations assuming 93 | // this install attempt was successful because the target directory exists. 94 | let installation_directory = self.get_target_directory(&package); 95 | debug!( 96 | "Cleaning up partial installation by deleting directory [{}]", 97 | installation_directory.to_string_lossy() 98 | ); 99 | if let Err(error) = fs::remove_dir_all(&installation_directory) { 100 | error!( 101 | "Failed to clean up directory [{}] due to {}", 102 | installation_directory.to_string_lossy(), 103 | error 104 | ); 105 | }; 106 | Transition::next( 107 | self, 108 | SetupFailed { 109 | message: "PackageInstallationFailed".to_string(), 110 | }, 111 | ) 112 | } 113 | } 114 | }; 115 | } 116 | 117 | async fn status(&self, _pod_state: &mut PodState, _pod: &Pod) -> anyhow::Result { 118 | Ok(make_status(Phase::Pending, "Installing")) 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/provider/states/pod/running.rs: -------------------------------------------------------------------------------- 1 | use anyhow::anyhow; 2 | use k8s_openapi::api::core::v1::PodCondition; 3 | use k8s_openapi::apimachinery::pkg::apis::meta::v1::Time; 4 | use k8s_openapi::chrono; 5 | use krator::ObjectStatus; 6 | use kubelet::{ 7 | container::Status, 8 | pod::state::prelude::*, 9 | pod::{Pod, PodKey}, 10 | }; 11 | use log::{debug, info, trace, warn}; 12 | use tokio::time::Duration; 13 | 14 | use super::terminated::Terminated; 15 | use crate::provider::{ 16 | kubernetes::status::{patch_container_status, patch_restart_count}, 17 | systemdmanager::service::ServiceState, 18 | PodHandle, PodState, ProviderState, 19 | }; 20 | 21 | #[derive(Debug, TransitionTo)] 22 | #[transition_to(Terminated)] 23 | pub struct Running { 24 | pub transition_time: Time, 25 | } 26 | 27 | impl Default for Running { 28 | fn default() -> Self { 29 | Self { 30 | transition_time: Time(chrono::offset::Utc::now()), 31 | } 32 | } 33 | } 34 | 35 | #[async_trait::async_trait] 36 | impl State for Running { 37 | async fn next( 38 | mut self: Box, 39 | shared: SharedState, 40 | pod_state: &mut PodState, 41 | pod: Manifest, 42 | ) -> Transition { 43 | let pod = pod.latest(); 44 | let pod_key = &PodKey::from(&pod); 45 | 46 | let (client, pod_handle) = { 47 | let provider_state = shared.read().await; 48 | let handles = provider_state.handles.read().await; 49 | ( 50 | provider_state.client.clone(), 51 | handles.get(pod_key).map(PodHandle::to_owned), 52 | ) 53 | }; 54 | 55 | let mut running_containers = match &pod_handle { 56 | Some(containers) => containers.to_owned(), 57 | None => return Transition::Complete(Err(anyhow!("No systemd units found for service [{}], this should not happen, please report a bug for this!", pod_state.service_name))), 58 | }; 59 | 60 | let mut container_failed = false; 61 | 62 | // We loop here and "wake up" periodically to check if the service is still 63 | // up and running 64 | // Interruption of this loop is triggered externally by the Krustlet code when 65 | // - the pod which this state machine refers to gets deleted 66 | // - Krustlet shuts down 67 | while !running_containers.is_empty() { 68 | tokio::time::sleep(Duration::from_secs(10)).await; 69 | trace!( 70 | "Checking if service {} is still running.", 71 | &pod_state.service_name 72 | ); 73 | 74 | let mut succeeded_containers = Vec::new(); 75 | let mut failed_containers = Vec::new(); 76 | 77 | for (container_key, container_handle) in running_containers.iter() { 78 | let systemd_service = &container_handle.systemd_service; 79 | 80 | match systemd_service.service_state().await { 81 | Ok(ServiceState::Created) => { 82 | warn!( 83 | "The unit [{}] of service [{}] was not started. \ 84 | This should not happen. Ignoring this state for now.", 85 | systemd_service.file(), 86 | pod_state.service_name 87 | ); 88 | } 89 | Ok(ServiceState::Started) => {} 90 | Ok(ServiceState::Succeeded) => succeeded_containers 91 | .push((container_key.to_owned(), container_handle.to_owned())), 92 | Ok(ServiceState::Failed) => failed_containers 93 | .push((container_key.to_owned(), container_handle.to_owned())), 94 | Err(dbus_error) => { 95 | warn!( 96 | "Error querying state for unit [{}] of service [{}]: [{}].", 97 | systemd_service.file(), 98 | pod_state.service_name, 99 | dbus_error 100 | ); 101 | } 102 | } 103 | } 104 | 105 | for (container_key, container_handle) in &succeeded_containers { 106 | info!( 107 | "Unit [{}] for service [{}] terminated successfully.", 108 | pod_state.service_name, container_handle.service_unit 109 | ); 110 | patch_container_status( 111 | &client, 112 | &pod, 113 | container_key, 114 | &Status::terminated("Completed", false), 115 | ) 116 | .await; 117 | running_containers.remove(container_key); 118 | } 119 | 120 | for (container_key, container_handle) in &failed_containers { 121 | info!( 122 | "Unit [{}] for service [{}] failed unexpectedly.", 123 | pod_state.service_name, container_handle.service_unit 124 | ); 125 | patch_container_status( 126 | &client, 127 | &pod, 128 | container_key, 129 | &Status::terminated("Error", true), 130 | ) 131 | .await; 132 | running_containers.remove(container_key); 133 | container_failed = true; 134 | } 135 | 136 | for (container_key, container_handle) in running_containers.iter() { 137 | trace!( 138 | "Unit [{}] of service [{}] still running ...", 139 | container_handle.service_unit, 140 | pod_state.service_name 141 | ); 142 | 143 | match container_handle.systemd_service.restart_count().await { 144 | Ok(restart_count) => { 145 | if let Err(error) = 146 | patch_restart_count(&client, &pod, container_key, restart_count).await 147 | { 148 | warn!("Could not patch restart count: {}", error); 149 | } 150 | } 151 | Err(error) => warn!( 152 | "Could retrieve restart count from unit [{}]: {}", 153 | container_handle.service_unit, error 154 | ), 155 | } 156 | } 157 | } 158 | 159 | Transition::next( 160 | self, 161 | Terminated { 162 | successful: !container_failed, 163 | }, 164 | ) 165 | } 166 | 167 | async fn status(&self, pod_state: &mut PodState, _pod: &Pod) -> anyhow::Result { 168 | let condition = PodCondition { 169 | last_probe_time: None, 170 | last_transition_time: Some(self.transition_time.clone()), 171 | message: Some(String::from("Service is running")), 172 | reason: Some(String::from("Running")), 173 | status: "True".to_string(), 174 | type_: "Ready".to_string(), 175 | }; 176 | 177 | let status = StatusBuilder::new() 178 | .phase(Phase::Running) 179 | .reason("Running") 180 | .conditions(vec![condition]) 181 | .build(); 182 | 183 | debug!( 184 | "Patching status for running service [{}] with: [{}]", 185 | pod_state.service_name, 186 | status.json_patch() 187 | ); 188 | Ok(status) 189 | } 190 | } 191 | -------------------------------------------------------------------------------- /src/provider/states/pod/setup_failed.rs: -------------------------------------------------------------------------------- 1 | use kubelet::pod::state::prelude::*; 2 | use log::{error, info}; 3 | 4 | use super::downloading::Downloading; 5 | use crate::provider::{PodState, ProviderState}; 6 | 7 | #[derive(Default, Debug, TransitionTo)] 8 | #[transition_to(Downloading)] 9 | /// A setup step for the service failed, this can be one of the following: 10 | /// - Download Package 11 | /// - Install Package 12 | /// - Create Config 13 | /// - Create Service 14 | pub struct SetupFailed { 15 | pub message: String, 16 | } 17 | 18 | #[async_trait::async_trait] 19 | impl State for SetupFailed { 20 | async fn next( 21 | self: Box, 22 | _provider_state: SharedState, 23 | _pod_state: &mut PodState, 24 | pod: Manifest, 25 | ) -> Transition { 26 | let pod = pod.latest(); 27 | 28 | error!( 29 | "setup failed for pod {} due to: {}", 30 | pod.name(), 31 | self.message 32 | ); 33 | info!("Waiting for {} seconds before retrying..", 10); 34 | // TODO: make this configurable 35 | tokio::time::sleep(std::time::Duration::from_secs(10)).await; 36 | Transition::next(self, Downloading) 37 | } 38 | 39 | async fn status(&self, _pod_state: &mut PodState, _pod: &Pod) -> anyhow::Result { 40 | Ok(make_status(Phase::Pending, "SetupFailed")) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/provider/states/pod/starting.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use super::running::Running; 4 | use crate::provider::{ 5 | kubernetes::status::patch_container_status, systemdmanager::service::ServiceState, PodHandle, 6 | PodState, ProviderState, 7 | }; 8 | 9 | use anyhow::Result; 10 | use kube::{ 11 | api::{Patch, PatchParams}, 12 | Api, Client, 13 | }; 14 | use kubelet::pod::{Pod, PodKey}; 15 | use kubelet::{container::Status, pod::state::prelude::*}; 16 | use log::{debug, error, info}; 17 | use serde_json::json; 18 | 19 | #[derive(Default, Debug, TransitionTo)] 20 | #[transition_to(Running)] 21 | pub struct Starting; 22 | 23 | #[async_trait::async_trait] 24 | impl State for Starting { 25 | async fn next( 26 | self: Box, 27 | shared: SharedState, 28 | pod_state: &mut PodState, 29 | pod: Manifest, 30 | ) -> Transition { 31 | let pod = pod.latest(); 32 | 33 | match start_service_units(shared, pod_state, &pod).await { 34 | Ok(()) => Transition::next(self, Running::default()), 35 | Err(error) => { 36 | error!("{}", error); 37 | Transition::Complete(Err(error)) 38 | } 39 | } 40 | } 41 | 42 | async fn status(&self, _pod_state: &mut PodState, _pod: &Pod) -> Result { 43 | Ok(make_status(Phase::Pending, "Starting")) 44 | } 45 | } 46 | 47 | /// Starts the service units for the containers of the given pod. 48 | /// 49 | /// The units are started and enabled if they were not already started. 50 | async fn start_service_units( 51 | shared: SharedState, 52 | pod_state: &PodState, 53 | pod: &Pod, 54 | ) -> Result<()> { 55 | let pod_key = &PodKey::from(pod); 56 | 57 | let (client, systemd_manager, pod_handle) = { 58 | let provider_state = shared.read().await; 59 | let handles = provider_state.handles.read().await; 60 | ( 61 | provider_state.client.clone(), 62 | provider_state.systemd_manager.clone(), 63 | handles.get(pod_key).map(PodHandle::to_owned), 64 | ) 65 | }; 66 | 67 | for (container_key, container_handle) in pod_handle.unwrap_or_default() { 68 | let systemd_service = &container_handle.systemd_service; 69 | let service_unit = &container_handle.service_unit; 70 | 71 | if systemd_service.service_state().await? == ServiceState::Created { 72 | info!("Starting systemd unit [{}]", service_unit); 73 | systemd_manager.start(service_unit).await?; 74 | 75 | info!("Enabling systemd unit [{}]", service_unit); 76 | systemd_manager.enable(service_unit).await?; 77 | } else { 78 | debug!( 79 | "Unit [{}] for service [{}] was already started. Skipping startup.", 80 | service_unit, &pod_state.service_name 81 | ); 82 | } 83 | 84 | let mut annotations = HashMap::new(); 85 | annotations.insert( 86 | "featureLogs", 87 | systemd_service.invocation_id().await.is_ok().to_string(), 88 | ); 89 | annotations.insert( 90 | "featureRestartCount", 91 | systemd_service.restart_count().await.is_ok().to_string(), 92 | ); 93 | 94 | add_annotations(&client, pod, &annotations).await?; 95 | 96 | patch_container_status(&client, pod, &container_key, &Status::running()).await; 97 | } 98 | 99 | Ok(()) 100 | } 101 | 102 | /// Adds annotations to the given pod. 103 | /// 104 | /// If there is already an annotation with the given key then the value 105 | /// is replaced. 106 | /// The function returns when the patch is sent. It does not await the 107 | /// changes to be visible to the watching clients. 108 | async fn add_annotations( 109 | client: &Client, 110 | pod: &Pod, 111 | annotations: &HashMap<&str, String>, 112 | ) -> kube::Result { 113 | debug!( 114 | "Adding annotations [{:?}] to pod [{:?}]", 115 | annotations, 116 | PodKey::from(pod) 117 | ); 118 | 119 | let api: Api = Api::namespaced(client.clone(), pod.namespace()); 120 | 121 | let patch = json!({ 122 | "metadata": { 123 | "annotations": annotations 124 | } 125 | }); 126 | 127 | api.patch( 128 | pod.name(), 129 | &PatchParams::default(), 130 | &Patch::Strategic(patch), 131 | ) 132 | .await 133 | } 134 | -------------------------------------------------------------------------------- /src/provider/states/pod/terminated.rs: -------------------------------------------------------------------------------- 1 | use kubelet::pod::{state::prelude::*, PodKey}; 2 | use log::{debug, info, warn}; 3 | 4 | use crate::provider::{PodState, ProviderState}; 5 | 6 | #[derive(Default, Debug)] 7 | /// The pod object was deleted in Kubernetes 8 | pub struct Terminated { 9 | pub successful: bool, 10 | } 11 | 12 | #[async_trait::async_trait] 13 | impl State for Terminated { 14 | async fn next( 15 | self: Box, 16 | shared: SharedState, 17 | pod_state: &mut PodState, 18 | pod: Manifest, 19 | ) -> Transition { 20 | info!("Pod {} was terminated", &pod_state.service_name); 21 | 22 | let pod = pod.latest(); 23 | let pod_key = &PodKey::from(pod); 24 | 25 | let (systemd_manager, pod_handle) = { 26 | let provider_state = shared.write().await; 27 | let mut handles = provider_state.handles.write().await; 28 | ( 29 | provider_state.systemd_manager.clone(), 30 | handles.remove(pod_key), 31 | ) 32 | }; 33 | 34 | // TODO: We need some additional error handling here, wait for the services to actually 35 | // shut down and try to remove the rest of the services if one fails (tbd, do we want that?) 36 | if let Some(containers) = pod_handle { 37 | for container_handle in containers.values() { 38 | let service_unit = &container_handle.service_unit; 39 | 40 | debug!("Stopping systemd unit [{}]", service_unit); 41 | if let Err(stop_error) = systemd_manager.stop(service_unit).await { 42 | warn!( 43 | "Error occurred stopping systemd unit [{}]: [{}]", 44 | service_unit, stop_error 45 | ); 46 | return Transition::Complete(Err(stop_error)); 47 | } 48 | 49 | // Daemon reload is false here, we'll do that once after all units have been removed 50 | debug!("Removing systemd unit [{}]", service_unit); 51 | if let Err(remove_error) = systemd_manager.remove_unit(service_unit, false).await { 52 | warn!( 53 | "Error occurred removing systemd unit [{}]: [{}]", 54 | service_unit, remove_error 55 | ); 56 | return Transition::Complete(Err(remove_error)); 57 | } 58 | } 59 | 60 | debug!("Performing daemon-reload"); 61 | if let Err(reload_error) = systemd_manager.reload().await { 62 | warn!("Failed to perform daemon-reload: [{}]", reload_error); 63 | return Transition::Complete(Err(reload_error)); 64 | }; 65 | } else { 66 | debug!("Pod [{}] was already terminated", pod_state.service_name); 67 | } 68 | 69 | Transition::Complete(Ok(())) 70 | } 71 | 72 | async fn status(&self, _pod_state: &mut PodState, pod: &Pod) -> anyhow::Result { 73 | let phase = pod 74 | .as_kube_pod() 75 | .status 76 | .as_ref() 77 | .and_then(|status| status.phase.as_ref()) 78 | .map(String::as_ref); 79 | 80 | let already_terminated = phase == Some("Succeeded") || phase == Some("Failed"); 81 | 82 | let status = if already_terminated { 83 | Default::default() // no changes to the current status 84 | } else if self.successful { 85 | make_status(Phase::Succeeded, "Completed") 86 | } else { 87 | make_status(Phase::Failed, "Error") 88 | }; 89 | 90 | Ok(status) 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/provider/states/pod/waiting_config_map.rs: -------------------------------------------------------------------------------- 1 | use kubelet::backoff::BackoffStrategy; 2 | use kubelet::pod::state::prelude::*; 3 | use log::info; 4 | 5 | use super::creating_config::CreatingConfig; 6 | use crate::provider::{PodState, ProviderState}; 7 | 8 | #[derive(Debug, TransitionTo)] 9 | #[transition_to(CreatingConfig)] 10 | /// A config map that was specified in the pod has not yet been created in the apiserver, back off 11 | /// until this has been created 12 | /// TODO: make this a watch instead of delay 13 | pub struct WaitingConfigMap { 14 | pub missing_config_maps: Vec, 15 | } 16 | 17 | #[async_trait::async_trait] 18 | impl State for WaitingConfigMap { 19 | async fn next( 20 | self: Box, 21 | _provider_state: SharedState, 22 | pod_state: &mut PodState, 23 | _pod: Manifest, 24 | ) -> Transition { 25 | info!( 26 | "Delaying execution due to missing configmaps: {:?}", 27 | &self.missing_config_maps 28 | ); 29 | pod_state.package_download_backoff_strategy.wait().await; 30 | 31 | Transition::next( 32 | self, 33 | CreatingConfig { 34 | target_directory: None, 35 | }, 36 | ) 37 | } 38 | 39 | async fn status(&self, _pod_state: &mut PodState, _pod: &Pod) -> anyhow::Result { 40 | Ok(make_status(Phase::Pending, "WaitingConfigMap")) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/provider/systemdmanager/journal_reader.rs: -------------------------------------------------------------------------------- 1 | //! This module provides functions for reading from the journal. 2 | 3 | use anyhow::{Error, Result}; 4 | use kubelet::log::Sender; 5 | use std::str; 6 | use systemd::{journal, journal::JournalRef}; 7 | 8 | /// Reads journal entries with the given invocation ID and sends the 9 | /// contained messages. 10 | /// 11 | /// The options `tail` and `follow` in `sender` are taken into account. 12 | /// 13 | /// If `tail` is set with `Some(line_count)` then only the last 14 | /// `line_count` messages (or less if not enough available) are sent 15 | /// otherwise all available messages are sent. 16 | /// 17 | /// If `follow` is `true` then additionally all new messages are sent 18 | /// until the channel of `sender` is closed. In this case an 19 | /// [`Err(kubelet::log::SendError::ChannelClosed)`] will be returned. 20 | pub async fn send_messages(sender: &mut Sender, invocation_id: &str) -> Result<()> { 21 | let mut journal = journal::OpenOptions::default().open()?; 22 | let journal = journal.match_add("_SYSTEMD_INVOCATION_ID", invocation_id)?; 23 | 24 | if let Some(line_count) = sender.tail() { 25 | seek_journal_backwards(journal, line_count)?; 26 | 27 | if sender.follow() { 28 | send_remaining_messages(journal, sender).await?; 29 | } else { 30 | send_n_messages(journal, sender, line_count).await?; 31 | } 32 | } else { 33 | send_remaining_messages(journal, sender).await?; 34 | } 35 | 36 | while sender.follow() { 37 | journal.wait(None)?; 38 | send_remaining_messages(journal, sender).await?; 39 | } 40 | 41 | Ok(()) 42 | } 43 | 44 | /// Sets the cursor of the journal to the position before the last `count` 45 | /// entries so that the next entry is the first of `count` remaining 46 | /// entries. If the beginning of the journal is reached then the cursor is 47 | /// set to the position before the first entry. 48 | fn seek_journal_backwards(journal: &mut JournalRef, count: usize) -> Result<()> { 49 | journal.seek_tail()?; 50 | 51 | let entries_to_skip = count + 1; 52 | let skipped = journal.previous_skip(entries_to_skip as u64)?; 53 | let beginning_reached = skipped < entries_to_skip; 54 | if beginning_reached { 55 | journal.seek_head()?; 56 | } 57 | 58 | Ok(()) 59 | } 60 | 61 | /// Sends the given number of messages from the journal. 62 | async fn send_n_messages( 63 | journal: &mut JournalRef, 64 | sender: &mut Sender, 65 | count: usize, 66 | ) -> Result<()> { 67 | let mut sent = 0; 68 | let mut message_available = true; 69 | while sent != count && message_available { 70 | if let Some(message) = next_message(journal)? { 71 | send_message(sender, &message).await?; 72 | sent += 1; 73 | } else { 74 | message_available = false; 75 | } 76 | } 77 | Ok(()) 78 | } 79 | 80 | /// Sends the remaining messages from the journal. 81 | async fn send_remaining_messages(journal: &mut JournalRef, sender: &mut Sender) -> Result<()> { 82 | while let Some(message) = next_message(journal)? { 83 | send_message(sender, &message).await?; 84 | } 85 | Ok(()) 86 | } 87 | 88 | /// Retrieves the message of the next entry from the journal. 89 | /// 90 | /// Returns [`Ok(Some(message))`] if a message could be successfully retrieved 91 | /// and advances the position in the journal. If the journal entry has no 92 | /// message assigned then `message` is an empty string. 93 | /// Returns [`Ok(None)`] if there are no new entries. 94 | /// Returns [`Err(error)`] if the journal could not be read. 95 | fn next_message(journal: &mut JournalRef) -> Result> { 96 | let maybe_message = if journal.next()? != 0 { 97 | let message = if let Some(entry) = journal.get_data("MESSAGE")? { 98 | if let Some(value) = entry.value() { 99 | String::from_utf8_lossy(value).into() 100 | } else { 101 | // The MESSAGE field contains no text, i.e. `MESSAGE=`. 102 | String::new() 103 | } 104 | } else { 105 | // The journal entry contains no MESSAGE field. 106 | String::new() 107 | }; 108 | Some(message) 109 | } else { 110 | None 111 | }; 112 | Ok(maybe_message) 113 | } 114 | 115 | /// Sends the given message with a newline character. 116 | async fn send_message(sender: &mut Sender, message: &str) -> Result<()> { 117 | let mut line = message.to_owned(); 118 | line.push('\n'); 119 | sender.send(line).await.map_err(Error::new) 120 | } 121 | -------------------------------------------------------------------------------- /src/provider/systemdmanager/manager.rs: -------------------------------------------------------------------------------- 1 | //! Exposes methods of the systemd manager interface. 2 | //! 3 | //! The module offers the ability to create, remove, start, stop, enable and 4 | //! disable systemd units. 5 | //! 6 | use super::service::SystemdService; 7 | use super::systemd1_api::{ 8 | AsyncJobProxy, AsyncManagerProxy, AsyncUnitProxy, JobRemovedResult, JobRemovedSignal, 9 | ManagerSignals, StartMode, StopMode, 10 | }; 11 | use crate::provider::systemdmanager::systemdunit::SystemDUnit; 12 | use crate::provider::StackableError; 13 | use crate::provider::StackableError::RuntimeError; 14 | use anyhow::{anyhow, Context}; 15 | use futures_util::{future, stream::StreamExt}; 16 | use log::debug; 17 | use std::fs; 18 | use std::fs::File; 19 | use std::future::Future; 20 | use std::io::Write; 21 | use std::path::PathBuf; 22 | use zbus::azync::Connection; 23 | 24 | /// Enum that lists the supported unit types 25 | #[derive(Clone, Debug, Eq, PartialEq)] 26 | pub enum UnitTypes { 27 | Service, 28 | } 29 | 30 | /// The main way of interacting with this module, this struct offers 31 | /// the public methods for managing service units. 32 | /// 33 | /// Use [`SystemdManager::new`] to create a new instance. 34 | pub struct SystemdManager { 35 | units_directory: PathBuf, 36 | proxy: AsyncManagerProxy<'static>, 37 | user_mode: bool, // TODO Use the same naming (user_mode or session_mode) everywhere 38 | } 39 | 40 | impl SystemdManager { 41 | /// Creates a new instance, takes a flag whether to run within the 42 | /// user session or manage services system-wide. 43 | pub async fn new(user_mode: bool, max_pods: u16) -> Result { 44 | // Connect to session or system bus depending on the value of [user_mode] 45 | let mut connection = if user_mode { 46 | Connection::session().await.map_err(|e| RuntimeError { 47 | msg: format!( 48 | "Could not create a connection to the systemd session bus: {}", 49 | e 50 | ), 51 | })? 52 | } else { 53 | Connection::system().await.map_err(|e| RuntimeError { 54 | msg: format!( 55 | "Could not create a connection to the systemd system-wide bus: {}", 56 | e 57 | ), 58 | })? 59 | }; 60 | 61 | // The maximum number of queued DBus messages must be higher 62 | // than the number of containers which can be started and 63 | // stopped simultaneously. 64 | connection.set_max_queued(max_pods as usize * 2); 65 | 66 | let proxy = AsyncManagerProxy::new(&connection) 67 | .await 68 | .map_err(|e| RuntimeError { 69 | msg: format!( 70 | "Proxy for org.freedesktop.systemd1.Manager could not be created: {}", 71 | e 72 | ), 73 | })?; 74 | 75 | // Depending on whether we are supposed to run in user space or system-wide 76 | // we'll pick the default directory to initialize the systemd manager with 77 | // This allows creating unit files either directly in the systemd folder by 78 | // passing in just a filename, or symlink them by passing in an absolute 79 | // path 80 | let units_directory = if user_mode { 81 | PathBuf::from(shellexpand::tilde("~/.config/systemd/user").to_string()) 82 | } else { 83 | PathBuf::from("/lib/systemd/system") 84 | }; 85 | 86 | Ok(SystemdManager { 87 | units_directory, 88 | proxy, 89 | user_mode, 90 | }) 91 | } 92 | 93 | pub fn is_user_mode(&self) -> bool { 94 | self.user_mode 95 | } 96 | 97 | // Internal helper method to remove an existing unit file or symlink 98 | fn delete_unit_file(&self, unit: &str) -> anyhow::Result<()> { 99 | let unit_file = self.units_directory.clone().join(&unit); 100 | debug!("Removing [{:?}]", unit_file); 101 | 102 | match fs::remove_file(&unit_file) { 103 | Ok(()) => Ok(()), 104 | Err(delete_error) => { 105 | debug!( 106 | "Failed to remove existing unit file [{:?}] for systemd unit [{}]", 107 | unit_file, unit 108 | ); 109 | Err(anyhow::Error::from(delete_error)) 110 | } 111 | } 112 | } 113 | 114 | /// Write the proper unit file for [unit] to disk. 115 | /// The location of the unit file is determined by the value of `unit_file_path`: 116 | /// 117 | /// * None, the unit file will be created in the base directory that this manager was initialized 118 | /// with, which is either /lib/systemd/system or ~/.config/systemd/user depending on the value of 119 | /// `session`. 120 | /// * Some, the unit file will be created at this location and linked into the proper 121 | /// systemd unit directory 122 | /// 123 | /// `force` determines if an existing unit file should be overwritten, if no external unit file 124 | /// path is specified in `unit_file_path`. If this is false and the target file exists an error 125 | /// is returned. 126 | /// 127 | /// The value of `daemon_reload` controls whether a daemon reload is triggered after creating or 128 | /// linking the unit file. 129 | pub async fn create_unit( 130 | &self, 131 | unit: &SystemDUnit, 132 | unit_file_path: Option, 133 | force: bool, 134 | daemon_reload: bool, 135 | ) -> anyhow::Result<()> { 136 | // Appends .service to name if necessary 137 | let linked_unit_file = unit_file_path.is_some(); 138 | let unit_name = SystemdManager::get_unit_file_name(&unit.name, &unit.unit_type)?; 139 | 140 | // Check if a path was provided for the unit file, otherwise use the base directory 141 | let target_file = if let Some(path) = unit_file_path { 142 | path.join(&unit_name) 143 | } else { 144 | // TODO: I think we can get away with a reference here, but not sure yet, 145 | // that would mean looking into get_unit_file_name returning a &str, _I think_ 146 | self.units_directory.clone().join(&unit_name) 147 | }; 148 | 149 | debug!( 150 | "Target file for service [{}] : [{:?}]", 151 | &unit_name, &target_file 152 | ); 153 | 154 | // The following behavior distinguishes between a systemd unit that is defined in a file 155 | // external to the systemd units directory which is then symlinked to and a file that is 156 | // created directly in the systemd units dir. 157 | // 158 | // For the first case the _external_ file that will be symlinked to should have been written 159 | // or potentially overwritten above, which is why we bypass this entire conditional in that 160 | // case. 161 | // For the case where we need to symlink we check if a symlink already exists and if so 162 | // if force has been specified - only then do we remove an existing link before recreating 163 | // it. 164 | 165 | // Perform some pre-flight checks to ensure that writing the unit file doesn't clash 166 | // with any existing files 167 | if !linked_unit_file 168 | && target_file.exists() 169 | && fs::symlink_metadata(&target_file)?.file_type().is_symlink() 170 | { 171 | // Handle the special case where we need to replace a symlink with an actual file 172 | // This only occurs when switching from using a linked file to writing the file 173 | // directly into the units folder - should not happen in practice 174 | // In this case we need to remove the symlink 175 | fs::remove_file(&target_file)?; 176 | } 177 | 178 | let unit_file = self.units_directory.join(&unit_name); 179 | if unit_file.exists() && unit_file.symlink_metadata()?.file_type().is_file() { 180 | // Handle the special case where we need to replace an actual file with a symlink 181 | // This only occurs when switching from writing the file 182 | // directly into the units folder to using a linked file - should not happen in practice 183 | // In this case we need to remove the file 184 | fs::remove_file(&unit_file)?; 185 | } 186 | 187 | // We have handled the special case above, if the target file does not exist 188 | // at this point in time we write the file - doesn't matter if inside or outside 189 | // the systemd folder 190 | if !target_file.exists() { 191 | // Write unit file, no matter where 192 | // TODO: implement check for content equality 193 | let mut unit_file = File::create(&target_file).with_context(|| { 194 | format!( 195 | "File [{}] could not be created", 196 | target_file.to_string_lossy() 197 | ) 198 | })?; 199 | unit_file.write_all(unit.get_unit_file_content().as_bytes())?; 200 | unit_file.flush()?; 201 | } 202 | 203 | // If this is a linked unit file we need to call out to systemd to link this file 204 | if linked_unit_file { 205 | self.link_unit_file(&target_file.into_os_string().to_string_lossy(), force) 206 | .await?; 207 | } 208 | 209 | // Perform daemon reload if requested 210 | if daemon_reload { 211 | self.reload().await?; 212 | } 213 | Ok(()) 214 | } 215 | 216 | /// Removes a unit from systemd. 217 | /// Depending on what is passed in the [unit] parameter this means one of two things: 218 | /// 219 | /// * if an absolute file path is passed, the symlink to this file is deleted from the 220 | /// systemd unit folder 221 | /// * if a unit name is passed an attempt is made to unlink the unit via a dbus call 222 | /// 223 | /// Calling this function means an implicit disabling of the service, if it was enabled. 224 | /// 225 | pub async fn remove_unit(&self, unit: &str, daemon_reload: bool) -> anyhow::Result<()> { 226 | debug!("Disabling unit [{}]", unit); 227 | if let Err(disable_error) = self.disable(unit).await { 228 | debug!( 229 | "Error disabling systemd unit [{}]: [{}]", 230 | unit, disable_error 231 | ); 232 | return Err(disable_error); 233 | } 234 | 235 | // If we are not linking to the unit file but writing it directly in the 236 | // units folder it won't be removed by the dbus method call to `DisableUnitFiles` 237 | //from [disable], so we delete explicitly 238 | let unit_file = self.units_directory.join(&unit); 239 | if unit_file.exists() { 240 | debug!("Removing unit [{}] from systemd", unit); 241 | self.delete_unit_file(unit)?; 242 | } 243 | 244 | if daemon_reload { 245 | self.reload().await?; 246 | } 247 | Ok(()) 248 | } 249 | 250 | /// Enables a systemd unit to be stared automatically at system boot - expects a fully named 251 | /// unit (which means: including the .service or other unit type). 252 | /// This either requires that the unit is known to systemd or an absolute path to a unit file 253 | /// to work. 254 | /// 255 | /// For a unit file to be _known_ it needs to either be located in the systemd unit folder, or 256 | /// linked into that folder - both actions can be performed by calling [`SystemdManager::create_unit`] 257 | pub async fn enable(&self, unit: &str) -> anyhow::Result<()> { 258 | // We don't do any checking around this and simply trust the user that either the name 259 | // of an existing and linked service was provided or this is an absolute path 260 | debug!("Trying to enable systemd unit [{}]", unit); 261 | 262 | match self.proxy.enable_unit_files(&[unit], false, true).await { 263 | Ok(_) => { 264 | debug!("Successfully enabled service [{}]", unit); 265 | Ok(()) 266 | } 267 | Err(e) => Err(anyhow!("Error enabling service [{}]: {}", unit, e)), 268 | } 269 | } 270 | 271 | // Disable the systemd unit - which effectively means removing the symlink from the 272 | // multi-user.target subdirectory. 273 | pub async fn disable(&self, unit: &str) -> anyhow::Result<()> { 274 | debug!("Trying to disable systemd unit [{}]", unit); 275 | match self.proxy.disable_unit_files(&[unit], false).await { 276 | Ok(_) => { 277 | debug!("Successfully disabled service [{}]", unit); 278 | Ok(()) 279 | } 280 | Err(e) => Err(anyhow!("Error disabling service [{}]: {}", unit, e)), 281 | } 282 | } 283 | 284 | /// Attempts to start a systemd unit 285 | /// [unit] is expected to be the name (including .) of a service that is known to 286 | /// systemd at the time this is called. 287 | /// To make a service known please take a look at the [`SystemdManager::enable`] function. 288 | pub async fn start(&self, unit: &str) -> anyhow::Result<()> { 289 | debug!("Trying to start unit [{}]", unit); 290 | 291 | let result = self 292 | .call_method(|proxy| proxy.start_unit(unit, StartMode::Fail)) 293 | .await; 294 | 295 | if result.is_ok() { 296 | debug!("Successfully started service [{}]", unit); 297 | } 298 | 299 | result.map_err(|e| anyhow!("Error starting service [{}]: {}", unit, e)) 300 | } 301 | 302 | /// Attempts to stop a systemd unit 303 | /// [unit] is expected to be the name (including .) of a service that is known to 304 | /// systemd at the time this is called. 305 | /// To make a service known please take a look at the [`SystemdManager::enable`] function. 306 | pub async fn stop(&self, unit: &str) -> anyhow::Result<()> { 307 | debug!("Trying to stop systemd unit [{}]", unit); 308 | 309 | let result = self 310 | .call_method(|proxy| proxy.stop_unit(unit, StopMode::Fail)) 311 | .await; 312 | 313 | if result.is_ok() { 314 | debug!("Successfully stopped service [{}]", unit); 315 | } 316 | 317 | result.map_err(|e| anyhow!("Error stopping service [{}]: {}", unit, e)) 318 | } 319 | 320 | /// Calls a systemd method and waits until the dependent job is 321 | /// finished. 322 | /// 323 | /// The given method enqueues a job in systemd and returns the job 324 | /// object. Systemd sends out a `JobRemoved` signal when the job is 325 | /// dequeued. The signal contains the reason for the dequeuing like 326 | /// `"done"`, `"failed"`, or `"canceled"`. 327 | /// 328 | /// This function subscribes to `JobRemoved` signals, calls the 329 | /// given method, awaits the signal for the corresponding job, and 330 | /// returns `Ok(())` if the result is [`JobRemovedResult::Done`]. 331 | /// If the signal contains another result or no signal is returned 332 | /// (which should never happen) then an error with a corresponding 333 | /// message is returned. 334 | async fn call_method<'a, F, Fut>(&'a self, method: F) -> anyhow::Result<()> 335 | where 336 | F: Fn(&'a AsyncManagerProxy) -> Fut, 337 | Fut: Future>>, 338 | { 339 | let signals = self 340 | .proxy 341 | .receive_signal(ManagerSignals::JobRemoved) 342 | .await? 343 | .map(|message| message.body::().unwrap()); 344 | 345 | let job = method(&self.proxy).await?; 346 | 347 | let mut signals = signals 348 | .filter(|signal| future::ready(&signal.job.to_owned().into_inner() == job.path())); 349 | 350 | let signal = signals.next().await; 351 | 352 | match signal { 353 | Some(message) if message.result == JobRemovedResult::Done => Ok(()), 354 | Some(message) => Err(anyhow!("The systemd job failed: {:?}", message)), 355 | None => Err(anyhow!( 356 | "No signal was returned for the systemd job: {:?}", 357 | job 358 | )), 359 | } 360 | } 361 | 362 | // Perform a daemon-reload, this causes systemd to re-read all unit files on disk and 363 | // discover changes that have been performed since the last reload 364 | // This needs to be done after creating a new service unit before it can be targeted by 365 | // start / stop and similar commands. 366 | pub async fn reload(&self) -> anyhow::Result<()> { 367 | debug!("Performing daemon-reload.."); 368 | 369 | match self.proxy.reload().await { 370 | Ok(_) => { 371 | debug!("Successfully performed daemon-reload"); 372 | Ok(()) 373 | } 374 | Err(e) => Err(anyhow!("Error performing daemon-reload: [{}]", e)), 375 | } 376 | } 377 | 378 | // Symlink a unit file into the systemd unit folder 379 | // This is not public on purpose, as [create] should be the normal way to link unit files 380 | // when using this crate 381 | async fn link_unit_file(&self, unit: &str, force: bool) -> anyhow::Result<()> { 382 | debug!("Linking [{}]", unit); 383 | self.proxy.link_unit_files(&[unit], false, force).await?; 384 | Ok(()) 385 | } 386 | 387 | pub async fn create_systemd_service(&self, unit: &str) -> anyhow::Result { 388 | SystemdService::new(unit, &self.proxy).await 389 | } 390 | 391 | /// Returns the file path of the given unit if there is one. 392 | pub async fn fragment_path(&self, unit: &str) -> anyhow::Result> { 393 | let unit_proxy = self.create_unit_proxy(unit).await?; 394 | let fragment_path = unit_proxy.fragment_path().await?; 395 | 396 | let file_path = if fragment_path.is_empty() { 397 | None 398 | } else { 399 | Some(fragment_path) 400 | }; 401 | 402 | Ok(file_path) 403 | } 404 | 405 | /// Returns the names of the units assigned to the given slice. 406 | pub async fn slice_content(&self, slice: &str) -> anyhow::Result> { 407 | let unit_proxy = self.create_unit_proxy(slice).await?; 408 | let content = unit_proxy.required_by().await?; 409 | Ok(content) 410 | } 411 | 412 | async fn create_unit_proxy(&self, unit: &str) -> anyhow::Result> { 413 | let unit_object_path = self.proxy.load_unit(unit).await?; 414 | 415 | let unit_proxy = AsyncUnitProxy::builder(self.proxy.connection()) 416 | .cache_properties(false) 417 | .path(unit_object_path) 418 | .unwrap() // safe because load_unit always returns a valid path 419 | .build() 420 | .await 421 | .unwrap(); // safe because destination, path, and interface are set 422 | 423 | Ok(unit_proxy) 424 | } 425 | 426 | // Check if the unit name is valid and append .service if needed 427 | // Cannot currently fail, I'll need to dig into what is a valid unit 428 | // name before adding checks 429 | #[allow(clippy::unnecessary_wraps)] 430 | fn get_unit_file_name(name: &str, unit_type: &UnitTypes) -> anyhow::Result { 431 | // TODO: what are valid systemd unit names? 432 | 433 | // Append proper extension for unit type to file name 434 | let extension = match unit_type { 435 | UnitTypes::Service => ".service", 436 | }; 437 | 438 | let mut result = String::from(name); 439 | if !name.ends_with(extension) { 440 | result.push_str(extension); 441 | } 442 | Ok(result) 443 | } 444 | } 445 | -------------------------------------------------------------------------------- /src/provider/systemdmanager/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod journal_reader; 2 | pub mod manager; 3 | pub mod service; 4 | pub mod systemd1_api; 5 | pub mod systemdunit; 6 | -------------------------------------------------------------------------------- /src/provider/systemdmanager/service.rs: -------------------------------------------------------------------------------- 1 | //! Exposes methods from the systemd unit and service interfaces. 2 | use super::systemd1_api::{ 3 | ActiveState, AsyncManagerProxy, AsyncServiceProxy, AsyncUnitProxy, SUB_STATE_SERVICE_EXITED, 4 | }; 5 | use anyhow::anyhow; 6 | 7 | /// Represents the state of a service unit object. 8 | #[derive(Clone, Debug, Eq, PartialEq)] 9 | pub enum ServiceState { 10 | /// The service was not started yet. 11 | Created, 12 | /// The service was started and is currently running or restarting. 13 | Started, 14 | /// The service terminated successfully and will not be restarted. 15 | Succeeded, 16 | /// The service terminated unsuccessfully and will not be restarted. 17 | Failed, 18 | } 19 | 20 | /// Stores proxies of a systemd unit and service 21 | #[derive(Clone, Debug)] 22 | pub struct SystemdService { 23 | file: String, 24 | unit_proxy: AsyncUnitProxy<'static>, 25 | service_proxy: AsyncServiceProxy<'static>, 26 | } 27 | 28 | impl SystemdService { 29 | pub async fn new( 30 | file: &str, 31 | manager_proxy: &AsyncManagerProxy<'static>, 32 | ) -> anyhow::Result { 33 | let unit_object_path = manager_proxy.load_unit(file).await?; 34 | 35 | // Caching of properties is disabled until it is more performant 36 | // (see https://gitlab.freedesktop.org/dbus/zbus/-/issues/184) 37 | 38 | let unit_proxy = AsyncUnitProxy::builder(manager_proxy.connection()) 39 | .cache_properties(false) 40 | .path(unit_object_path) 41 | .unwrap() // safe because load_unit always returns a valid path 42 | .build() 43 | .await 44 | .unwrap(); // safe because destination, path, and interface are set 45 | 46 | let service_proxy = AsyncServiceProxy::builder(unit_proxy.connection()) 47 | .cache_properties(false) 48 | .path(unit_proxy.path().to_owned()) 49 | .unwrap() // safe because the path is taken from an existing proxy 50 | .build() 51 | .await 52 | .unwrap(); // safe because destination, path, and interface are set 53 | 54 | Ok(SystemdService { 55 | file: file.into(), 56 | unit_proxy, 57 | service_proxy, 58 | }) 59 | } 60 | 61 | /// Returns the filename of the systemd unit. 62 | pub fn file(&self) -> String { 63 | self.file.clone() 64 | } 65 | 66 | /// Returns a coarse-grained state of the service unit object. 67 | /// 68 | /// It is assumed that RemainAfterExit is set to "yes" in the given 69 | /// unit if the service can terminate. Otherwise it would not be 70 | /// possible to distinguish between "inactive and never run" and 71 | /// "inactive and terminated successfully". 72 | pub async fn service_state(&self) -> anyhow::Result { 73 | let active_state = self.unit_proxy.active_state().await?; 74 | 75 | let service_state = match active_state { 76 | ActiveState::Inactive => { 77 | // ActiveState "inactive" means in general that the 78 | // previous run was successful or no previous run has 79 | // taken place yet. If RemainAfterExit is set to "yes" 80 | // then a successfully terminated service stays in 81 | // ActiveState "active" and only a service which was not 82 | // started before is in ActiveState "inactive". It is 83 | // assumed here that RemainAfterExit is enabled. 84 | ServiceState::Created 85 | } 86 | ActiveState::Active => { 87 | let sub_state = self.unit_proxy.sub_state().await?; 88 | if sub_state == SUB_STATE_SERVICE_EXITED { 89 | // The service terminated successfully (otherwise 90 | // ActiveState would be set to "failed") and will 91 | // not be restarted (otherwise ActiveState would be 92 | // set to "activating") and RemainAfterExit is set 93 | // to "yes" (otherwise ActiveState would be set to 94 | // "inactive"). It is assumed here that 95 | // RemainAfterExit is enabled. 96 | ServiceState::Succeeded 97 | } else { 98 | ServiceState::Started 99 | } 100 | } 101 | ActiveState::Failed => { 102 | // The service terminated unsuccessfully and will not be 103 | // restarted (otherwise ActiveState would be set to 104 | // "activating"). 105 | ServiceState::Failed 106 | } 107 | ActiveState::Reloading => ServiceState::Started, 108 | ActiveState::Activating => ServiceState::Started, 109 | ActiveState::Deactivating => ServiceState::Started, 110 | }; 111 | 112 | Ok(service_state) 113 | } 114 | 115 | /// Retrieves the current restart count. 116 | /// 117 | /// The restart counter was introduced in systemd version 235. 118 | pub async fn restart_count(&self) -> anyhow::Result { 119 | self.service_proxy 120 | .nrestarts() 121 | .await 122 | .map_err(|e| anyhow!("Error receiving NRestarts of unit [{}]. {}", self.file, e)) 123 | } 124 | 125 | /// Retrieves the current invocation ID. 126 | /// 127 | /// The invocation ID was introduced in systemd version 232. 128 | pub async fn invocation_id(&self) -> anyhow::Result { 129 | self.unit_proxy 130 | .invocation_id() 131 | .await 132 | .map(|invocation_id| invocation_id.to_string()) 133 | .map_err(|error| { 134 | anyhow!( 135 | "InvocationID of systemd unit [{}] cannot be retrieved: {}", 136 | self.file, 137 | error 138 | ) 139 | }) 140 | } 141 | } 142 | --------------------------------------------------------------------------------