├── .extra └── k3s-selinux-next.rpm ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── config.yml │ └── feature_request.yml ├── dependabot.yaml ├── release.yaml └── workflows │ ├── generate-docs.yaml │ ├── lint_pr.yaml │ └── publish-release.yaml ├── .gitignore ├── .images ├── kube-hetzner-logo.png └── kubectl-pod-all-17022022.png ├── .pre-commit-config.yaml ├── .terraform-docs.yml ├── LICENSE ├── README.md ├── SECURITY.md ├── agents.tf ├── autoscaler-agents.tf ├── control_planes.tf ├── data.tf ├── docs ├── add-robot-server.md ├── llms.md ├── ssh.md └── terraform.md ├── examples ├── kustomization_user_deploy │ ├── README.md │ ├── helm-chart │ │ ├── helm-chart.yaml.tpl │ │ ├── kustomization.yaml.tpl │ │ └── namespace.yaml.tpl │ ├── letsencrypt │ │ ├── kustomization.yaml.tpl │ │ └── letsencrypt.yaml.tpl │ ├── mutliple-namespaces │ │ ├── base │ │ │ ├── kustomization.yaml.tpl │ │ │ └── pod.yaml.tpl │ │ ├── kustomization.yaml.tpl │ │ ├── namespace-a │ │ │ ├── kustomization.yaml.tpl │ │ │ └── namespace-a.yaml.tpl │ │ └── namespace-b │ │ │ ├── kustomization.yaml.tpl │ │ │ └── namespace-b.yaml.tpl │ └── simple-resources │ │ ├── demo-config-map.yaml.tpl │ │ ├── demo-pod.yml.tpl │ │ └── kustomization.yaml.tpl ├── micro_os_rollback │ └── Readme.md └── tls │ ├── ingress.yaml │ ├── pod.yaml │ └── service.yaml ├── init.tf ├── kube.tf.example ├── kubeconfig.tf ├── kustomization_backup.tf ├── kustomization_user.tf ├── kustomize ├── flannel-rbac.yaml └── system-upgrade-controller.yaml ├── locals.tf ├── main.tf ├── modules └── host │ ├── locals.tf │ ├── main.tf │ ├── out.tf │ ├── templates │ └── cloudinit.yaml.tpl │ ├── variables.tf │ └── versions.tf ├── nat-router.tf ├── output.tf ├── packer-template └── hcloud-microos-snapshots.pkr.hcl ├── placement_groups.tf ├── scripts ├── cleanup.sh └── create.sh ├── templates ├── autoscaler-cloudinit.yaml.tpl ├── autoscaler.yaml.tpl ├── calico.yaml.tpl ├── ccm.yaml.tpl ├── cert_manager.yaml.tpl ├── cilium.yaml.tpl ├── csi-driver-smb.yaml.tpl ├── haproxy_ingress.yaml.tpl ├── hcloud-ccm-helm.yaml.tpl ├── hcloud-csi.yaml.tpl ├── kured.yaml.tpl ├── longhorn.yaml.tpl ├── nat-router-cloudinit.yaml.tpl ├── nginx_ingress.yaml.tpl ├── plans.yaml.tpl ├── rancher.yaml.tpl └── traefik_ingress.yaml.tpl ├── values-export.tf ├── variables.tf └── versions.tf /.extra/k3s-selinux-next.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kube-hetzner/terraform-hcloud-kube-hetzner/2613b1fa5cff0dd06e25e841fb4e09317ebb3fc3/.extra/k3s-selinux-next.rpm -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: kube-hetzner 4 | open_collective: kube-hetzner 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: File a bug report 3 | title: "[Bug]: " 4 | labels: ["bug", "triage"] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | **Kube-Hetzner Bug Report** 10 | 11 | Before doing so, please research the project, the solution may be in the documentation, in other issues, or in the discussions. If none of the above gave you the answer, please explain the bug in detail and provide as much information as possible. 12 | - type: textarea 13 | id: description 14 | attributes: 15 | label: Description 16 | description: A clear and concise description of what the bug is. 17 | placeholder: What's happening? 18 | validations: 19 | required: true 20 | - type: textarea 21 | id: kube_tf 22 | attributes: 23 | label: Kube.tf file 24 | description: Please share your kube.tf file, without sensitive values, and if possible, stripped of comments. 25 | placeholder: Enter your kube.tf content goes here 26 | render: terraform 27 | validations: 28 | required: true 29 | - type: textarea 30 | id: screenshots 31 | attributes: 32 | label: Screenshots 33 | description: If applicable, add screenshots of the errors. 34 | placeholder: Enter screenshots here 35 | - type: input 36 | id: platform 37 | attributes: 38 | label: Platform 39 | description: Windows, Linux, Mac 40 | placeholder: Enter platform here 41 | validations: 42 | required: true 43 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: If you have questions, use the discussions 4 | url: https://github.com/kube-hetzner/terraform-hcloud-kube-hetzner/discussions 5 | about: Please ask and answer questions here. -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Feature Request 2 | title: "[Feature Request]: " 3 | description: "Submit a feature request for consideration" 4 | labels: ["feature request"] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | **Kube-Hetzner Feature Request** 10 | 11 | For feature exploration, please use our [discussions](https://github.com/kube-hetzner/terraform-hcloud-kube-hetzner/discussions) section. However, if you judge something to be extremely important or urgent, please let us know what you need and why, and even share tips on implementation. 12 | 13 | Also please know that we are very open to PRs and will work with you to make it happen if fully relevent to the project. So if you can make it happen, and it falls within the project, please do not hesitate to ping us in the discussions about your idea. 14 | - type: textarea 15 | id: feature-description 16 | attributes: 17 | label: Description 18 | description: Tell us more about your feature request. 19 | placeholder: "E.g. Adding support for XYZ would greatly improve the user experience..." 20 | validations: 21 | required: true -------------------------------------------------------------------------------- /.github/dependabot.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "terraform" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | 8 | - package-ecosystem: "github-actions" 9 | directory: "/" 10 | schedule: 11 | interval: "daily" 12 | -------------------------------------------------------------------------------- /.github/release.yaml: -------------------------------------------------------------------------------- 1 | changelog: 2 | exclude: 3 | labels: 4 | - ignore-for-release 5 | authors: 6 | - octocat 7 | categories: 8 | - title: Breaking Changes 🛠 9 | labels: 10 | - Semver-Major 11 | - breaking-change 12 | - title: New Features 🎉 13 | labels: 14 | - Semver-Minor 15 | - enhancement 16 | - title: Bug Fixes 🐛 17 | labels: 18 | - Semver-Patch 19 | - bug 20 | - title: Other Changes 21 | labels: 22 | - "*" 23 | -------------------------------------------------------------------------------- /.github/workflows/generate-docs.yaml: -------------------------------------------------------------------------------- 1 | name: Generate terraform docs 2 | on: 3 | push: 4 | branches: 5 | - master 6 | - staging 7 | 8 | jobs: 9 | docs: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v5 13 | with: 14 | ref: ${{ github.event.pull_request.head.ref }} 15 | fetch-depth: 0 # Necessary to fetch all history for create-pull-request to work correctly 16 | 17 | - name: Render terraform docs and push changes back to PR 18 | uses: terraform-docs/gh-actions@main 19 | with: 20 | working-dir: . 21 | output-file: docs/terraform.md 22 | output-method: inject 23 | config-file: ".terraform-docs.yml" 24 | 25 | - name: Create Pull Request 26 | uses: peter-evans/create-pull-request@v7 27 | with: 28 | token: ${{ secrets.GITHUB_TOKEN }} 29 | commit-message: Update Terraform documentation 30 | title: "[AUTO] Update Terraform Documentation" 31 | body: "Automated changes by GitHub Actions" 32 | branch: "docs/update-${{ github.head_ref }}" 33 | labels: documentation -------------------------------------------------------------------------------- /.github/workflows/lint_pr.yaml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | pull_request: 5 | 6 | jobs: 7 | tfsec: 8 | runs-on: ubuntu-latest 9 | permissions: 10 | contents: read 11 | pull-requests: write 12 | name: Scan terraform files with tfsec 13 | steps: 14 | - name: Clone repo 15 | uses: actions/checkout@v5 16 | 17 | - name: tfsec 18 | uses: aquasecurity/tfsec-pr-commenter-action@v1.3.1 19 | with: 20 | github_token: ${{ github.token }} 21 | tfsec_args: --ignore-hcl-errors 22 | 23 | - name: Run tfsec with reviewdog output on the PR 24 | uses: reviewdog/action-tfsec@v1.30.0 25 | with: 26 | github_token: ${{ secrets.github_token }} 27 | filter_mode: nofilter 28 | fail_on_error: true 29 | tfsec_flags: --ignore-hcl-errors 30 | 31 | validate: 32 | runs-on: ubuntu-latest 33 | name: Validate terraform configuration 34 | steps: 35 | - name: Checkout 36 | uses: actions/checkout@v5 37 | 38 | - name: terraform validate 39 | uses: dflook/terraform-validate@v2.2.2 40 | 41 | fmt-check: 42 | runs-on: ubuntu-latest 43 | name: Check formatting of terraform files 44 | steps: 45 | - name: Checkout 46 | uses: actions/checkout@v5 47 | 48 | - name: terraform fmt 49 | uses: dflook/terraform-fmt-check@v2.2.2 50 | -------------------------------------------------------------------------------- /.github/workflows/publish-release.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Publish a new Github Release 3 | 4 | on: 5 | push: 6 | tags: 7 | - '*' 8 | workflow_dispatch: 9 | tags: 10 | - '*' 11 | jobs: 12 | Release: 13 | name: Release 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v5 17 | - uses: ncipollo/release-action@v1 18 | with: 19 | generateReleaseNotes: true 20 | name: ${{ github.ref_name }} 21 | token: ${{ secrets.GITHUB_TOKEN }} 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Local .terraform directories 2 | **/.terraform/* 3 | 4 | # .tfstate files 5 | *.tfstate 6 | *.tfstate.* 7 | 8 | # Crash log files 9 | crash.log 10 | crash.*.log 11 | 12 | # Exclude all .tfvars files, which are likely to contain sensitive data, such as 13 | # password, private keys, and other secrets. These should not be part of version 14 | # control as they are data points which are potentially sensitive and subject 15 | # to change depending on the environment. 16 | *.tfvars 17 | *.tfvars.json 18 | 19 | # Ignore override files as they are usually used to override resources locally and so 20 | # are not checked in 21 | override.tf 22 | override.tf.json 23 | *_override.tf 24 | *_override.tf.json 25 | 26 | # Include override files you do wish to add to version control using negated pattern 27 | # !example_override.tf 28 | 29 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan 30 | # example: *tfplan* 31 | 32 | # Ignore CLI configuration files 33 | .terraformrc 34 | terraform.rc 35 | 36 | *_kubeconfig.yaml 37 | *_kubeconfig.yaml-e 38 | terraform.tfvars 39 | plans-custom.yaml 40 | kustomization.yaml 41 | *kustomization_backup.yaml 42 | kube.tf 43 | .terraform.lock.hcl 44 | issue_fix.patch 45 | 46 | # AI related files 47 | CLAUDE.md 48 | 49 | # Misc 50 | .DS_Store 51 | 52 | requirements/* -------------------------------------------------------------------------------- /.images/kube-hetzner-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kube-hetzner/terraform-hcloud-kube-hetzner/2613b1fa5cff0dd06e25e841fb4e09317ebb3fc3/.images/kube-hetzner-logo.png -------------------------------------------------------------------------------- /.images/kubectl-pod-all-17022022.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kube-hetzner/terraform-hcloud-kube-hetzner/2613b1fa5cff0dd06e25e841fb4e09317ebb3fc3/.images/kubectl-pod-all-17022022.png -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_install_hook_types: 2 | - pre-commit 3 | 4 | repos: 5 | - repo: https://github.com/antonbabenko/pre-commit-terraform 6 | rev: v1.97.3 7 | hooks: 8 | - id: terraform_fmt 9 | - id: terraform_validate 10 | - id: terraform_tfsec 11 | pass_filenames: false 12 | - id: terraform_docs 13 | args: 14 | - '--args=--lockfile=false' 15 | # - id: terraform_tflint 16 | # args: 17 | # - '--args=--only=terraform_deprecated_interpolation' 18 | # - '--args=--only=terraform_deprecated_index' 19 | # - '--args=--only=terraform_unused_declarations' 20 | # - '--args=--only=terraform_comment_syntax' 21 | # - '--args=--only=terraform_documented_outputs' 22 | # - '--args=--only=terraform_documented_variables' 23 | # - '--args=--only=terraform_typed_variables' 24 | # - '--args=--only=terraform_module_pinned_source' 25 | # - '--args=--only=terraform_naming_convention' 26 | # - '--args=--only=terraform_required_version' 27 | # - '--args=--only=terraform_required_providers' 28 | # - '--args=--only=terraform_standard_module_structure' 29 | # - '--args=--only=terraform_workspace_remote' 30 | - repo: https://github.com/pre-commit/pre-commit-hooks 31 | rev: v5.0.0 32 | hooks: 33 | - id: end-of-file-fixer 34 | -------------------------------------------------------------------------------- /.terraform-docs.yml: -------------------------------------------------------------------------------- 1 | formatter: "markdown table" 2 | 3 | recursive: 4 | enabled: false 5 | path: modules 6 | 7 | output: 8 | file: docs/terraform.md 9 | mode: inject 10 | template: |- 11 | 12 | {{ .Content }} 13 | 14 | 15 | output-values: 16 | enabled: false 17 | from: "" 18 | 19 | sort: 20 | enabled: true 21 | by: name 22 | 23 | settings: 24 | anchor: true 25 | color: true 26 | default: true 27 | description: false 28 | escape: true 29 | hide-empty: false 30 | html: true 31 | indent: 3 32 | lockfile: true 33 | read-comments: true 34 | required: true 35 | sensitive: true 36 | type: true 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 2 | 3 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 4 | 5 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Reporting a Vulnerability 4 | 5 | Please report any vulnerability findings privately via email to the top three contributors to the projects. You can find our emails by grepping the git logs. 6 | 7 | In case you can't find the emails: 8 | 9 | - [aleksasiriski](https://github.com/aleksasiriski): [sir@tmina.org](mailto:kube-hetzner@sir.tmina.org) 10 | -------------------------------------------------------------------------------- /agents.tf: -------------------------------------------------------------------------------- 1 | module "agents" { 2 | source = "./modules/host" 3 | 4 | providers = { 5 | hcloud = hcloud, 6 | } 7 | 8 | for_each = local.agent_nodes 9 | 10 | name = "${var.use_cluster_name_in_node_name ? "${var.cluster_name}-" : ""}${each.value.nodepool_name}${try(each.value.node_name_suffix, "")}" 11 | microos_snapshot_id = substr(each.value.server_type, 0, 3) == "cax" ? data.hcloud_image.microos_arm_snapshot.id : data.hcloud_image.microos_x86_snapshot.id 12 | base_domain = var.base_domain 13 | ssh_keys = length(var.ssh_hcloud_key_label) > 0 ? concat([local.hcloud_ssh_key_id], data.hcloud_ssh_keys.keys_by_selector[0].ssh_keys.*.id) : [local.hcloud_ssh_key_id] 14 | ssh_port = var.ssh_port 15 | ssh_public_key = var.ssh_public_key 16 | ssh_private_key = var.ssh_private_key 17 | ssh_additional_public_keys = length(var.ssh_hcloud_key_label) > 0 ? concat(var.ssh_additional_public_keys, data.hcloud_ssh_keys.keys_by_selector[0].ssh_keys.*.public_key) : var.ssh_additional_public_keys 18 | firewall_ids = each.value.disable_ipv4 && each.value.disable_ipv6 ? [] : [hcloud_firewall.k3s.id] # Cannot attach a firewall when public interfaces are disabled 19 | placement_group_id = var.placement_group_disable ? null : (each.value.placement_group == null ? hcloud_placement_group.agent[each.value.placement_group_compat_idx].id : hcloud_placement_group.agent_named[each.value.placement_group].id) 20 | location = each.value.location 21 | server_type = each.value.server_type 22 | backups = each.value.backups 23 | ipv4_subnet_id = hcloud_network_subnet.agent[[for i, v in var.agent_nodepools : i if v.name == each.value.nodepool_name][0]].id 24 | dns_servers = var.dns_servers 25 | k3s_registries = var.k3s_registries 26 | k3s_registries_update_script = local.k3s_registries_update_script 27 | cloudinit_write_files_common = local.cloudinit_write_files_common 28 | cloudinit_runcmd_common = local.cloudinit_runcmd_common 29 | swap_size = each.value.swap_size 30 | zram_size = each.value.zram_size 31 | keep_disk_size = var.keep_disk_agents 32 | disable_ipv4 = each.value.disable_ipv4 33 | disable_ipv6 = each.value.disable_ipv6 34 | ssh_bastion = local.ssh_bastion 35 | network_id = data.hcloud_network.k3s.id 36 | private_ipv4 = cidrhost(hcloud_network_subnet.agent[[for i, v in var.agent_nodepools : i if v.name == each.value.nodepool_name][0]].ip_range, each.value.index + 101) 37 | 38 | labels = merge(local.labels, local.labels_agent_node) 39 | 40 | automatically_upgrade_os = var.automatically_upgrade_os 41 | 42 | depends_on = [ 43 | hcloud_network_subnet.agent, 44 | hcloud_placement_group.agent, 45 | hcloud_server.nat_router, 46 | null_resource.nat_router_await_cloud_init, 47 | ] 48 | } 49 | 50 | locals { 51 | k3s-agent-config = { for k, v in local.agent_nodes : k => merge( 52 | { 53 | node-name = module.agents[k].name 54 | server = "https://${var.use_control_plane_lb ? hcloud_load_balancer_network.control_plane.*.ip[0] : module.control_planes[keys(module.control_planes)[0]].private_ipv4_address}:6443" 55 | token = local.k3s_token 56 | # Kubelet arg precedence (last wins): local.kubelet_arg > v.kubelet_args > k3s_global_kubelet_args > k3s_agent_kubelet_args 57 | kubelet-arg = concat( 58 | local.kubelet_arg, 59 | v.kubelet_args, 60 | var.k3s_global_kubelet_args, 61 | var.k3s_agent_kubelet_args 62 | ) 63 | flannel-iface = local.flannel_iface 64 | node-ip = module.agents[k].private_ipv4_address 65 | node-label = v.labels 66 | node-taint = v.taints 67 | }, 68 | var.agent_nodes_custom_config, 69 | local.prefer_bundled_bin_config, 70 | # Force selinux=false if disable_selinux = true. 71 | var.disable_selinux 72 | ? { selinux = false } 73 | : (v.selinux == true ? { selinux = true } : {}) 74 | ) } 75 | 76 | agent_ips = { 77 | for k, v in module.agents : k => coalesce( 78 | v.ipv4_address, 79 | v.ipv6_address, 80 | v.private_ipv4_address 81 | ) 82 | } 83 | } 84 | 85 | resource "null_resource" "agent_config" { 86 | for_each = local.agent_nodes 87 | 88 | triggers = { 89 | agent_id = module.agents[each.key].id 90 | config = sha1(yamlencode(local.k3s-agent-config[each.key])) 91 | } 92 | 93 | connection { 94 | user = "root" 95 | private_key = var.ssh_private_key 96 | agent_identity = local.ssh_agent_identity 97 | host = local.agent_ips[each.key] 98 | port = var.ssh_port 99 | 100 | bastion_host = local.ssh_bastion.bastion_host 101 | bastion_port = local.ssh_bastion.bastion_port 102 | bastion_user = local.ssh_bastion.bastion_user 103 | bastion_private_key = local.ssh_bastion.bastion_private_key 104 | 105 | } 106 | 107 | # Generating k3s agent config file 108 | provisioner "file" { 109 | content = yamlencode(local.k3s-agent-config[each.key]) 110 | destination = "/tmp/config.yaml" 111 | } 112 | 113 | provisioner "remote-exec" { 114 | inline = [local.k3s_config_update_script] 115 | } 116 | } 117 | 118 | resource "null_resource" "agents" { 119 | for_each = local.agent_nodes 120 | 121 | triggers = { 122 | agent_id = module.agents[each.key].id 123 | } 124 | 125 | connection { 126 | user = "root" 127 | private_key = var.ssh_private_key 128 | agent_identity = local.ssh_agent_identity 129 | host = local.agent_ips[each.key] 130 | port = var.ssh_port 131 | 132 | bastion_host = local.ssh_bastion.bastion_host 133 | bastion_port = local.ssh_bastion.bastion_port 134 | bastion_user = local.ssh_bastion.bastion_user 135 | bastion_private_key = local.ssh_bastion.bastion_private_key 136 | 137 | } 138 | 139 | # Install k3s agent 140 | provisioner "remote-exec" { 141 | inline = local.install_k3s_agent 142 | } 143 | 144 | # Start the k3s agent and wait for it to have started 145 | provisioner "remote-exec" { 146 | inline = concat(var.enable_longhorn || var.enable_iscsid ? ["systemctl enable --now iscsid"] : [], [ 147 | "timeout 120 systemctl start k3s-agent 2> /dev/null", 148 | <<-EOT 149 | timeout 120 bash < /dev/null; do 151 | systemctl start k3s-agent 2> /dev/null 152 | echo "Waiting for the k3s agent to start..." 153 | sleep 2 154 | done 155 | EOF 156 | EOT 157 | ]) 158 | } 159 | 160 | depends_on = [ 161 | null_resource.first_control_plane, 162 | null_resource.agent_config, 163 | hcloud_network_subnet.agent 164 | ] 165 | } 166 | 167 | resource "hcloud_volume" "longhorn_volume" { 168 | for_each = { for k, v in local.agent_nodes : k => v if((v.longhorn_volume_size >= 10) && (v.longhorn_volume_size <= 10240) && var.enable_longhorn) } 169 | 170 | labels = { 171 | provisioner = "terraform" 172 | cluster = var.cluster_name 173 | scope = "longhorn" 174 | } 175 | name = "${var.cluster_name}-longhorn-${module.agents[each.key].name}" 176 | size = local.agent_nodes[each.key].longhorn_volume_size 177 | server_id = module.agents[each.key].id 178 | automount = true 179 | format = var.longhorn_fstype 180 | delete_protection = var.enable_delete_protection.volume 181 | } 182 | 183 | resource "null_resource" "configure_longhorn_volume" { 184 | for_each = { for k, v in local.agent_nodes : k => v if((v.longhorn_volume_size >= 10) && (v.longhorn_volume_size <= 10240) && var.enable_longhorn) } 185 | 186 | triggers = { 187 | agent_id = module.agents[each.key].id 188 | } 189 | 190 | # Start the k3s agent and wait for it to have started 191 | provisioner "remote-exec" { 192 | inline = [ 193 | "mkdir /var/longhorn >/dev/null 2>&1", 194 | "mount -o discard,defaults ${hcloud_volume.longhorn_volume[each.key].linux_device} /var/longhorn", 195 | "${var.longhorn_fstype == "ext4" ? "resize2fs" : "xfs_growfs"} ${hcloud_volume.longhorn_volume[each.key].linux_device}", 196 | "echo '${hcloud_volume.longhorn_volume[each.key].linux_device} /var/longhorn ${var.longhorn_fstype} discard,nofail,defaults 0 0' >> /etc/fstab" 197 | ] 198 | } 199 | 200 | connection { 201 | user = "root" 202 | private_key = var.ssh_private_key 203 | agent_identity = local.ssh_agent_identity 204 | host = local.agent_ips[each.key] 205 | port = var.ssh_port 206 | 207 | bastion_host = local.ssh_bastion.bastion_host 208 | bastion_port = local.ssh_bastion.bastion_port 209 | bastion_user = local.ssh_bastion.bastion_user 210 | bastion_private_key = local.ssh_bastion.bastion_private_key 211 | 212 | } 213 | 214 | depends_on = [ 215 | hcloud_volume.longhorn_volume 216 | ] 217 | } 218 | 219 | resource "hcloud_floating_ip" "agents" { 220 | for_each = { for k, v in local.agent_nodes : k => v if coalesce(lookup(v, "floating_ip"), false) } 221 | 222 | type = "ipv4" 223 | labels = local.labels 224 | home_location = each.value.location 225 | delete_protection = var.enable_delete_protection.floating_ip 226 | } 227 | 228 | resource "hcloud_floating_ip_assignment" "agents" { 229 | for_each = { for k, v in local.agent_nodes : k => v if coalesce(lookup(v, "floating_ip"), false) } 230 | 231 | floating_ip_id = hcloud_floating_ip.agents[each.key].id 232 | server_id = module.agents[each.key].id 233 | 234 | depends_on = [ 235 | null_resource.agents 236 | ] 237 | } 238 | 239 | resource "hcloud_rdns" "agents" { 240 | for_each = { for k, v in local.agent_nodes : k => v if lookup(v, "floating_ip_rdns", null) != null } 241 | 242 | floating_ip_id = hcloud_floating_ip.agents[each.key].id 243 | ip_address = hcloud_floating_ip.agents[each.key].ip_address 244 | dns_ptr = local.agent_nodes[each.key].floating_ip_rdns 245 | 246 | depends_on = [ 247 | hcloud_floating_ip.agents 248 | ] 249 | } 250 | 251 | resource "null_resource" "configure_floating_ip" { 252 | for_each = { for k, v in local.agent_nodes : k => v if coalesce(lookup(v, "floating_ip"), false) } 253 | 254 | triggers = { 255 | agent_id = module.agents[each.key].id 256 | floating_ip_id = hcloud_floating_ip.agents[each.key].id 257 | } 258 | 259 | provisioner "remote-exec" { 260 | inline = [ 261 | # Reconfigure eth0: 262 | # - add floating_ip as first and other IP as second address 263 | # - add 172.31.1.1 as default gateway (In the Hetzner Cloud, the 264 | # special private IP address 172.31.1.1 is the default 265 | # gateway for the public network) 266 | # The configuration is stored in file /etc/NetworkManager/system-connections/cloud-init-eth0.nmconnection 267 | <<-EOT 268 | ETH=eth1 269 | if ip link show eth0 &>/dev/null; then 270 | ETH=eth0 271 | fi 272 | 273 | NM_CONNECTION=$(nmcli -g GENERAL.CONNECTION device show "$ETH" 2>/dev/null) 274 | if [ -z "$NM_CONNECTION" ]; then 275 | echo "ERROR: No NetworkManager connection found for $ETH" >&2 276 | exit 1 277 | fi 278 | 279 | nmcli connection modify "$NM_CONNECTION" \ 280 | ipv4.method manual \ 281 | ipv4.addresses ${hcloud_floating_ip.agents[each.key].ip_address}/32,${local.agent_ips[each.key]}/32 gw4 172.31.1.1 \ 282 | ipv4.route-metric 100 \ 283 | && nmcli connection up "$NM_CONNECTION" 284 | EOT 285 | ] 286 | } 287 | 288 | connection { 289 | user = "root" 290 | private_key = var.ssh_private_key 291 | agent_identity = local.ssh_agent_identity 292 | host = local.agent_ips[each.key] 293 | port = var.ssh_port 294 | 295 | bastion_host = local.ssh_bastion.bastion_host 296 | bastion_port = local.ssh_bastion.bastion_port 297 | bastion_user = local.ssh_bastion.bastion_user 298 | bastion_private_key = local.ssh_bastion.bastion_private_key 299 | 300 | } 301 | 302 | depends_on = [ 303 | hcloud_floating_ip_assignment.agents 304 | ] 305 | } 306 | -------------------------------------------------------------------------------- /autoscaler-agents.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | cluster_prefix = var.use_cluster_name_in_node_name ? "${var.cluster_name}-" : "" 3 | first_nodepool_snapshot_id = length(var.autoscaler_nodepools) == 0 ? "" : ( 4 | substr(var.autoscaler_nodepools[0].server_type, 0, 3) == "cax" ? data.hcloud_image.microos_arm_snapshot.id : data.hcloud_image.microos_x86_snapshot.id 5 | ) 6 | 7 | imageList = { 8 | arm64 : tostring(data.hcloud_image.microos_arm_snapshot.id) 9 | amd64 : tostring(data.hcloud_image.microos_x86_snapshot.id) 10 | } 11 | 12 | nodeConfigName = var.use_cluster_name_in_node_name ? "${var.cluster_name}-" : "" 13 | cluster_config = { 14 | imagesForArch : local.imageList 15 | nodeConfigs : { 16 | for index, nodePool in var.autoscaler_nodepools : 17 | ("${local.nodeConfigName}${nodePool.name}") => { 18 | cloudInit = data.cloudinit_config.autoscaler_config[index].rendered 19 | labels = nodePool.labels 20 | taints = nodePool.taints 21 | } 22 | } 23 | } 24 | 25 | isUsingLegacyConfig = length(var.autoscaler_labels) > 0 || length(var.autoscaler_taints) > 0 26 | 27 | autoscaler_yaml = length(var.autoscaler_nodepools) == 0 ? "" : templatefile( 28 | "${path.module}/templates/autoscaler.yaml.tpl", 29 | { 30 | cloudinit_config = local.isUsingLegacyConfig ? base64encode(data.cloudinit_config.autoscaler_legacy_config[0].rendered) : "" 31 | ca_image = var.cluster_autoscaler_image 32 | ca_version = var.cluster_autoscaler_version 33 | cluster_autoscaler_extra_args = var.cluster_autoscaler_extra_args 34 | cluster_autoscaler_log_level = var.cluster_autoscaler_log_level 35 | cluster_autoscaler_log_to_stderr = var.cluster_autoscaler_log_to_stderr 36 | cluster_autoscaler_stderr_threshold = var.cluster_autoscaler_stderr_threshold 37 | cluster_autoscaler_server_creation_timeout = tostring(var.cluster_autoscaler_server_creation_timeout) 38 | ssh_key = local.hcloud_ssh_key_id 39 | ipv4_subnet_id = data.hcloud_network.k3s.id 40 | snapshot_id = local.first_nodepool_snapshot_id 41 | cluster_config = base64encode(jsonencode(local.cluster_config)) 42 | firewall_id = hcloud_firewall.k3s.id 43 | cluster_name = local.cluster_prefix 44 | node_pools = var.autoscaler_nodepools 45 | enable_ipv4 = !(var.autoscaler_disable_ipv4 || local.use_nat_router) 46 | enable_ipv6 = !(var.autoscaler_disable_ipv6 || local.use_nat_router) 47 | }) 48 | # A concatenated list of all autoscaled nodes 49 | autoscaled_nodes = length(var.autoscaler_nodepools) == 0 ? {} : { 50 | for v in concat([ 51 | for k, v in data. 52 | hcloud_servers.autoscaled_nodes : [for v in v.servers : v] 53 | ]...) : v.name => v 54 | } 55 | } 56 | 57 | resource "null_resource" "configure_autoscaler" { 58 | count = length(var.autoscaler_nodepools) > 0 ? 1 : 0 59 | 60 | triggers = { 61 | template = local.autoscaler_yaml 62 | } 63 | connection { 64 | user = "root" 65 | private_key = var.ssh_private_key 66 | agent_identity = local.ssh_agent_identity 67 | host = local.first_control_plane_ip 68 | port = var.ssh_port 69 | 70 | bastion_host = local.ssh_bastion.bastion_host 71 | bastion_port = local.ssh_bastion.bastion_port 72 | bastion_user = local.ssh_bastion.bastion_user 73 | bastion_private_key = local.ssh_bastion.bastion_private_key 74 | 75 | } 76 | 77 | # Upload the autoscaler resource defintion 78 | provisioner "file" { 79 | content = local.autoscaler_yaml 80 | destination = "/tmp/autoscaler.yaml" 81 | } 82 | 83 | # Create/Apply the definition 84 | provisioner "remote-exec" { 85 | inline = ["kubectl apply -f /tmp/autoscaler.yaml"] 86 | } 87 | 88 | depends_on = [ 89 | hcloud_load_balancer.cluster, 90 | null_resource.control_planes, 91 | random_password.rancher_bootstrap, 92 | hcloud_volume.longhorn_volume, 93 | data.hcloud_image.microos_x86_snapshot 94 | ] 95 | } 96 | 97 | data "cloudinit_config" "autoscaler_config" { 98 | count = length(var.autoscaler_nodepools) 99 | 100 | gzip = true 101 | base64_encode = true 102 | 103 | # Main cloud-config configuration file. 104 | part { 105 | filename = "init.cfg" 106 | content_type = "text/cloud-config" 107 | content = templatefile( 108 | "${path.module}/templates/autoscaler-cloudinit.yaml.tpl", 109 | { 110 | hostname = "autoscaler" 111 | dns_servers = var.dns_servers 112 | has_dns_servers = local.has_dns_servers 113 | sshAuthorizedKeys = concat([var.ssh_public_key], var.ssh_additional_public_keys) 114 | k3s_config = yamlencode(merge( 115 | { 116 | server = "https://${var.use_control_plane_lb ? hcloud_load_balancer_network.control_plane.*.ip[0] : module.control_planes[keys(module.control_planes)[0]].private_ipv4_address}:6443" 117 | token = local.k3s_token 118 | # Kubelet arg precedence (last wins): local.kubelet_arg > nodepool.kubelet_args > k3s_global_kubelet_args > k3s_autoscaler_kubelet_args 119 | kubelet-arg = concat(local.kubelet_arg, var.autoscaler_nodepools[count.index].kubelet_args, var.k3s_global_kubelet_args, var.k3s_autoscaler_kubelet_args) 120 | flannel-iface = local.flannel_iface 121 | node-label = concat(local.default_agent_labels, [for k, v in var.autoscaler_nodepools[count.index].labels : "${k}=${v}"]) 122 | node-taint = concat(local.default_agent_taints, [for taint in var.autoscaler_nodepools[count.index].taints : "${taint.key}=${tostring(taint.value)}:${taint.effect}"]) 123 | selinux = !var.disable_selinux 124 | }, 125 | var.agent_nodes_custom_config, 126 | local.prefer_bundled_bin_config 127 | )) 128 | install_k3s_agent_script = join("\n", concat(local.install_k3s_agent, ["systemctl start k3s-agent"])) 129 | cloudinit_write_files_common = local.cloudinit_write_files_common 130 | cloudinit_runcmd_common = local.cloudinit_runcmd_common, 131 | private_network_only = var.autoscaler_disable_ipv4 && var.autoscaler_disable_ipv6, 132 | } 133 | ) 134 | } 135 | } 136 | 137 | data "cloudinit_config" "autoscaler_legacy_config" { 138 | count = length(var.autoscaler_nodepools) > 0 && local.isUsingLegacyConfig ? 1 : 0 139 | 140 | gzip = true 141 | base64_encode = true 142 | 143 | # Main cloud-config configuration file. 144 | part { 145 | filename = "init.cfg" 146 | content_type = "text/cloud-config" 147 | content = templatefile( 148 | "${path.module}/templates/autoscaler-cloudinit.yaml.tpl", 149 | { 150 | hostname = "autoscaler" 151 | dns_servers = var.dns_servers 152 | has_dns_servers = local.has_dns_servers 153 | sshAuthorizedKeys = concat([var.ssh_public_key], var.ssh_additional_public_keys) 154 | k3s_config = yamlencode(merge( 155 | { 156 | server = "https://${var.use_control_plane_lb ? hcloud_load_balancer_network.control_plane.*.ip[0] : module.control_planes[keys(module.control_planes)[0]].private_ipv4_address}:6443" 157 | token = local.k3s_token 158 | kubelet-arg = local.kubelet_arg 159 | flannel-iface = local.flannel_iface 160 | node-label = concat(local.default_agent_labels, var.autoscaler_labels) 161 | node-taint = concat(local.default_agent_taints, var.autoscaler_taints) 162 | selinux = !var.disable_selinux 163 | }, 164 | var.agent_nodes_custom_config, 165 | local.prefer_bundled_bin_config 166 | )) 167 | install_k3s_agent_script = join("\n", concat(local.install_k3s_agent, ["systemctl start k3s-agent"])) 168 | cloudinit_write_files_common = local.cloudinit_write_files_common 169 | cloudinit_runcmd_common = local.cloudinit_runcmd_common, 170 | private_network_only = var.autoscaler_disable_ipv4 && var.autoscaler_disable_ipv6, 171 | } 172 | ) 173 | } 174 | } 175 | 176 | data "hcloud_servers" "autoscaled_nodes" { 177 | for_each = toset(var.autoscaler_nodepools[*].name) 178 | with_selector = "hcloud/node-group=${local.cluster_prefix}${each.value}" 179 | } 180 | 181 | resource "null_resource" "autoscaled_nodes_registries" { 182 | for_each = local.autoscaled_nodes 183 | triggers = { 184 | registries = var.k3s_registries 185 | } 186 | 187 | connection { 188 | user = "root" 189 | private_key = var.ssh_private_key 190 | agent_identity = local.ssh_agent_identity 191 | host = coalesce(each.value.ipv4_address, each.value.ipv6_address, try(one(each.value.network).ip, null)) 192 | port = var.ssh_port 193 | 194 | bastion_host = local.ssh_bastion.bastion_host 195 | bastion_port = local.ssh_bastion.bastion_port 196 | bastion_user = local.ssh_bastion.bastion_user 197 | bastion_private_key = local.ssh_bastion.bastion_private_key 198 | 199 | } 200 | 201 | provisioner "file" { 202 | content = var.k3s_registries 203 | destination = "/tmp/registries.yaml" 204 | } 205 | 206 | provisioner "remote-exec" { 207 | inline = [local.k3s_registries_update_script] 208 | } 209 | } 210 | -------------------------------------------------------------------------------- /control_planes.tf: -------------------------------------------------------------------------------- 1 | module "control_planes" { 2 | source = "./modules/host" 3 | 4 | providers = { 5 | hcloud = hcloud, 6 | } 7 | 8 | for_each = local.control_plane_nodes 9 | 10 | name = "${var.use_cluster_name_in_node_name ? "${var.cluster_name}-" : ""}${each.value.nodepool_name}" 11 | microos_snapshot_id = substr(each.value.server_type, 0, 3) == "cax" ? data.hcloud_image.microos_arm_snapshot.id : data.hcloud_image.microos_x86_snapshot.id 12 | base_domain = var.base_domain 13 | ssh_keys = length(var.ssh_hcloud_key_label) > 0 ? concat([local.hcloud_ssh_key_id], data.hcloud_ssh_keys.keys_by_selector[0].ssh_keys.*.id) : [local.hcloud_ssh_key_id] 14 | ssh_port = var.ssh_port 15 | ssh_public_key = var.ssh_public_key 16 | ssh_private_key = var.ssh_private_key 17 | ssh_additional_public_keys = length(var.ssh_hcloud_key_label) > 0 ? concat(var.ssh_additional_public_keys, data.hcloud_ssh_keys.keys_by_selector[0].ssh_keys.*.public_key) : var.ssh_additional_public_keys 18 | firewall_ids = each.value.disable_ipv4 && each.value.disable_ipv6 ? [] : [hcloud_firewall.k3s.id] # Cannot attach a firewall when public interfaces are disabled 19 | placement_group_id = var.placement_group_disable ? null : (each.value.placement_group == null ? hcloud_placement_group.control_plane[each.value.placement_group_compat_idx].id : hcloud_placement_group.control_plane_named[each.value.placement_group].id) 20 | location = each.value.location 21 | server_type = each.value.server_type 22 | backups = each.value.backups 23 | ipv4_subnet_id = hcloud_network_subnet.control_plane[[for i, v in var.control_plane_nodepools : i if v.name == each.value.nodepool_name][0]].id 24 | dns_servers = var.dns_servers 25 | k3s_registries = var.k3s_registries 26 | k3s_registries_update_script = local.k3s_registries_update_script 27 | cloudinit_write_files_common = local.cloudinit_write_files_common 28 | cloudinit_runcmd_common = local.cloudinit_runcmd_common 29 | swap_size = each.value.swap_size 30 | zram_size = each.value.zram_size 31 | keep_disk_size = var.keep_disk_cp 32 | disable_ipv4 = each.value.disable_ipv4 33 | disable_ipv6 = each.value.disable_ipv6 34 | ssh_bastion = local.ssh_bastion 35 | network_id = data.hcloud_network.k3s.id 36 | 37 | # We leave some room so 100 eventual Hetzner LBs that can be created perfectly safely 38 | # It leaves the subnet with 254 x 254 - 100 = 64416 IPs to use, so probably enough. 39 | private_ipv4 = cidrhost(hcloud_network_subnet.control_plane[[for i, v in var.control_plane_nodepools : i if v.name == each.value.nodepool_name][0]].ip_range, each.value.index + 101) 40 | 41 | labels = merge(local.labels, local.labels_control_plane_node) 42 | 43 | automatically_upgrade_os = var.automatically_upgrade_os 44 | 45 | depends_on = [ 46 | hcloud_network_subnet.control_plane, 47 | hcloud_placement_group.control_plane, 48 | hcloud_server.nat_router, 49 | null_resource.nat_router_await_cloud_init, 50 | ] 51 | } 52 | 53 | resource "hcloud_load_balancer" "control_plane" { 54 | count = var.use_control_plane_lb ? 1 : 0 55 | name = "${var.cluster_name}-control-plane" 56 | 57 | load_balancer_type = var.control_plane_lb_type 58 | location = var.load_balancer_location 59 | labels = merge(local.labels, local.labels_control_plane_lb) 60 | delete_protection = var.enable_delete_protection.load_balancer 61 | } 62 | 63 | resource "hcloud_load_balancer_network" "control_plane" { 64 | count = var.use_control_plane_lb ? 1 : 0 65 | 66 | load_balancer_id = hcloud_load_balancer.control_plane.*.id[0] 67 | subnet_id = hcloud_network_subnet.control_plane.*.id[0] 68 | enable_public_interface = var.control_plane_lb_enable_public_interface 69 | ip = cidrhost(hcloud_network_subnet.control_plane.*.ip_range[0], 254) 70 | 71 | # To ensure backwards compatibility, we ignore changes to the IP address 72 | # as before it was set manually. 73 | lifecycle { 74 | ignore_changes = [ip] 75 | } 76 | } 77 | 78 | resource "hcloud_load_balancer_target" "control_plane" { 79 | count = var.use_control_plane_lb ? 1 : 0 80 | 81 | depends_on = [hcloud_load_balancer_network.control_plane] 82 | type = "label_selector" 83 | load_balancer_id = hcloud_load_balancer.control_plane.*.id[0] 84 | label_selector = join(",", [for k, v in merge(local.labels, local.labels_control_plane_node) : "${k}=${v}"]) 85 | use_private_ip = true 86 | } 87 | 88 | resource "hcloud_load_balancer_service" "control_plane" { 89 | count = var.use_control_plane_lb ? 1 : 0 90 | 91 | load_balancer_id = hcloud_load_balancer.control_plane.*.id[0] 92 | protocol = "tcp" 93 | destination_port = "6443" 94 | listen_port = "6443" 95 | } 96 | 97 | locals { 98 | control_plane_ips = { 99 | for k, v in module.control_planes : k => coalesce( 100 | v.ipv4_address, 101 | v.ipv6_address, 102 | v.private_ipv4_address 103 | ) 104 | } 105 | 106 | k3s-config = { for k, v in local.control_plane_nodes : k => merge( 107 | { 108 | node-name = module.control_planes[k].name 109 | server = length(module.control_planes) == 1 ? null : "https://${ 110 | var.use_control_plane_lb ? hcloud_load_balancer_network.control_plane.*.ip[0] : 111 | module.control_planes[k].private_ipv4_address == module.control_planes[keys(module.control_planes)[0]].private_ipv4_address ? 112 | module.control_planes[keys(module.control_planes)[1]].private_ipv4_address : 113 | module.control_planes[keys(module.control_planes)[0]].private_ipv4_address}:6443" 114 | token = local.k3s_token 115 | disable-cloud-controller = true 116 | disable-kube-proxy = var.disable_kube_proxy 117 | disable = local.disable_extras 118 | # Kubelet arg precedence (last wins): local.kubelet_arg > v.kubelet_args > k3s_global_kubelet_args > k3s_control_plane_kubelet_args 119 | kubelet-arg = concat(local.kubelet_arg, v.kubelet_args, var.k3s_global_kubelet_args, var.k3s_control_plane_kubelet_args) 120 | kube-apiserver-arg = local.kube_apiserver_arg 121 | kube-controller-manager-arg = local.kube_controller_manager_arg 122 | flannel-iface = local.flannel_iface 123 | node-ip = module.control_planes[k].private_ipv4_address 124 | advertise-address = module.control_planes[k].private_ipv4_address 125 | node-label = v.labels 126 | node-taint = v.taints 127 | selinux = var.disable_selinux ? false : (v.selinux == true ? true : false) 128 | cluster-cidr = var.cluster_ipv4_cidr 129 | service-cidr = var.service_ipv4_cidr 130 | cluster-dns = local.cluster_dns_ipv4 131 | write-kubeconfig-mode = "0644" # needed for import into rancher 132 | }, 133 | lookup(local.cni_k3s_settings, var.cni_plugin, {}), 134 | var.use_control_plane_lb ? { 135 | tls-san = concat([ 136 | hcloud_load_balancer.control_plane.*.ipv4[0], 137 | hcloud_load_balancer_network.control_plane.*.ip[0], 138 | var.kubeconfig_server_address != "" ? var.kubeconfig_server_address : null 139 | ], var.additional_tls_sans) 140 | } : { 141 | tls-san = concat( 142 | compact([ 143 | module.control_planes[k].ipv4_address != "" ? module.control_planes[k].ipv4_address : null, 144 | module.control_planes[k].ipv6_address != "" ? module.control_planes[k].ipv6_address : null, 145 | try(one(module.control_planes[k].network).ip, null) 146 | ]), 147 | var.additional_tls_sans) 148 | }, 149 | local.etcd_s3_snapshots, 150 | var.control_planes_custom_config, 151 | local.prefer_bundled_bin_config 152 | ) } 153 | } 154 | 155 | resource "null_resource" "control_plane_config" { 156 | for_each = local.control_plane_nodes 157 | 158 | triggers = { 159 | control_plane_id = module.control_planes[each.key].id 160 | config = sha1(yamlencode(local.k3s-config[each.key])) 161 | } 162 | 163 | connection { 164 | user = "root" 165 | private_key = var.ssh_private_key 166 | agent_identity = local.ssh_agent_identity 167 | host = local.control_plane_ips[each.key] 168 | port = var.ssh_port 169 | 170 | bastion_host = local.ssh_bastion.bastion_host 171 | bastion_port = local.ssh_bastion.bastion_port 172 | bastion_user = local.ssh_bastion.bastion_user 173 | bastion_private_key = local.ssh_bastion.bastion_private_key 174 | 175 | } 176 | 177 | # Generating k3s server config file 178 | provisioner "file" { 179 | content = yamlencode(local.k3s-config[each.key]) 180 | destination = "/tmp/config.yaml" 181 | } 182 | 183 | provisioner "remote-exec" { 184 | inline = [local.k3s_config_update_script] 185 | } 186 | 187 | depends_on = [ 188 | null_resource.first_control_plane, 189 | hcloud_network_subnet.control_plane 190 | ] 191 | } 192 | 193 | 194 | resource "null_resource" "authentication_config" { 195 | for_each = local.control_plane_nodes 196 | 197 | triggers = { 198 | control_plane_id = module.control_planes[each.key].id 199 | authentication_config = sha1(var.authentication_config) 200 | } 201 | 202 | connection { 203 | user = "root" 204 | private_key = var.ssh_private_key 205 | agent_identity = local.ssh_agent_identity 206 | host = local.control_plane_ips[each.key] 207 | port = var.ssh_port 208 | 209 | bastion_host = local.ssh_bastion.bastion_host 210 | bastion_port = local.ssh_bastion.bastion_port 211 | bastion_user = local.ssh_bastion.bastion_user 212 | bastion_private_key = local.ssh_bastion.bastion_private_key 213 | 214 | } 215 | 216 | provisioner "file" { 217 | content = var.authentication_config 218 | destination = "/tmp/authentication_config.yaml" 219 | } 220 | 221 | provisioner "remote-exec" { 222 | inline = [local.k3s_authentication_config_update_script] 223 | } 224 | 225 | depends_on = [ 226 | null_resource.first_control_plane, 227 | hcloud_network_subnet.control_plane 228 | ] 229 | } 230 | 231 | resource "null_resource" "control_planes" { 232 | for_each = local.control_plane_nodes 233 | 234 | triggers = { 235 | control_plane_id = module.control_planes[each.key].id 236 | } 237 | 238 | connection { 239 | user = "root" 240 | private_key = var.ssh_private_key 241 | agent_identity = local.ssh_agent_identity 242 | host = local.control_plane_ips[each.key] 243 | port = var.ssh_port 244 | 245 | bastion_host = local.ssh_bastion.bastion_host 246 | bastion_port = local.ssh_bastion.bastion_port 247 | bastion_user = local.ssh_bastion.bastion_user 248 | bastion_private_key = local.ssh_bastion.bastion_private_key 249 | 250 | } 251 | 252 | # Install k3s server 253 | provisioner "remote-exec" { 254 | inline = local.install_k3s_server 255 | } 256 | 257 | # Start the k3s server and wait for it to have started correctly 258 | provisioner "remote-exec" { 259 | inline = [ 260 | "systemctl start k3s 2> /dev/null", 261 | # prepare the needed directories 262 | "mkdir -p /var/post_install /var/user_kustomize", 263 | # wait for the server to be ready 264 | <<-EOT 265 | timeout 360 bash < /dev/null; do 267 | systemctl start k3s 2> /dev/null 268 | echo "Waiting for the k3s server to start..." 269 | sleep 3 270 | done 271 | EOF 272 | EOT 273 | ] 274 | } 275 | 276 | depends_on = [ 277 | null_resource.first_control_plane, 278 | null_resource.control_plane_config, 279 | null_resource.authentication_config, 280 | hcloud_network_subnet.control_plane 281 | ] 282 | } 283 | -------------------------------------------------------------------------------- /data.tf: -------------------------------------------------------------------------------- 1 | data "github_release" "hetzner_ccm" { 2 | count = var.hetzner_ccm_version == null ? 1 : 0 3 | repository = "hcloud-cloud-controller-manager" 4 | owner = "hetznercloud" 5 | retrieve_by = "latest" 6 | } 7 | 8 | data "github_release" "hetzner_csi" { 9 | count = var.hetzner_csi_version == null && !var.disable_hetzner_csi ? 1 : 0 10 | repository = "csi-driver" 11 | owner = "hetznercloud" 12 | retrieve_by = "latest" 13 | } 14 | 15 | // github_release for kured 16 | data "github_release" "kured" { 17 | count = var.kured_version == null ? 1 : 0 18 | repository = "kured" 19 | owner = "kubereboot" 20 | retrieve_by = "latest" 21 | } 22 | 23 | // github_release for kured 24 | data "github_release" "calico" { 25 | count = var.calico_version == null && var.cni_plugin == "calico" ? 1 : 0 26 | repository = "calico" 27 | owner = "projectcalico" 28 | retrieve_by = "latest" 29 | } 30 | 31 | data "hcloud_ssh_keys" "keys_by_selector" { 32 | count = length(var.ssh_hcloud_key_label) > 0 ? 1 : 0 33 | with_selector = var.ssh_hcloud_key_label 34 | } 35 | -------------------------------------------------------------------------------- /docs/add-robot-server.md: -------------------------------------------------------------------------------- 1 | # Hetzner Robot Server Integration using HCCM v1.19+ 2 | 3 | This guide describes how to add Hetzner **robot servers** to a Kubernetes cluster with help of the [hcloud-cloud-controller-manager](https://github.com/hetznercloud/hcloud-cloud-controller-manager), version 1.19 or newer. 4 | It covers configuration for both k3s and Robot nodes, including networking, configuration, and caveats. 5 | 6 | --- 7 | 8 | ## Prerequisites 9 | 10 | - **vSwitch** set up for private networking between Cloud and Robot nodes 11 | - **Webservice User** created in Hetzner Robot account settings (for API access) 12 | - `hccm` version **1.19 or newer** 13 | - **Operating System**: Ideally use the MicroOS image created by this project. Otherwise, any Linux distribution that supports k3s will work 14 | 15 | --- 16 | 17 | ## 1. Networking: Private Communication 18 | 19 | - **Robot and Cloud servers communication happens over a private network.** 20 | - The recommended way is using a **vSwitch**. 21 | - Alternatives like WireGuard exist, but are not covered here. 22 | - Ensure all nodes can reach each other via internal IPs (e.g., `10.x.x.x`). 23 | 24 | --- 25 | 26 | ## 2. Hetzner Robot API Access 27 | 28 | - Create a **Webservice User** in your Hetzner Robot account. 29 | - This is required for `hccm` to list robot servers via the metadata endpoint: 30 | - `https://169.254.169.254/hetzner/v1/metadata/instance-id` 31 | 32 | --- 33 | 34 | ## 3. Robot Node Network Configuration 35 | 36 | - **Manually configure** the network interface and routes on the robot server. 37 | - For Ubuntu, see [Hetzner docs](https://docs.hetzner.com/cloud/networks/connect-dedi-vswitch#persistent-example-configurations). 38 | - For RHEL-based systems (e.g., AlmaLinux), use the following `nmcli` commands: 39 | 40 |
41 | RHEL/AlmaLinux nmcli Example 42 | 43 | Assumptions (change these to your values!): 44 | - vSwitch subnet: `10.1.0.0/24` 45 | - vSwitch ID: `9999` # arbitrary value, replace with your vSwitch ID 46 | - Main interface: `enp6s0` 47 | 48 | > [!CAUTION] 49 | > The routes and CIDR notations depend on your local setup and may vary depending on your network configuration. 50 | 51 | ```bash 52 | nmcli connection add type vlan con-name vlan9999 ifname vlan9999 vlan.parent enp6s0 vlan.id 9999 53 | 54 | nmcli connection modify vlan9999 802-3-ethernet.mtu 1400 # Important: vSwitch requires MTU 1400 55 | nmcli connection modify vlan9999 ipv4.addresses '10.1.0.2/24' 56 | nmcli connection modify vlan9999 ipv4.gateway '10.1.0.1' 57 | nmcli connection modify vlan9999 ipv4.method manual 58 | # Route all 10.x IPs through the vSwitch gateway 59 | nmcli connection modify vlan9999 +ipv4.routes "10.0.0.0/8 10.1.0.1" 60 | 61 | # Apply the config 62 | nmcli connection down vlan9999 63 | nmcli connection up vlan9999 64 | ``` 65 | 66 |
67 | 68 | --- 69 | 70 | ## 4. HCCM Helm Chart Configuration 71 | 72 | - **Update the `hcloud` Kubernetes secret** with your `robot-user` and `robot-password`. 73 | - Set `networking.enabled: true` in `hetzner_ccm_values`. 74 | - Set the correct `cluster-cidr` (the pod subnet for your cluster). 75 | - Deploy `hccm` version **1.19 or newer**. 76 | 77 | Example `hetzner_ccm_values` for Helm: 78 | 79 | ```yaml 80 | networking: 81 | enabled: true 82 | robot: 83 | enabled: true 84 | 85 | args: 86 | allocate-node-cidrs: "true" 87 | cluster-cidr: "10.42.0.0/16" # Adjust to your pod subnet 88 | 89 | env: 90 | HCLOUD_LOAD_BALANCERS_ENABLED: 91 | value: "true" 92 | HCLOUD_LOAD_BALANCERS_LOCATION: 93 | value: "fsn1" # Adjust to your LB region 94 | HCLOUD_LOAD_BALANCERS_USE_PRIVATE_IP: 95 | value: "true" 96 | HCLOUD_LOAD_BALANCERS_DISABLE_PRIVATE_INGRESS: 97 | value: "true" 98 | HCLOUD_NETWORK_ROUTES_ENABLED: 99 | value: "false" 100 | 101 | HCLOUD_TOKEN: 102 | valueFrom: 103 | secretKeyRef: 104 | name: hcloud 105 | key: token 106 | 107 | ROBOT_USER: 108 | valueFrom: 109 | secretKeyRef: 110 | name: hcloud 111 | key: robot-user 112 | optional: true 113 | ROBOT_PASSWORD: 114 | valueFrom: 115 | secretKeyRef: 116 | name: hcloud 117 | key: robot-password 118 | optional: true 119 | ``` 120 | 121 | --- 122 | 123 | ## 5. Robot Node: k3s Agent Configuration 124 | 125 | 1. **Create `/etc/rancher/k3s/config.yaml`** on the robot node: 126 | 127 | ```yaml 128 | flannel-iface: enp6s0 # Set to your main interface (only needed for Flannel CNI) 129 | prefer-bundled-bin: true 130 | kubelet-arg: 131 | - cloud-provider=external 132 | - volume-plugin-dir=/var/lib/kubelet/volumeplugins 133 | - kube-reserved=cpu=50m,memory=300Mi,ephemeral-storage=1Gi 134 | - system-reserved=cpu=250m,memory=6000Mi # Optional: reserve some space for system 135 | node-label: 136 | - k3s_upgrade=true 137 | node-taint: [] 138 | selinux: true 139 | server: https://:6443 # Replace with your API server IP 140 | token: # Replace with your cluster token 141 | ``` 142 | 143 | 2. **Before starting the agent**, verify network connectivity: 144 | - You must be able to `ping` other nodes' internal IPs (e.g., `ping 10.255.0.101`). 145 | 146 | --- 147 | 148 | ## 6. Storage and Scheduling Notes 149 | 150 | - **Hetzner Cloud Volumes** do **not** work on robot servers (CSI driver limitation). 151 | - Use [Longhorn](https://longhorn.io/) or other external storage. 152 | - Pods using cloud volumes cannot be scheduled on robot nodes. 153 | - **Longhorn**: Install `open-iscsi` and start the service: 154 | ```bash 155 | sudo dnf install -y iscsi-initiator-utils 156 | sudo systemctl start iscsid 157 | ``` 158 | - **Node Scheduling**: 159 | - Use taints and labels to control pod placement. 160 | - To prevent Hetzner CSI pods from being scheduled on robot nodes, apply the label: 161 | ``` 162 | instance.hetzner.cloud/provided-by=robot 163 | ``` 164 | [Reference](https://github.com/hetznercloud/csi-driver/blob/main/docs/kubernetes/README.md#integration-with-root-servers) 165 | 166 | --- 167 | 168 | ## 7. Caveats & Warnings 169 | 170 | - This setup may not cover all edge cases (e.g., other CNIs, non-wireguard clusters, complex private networks). 171 | - **Test your network thoroughly** before adding robot nodes to production clusters. 172 | - **MTU Issues**: When using vSwitch, MTU configuration is critical: 173 | - vSwitch has a maximum MTU of 1400 174 | - Some users report needing even lower MTU values (e.g., 1350 or less) for stable operation 175 | - This particularly affects Cilium CNI users 176 | - Without proper MTU configuration, you may experience: 177 | - Pods unable to connect to the Kubernetes API 178 | - Network instability for pods not using host networking 179 | - Intermittent connection issues 180 | - Test different MTU values if you encounter network issues 181 | 182 | --- 183 | 184 | ## References 185 | 186 | - [Hetzner Cloud Controller Manager](https://github.com/hetznercloud/hcloud-cloud-controller-manager) 187 | - [Hetzner vSwitch & Robot Networking](https://docs.hetzner.com/cloud/networks/connect-dedi-vswitch) 188 | - [Hetzner CSI Driver: Root Server Integration](https://github.com/hetznercloud/csi-driver/blob/main/docs/kubernetes/README.md#integration-with-root-servers) 189 | -------------------------------------------------------------------------------- /docs/ssh.md: -------------------------------------------------------------------------------- 1 | Kube-Hetzner requires you to have a recent version of OpenSSH (>=6.5) installed on your client, and the use of a key-pair generated with either of the following algorithms: 2 | 3 | - ssh-ed25519 (preferred, and most simple to use without passphrase) 4 | - rsa-sha2-512 5 | - rsa-sha2-256 6 | 7 | If your key-pair is of the `ssh-ed25519` sort (useful command `ssh-keygen -t ed25519`), and without of passphrase, you do not need to do anything else. Just set `public_key` and `private_key` to their respective path values in your kube.tf file. 8 | 9 | --- 10 | 11 | Otherwise, for a key-pair with passphrase or a device like a Yubikey, make sure you have an SSH agent running and your key is loaded with: 12 | 13 | ```bash 14 | eval ssh-agent $SHELL 15 | ssh-add ~/.ssh/my_private-key_id 16 | ``` 17 | 18 | Verify it is loaded with: 19 | 20 | ```bash 21 | ssh-add -l 22 | ``` 23 | 24 | Then set `private_key = null` in your kube.tf file, as it will be read from the ssh-agent automatically. 25 | -------------------------------------------------------------------------------- /examples/kustomization_user_deploy/README.md: -------------------------------------------------------------------------------- 1 | # How to Install and Deploy Additional Resources with Terraform and Kube-Hetzner 2 | 3 | Kube-Hetzner allows you to provide user-defined resources after the initial setup of the Kubernetes cluster. You can deploy additional resources using Kustomize scripts in the `extra-manifests` directory with the extension `.yaml.tpl`. These scripts are recursively copied onto the control plane and deployed with `kubectl apply -k`. The main entry point for these additional resources is the `kustomization.yaml.tpl` file. In this file, you need to list the names of other manifests without the `.tpl` extension in the resources section. 4 | 5 | When you execute terraform apply, the manifests in the extra-manifests directory, including the rendered versions of the `*.yaml.tpl` files, will be automatically deployed to the cluster. 6 | 7 | ## Examples 8 | 9 | Here are some examples of common use cases for deploying additional resources: 10 | 11 | > **Note**: When trying out the demos, make sure that the files from the demo folders are located in the `extra-manifests` directory. 12 | 13 | ### Deploying Simple Resources 14 | 15 | The easiest use case is to deploy simple resources to the cluster. Since the Kustomize resources are [Terraform template](https://registry.terraform.io/providers/hashicorp/template/latest/docs/data-sources/file) files, they can make use of parameters provided in the `extra_kustomize_parameters` map of the `kube.tf` file. 16 | 17 | #### `kube.tf` 18 | 19 | ``` 20 | ... 21 | extra_kustomize_parameters = { 22 | my_config_key = "somestring" 23 | } 24 | ... 25 | ``` 26 | 27 | The variable defined in `kube.tf` can be used in any `.yaml.tpl` manifest. 28 | 29 | #### `configmap.tf` 30 | 31 | ``` 32 | apiVersion: v1 33 | kind: ConfigMap 34 | metadata: 35 | name: demo-config 36 | data: 37 | someConfigKey: ${sealed_secrets_crt} 38 | ``` 39 | 40 | For a full demo see the [simple-resources](simple-resources/) example. 41 | 42 | ### Deploying a Helm Chart 43 | 44 | If you want to deploy a Helm chart to your cluster, you can use the [Helm Chart controller](https://docs.k3s.io/helm) included in K3s. The Helm Chart controller provides the CRDs `HelmChart` and `HelmChartConfig`. 45 | 46 | For a full demo see the [helm-chart](helm-chart/) example. 47 | 48 | ### Multiple Namespaces 49 | 50 | In more complex use cases, you may want to deploy to multiple namespaces with a common base. Kustomize supports this behavior, and it can be since Kube-Hetzner is considering all subdirectories of `extra-manifests`. 51 | 52 | For a full demo see the [multiple-namespaces](multiple-namespaces/) example. 53 | 54 | ### Using Letsencrypt with cert-manager 55 | 56 | You can use letsencrypt issuer to issue tls certificate see [example](https://doc.traefik.io/traefik/user-guides/cert-manager/). You need to create a issuer type of `ClusterIssuer` to make is available in all namespaces, unlike in the traefik example. Also note that the `server` in the example is a stagging server, you would need a prod server to use in, well, production. The prod server link can be found at `https://letsencrypt.org/getting-started/` 57 | 58 | For a full demo see the [letsencrypt](letsencrypt/) 59 | 60 | ## Debugging 61 | 62 | To check the existing kustomization, you can run the following command: 63 | 64 | ``` 65 | $ terraform state list | grep kustom 66 | ... 67 | module.kube-hetzner.null_resource.kustomization 68 | module.kube-hetzner.null_resource.kustomization_user["demo-config-map.yaml.tpl"] 69 | module.kube-hetzner.null_resource.kustomization_user["demo-pod.yaml.tpl"] 70 | module.kube-hetzner.null_resource.kustomization_user["kustomization.yaml.tpl"] 71 | ... 72 | ``` 73 | 74 | If you want to rerun just the kustomization part, you can use the following command: 75 | 76 | ``` 77 | terraform apply -replace='module.kube-hetzner.null_resource.kustomization_user["kustomization.yaml.tpl"]' --auto-approve 78 | ``` 79 | -------------------------------------------------------------------------------- /examples/kustomization_user_deploy/helm-chart/helm-chart.yaml.tpl: -------------------------------------------------------------------------------- 1 | apiVersion: helm.cattle.io/v1 2 | kind: HelmChart 3 | metadata: 4 | name: argocd 5 | namespace: argocd 6 | spec: 7 | repo: https://argoproj.github.io/argo-helm 8 | chart: argo-cd 9 | targetNamespace: argocd 10 | valuesContent: |- 11 | global: 12 | domain: argocd.example.com 13 | -------------------------------------------------------------------------------- /examples/kustomization_user_deploy/helm-chart/kustomization.yaml.tpl: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | resources: 5 | - namespace.yaml 6 | - helm-chart.yaml 7 | -------------------------------------------------------------------------------- /examples/kustomization_user_deploy/helm-chart/namespace.yaml.tpl: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: argocd 5 | -------------------------------------------------------------------------------- /examples/kustomization_user_deploy/letsencrypt/kustomization.yaml.tpl: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | resources: 5 | - letsencrypt.yaml 6 | 7 | -------------------------------------------------------------------------------- /examples/kustomization_user_deploy/letsencrypt/letsencrypt.yaml.tpl: -------------------------------------------------------------------------------- 1 | apiVersion: cert-manager.io/v1 2 | kind: ClusterIssuer 3 | metadata: 4 | name: letsencrypt 5 | namespace: cert-manager 6 | spec: 7 | acme: 8 | email: <--- change this to your email 9 | server: https://acme-v02.api.letsencrypt.org/directory | https://acme-staging-v02.api.letsencrypt.org/directory <-- pick one 10 | privateKeySecretRef: 11 | name: letsencrypt-account-key 12 | solvers: 13 | - http01: 14 | ingress: 15 | ingressClassName: traefik 16 | -------------------------------------------------------------------------------- /examples/kustomization_user_deploy/mutliple-namespaces/base/kustomization.yaml.tpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kube-hetzner/terraform-hcloud-kube-hetzner/2613b1fa5cff0dd06e25e841fb4e09317ebb3fc3/examples/kustomization_user_deploy/mutliple-namespaces/base/kustomization.yaml.tpl -------------------------------------------------------------------------------- /examples/kustomization_user_deploy/mutliple-namespaces/base/pod.yaml.tpl: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: myapp-pod 5 | labels: 6 | app: myapp 7 | spec: 8 | containers: 9 | - name: nginx 10 | image: nginx:1.7.9 11 | -------------------------------------------------------------------------------- /examples/kustomization_user_deploy/mutliple-namespaces/kustomization.yaml.tpl: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | resources: 5 | - namespace-a 6 | - namespace-b 7 | -------------------------------------------------------------------------------- /examples/kustomization_user_deploy/mutliple-namespaces/namespace-a/kustomization.yaml.tpl: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | resources: 5 | - namespace.yaml 6 | - ../base 7 | namespace: namespace-a 8 | -------------------------------------------------------------------------------- /examples/kustomization_user_deploy/mutliple-namespaces/namespace-a/namespace-a.yaml.tpl: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: namespace-a 5 | -------------------------------------------------------------------------------- /examples/kustomization_user_deploy/mutliple-namespaces/namespace-b/kustomization.yaml.tpl: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | resources: 5 | - namespace.yaml 6 | - ../base 7 | namespace: namespace-b 8 | -------------------------------------------------------------------------------- /examples/kustomization_user_deploy/mutliple-namespaces/namespace-b/namespace-b.yaml.tpl: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: namespace-b 5 | -------------------------------------------------------------------------------- /examples/kustomization_user_deploy/simple-resources/demo-config-map.yaml.tpl: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: demo-config 5 | data: 6 | someConfigKey: ${sealed_secrets_crt} 7 | -------------------------------------------------------------------------------- /examples/kustomization_user_deploy/simple-resources/demo-pod.yml.tpl: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: demo 5 | spec: 6 | containers: 7 | - name: demo-container 8 | image: registry.k8s.io/busybox 9 | command: [ "/bin/sh", "-c", "env" ] 10 | env: 11 | - name: DEMO_ENVIRONEMNT_VARIABLE 12 | valueFrom: 13 | configMapKeyRef: 14 | name: demo-config 15 | key: someConfigKey 16 | restartPolicy: Never 17 | -------------------------------------------------------------------------------- /examples/kustomization_user_deploy/simple-resources/kustomization.yaml.tpl: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | resources: 5 | - demo-config-map.yaml.tpl 6 | -------------------------------------------------------------------------------- /examples/micro_os_rollback/Readme.md: -------------------------------------------------------------------------------- 1 | # Rollback Node MicroOS Manually 2 | 3 | How to manually rollback a MicroOS node to the last snapshot or be date. 4 | 5 | ## Background 6 | 7 | Certain versions of `linux-utils` (e.g., >2.40) may cause errors such as: 8 | 9 | ``` 10 | ...cannot mount subpath... file exists... unmount... 11 | ``` 12 | 13 | For more details, refer to the [Kubernetes issue #130999](https://github.com/kubernetes/kubernetes/issues/130999). 14 | 15 | ## Step 1: Find Problematic Nodes 16 | 17 | Run the following command to identify nodes with issues: 18 | 19 | ```bash 20 | kubectl get pods -o wide --all-namespaces | grep CreateContainerConfigError | awk '{printf "%s%s", $8, (NR == total ? "" : ",")} {total=NR} END {print ""}' 21 | ``` 22 | 23 | **Note:** The output may include duplicates or irrelevant entries (e.g., pods with uptime of 12h). 24 | 25 | ## Step 2: Manual Rollback Per Node 26 | 27 | SSH into each problematic node and execute the following command: 28 | 29 | ```bash 30 | snapper --iso list | tail -2 | head -1 | awk '{print $1}' | xargs -I{} snapper rollback {} && reboot 31 | ``` 32 | 33 | ### Explanation of the Command 34 | 35 | 1. `snapper --iso list`: Lists all snapshots with ISO timestamps. 36 | 2. `tail -2`: Filters the last two snapshots. 37 | 3. `head -1`: Selects the snapshot before the current one. 38 | 4. `awk '{print $1}'`: Extracts the snapshot ID. 39 | 5. `xargs -I{} snapper rollback {}`: Rolls back to the selected snapshot. 40 | 6. `&& reboot`: Reboots the node if the rollback is successful. 41 | 42 | ## Step 3: Automate Rollback with Ansible (Work in Progress) 43 | 44 | If you have an inventory file, you can automate the rollback process using Ansible: 45 | 46 | ```bash 47 | export COMMA_SEPARATED_NODE_LIST=$(kubectl get pods -o wide --all-namespaces | grep CreateContainerConfigError | awk '{printf "%s,", $8} END {print ""}') 48 | echo $COMMA_SEPARATED_NODE_LIST 49 | 50 | ansible ${COMMA_SEPARATED_NODE_LIST} -i ansible/inventory.yml \ 51 | -m shell \ 52 | -a 'snapper --iso list | tail -2 | head -1 | awk "{print $1}" | xargs -I{} snapper rollback {} && reboot' 53 | ``` 54 | 55 | ## Additional Notes 56 | 57 | ### Snapshot List Example 58 | 59 | Below is an example output of `snapper --iso list`: 60 | 61 | ```bash 62 | # │ Type │ Pre # │ Date │ User │ Used Space │ Cleanup │ Description │ Userdata 63 | ─────┼────────┼───────┼─────────────────────┼──────┼────────────┼─────────┼────────────────────────┼────────────── 64 | 0 │ single │ │ │ root │ │ │ current │ 65 | 97 │ single │ │ 2025-06-03 00:33:31 │ root │ 274.03 MiB │ number │ Snapshot Update of #96 │ important=yes 66 | 98 │ single │ │ 2025-06-05 00:59:06 │ root │ 59.11 MiB │ number │ Snapshot Update of #97 │ important=yes 67 | 99 │ single │ │ 2025-06-06 01:17:29 │ root │ 22.16 MiB │ number │ Snapshot Update of #98 │ important=yes 68 | 100* │ single │ │ 2025-06-08 01:49:48 │ root │ 38.02 MiB │ number │ Snapshot Update of #99 │ 69 | ``` 70 | 71 | - `*`: Marks the current/running snapshot. 72 | - `+`: Marks the snapshot to be used on the next boot. 73 | 74 | ### Alternative: Select Snapshot by Date 75 | 76 | To rollback to a snapshot from a specific date (e.g., June 6, 2025): 77 | 78 | ```bash 79 | snapper --iso list | grep 06-06 | awk '{print $1}' | xargs -I{} snapper rollback {} && reboot 80 | ``` 81 | -------------------------------------------------------------------------------- /examples/tls/ingress.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: networking.k8s.io/v1 2 | kind: Ingress 3 | metadata: 4 | name: nginx-ingress 5 | annotations: 6 | traefik.ingress.kubernetes.io/router.tls: "true" 7 | traefik.ingress.kubernetes.io/router.tls.certresolver: le 8 | spec: 9 | tls: 10 | - hosts: 11 | - example.com 12 | rules: 13 | - host: example.com 14 | http: 15 | paths: 16 | - path: / 17 | pathType: Prefix 18 | backend: 19 | service: 20 | name: nginx-service 21 | port: 22 | number: 80 23 | 24 | -------------------------------------------------------------------------------- /examples/tls/pod.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | labels: 5 | run: nginx 6 | name: nginx 7 | spec: 8 | containers: 9 | - image: nginx 10 | name: nginx 11 | ports: 12 | - containerPort: 80 13 | 14 | -------------------------------------------------------------------------------- /examples/tls/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: nginx-service 5 | spec: 6 | ports: 7 | - port: 80 8 | protocol: TCP 9 | targetPort: 80 10 | selector: 11 | run: nginx 12 | -------------------------------------------------------------------------------- /init.tf: -------------------------------------------------------------------------------- 1 | resource "hcloud_load_balancer" "cluster" { 2 | count = local.has_external_load_balancer ? 0 : 1 3 | name = local.load_balancer_name 4 | 5 | load_balancer_type = var.load_balancer_type 6 | location = var.load_balancer_location 7 | labels = local.labels 8 | delete_protection = var.enable_delete_protection.load_balancer 9 | 10 | algorithm { 11 | type = var.load_balancer_algorithm_type 12 | } 13 | 14 | lifecycle { 15 | ignore_changes = [ 16 | # Ignore changes to hcloud-ccm/service-uid label that is managed by the CCM. 17 | labels["hcloud-ccm/service-uid"], 18 | ] 19 | } 20 | } 21 | 22 | resource "hcloud_load_balancer_network" "cluster" { 23 | count = local.has_external_load_balancer ? 0 : 1 24 | 25 | load_balancer_id = hcloud_load_balancer.cluster.*.id[0] 26 | ip = cidrhost( 27 | ( 28 | length(hcloud_network_subnet.agent) > 0 29 | ? hcloud_network_subnet.agent.*.ip_range[0] 30 | : hcloud_network_subnet.control_plane.*.ip_range[0] 31 | ) 32 | , 254) 33 | subnet_id = ( 34 | length(hcloud_network_subnet.agent) > 0 35 | ? hcloud_network_subnet.agent.*.id[0] 36 | : hcloud_network_subnet.control_plane.*.id[0] 37 | ) 38 | enable_public_interface = true 39 | 40 | lifecycle { 41 | create_before_destroy = false 42 | ignore_changes = [ 43 | ip, 44 | enable_public_interface 45 | ] 46 | } 47 | } 48 | 49 | resource "hcloud_load_balancer_target" "cluster" { 50 | count = local.has_external_load_balancer ? 0 : 1 51 | 52 | depends_on = [hcloud_load_balancer_network.cluster] 53 | type = "label_selector" 54 | load_balancer_id = hcloud_load_balancer.cluster.*.id[0] 55 | label_selector = join(",", concat( 56 | [for k, v in local.labels : "${k}=${v}"], 57 | [ 58 | # Generic label merge from control plane and agent namespaces with "or", 59 | # resulting in: role in (control_plane_node,agent_node) 60 | for key in keys(merge(local.labels_control_plane_node, local.labels_agent_node)) : 61 | "${key} in (${ 62 | join(",", compact([ 63 | for labels in [local.labels_control_plane_node, local.labels_agent_node] : 64 | try(labels[key], "") 65 | ])) 66 | })" 67 | ] 68 | )) 69 | use_private_ip = true 70 | } 71 | 72 | locals { 73 | first_control_plane_ip = coalesce( 74 | module.control_planes[keys(module.control_planes)[0]].ipv4_address, 75 | module.control_planes[keys(module.control_planes)[0]].ipv6_address, 76 | module.control_planes[keys(module.control_planes)[0]].private_ipv4_address 77 | ) 78 | } 79 | 80 | resource "null_resource" "first_control_plane" { 81 | connection { 82 | user = "root" 83 | private_key = var.ssh_private_key 84 | agent_identity = local.ssh_agent_identity 85 | host = local.first_control_plane_ip 86 | port = var.ssh_port 87 | timeout = "10m" # Extended timeout to handle network migrations during upgrades 88 | 89 | bastion_host = local.ssh_bastion.bastion_host 90 | bastion_port = local.ssh_bastion.bastion_port 91 | bastion_user = local.ssh_bastion.bastion_user 92 | bastion_private_key = local.ssh_bastion.bastion_private_key 93 | 94 | } 95 | 96 | # Generating k3s master config file 97 | provisioner "file" { 98 | content = yamlencode( 99 | merge( 100 | { 101 | node-name = module.control_planes[keys(module.control_planes)[0]].name 102 | token = local.k3s_token 103 | cluster-init = true 104 | disable-cloud-controller = true 105 | disable-kube-proxy = var.disable_kube_proxy 106 | disable = local.disable_extras 107 | kubelet-arg = local.kubelet_arg 108 | kube-controller-manager-arg = local.kube_controller_manager_arg 109 | flannel-iface = local.flannel_iface 110 | node-ip = module.control_planes[keys(module.control_planes)[0]].private_ipv4_address 111 | advertise-address = module.control_planes[keys(module.control_planes)[0]].private_ipv4_address 112 | node-taint = local.control_plane_nodes[keys(module.control_planes)[0]].taints 113 | node-label = local.control_plane_nodes[keys(module.control_planes)[0]].labels 114 | cluster-cidr = var.cluster_ipv4_cidr 115 | service-cidr = var.service_ipv4_cidr 116 | cluster-dns = local.cluster_dns_ipv4 117 | }, 118 | lookup(local.cni_k3s_settings, var.cni_plugin, {}), 119 | var.use_control_plane_lb ? { 120 | tls-san = concat([hcloud_load_balancer.control_plane.*.ipv4[0], hcloud_load_balancer_network.control_plane.*.ip[0]], var.additional_tls_sans) 121 | } : { 122 | tls-san = concat([local.first_control_plane_ip], var.additional_tls_sans) 123 | }, 124 | local.etcd_s3_snapshots, 125 | var.control_planes_custom_config, 126 | (local.control_plane_nodes[keys(module.control_planes)[0]].selinux == true ? { selinux = true } : {}), 127 | local.prefer_bundled_bin_config 128 | ) 129 | ) 130 | 131 | destination = "/tmp/config.yaml" 132 | } 133 | 134 | # Install k3s server 135 | provisioner "remote-exec" { 136 | inline = local.install_k3s_server 137 | } 138 | 139 | # Upon reboot start k3s and wait for it to be ready to receive commands 140 | provisioner "remote-exec" { 141 | inline = [ 142 | "systemctl start k3s", 143 | # prepare the needed directories 144 | "mkdir -p /var/post_install /var/user_kustomize", 145 | # wait for k3s to become ready 146 | <<-EOT 147 | timeout 120 bash < /dev/null; do 149 | systemctl start k3s 150 | echo "Waiting for the k3s server to start..." 151 | sleep 2 152 | done 153 | until [ -e /etc/rancher/k3s/k3s.yaml ]; do 154 | echo "Waiting for kubectl config..." 155 | sleep 2 156 | done 157 | until [[ "\$(kubectl get --raw='/readyz' 2> /dev/null)" == "ok" ]]; do 158 | echo "Waiting for the cluster to become ready..." 159 | sleep 2 160 | done 161 | EOF 162 | EOT 163 | ] 164 | } 165 | 166 | depends_on = [ 167 | hcloud_network_subnet.control_plane 168 | ] 169 | } 170 | 171 | # Needed for rancher setup 172 | resource "random_password" "rancher_bootstrap" { 173 | count = length(var.rancher_bootstrap_password) == 0 ? 1 : 0 174 | length = 48 175 | special = false 176 | } 177 | 178 | # This is where all the setup of Kubernetes components happen 179 | resource "null_resource" "kustomization" { 180 | triggers = { 181 | # Redeploy helm charts when the underlying values change 182 | helm_values_yaml = join("---\n", [ 183 | local.traefik_values, 184 | local.nginx_values, 185 | local.haproxy_values, 186 | local.calico_values, 187 | local.cilium_values, 188 | local.longhorn_values, 189 | local.csi_driver_smb_values, 190 | local.cert_manager_values, 191 | local.rancher_values, 192 | local.hetzner_csi_values, 193 | local.hetzner_ccm_values, 194 | 195 | ]) 196 | # Redeploy when versions of addons need to be updated 197 | versions = join("\n", [ 198 | coalesce(var.initial_k3s_channel, "N/A"), 199 | coalesce(var.install_k3s_version, "N/A"), 200 | coalesce(var.cluster_autoscaler_version, "N/A"), 201 | coalesce(var.hetzner_ccm_version, "N/A"), 202 | coalesce(var.hetzner_csi_version, "N/A"), 203 | coalesce(var.kured_version, "N/A"), 204 | coalesce(var.calico_version, "N/A"), 205 | coalesce(var.cilium_version, "N/A"), 206 | coalesce(var.traefik_version, "N/A"), 207 | coalesce(var.nginx_version, "N/A"), 208 | coalesce(var.haproxy_version, "N/A"), 209 | coalesce(var.cert_manager_version, "N/A"), 210 | coalesce(var.csi_driver_smb_version, "N/A"), 211 | coalesce(var.longhorn_version, "N/A"), 212 | coalesce(var.rancher_version, "N/A"), 213 | coalesce(var.sys_upgrade_controller_version, "N/A"), 214 | ]) 215 | options = join("\n", [ 216 | for option, value in local.kured_options : "${option}=${value}" 217 | ]) 218 | ccm_use_helm = var.hetzner_ccm_use_helm 219 | } 220 | 221 | connection { 222 | user = "root" 223 | private_key = var.ssh_private_key 224 | agent_identity = local.ssh_agent_identity 225 | host = local.first_control_plane_ip 226 | port = var.ssh_port 227 | timeout = "10m" # Extended timeout to handle network migrations during upgrades 228 | 229 | bastion_host = local.ssh_bastion.bastion_host 230 | bastion_port = local.ssh_bastion.bastion_port 231 | bastion_user = local.ssh_bastion.bastion_user 232 | bastion_private_key = local.ssh_bastion.bastion_private_key 233 | 234 | } 235 | 236 | # Upload kustomization.yaml, containing Hetzner CSI & CSM, as well as kured. 237 | provisioner "file" { 238 | content = local.kustomization_backup_yaml 239 | destination = "/var/post_install/kustomization.yaml" 240 | } 241 | 242 | # Upload the flannel RBAC fix 243 | provisioner "file" { 244 | content = file("${path.module}/kustomize/flannel-rbac.yaml") 245 | destination = "/var/post_install/flannel-rbac.yaml" 246 | } 247 | 248 | # Upload traefik ingress controller config 249 | provisioner "file" { 250 | content = templatefile( 251 | "${path.module}/templates/traefik_ingress.yaml.tpl", 252 | { 253 | version = var.traefik_version 254 | values = indent(4, local.traefik_values) 255 | target_namespace = local.ingress_controller_namespace 256 | }) 257 | destination = "/var/post_install/traefik_ingress.yaml" 258 | } 259 | 260 | # Upload nginx ingress controller config 261 | provisioner "file" { 262 | content = templatefile( 263 | "${path.module}/templates/nginx_ingress.yaml.tpl", 264 | { 265 | version = var.nginx_version 266 | values = indent(4, local.nginx_values) 267 | target_namespace = local.ingress_controller_namespace 268 | }) 269 | destination = "/var/post_install/nginx_ingress.yaml" 270 | } 271 | 272 | # Upload haproxy ingress controller config 273 | provisioner "file" { 274 | content = templatefile( 275 | "${path.module}/templates/haproxy_ingress.yaml.tpl", 276 | { 277 | version = var.haproxy_version 278 | values = indent(4, local.haproxy_values) 279 | target_namespace = local.ingress_controller_namespace 280 | }) 281 | destination = "/var/post_install/haproxy_ingress.yaml" 282 | } 283 | 284 | # Upload the CCM patch config using the legacy deployment 285 | provisioner "file" { 286 | content = var.hetzner_ccm_use_helm ? "" : templatefile( 287 | "${path.module}/templates/ccm.yaml.tpl", 288 | { 289 | cluster_cidr_ipv4 = var.cluster_ipv4_cidr 290 | default_lb_location = var.load_balancer_location 291 | using_klipper_lb = local.using_klipper_lb 292 | }) 293 | destination = "/var/post_install/ccm.yaml" 294 | } 295 | 296 | # Upload the CCM patch config using helm 297 | provisioner "file" { 298 | content = var.hetzner_ccm_use_helm ? templatefile( 299 | "${path.module}/templates/hcloud-ccm-helm.yaml.tpl", 300 | { 301 | values = indent(4, local.hetzner_ccm_values) 302 | version = coalesce(local.ccm_version, "*") 303 | using_klipper_lb = local.using_klipper_lb 304 | default_lb_location = var.load_balancer_location 305 | } 306 | ) : "" 307 | destination = "/var/post_install/hcloud-ccm-helm.yaml" 308 | } 309 | 310 | # Upload the calico patch config, for the kustomization of the calico manifest 311 | # This method is a stub which could be replaced by a more practical helm implementation 312 | provisioner "file" { 313 | content = templatefile( 314 | "${path.module}/templates/calico.yaml.tpl", 315 | { 316 | values = trimspace(local.calico_values) 317 | }) 318 | destination = "/var/post_install/calico.yaml" 319 | } 320 | 321 | # Upload the cilium install file 322 | provisioner "file" { 323 | content = templatefile( 324 | "${path.module}/templates/cilium.yaml.tpl", 325 | { 326 | values = indent(4, local.cilium_values) 327 | version = var.cilium_version 328 | }) 329 | destination = "/var/post_install/cilium.yaml" 330 | } 331 | 332 | # Upload the system upgrade controller plans config 333 | provisioner "file" { 334 | content = templatefile( 335 | "${path.module}/templates/plans.yaml.tpl", 336 | { 337 | channel = var.initial_k3s_channel 338 | version = var.install_k3s_version 339 | disable_eviction = !var.system_upgrade_enable_eviction 340 | drain = var.system_upgrade_use_drain 341 | }) 342 | destination = "/var/post_install/plans.yaml" 343 | } 344 | 345 | # Upload the Longhorn config 346 | provisioner "file" { 347 | content = templatefile( 348 | "${path.module}/templates/longhorn.yaml.tpl", 349 | { 350 | longhorn_namespace = var.longhorn_namespace 351 | longhorn_repository = var.longhorn_repository 352 | version = var.longhorn_version 353 | bootstrap = var.longhorn_helmchart_bootstrap 354 | values = indent(4, local.longhorn_values) 355 | }) 356 | destination = "/var/post_install/longhorn.yaml" 357 | } 358 | 359 | # Upload the csi-driver config (ignored if csi is disabled) 360 | provisioner "file" { 361 | content = var.disable_hetzner_csi ? "" : templatefile( 362 | "${path.module}/templates/hcloud-csi.yaml.tpl", 363 | { 364 | version = coalesce(local.csi_version, "*") 365 | values = indent(4, local.hetzner_csi_values) 366 | } 367 | ) 368 | destination = "/var/post_install/hcloud-csi.yaml" 369 | } 370 | 371 | # Upload the csi-driver-smb config 372 | provisioner "file" { 373 | content = templatefile( 374 | "${path.module}/templates/csi-driver-smb.yaml.tpl", 375 | { 376 | version = var.csi_driver_smb_version 377 | bootstrap = var.csi_driver_smb_helmchart_bootstrap 378 | values = indent(4, local.csi_driver_smb_values) 379 | }) 380 | destination = "/var/post_install/csi-driver-smb.yaml" 381 | } 382 | 383 | # Upload the cert-manager config 384 | provisioner "file" { 385 | content = templatefile( 386 | "${path.module}/templates/cert_manager.yaml.tpl", 387 | { 388 | version = var.cert_manager_version 389 | bootstrap = var.cert_manager_helmchart_bootstrap 390 | values = indent(4, local.cert_manager_values) 391 | }) 392 | destination = "/var/post_install/cert_manager.yaml" 393 | } 394 | 395 | # Upload the Rancher config 396 | provisioner "file" { 397 | content = templatefile( 398 | "${path.module}/templates/rancher.yaml.tpl", 399 | { 400 | rancher_install_channel = var.rancher_install_channel 401 | version = var.rancher_version 402 | bootstrap = var.rancher_helmchart_bootstrap 403 | values = indent(4, local.rancher_values) 404 | }) 405 | destination = "/var/post_install/rancher.yaml" 406 | } 407 | 408 | provisioner "file" { 409 | content = templatefile( 410 | "${path.module}/templates/kured.yaml.tpl", 411 | { 412 | options = local.kured_options 413 | } 414 | ) 415 | destination = "/var/post_install/kured.yaml" 416 | } 417 | 418 | # Deploy secrets, logging is automatically disabled due to sensitive variables 419 | provisioner "remote-exec" { 420 | inline = [ 421 | <<-EOT 422 | set -ex 423 | # Retry logic to handle temporary network connectivity issues during upgrades 424 | MAX_ATTEMPTS=30 425 | RETRY_INTERVAL=10 426 | for attempt in $(seq 1 $MAX_ATTEMPTS); do 427 | echo "Attempt $attempt: Checking kubectl connectivity..." 428 | if [ "$(kubectl get --raw='/readyz' 2>/dev/null)" = "ok" ]; then 429 | echo "kubectl connectivity established, deploying secrets..." 430 | kubectl -n kube-system create secret generic hcloud --from-literal=token=${var.hcloud_token} --from-literal=network=${data.hcloud_network.k3s.name} --dry-run=client -o yaml | kubectl apply -f - 431 | kubectl -n kube-system create secret generic hcloud-csi --from-literal=token=${var.hcloud_token} --dry-run=client -o yaml | kubectl apply -f - 432 | echo "Secrets deployed successfully" 433 | break 434 | else 435 | echo "kubectl not ready yet, waiting $RETRY_INTERVAL seconds..." 436 | sleep $RETRY_INTERVAL 437 | fi 438 | if [ $attempt -eq $MAX_ATTEMPTS ]; then 439 | echo "Failed to establish kubectl connectivity after $MAX_ATTEMPTS attempts" 440 | exit 1 441 | fi 442 | done 443 | EOT 444 | ] 445 | } 446 | 447 | # Deploy our post-installation kustomization 448 | provisioner "remote-exec" { 449 | inline = concat([ 450 | "set -ex", 451 | 452 | # This ugly hack is here, because terraform serializes the 453 | # embedded yaml files with "- |2", when there is more than 454 | # one yamldocument in the embedded file. Kustomize does not understand 455 | # that syntax and tries to parse the blocks content as a file, resulting 456 | # in weird errors. so gnu sed with funny escaping is used to 457 | # replace lines like "- |3" by "- |" (yaml block syntax). 458 | # due to indendation this should not changes the embedded 459 | # manifests themselves 460 | "sed -i 's/^- |[0-9]\\+$/- |/g' /var/post_install/kustomization.yaml", 461 | 462 | # Wait for k3s to become ready (we check one more time) because in some edge cases, 463 | # the cluster had become unvailable for a few seconds, at this very instant. 464 | <<-EOT 465 | timeout 360 bash < /dev/null)" == "ok" ]]; do 467 | echo "Waiting for the cluster to become ready..." 468 | sleep 2 469 | done 470 | EOF 471 | EOT 472 | ] 473 | , 474 | var.hetzner_ccm_use_helm ? [ 475 | "echo 'Remove legacy ccm manifests if they exist'", 476 | "kubectl delete serviceaccount,deployment -n kube-system --field-selector 'metadata.name=hcloud-cloud-controller-manager' --selector='app.kubernetes.io/managed-by!=Helm'", 477 | "kubectl delete clusterrolebinding -n kube-system --field-selector 'metadata.name=system:hcloud-cloud-controller-manager' --selector='app.kubernetes.io/managed-by!=Helm'", 478 | ] : [ 479 | "echo 'Uninstall helm ccm manifests if they exist'", 480 | "kubectl delete --ignore-not-found -n kube-system helmchart.helm.cattle.io/hcloud-cloud-controller-manager", 481 | ], 482 | [ 483 | # Ready, set, go for the kustomization 484 | "kubectl apply -k /var/post_install", 485 | "echo 'Waiting for the system-upgrade-controller deployment to become available...'", 486 | "kubectl -n system-upgrade wait --for=condition=available --timeout=900s deployment/system-upgrade-controller", 487 | "sleep 7", # important as the system upgrade controller CRDs sometimes don't get ready right away, especially with Cilium. 488 | "kubectl -n system-upgrade apply -f /var/post_install/plans.yaml" 489 | ], 490 | local.has_external_load_balancer ? [] : [ 491 | <<-EOT 492 | timeout 360 bash < /dev/null)" ]; do 494 | echo "Waiting for load-balancer to get an IP..." 495 | sleep 2 496 | done 497 | EOF 498 | EOT 499 | ]) 500 | } 501 | 502 | depends_on = [ 503 | hcloud_load_balancer.cluster, 504 | null_resource.control_planes, 505 | random_password.rancher_bootstrap, 506 | hcloud_volume.longhorn_volume 507 | ] 508 | } 509 | -------------------------------------------------------------------------------- /kubeconfig.tf: -------------------------------------------------------------------------------- 1 | resource "ssh_sensitive_resource" "kubeconfig" { 2 | # Note: moved from remote_file to ssh_sensitive_resource because 3 | # remote_file does not support bastion hosts and ssh_sensitive_resource does. 4 | # The default behaviour is to run file blocks and commands at create time 5 | # You can also specify 'destroy' to run the commands at destroy time 6 | when = "create" 7 | 8 | bastion_host = local.ssh_bastion.bastion_host 9 | bastion_port = local.ssh_bastion.bastion_port 10 | bastion_user = local.ssh_bastion.bastion_user 11 | bastion_private_key = local.ssh_bastion.bastion_private_key 12 | 13 | host = can(ipv6(local.first_control_plane_ip)) ? "[${local.first_control_plane_ip}]" : local.first_control_plane_ip 14 | port = var.ssh_port 15 | user = "root" 16 | private_key = var.ssh_private_key 17 | agent = var.ssh_private_key == null 18 | 19 | # An ssh-agent with your SSH private keys should be running 20 | # Use 'private_key' to set the SSH key otherwise 21 | 22 | timeout = "15m" 23 | 24 | commands = [ 25 | "cat /etc/rancher/k3s/k3s.yaml" 26 | ] 27 | 28 | depends_on = [null_resource.control_planes[0]] 29 | } 30 | 31 | locals { 32 | kubeconfig_server_address = var.kubeconfig_server_address != "" ? var.kubeconfig_server_address : (var.use_control_plane_lb ? 33 | ( 34 | var.control_plane_lb_enable_public_interface ? 35 | hcloud_load_balancer.control_plane.*.ipv4[0] 36 | : hcloud_load_balancer_network.control_plane.*.ip[0] 37 | ) 38 | : 39 | (can(local.first_control_plane_ip) ? local.first_control_plane_ip : "unknown") 40 | ) 41 | kubeconfig_external = replace(replace(ssh_sensitive_resource.kubeconfig.result, "127.0.0.1", local.kubeconfig_server_address), "default", var.cluster_name) 42 | kubeconfig_parsed = yamldecode(local.kubeconfig_external) 43 | kubeconfig_data = { 44 | host = local.kubeconfig_parsed["clusters"][0]["cluster"]["server"] 45 | client_certificate = base64decode(local.kubeconfig_parsed["users"][0]["user"]["client-certificate-data"]) 46 | client_key = base64decode(local.kubeconfig_parsed["users"][0]["user"]["client-key-data"]) 47 | cluster_ca_certificate = base64decode(local.kubeconfig_parsed["clusters"][0]["cluster"]["certificate-authority-data"]) 48 | cluster_name = var.cluster_name 49 | } 50 | } 51 | 52 | resource "local_sensitive_file" "kubeconfig" { 53 | count = var.create_kubeconfig ? 1 : 0 54 | content = local.kubeconfig_external 55 | filename = "${var.cluster_name}_kubeconfig.yaml" 56 | file_permission = "600" 57 | } 58 | -------------------------------------------------------------------------------- /kustomization_backup.tf: -------------------------------------------------------------------------------- 1 | resource "local_file" "kustomization_backup" { 2 | count = var.create_kustomization ? 1 : 0 3 | content = local.kustomization_backup_yaml 4 | filename = "${var.cluster_name}_kustomization_backup.yaml" 5 | file_permission = "600" 6 | } 7 | -------------------------------------------------------------------------------- /kustomization_user.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | user_kustomization_templates = try(fileset(var.extra_kustomize_folder, "**/*.yaml.tpl"), toset([])) 3 | } 4 | 5 | resource "null_resource" "kustomization_user" { 6 | for_each = local.user_kustomization_templates 7 | 8 | connection { 9 | user = "root" 10 | private_key = var.ssh_private_key 11 | agent_identity = local.ssh_agent_identity 12 | host = local.first_control_plane_ip 13 | port = var.ssh_port 14 | 15 | bastion_host = local.ssh_bastion.bastion_host 16 | bastion_port = local.ssh_bastion.bastion_port 17 | bastion_user = local.ssh_bastion.bastion_user 18 | bastion_private_key = local.ssh_bastion.bastion_private_key 19 | 20 | } 21 | 22 | provisioner "remote-exec" { 23 | inline = [ 24 | "mkdir -p $(dirname /var/user_kustomize/${each.key})" 25 | ] 26 | } 27 | 28 | provisioner "file" { 29 | content = templatefile("${var.extra_kustomize_folder}/${each.key}", var.extra_kustomize_parameters) 30 | destination = replace("/var/user_kustomize/${each.key}", ".yaml.tpl", ".yaml") 31 | } 32 | 33 | triggers = { 34 | manifest_sha1 = "${sha1(templatefile("${var.extra_kustomize_folder}/${each.key}", var.extra_kustomize_parameters))}" 35 | } 36 | 37 | depends_on = [ 38 | null_resource.kustomization 39 | ] 40 | } 41 | 42 | resource "null_resource" "kustomization_user_deploy" { 43 | count = length(local.user_kustomization_templates) > 0 ? 1 : 0 44 | 45 | connection { 46 | user = "root" 47 | private_key = var.ssh_private_key 48 | agent_identity = local.ssh_agent_identity 49 | host = local.first_control_plane_ip 50 | port = var.ssh_port 51 | 52 | bastion_host = local.ssh_bastion.bastion_host 53 | bastion_port = local.ssh_bastion.bastion_port 54 | bastion_user = local.ssh_bastion.bastion_user 55 | bastion_private_key = local.ssh_bastion.bastion_private_key 56 | 57 | } 58 | 59 | # Remove templates after rendering, and apply changes. 60 | provisioner "remote-exec" { 61 | # Debugging: "sh -c 'for file in $(find /var/user_kustomize -type f -name \"*.yaml\" | sort -n); do echo \"\n### Template $${file}.tpl after rendering:\" && cat $${file}; done'", 62 | inline = compact([ 63 | "rm -f /var/user_kustomize/**/*.yaml.tpl", 64 | "echo 'Applying user kustomization...'", 65 | "kubectl apply -k /var/user_kustomize/ --wait=true", 66 | var.extra_kustomize_deployment_commands 67 | ]) 68 | } 69 | 70 | lifecycle { 71 | replace_triggered_by = [ 72 | null_resource.kustomization_user 73 | ] 74 | } 75 | 76 | depends_on = [ 77 | null_resource.kustomization_user 78 | ] 79 | } 80 | -------------------------------------------------------------------------------- /kustomize/flannel-rbac.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: flannel-node-lister 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: system:node 9 | subjects: 10 | - kind: Group 11 | name: system:nodes 12 | apiGroup: rbac.authorization.k8s.io -------------------------------------------------------------------------------- /kustomize/system-upgrade-controller.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: system-upgrade-controller 5 | namespace: system-upgrade 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: system-upgrade-controller 11 | volumeMounts: 12 | - name: ca-certificates 13 | mountPath: /var/lib/ca-certificates 14 | volumes: 15 | - name: ca-certificates 16 | hostPath: 17 | path: /var/lib/ca-certificates 18 | type: Directory 19 | -------------------------------------------------------------------------------- /main.tf: -------------------------------------------------------------------------------- 1 | resource "random_password" "k3s_token" { 2 | length = 48 3 | special = false 4 | } 5 | 6 | data "hcloud_image" "microos_x86_snapshot" { 7 | with_selector = "microos-snapshot=yes" 8 | with_architecture = "x86" 9 | most_recent = true 10 | } 11 | 12 | data "hcloud_image" "microos_arm_snapshot" { 13 | with_selector = "microos-snapshot=yes" 14 | with_architecture = "arm" 15 | most_recent = true 16 | } 17 | 18 | resource "hcloud_ssh_key" "k3s" { 19 | count = var.hcloud_ssh_key_id == null ? 1 : 0 20 | name = var.cluster_name 21 | public_key = var.ssh_public_key 22 | labels = local.labels 23 | } 24 | 25 | resource "hcloud_network" "k3s" { 26 | count = local.use_existing_network ? 0 : 1 27 | name = var.cluster_name 28 | ip_range = var.network_ipv4_cidr 29 | labels = local.labels 30 | } 31 | 32 | data "hcloud_network" "k3s" { 33 | id = local.use_existing_network ? var.existing_network_id[0] : hcloud_network.k3s[0].id 34 | } 35 | 36 | 37 | # We start from the end of the subnets cidr array, 38 | # as we would have fewer control plane nodepools, than agent ones. 39 | resource "hcloud_network_subnet" "control_plane" { 40 | count = length(var.control_plane_nodepools) 41 | network_id = data.hcloud_network.k3s.id 42 | type = "cloud" 43 | network_zone = var.network_region 44 | ip_range = local.network_ipv4_subnets[255 - count.index] 45 | } 46 | 47 | # Here we start at the beginning of the subnets cidr array 48 | resource "hcloud_network_subnet" "agent" { 49 | count = length(var.agent_nodepools) 50 | network_id = data.hcloud_network.k3s.id 51 | type = "cloud" 52 | network_zone = var.network_region 53 | ip_range = local.network_ipv4_subnets[count.index] 54 | } 55 | 56 | # Subnet for NAT router and other peripherals 57 | resource "hcloud_network_subnet" "nat_router" { 58 | count = var.nat_router != null ? 1 : 0 59 | network_id = data.hcloud_network.k3s.id 60 | type = "cloud" 61 | network_zone = var.network_region 62 | ip_range = local.network_ipv4_subnets[var.nat_router_subnet_index] 63 | } 64 | 65 | 66 | resource "hcloud_firewall" "k3s" { 67 | name = var.cluster_name 68 | labels = local.labels 69 | 70 | dynamic "rule" { 71 | for_each = local.firewall_rules_list 72 | content { 73 | description = rule.value.description 74 | direction = rule.value.direction 75 | protocol = rule.value.protocol 76 | port = lookup(rule.value, "port", null) 77 | destination_ips = lookup(rule.value, "destination_ips", []) 78 | source_ips = lookup(rule.value, "source_ips", []) 79 | } 80 | } 81 | } 82 | 83 | -------------------------------------------------------------------------------- /modules/host/locals.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | # ssh_agent_identity is not set if the private key is passed directly, but if ssh agent is used, the public key tells ssh agent which private key to use. 3 | # For terraforms provisioner.connection.agent_identity, we need the public key as a string. 4 | ssh_agent_identity = var.ssh_private_key == null ? var.ssh_public_key : null 5 | # shared flags for ssh to ignore host keys for all connections during provisioning. 6 | ssh_args = "-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentitiesOnly=yes -o PubkeyAuthentication=yes" 7 | 8 | ssh_proxy_jump = var.ssh_bastion.bastion_host != null ? " -o ProxyCommand=\"ssh -W %h:%p ${local.ssh_args} -p ${var.ssh_bastion.bastion_port} ${var.ssh_bastion.bastion_user}@${var.ssh_bastion.bastion_host} -i /tmp/${random_string.identity_file.id}\" " : "" 9 | 10 | # ssh_client_identity is used for ssh "-i" flag, its the private key if that is set, or a public key 11 | # if an ssh agent is used. 12 | ssh_client_identity = var.ssh_private_key == null ? var.ssh_public_key : var.ssh_private_key 13 | 14 | # the hosts name with its unique suffix attached 15 | name = "${var.name}-${random_string.server.id}" 16 | 17 | # check if the user has set dns servers 18 | has_dns_servers = length(var.dns_servers) > 0 19 | } 20 | -------------------------------------------------------------------------------- /modules/host/main.tf: -------------------------------------------------------------------------------- 1 | resource "random_string" "server" { 2 | length = 3 3 | lower = true 4 | special = false 5 | numeric = false 6 | upper = false 7 | 8 | keepers = { 9 | # We re-create the apart of the name changes. 10 | name = var.name 11 | } 12 | } 13 | 14 | resource "random_string" "identity_file" { 15 | length = 20 16 | lower = true 17 | special = false 18 | numeric = true 19 | upper = false 20 | } 21 | 22 | variable "network" { 23 | type = object({ 24 | network_id = number 25 | ip = string 26 | alias_ips = list(string) 27 | }) 28 | default = null 29 | } 30 | 31 | resource "hcloud_server" "server" { 32 | name = local.name 33 | image = var.microos_snapshot_id 34 | server_type = var.server_type 35 | location = var.location 36 | ssh_keys = var.ssh_keys 37 | firewall_ids = var.firewall_ids 38 | placement_group_id = var.placement_group_id 39 | backups = var.backups 40 | user_data = data.cloudinit_config.config.rendered 41 | keep_disk = var.keep_disk_size 42 | public_net { 43 | ipv4_enabled = !var.disable_ipv4 44 | ipv6_enabled = !var.disable_ipv6 45 | } 46 | 47 | network { 48 | network_id = var.network_id 49 | ip = var.private_ipv4 50 | alias_ips = [] 51 | } 52 | 53 | labels = var.labels 54 | 55 | # Prevent destroying the whole cluster if the user changes 56 | # any of the attributes that force to recreate the servers. 57 | lifecycle { 58 | ignore_changes = [ 59 | location, 60 | ssh_keys, 61 | user_data, 62 | image, 63 | ] 64 | } 65 | 66 | connection { 67 | user = "root" 68 | private_key = var.ssh_private_key 69 | agent_identity = local.ssh_agent_identity 70 | host = coalesce(self.ipv4_address, self.ipv6_address, try(one(self.network).ip, null)) 71 | port = var.ssh_port 72 | 73 | bastion_host = var.ssh_bastion.bastion_host 74 | bastion_port = var.ssh_bastion.bastion_port 75 | bastion_user = var.ssh_bastion.bastion_user 76 | bastion_private_key = var.ssh_bastion.bastion_private_key 77 | 78 | } 79 | 80 | # Prepare ssh identity file 81 | provisioner "local-exec" { 82 | command = <<-EOT 83 | install -b -m 600 /dev/null /tmp/${random_string.identity_file.id} 84 | echo "${local.ssh_client_identity}" | sed 's/\r$//' > /tmp/${random_string.identity_file.id} 85 | EOT 86 | } 87 | 88 | # Wait for MicroOS to reboot and be ready. 89 | provisioner "local-exec" { 90 | command = <<-EOT 91 | timeout 600 bash < /dev/null 93 | do 94 | echo "Waiting for MicroOS to become available..." 95 | sleep 3 96 | done 97 | EOF 98 | EOT 99 | } 100 | 101 | # Cleanup ssh identity file 102 | provisioner "local-exec" { 103 | command = <<-EOT 104 | rm /tmp/${random_string.identity_file.id} 105 | EOT 106 | } 107 | 108 | 109 | provisioner "remote-exec" { 110 | inline = var.automatically_upgrade_os ? [ 111 | <<-EOT 112 | echo "Automatic OS updates are enabled" 113 | EOT 114 | ] : [ 115 | <<-EOT 116 | echo "Automatic OS updates are disabled" 117 | systemctl --now disable transactional-update.timer 118 | EOT 119 | ] 120 | } 121 | 122 | } 123 | 124 | resource "null_resource" "registries" { 125 | triggers = { 126 | registries = var.k3s_registries 127 | } 128 | 129 | connection { 130 | user = "root" 131 | private_key = var.ssh_private_key 132 | agent_identity = local.ssh_agent_identity 133 | host = coalesce(hcloud_server.server.ipv4_address, hcloud_server.server.ipv6_address, try(one(hcloud_server.server.network).ip, null)) 134 | port = var.ssh_port 135 | 136 | bastion_host = var.ssh_bastion.bastion_host 137 | bastion_port = var.ssh_bastion.bastion_port 138 | bastion_user = var.ssh_bastion.bastion_user 139 | bastion_private_key = var.ssh_bastion.bastion_private_key 140 | 141 | } 142 | 143 | provisioner "file" { 144 | content = var.k3s_registries 145 | destination = "/tmp/registries.yaml" 146 | } 147 | 148 | provisioner "remote-exec" { 149 | inline = [var.k3s_registries_update_script] 150 | } 151 | 152 | depends_on = [hcloud_server.server] 153 | } 154 | 155 | resource "hcloud_rdns" "server" { 156 | count = (var.base_domain != "" && !var.disable_ipv4) ? 1 : 0 157 | 158 | server_id = hcloud_server.server.id 159 | ip_address = coalesce(hcloud_server.server.ipv4_address, try(one(hcloud_server.server.network).ip, null)) 160 | dns_ptr = format("%s.%s", local.name, var.base_domain) 161 | } 162 | 163 | resource "hcloud_rdns" "server_ipv6" { 164 | count = (var.base_domain != "" && !var.disable_ipv6) ? 1 : 0 165 | 166 | server_id = hcloud_server.server.id 167 | ip_address = hcloud_server.server.ipv6_address 168 | dns_ptr = format("%s.%s", local.name, var.base_domain) 169 | } 170 | 171 | 172 | data "cloudinit_config" "config" { 173 | gzip = true 174 | base64_encode = true 175 | 176 | # Main cloud-config configuration file. 177 | part { 178 | filename = "init.cfg" 179 | content_type = "text/cloud-config" 180 | content = templatefile( 181 | "${path.module}/templates/cloudinit.yaml.tpl", 182 | { 183 | hostname = local.name 184 | dns_servers = var.dns_servers 185 | has_dns_servers = local.has_dns_servers 186 | sshAuthorizedKeys = concat([var.ssh_public_key], var.ssh_additional_public_keys) 187 | cloudinit_write_files_common = var.cloudinit_write_files_common 188 | cloudinit_runcmd_common = var.cloudinit_runcmd_common 189 | swap_size = var.swap_size 190 | private_network_only = (var.disable_ipv4 && var.disable_ipv6) 191 | } 192 | ) 193 | } 194 | } 195 | 196 | resource "null_resource" "zram" { 197 | triggers = { 198 | zram_size = var.zram_size 199 | } 200 | 201 | connection { 202 | user = "root" 203 | private_key = var.ssh_private_key 204 | agent_identity = local.ssh_agent_identity 205 | host = coalesce(hcloud_server.server.ipv4_address, hcloud_server.server.ipv6_address, try(one(hcloud_server.server.network).ip, null)) 206 | port = var.ssh_port 207 | 208 | bastion_host = var.ssh_bastion.bastion_host 209 | bastion_port = var.ssh_bastion.bastion_port 210 | bastion_user = var.ssh_bastion.bastion_user 211 | bastion_private_key = var.ssh_bastion.bastion_private_key 212 | 213 | } 214 | 215 | provisioner "file" { 216 | content = <<-EOT 217 | #!/bin/bash 218 | 219 | # Switching off swap 220 | swapoff /dev/zram0 221 | 222 | rmmod zram 223 | EOT 224 | destination = "/usr/local/bin/k3s-swapoff" 225 | } 226 | 227 | provisioner "file" { 228 | content = <<-EOT 229 | #!/bin/bash 230 | 231 | # get the amount of memory in the machine 232 | # load the dependency module 233 | modprobe zram 234 | 235 | # initialize the device with zstd compression algorithm 236 | echo zstd > /sys/block/zram0/comp_algorithm; 237 | echo ${var.zram_size} > /sys/block/zram0/disksize 238 | 239 | # Creating the swap filesystem 240 | mkswap /dev/zram0 241 | 242 | # Switch the swaps on 243 | swapon -p 100 /dev/zram0 244 | EOT 245 | destination = "/usr/local/bin/k3s-swapon" 246 | } 247 | 248 | # Setup zram if it's enabled 249 | provisioner "file" { 250 | content = <<-EOT 251 | [Unit] 252 | Description=Swap with zram 253 | After=multi-user.target 254 | 255 | [Service] 256 | Type=oneshot 257 | RemainAfterExit=true 258 | ExecStart=/usr/local/bin/k3s-swapon 259 | ExecStop=/usr/local/bin/k3s-swapoff 260 | 261 | [Install] 262 | WantedBy=multi-user.target 263 | EOT 264 | destination = "/etc/systemd/system/zram.service" 265 | } 266 | 267 | provisioner "remote-exec" { 268 | inline = concat(var.zram_size != "" ? [ 269 | "chmod +x /usr/local/bin/k3s-swapon", 270 | "chmod +x /usr/local/bin/k3s-swapoff", 271 | "systemctl disable --now zram.service", 272 | "systemctl enable --now zram.service", 273 | ] : [ 274 | "systemctl disable --now zram.service", 275 | ]) 276 | } 277 | 278 | depends_on = [hcloud_server.server] 279 | } 280 | 281 | # Resource to toggle transactional-update.timer based on automatically_upgrade_os setting 282 | resource "null_resource" "os_upgrade_toggle" { 283 | triggers = { 284 | os_upgrade_state = var.automatically_upgrade_os ? "enabled" : "disabled" 285 | server_id = hcloud_server.server.id 286 | } 287 | 288 | connection { 289 | user = "root" 290 | private_key = var.ssh_private_key 291 | agent_identity = local.ssh_agent_identity 292 | host = coalesce(hcloud_server.server.ipv4_address, hcloud_server.server.ipv6_address, try(one(hcloud_server.server.network).ip, null)) 293 | port = var.ssh_port 294 | 295 | bastion_host = var.ssh_bastion.bastion_host 296 | bastion_port = var.ssh_bastion.bastion_port 297 | bastion_user = var.ssh_bastion.bastion_user 298 | bastion_private_key = var.ssh_bastion.bastion_private_key 299 | 300 | } 301 | 302 | provisioner "remote-exec" { 303 | inline = [ 304 | <<-EOT 305 | if [ "${var.automatically_upgrade_os}" = "true" ]; then 306 | echo "automatically_upgrade_os changed to true, enabling transactional-update.timer" 307 | systemctl enable --now transactional-update.timer || true 308 | else 309 | echo "automatically_upgrade_os changed to false, disabling transactional-update.timer" 310 | systemctl disable --now transactional-update.timer || true 311 | fi 312 | EOT 313 | ] 314 | } 315 | 316 | depends_on = [ 317 | hcloud_server.server, 318 | null_resource.registries 319 | ] 320 | } 321 | -------------------------------------------------------------------------------- /modules/host/out.tf: -------------------------------------------------------------------------------- 1 | output "ipv4_address" { 2 | value = hcloud_server.server.ipv4_address 3 | } 4 | 5 | output "ipv6_address" { 6 | value = hcloud_server.server.ipv6_address 7 | } 8 | 9 | output "private_ipv4_address" { 10 | value = try(one(hcloud_server.server.network).ip, "") 11 | } 12 | 13 | output "name" { 14 | value = hcloud_server.server.name 15 | } 16 | 17 | output "id" { 18 | value = hcloud_server.server.id 19 | } 20 | 21 | output "domain_assignments" { 22 | description = "Assignment of domain to the primary IP of the server" 23 | value = [ 24 | for rdns in hcloud_rdns.server : { 25 | domain = rdns.dns_ptr 26 | ips = [rdns.ip_address] 27 | } 28 | ] 29 | } 30 | -------------------------------------------------------------------------------- /modules/host/templates/cloudinit.yaml.tpl: -------------------------------------------------------------------------------- 1 | #cloud-config 2 | 3 | write_files: 4 | 5 | ${cloudinit_write_files_common} 6 | 7 | # Apply DNS config 8 | %{ if has_dns_servers ~} 9 | manage_resolv_conf: true 10 | resolv_conf: 11 | nameservers: 12 | %{ for dns_server in dns_servers ~} 13 | - ${dns_server} 14 | %{ endfor ~} 15 | %{ endif ~} 16 | 17 | # Add ssh authorized keys 18 | ssh_authorized_keys: 19 | %{ for key in sshAuthorizedKeys ~} 20 | - ${key} 21 | %{ endfor ~} 22 | 23 | # Resize /var, not /, as that's the last partition in MicroOS image. 24 | growpart: 25 | devices: ["/var"] 26 | 27 | # Make sure the hostname is set correctly 28 | hostname: ${hostname} 29 | preserve_hostname: true 30 | 31 | runcmd: 32 | 33 | ${cloudinit_runcmd_common} 34 | 35 | # Configure default routes based on public ip availability 36 | %{if private_network_only~} 37 | # Private-only setup: eth0 is the private interface 38 | - [ip, route, add, default, via, '10.0.0.1', dev, 'eth0', metric, '100'] 39 | %{else~} 40 | # Standard setup: eth0 is public, configure both IPv4 and IPv6 41 | - [ip, route, add, default, via, '172.31.1.1', dev, 'eth0', metric, '100'] 42 | - [ip, -6, route, add, default, via, 'fe80::1', dev, 'eth0', metric, '100'] 43 | %{endif~} 44 | 45 | %{if swap_size != ""~} 46 | - | 47 | btrfs subvolume create /var/lib/swap 48 | chmod 700 /var/lib/swap 49 | truncate -s 0 /var/lib/swap/swapfile 50 | chattr +C /var/lib/swap/swapfile 51 | fallocate -l ${swap_size} /var/lib/swap/swapfile 52 | chmod 600 /var/lib/swap/swapfile 53 | mkswap /var/lib/swap/swapfile 54 | swapon /var/lib/swap/swapfile 55 | echo "/var/lib/swap/swapfile none swap defaults 0 0" | sudo tee -a /etc/fstab 56 | cat << EOF >> /etc/systemd/system/swapon-late.service 57 | [Unit] 58 | Description=Activate all swap devices later 59 | After=default.target 60 | 61 | [Service] 62 | Type=oneshot 63 | ExecStart=/sbin/swapon -a 64 | 65 | [Install] 66 | WantedBy=default.target 67 | EOF 68 | systemctl daemon-reload 69 | systemctl enable swapon-late.service 70 | %{endif~} 71 | -------------------------------------------------------------------------------- /modules/host/variables.tf: -------------------------------------------------------------------------------- 1 | variable "name" { 2 | description = "Host name" 3 | type = string 4 | } 5 | variable "microos_snapshot_id" { 6 | description = "MicroOS snapshot ID to be used. Per default empty, an initial snapshot will be created" 7 | type = string 8 | default = "" 9 | } 10 | variable "base_domain" { 11 | description = "Base domain used for reverse dns" 12 | type = string 13 | } 14 | 15 | variable "ssh_port" { 16 | description = "SSH port" 17 | type = number 18 | } 19 | 20 | variable "ssh_public_key" { 21 | description = "SSH public Key" 22 | type = string 23 | } 24 | 25 | variable "ssh_private_key" { 26 | description = "SSH private Key" 27 | type = string 28 | } 29 | 30 | variable "ssh_additional_public_keys" { 31 | description = "Additional SSH public Keys. Use them to grant other team members root access to your cluster nodes" 32 | type = list(string) 33 | default = [] 34 | } 35 | 36 | variable "ssh_keys" { 37 | description = "List of SSH key IDs" 38 | type = list(string) 39 | nullable = true 40 | } 41 | 42 | variable "firewall_ids" { 43 | description = "Set of firewall IDs" 44 | type = set(number) 45 | nullable = true 46 | } 47 | 48 | variable "placement_group_id" { 49 | description = "Placement group ID" 50 | type = number 51 | nullable = true 52 | } 53 | 54 | variable "labels" { 55 | description = "Labels" 56 | type = map(any) 57 | nullable = true 58 | } 59 | 60 | variable "location" { 61 | description = "The server location" 62 | type = string 63 | } 64 | 65 | variable "ipv4_subnet_id" { 66 | description = "The subnet id" 67 | type = string 68 | } 69 | 70 | variable "private_ipv4" { 71 | description = "Private IP for the server" 72 | type = string 73 | } 74 | 75 | variable "server_type" { 76 | description = "The server type" 77 | type = string 78 | } 79 | 80 | variable "backups" { 81 | description = "Enable automatic backups via Hetzner" 82 | type = bool 83 | default = false 84 | } 85 | 86 | variable "packages_to_install" { 87 | description = "Packages to install" 88 | type = list(string) 89 | default = [] 90 | } 91 | 92 | variable "dns_servers" { 93 | type = list(string) 94 | description = "IP Addresses to use for the DNS Servers, set to an empty list to use the ones provided by Hetzner" 95 | } 96 | 97 | variable "automatically_upgrade_os" { 98 | type = bool 99 | default = true 100 | } 101 | 102 | variable "k3s_registries" { 103 | default = "" 104 | type = string 105 | } 106 | 107 | variable "k3s_registries_update_script" { 108 | default = "" 109 | type = string 110 | } 111 | 112 | variable "cloudinit_write_files_common" { 113 | default = "" 114 | type = string 115 | } 116 | 117 | variable "cloudinit_runcmd_common" { 118 | default = "" 119 | type = string 120 | } 121 | 122 | variable "swap_size" { 123 | default = "" 124 | type = string 125 | 126 | validation { 127 | condition = can(regex("^$|[1-9][0-9]{0,3}(G|M)$", var.swap_size)) 128 | error_message = "Invalid swap size. Examples: 512M, 1G" 129 | } 130 | } 131 | 132 | variable "zram_size" { 133 | default = "" 134 | type = string 135 | 136 | validation { 137 | condition = can(regex("^$|[1-9][0-9]{0,3}(G|M)$", var.zram_size)) 138 | error_message = "Invalid zram size. Examples: 512M, 1G" 139 | } 140 | } 141 | 142 | variable "keep_disk_size" { 143 | type = bool 144 | default = false 145 | description = "Whether to keep OS disks of nodes the same size when upgrading a node" 146 | } 147 | 148 | variable "disable_ipv4" { 149 | type = bool 150 | default = false 151 | description = "Whether to disable ipv4 on the server. If you disable ipv4 and ipv6 make sure you have an access to your private network." 152 | } 153 | 154 | variable "disable_ipv6" { 155 | type = bool 156 | default = false 157 | description = "Whether to disable ipv4 on the server. If you disable ipv4 and ipv6 make sure you have an access to your private network." 158 | } 159 | 160 | variable "network_id" { 161 | type = number 162 | default = null 163 | description = "The network id to attach the server to." 164 | } 165 | 166 | variable "ssh_bastion" { 167 | type = object({ 168 | 169 | bastion_host = string 170 | bastion_port = number 171 | bastion_user = string 172 | bastion_private_key = string 173 | }) 174 | } -------------------------------------------------------------------------------- /modules/host/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | hcloud = { 4 | source = "hetznercloud/hcloud" 5 | version = ">= 1.51.0" 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /nat-router.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | nat_router_ip = var.nat_router != null ? cidrhost(hcloud_network_subnet.nat_router[0].ip_range, 1) : "" 3 | nat_router_data_center = var.nat_router != null ? { 4 | "fsn1" : "fsn1-dc14", 5 | "nbg1" : "nbg1-dc3", 6 | "hel1" : "hel1-dc2", 7 | "ash" : "ash-dc1", 8 | "hil" : "hil-dc1", 9 | "sin" : "sin-dc1", 10 | }[var.nat_router.location] : null 11 | } 12 | 13 | data "cloudinit_config" "nat_router_config" { 14 | count = var.nat_router != null ? 1 : 0 15 | 16 | gzip = true 17 | base64_encode = true 18 | 19 | # Main cloud-config configuration file. 20 | part { 21 | filename = "init.cfg" 22 | content_type = "text/cloud-config" 23 | content = templatefile( 24 | "${path.module}/templates/nat-router-cloudinit.yaml.tpl", 25 | { 26 | hostname = "nat-router" 27 | dns_servers = var.dns_servers 28 | has_dns_servers = local.has_dns_servers 29 | sshAuthorizedKeys = concat([var.ssh_public_key], var.ssh_additional_public_keys) 30 | enable_sudo = var.nat_router.enable_sudo 31 | private_network_ipv4_range = data.hcloud_network.k3s.ip_range 32 | ssh_port = var.ssh_port 33 | ssh_max_auth_tries = var.ssh_max_auth_tries 34 | } 35 | ) 36 | } 37 | } 38 | 39 | resource "hcloud_network_route" "nat_route_public_internet" { 40 | count = var.nat_router != null ? 1 : 0 41 | network_id = data.hcloud_network.k3s.id 42 | destination = "0.0.0.0/0" 43 | gateway = local.nat_router_ip 44 | } 45 | 46 | resource "hcloud_primary_ip" "nat_router_primary_ipv4" { 47 | # explicitly declare the ipv4 address, such that the address 48 | # is stable against possible replacements of the nat router 49 | count = var.nat_router != null ? 1 : 0 50 | type = "ipv4" 51 | name = "${var.cluster_name}-nat-router-ipv4" 52 | datacenter = local.nat_router_data_center 53 | auto_delete = false 54 | assignee_type = "server" 55 | } 56 | 57 | resource "hcloud_primary_ip" "nat_router_primary_ipv6" { 58 | # explicitly declare the ipv4 address, such that the address 59 | # is stable against possible replacements of the nat router 60 | count = var.nat_router != null ? 1 : 0 61 | type = "ipv6" 62 | name = "${var.cluster_name}-nat-router-ipv6" 63 | datacenter = local.nat_router_data_center 64 | auto_delete = false 65 | assignee_type = "server" 66 | } 67 | resource "hcloud_server" "nat_router" { 68 | count = var.nat_router != null ? 1 : 0 69 | name = "${var.cluster_name}-nat-router" 70 | image = "debian-12" 71 | server_type = var.nat_router.server_type 72 | location = var.nat_router.location 73 | ssh_keys = length(var.ssh_hcloud_key_label) > 0 ? concat([local.hcloud_ssh_key_id], data.hcloud_ssh_keys.keys_by_selector[0].ssh_keys.*.id) : [local.hcloud_ssh_key_id] 74 | firewall_ids = [hcloud_firewall.k3s.id] 75 | user_data = data.cloudinit_config.nat_router_config[0].rendered 76 | keep_disk = false 77 | public_net { 78 | ipv4_enabled = true 79 | ipv4 = hcloud_primary_ip.nat_router_primary_ipv4[0].id 80 | ipv6_enabled = true 81 | ipv6 = hcloud_primary_ip.nat_router_primary_ipv6[0].id 82 | } 83 | 84 | network { 85 | network_id = data.hcloud_network.k3s.id 86 | ip = local.nat_router_ip 87 | alias_ips = [] 88 | } 89 | 90 | labels = merge( 91 | { 92 | role = "nat_router" 93 | }, 94 | try(var.nat_router.labels, {}), 95 | ) 96 | 97 | } 98 | 99 | resource "null_resource" "nat_router_await_cloud_init" { 100 | count = var.nat_router != null ? 1 : 0 101 | 102 | depends_on = [ 103 | hcloud_network_route.nat_route_public_internet, 104 | hcloud_server.nat_router, 105 | ] 106 | 107 | triggers = { 108 | config = data.cloudinit_config.nat_router_config[0].rendered 109 | } 110 | 111 | connection { 112 | user = "nat-router" 113 | private_key = var.ssh_private_key 114 | agent_identity = local.ssh_agent_identity 115 | host = hcloud_server.nat_router[0].ipv4_address 116 | port = var.ssh_port 117 | } 118 | 119 | provisioner "remote-exec" { 120 | inline = ["cloud-init status --wait > /dev/null || echo 'Ready to move on'"] 121 | # on_failure = continue # this will fail because the reboot 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /output.tf: -------------------------------------------------------------------------------- 1 | output "cluster_name" { 2 | value = var.cluster_name 3 | description = "Shared suffix for all resources belonging to this cluster." 4 | } 5 | 6 | output "network_id" { 7 | value = data.hcloud_network.k3s.id 8 | description = "The ID of the HCloud network." 9 | } 10 | 11 | output "ssh_key_id" { 12 | value = local.hcloud_ssh_key_id 13 | description = "The ID of the HCloud SSH key." 14 | } 15 | 16 | output "control_planes_public_ipv4" { 17 | value = [ 18 | for obj in module.control_planes : obj.ipv4_address 19 | ] 20 | description = "The public IPv4 addresses of the controlplane servers." 21 | } 22 | 23 | output "control_planes_public_ipv6" { 24 | value = [ 25 | for obj in module.control_planes : obj.ipv6_address 26 | ] 27 | description = "The public IPv6 addresses of the controlplane servers." 28 | } 29 | 30 | output "agents_public_ipv4" { 31 | value = [ 32 | for obj in module.agents : obj.ipv4_address 33 | ] 34 | description = "The public IPv4 addresses of the agent servers." 35 | } 36 | 37 | output "agents_public_ipv6" { 38 | value = [ 39 | for obj in module.agents : obj.ipv6_address 40 | ] 41 | description = "The public IPv6 addresses of the agent servers." 42 | } 43 | 44 | output "ingress_public_ipv4" { 45 | description = "The public IPv4 address of the Hetzner load balancer (with fallback to first control plane node)" 46 | value = local.has_external_load_balancer ? local.first_control_plane_ip : hcloud_load_balancer.cluster[0].ipv4 47 | } 48 | 49 | output "ingress_public_ipv6" { 50 | description = "The public IPv6 address of the Hetzner load balancer (with fallback to first control plane node)" 51 | value = local.has_external_load_balancer ? module.control_planes[keys(module.control_planes)[0]].ipv6_address : (var.load_balancer_disable_ipv6 ? null : hcloud_load_balancer.cluster[0].ipv6) 52 | } 53 | 54 | output "lb_control_plane_ipv4" { 55 | description = "The public IPv4 address of the Hetzner control plane load balancer" 56 | value = one(hcloud_load_balancer.control_plane[*].ipv4) 57 | } 58 | 59 | output "lb_control_plane_ipv6" { 60 | description = "The public IPv6 address of the Hetzner control plane load balancer" 61 | value = one(hcloud_load_balancer.control_plane[*].ipv6) 62 | } 63 | 64 | 65 | output "k3s_endpoint" { 66 | description = "A controller endpoint to register new nodes" 67 | value = "https://${var.use_control_plane_lb ? hcloud_load_balancer_network.control_plane.*.ip[0] : module.control_planes[keys(module.control_planes)[0]].private_ipv4_address}:6443" 68 | } 69 | 70 | output "k3s_token" { 71 | description = "The k3s token to register new nodes" 72 | value = local.k3s_token 73 | sensitive = true 74 | } 75 | 76 | output "control_plane_nodes" { 77 | description = "The control plane nodes" 78 | value = [for node in module.control_planes : node] 79 | } 80 | 81 | output "agent_nodes" { 82 | description = "The agent nodes" 83 | value = [for node in module.agents : node] 84 | } 85 | 86 | output "domain_assignments" { 87 | description = "Assignments of domains to IPs based on reverse DNS" 88 | value = concat( 89 | # Propagate domain assignments from control plane and agent nodes. 90 | flatten([ 91 | for node in concat(values(module.control_planes), values(module.agents)) : 92 | node.domain_assignments 93 | ]), 94 | # Get assignments from floating IPs. 95 | [for rdns in hcloud_rdns.agents : { 96 | domain = rdns.dns_ptr 97 | ips = [rdns.ip_address] 98 | }] 99 | ) 100 | } 101 | 102 | # Keeping for backward compatibility 103 | output "kubeconfig_file" { 104 | value = local.kubeconfig_external 105 | description = "Kubeconfig file content with external IP address, or internal IP address if only private ips are available" 106 | sensitive = true 107 | } 108 | 109 | output "kubeconfig" { 110 | value = local.kubeconfig_external 111 | description = "Kubeconfig file content with external IP address, or internal IP address if only private ips are available" 112 | sensitive = true 113 | } 114 | 115 | output "kubeconfig_data" { 116 | description = "Structured kubeconfig data to supply to other providers" 117 | value = local.kubeconfig_data 118 | sensitive = true 119 | } 120 | 121 | output "cilium_values" { 122 | description = "Helm values.yaml used for Cilium" 123 | value = local.cilium_values 124 | sensitive = true 125 | } 126 | 127 | output "cert_manager_values" { 128 | description = "Helm values.yaml used for cert-manager" 129 | value = local.cert_manager_values 130 | sensitive = true 131 | } 132 | 133 | output "csi_driver_smb_values" { 134 | description = "Helm values.yaml used for SMB CSI driver" 135 | value = local.csi_driver_smb_values 136 | sensitive = true 137 | } 138 | 139 | output "longhorn_values" { 140 | description = "Helm values.yaml used for Longhorn" 141 | value = local.longhorn_values 142 | sensitive = true 143 | } 144 | 145 | output "traefik_values" { 146 | description = "Helm values.yaml used for Traefik" 147 | value = local.traefik_values 148 | sensitive = true 149 | } 150 | 151 | output "nginx_values" { 152 | description = "Helm values.yaml used for nginx-ingress" 153 | value = local.nginx_values 154 | sensitive = true 155 | } 156 | 157 | output "haproxy_values" { 158 | description = "Helm values.yaml used for HAProxy" 159 | value = local.haproxy_values 160 | sensitive = true 161 | } 162 | 163 | output "nat_router_public_ipv4" { 164 | description = "The address of the nat router, if it exists." 165 | value = try(hcloud_server.nat_router[0].ipv4_address, null) 166 | } 167 | output "nat_router_public_ipv6" { 168 | description = "The address of the nat router, if it exists." 169 | value = try(hcloud_server.nat_router[0].ipv6_address, null) 170 | } 171 | output "nat_router_username" { 172 | description = "The non-root user as which you can ssh into the router." 173 | value = "nat-router" # hard-coded in cloud-init template. 174 | } 175 | output "nat_router_ssh_port" { 176 | description = "The non-root user as which you can ssh into the router." 177 | value = var.ssh_port 178 | } 179 | -------------------------------------------------------------------------------- /packer-template/hcloud-microos-snapshots.pkr.hcl: -------------------------------------------------------------------------------- 1 | /* 2 | * Creates a MicroOS snapshot for Kube-Hetzner 3 | */ 4 | packer { 5 | required_plugins { 6 | hcloud = { 7 | version = ">= 1.0.5" 8 | source = "github.com/hashicorp/hcloud" 9 | } 10 | } 11 | } 12 | 13 | variable "hcloud_token" { 14 | type = string 15 | default = env("HCLOUD_TOKEN") 16 | sensitive = true 17 | } 18 | 19 | # We download the OpenSUSE MicroOS x86 image from an automatically selected mirror. 20 | variable "opensuse_microos_x86_mirror_link" { 21 | type = string 22 | default = "https://download.opensuse.org/tumbleweed/appliances/openSUSE-MicroOS.x86_64-ContainerHost-OpenStack-Cloud.qcow2" 23 | } 24 | 25 | # We download the OpenSUSE MicroOS ARM image from an automatically selected mirror. 26 | variable "opensuse_microos_arm_mirror_link" { 27 | type = string 28 | default = "https://download.opensuse.org/ports/aarch64/tumbleweed/appliances/openSUSE-MicroOS.aarch64-ContainerHost-OpenStack-Cloud.qcow2" 29 | } 30 | 31 | # If you need to add other packages to the OS, do it here in the default value, like ["vim", "curl", "wget"] 32 | # When looking for packages, you need to search for OpenSUSE Tumbleweed packages, as MicroOS is based on Tumbleweed. 33 | variable "packages_to_install" { 34 | type = list(string) 35 | default = [] 36 | } 37 | 38 | locals { 39 | needed_packages = join(" ", concat(["restorecond policycoreutils policycoreutils-python-utils setools-console audit bind-utils wireguard-tools fuse open-iscsi nfs-client xfsprogs cryptsetup lvm2 git cifs-utils bash-completion mtr tcpdump udica qemu-guest-agent"], var.packages_to_install)) 40 | 41 | # Add local variables for inline shell commands 42 | download_image = "wget --timeout=5 --waitretry=5 --tries=5 --retry-connrefused --inet4-only " 43 | 44 | write_image = <<-EOT 45 | set -ex 46 | echo 'MicroOS image loaded, writing to disk... ' 47 | qemu-img convert -p -f qcow2 -O host_device $(ls -a | grep -ie '^opensuse.*microos.*qcow2$') /dev/sda 48 | echo 'done. Rebooting...' 49 | sleep 1 && udevadm settle && reboot 50 | EOT 51 | 52 | install_packages = <<-EOT 53 | set -ex 54 | echo "First reboot successful, installing needed packages..." 55 | transactional-update --continue pkg install -y ${local.needed_packages} 56 | transactional-update --continue shell <<- EOF 57 | setenforce 0 58 | rpm --import https://rpm.rancher.io/public.key 59 | zypper install -y https://github.com/k3s-io/k3s-selinux/releases/download/v1.6.stable.1/k3s-selinux-1.6-1.sle.noarch.rpm 60 | zypper addlock k3s-selinux 61 | restorecon -Rv /etc/selinux/targeted/policy 62 | restorecon -Rv /var/lib 63 | setenforce 1 64 | EOF 65 | sleep 1 && udevadm settle && reboot 66 | EOT 67 | 68 | clean_up = <<-EOT 69 | set -ex 70 | echo "Second reboot successful, cleaning-up..." 71 | rm -rf /etc/ssh/ssh_host_* 72 | echo "Make sure to use NetworkManager" 73 | touch /etc/NetworkManager/NetworkManager.conf 74 | sleep 1 && udevadm settle 75 | EOT 76 | } 77 | 78 | # Source for the MicroOS x86 snapshot 79 | source "hcloud" "microos-x86-snapshot" { 80 | image = "ubuntu-24.04" 81 | rescue = "linux64" 82 | location = "fsn1" 83 | server_type = "cx22" # disk size of >= 40GiB is needed to install the MicroOS image 84 | snapshot_labels = { 85 | microos-snapshot = "yes" 86 | creator = "kube-hetzner" 87 | } 88 | snapshot_name = "OpenSUSE MicroOS x86 by Kube-Hetzner" 89 | ssh_username = "root" 90 | token = var.hcloud_token 91 | } 92 | 93 | # Source for the MicroOS ARM snapshot 94 | source "hcloud" "microos-arm-snapshot" { 95 | image = "ubuntu-24.04" 96 | rescue = "linux64" 97 | location = "fsn1" 98 | server_type = "cax11" # disk size of >= 40GiB is needed to install the MicroOS image 99 | snapshot_labels = { 100 | microos-snapshot = "yes" 101 | creator = "kube-hetzner" 102 | } 103 | snapshot_name = "OpenSUSE MicroOS ARM by Kube-Hetzner" 104 | ssh_username = "root" 105 | token = var.hcloud_token 106 | } 107 | 108 | # Build the MicroOS x86 snapshot 109 | build { 110 | sources = ["source.hcloud.microos-x86-snapshot"] 111 | 112 | # Download the MicroOS x86 image 113 | provisioner "shell" { 114 | inline = ["${local.download_image}${var.opensuse_microos_x86_mirror_link}"] 115 | } 116 | 117 | # Write the MicroOS x86 image to disk 118 | provisioner "shell" { 119 | inline = [local.write_image] 120 | expect_disconnect = true 121 | } 122 | 123 | # Ensure connection to MicroOS x86 and do house-keeping 124 | provisioner "shell" { 125 | pause_before = "5s" 126 | inline = [local.install_packages] 127 | expect_disconnect = true 128 | } 129 | 130 | # Ensure connection to MicroOS x86 and do house-keeping 131 | provisioner "shell" { 132 | pause_before = "5s" 133 | inline = [local.clean_up] 134 | } 135 | } 136 | 137 | # Build the MicroOS ARM snapshot 138 | build { 139 | sources = ["source.hcloud.microos-arm-snapshot"] 140 | 141 | # Download the MicroOS ARM image 142 | provisioner "shell" { 143 | inline = ["${local.download_image}${var.opensuse_microos_arm_mirror_link}"] 144 | } 145 | 146 | # Write the MicroOS ARM image to disk 147 | provisioner "shell" { 148 | inline = [local.write_image] 149 | expect_disconnect = true 150 | } 151 | 152 | # Ensure connection to MicroOS ARM and do house-keeping 153 | provisioner "shell" { 154 | pause_before = "5s" 155 | inline = [local.install_packages] 156 | expect_disconnect = true 157 | } 158 | 159 | # Ensure connection to MicroOS ARM and do house-keeping 160 | provisioner "shell" { 161 | pause_before = "5s" 162 | inline = [local.clean_up] 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /placement_groups.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | control_plane_placement_compat_groups = max( 3 | 0, 4 | [ 5 | for cp_pool in var.control_plane_nodepools : 6 | cp_pool.placement_group_compat_idx + 1 if cp_pool.placement_group_compat_idx != null && cp_pool.placement_group == null 7 | ]... 8 | ) 9 | control_plane_groups = toset( 10 | [ 11 | for cp_pool in var.control_plane_nodepools : 12 | cp_pool.placement_group if cp_pool.placement_group != null 13 | ] 14 | ) 15 | agent_placement_compat_groups = max( 16 | 0, 17 | [ 18 | for ag_pool in var.agent_nodepools : 19 | ag_pool.placement_group_compat_idx + 1 if ag_pool.placement_group_compat_idx != null && ag_pool.placement_group == null 20 | ]... 21 | ) 22 | agent_placement_groups = toset( 23 | concat( 24 | [ 25 | for ag_pool in var.agent_nodepools : 26 | ag_pool.placement_group if ag_pool.placement_group != null 27 | ], 28 | concat( 29 | [ 30 | for ag_pool in var.agent_nodepools : 31 | [ 32 | for node, node_config in coalesce(ag_pool.nodes, {}) : 33 | node_config.placement_group if node_config.placement_group != null 34 | ] 35 | ] 36 | )... 37 | ) 38 | ) 39 | } 40 | 41 | resource "hcloud_placement_group" "control_plane" { 42 | count = local.control_plane_placement_compat_groups 43 | name = "${var.cluster_name}-control-plane-${count.index + 1}" 44 | labels = local.labels 45 | type = "spread" 46 | } 47 | 48 | resource "hcloud_placement_group" "control_plane_named" { 49 | for_each = local.control_plane_groups 50 | name = "${var.cluster_name}-control-plane-${each.key}" 51 | labels = local.labels 52 | type = "spread" 53 | } 54 | 55 | resource "hcloud_placement_group" "agent" { 56 | count = local.agent_placement_compat_groups 57 | name = "${var.cluster_name}-agent-${count.index + 1}" 58 | labels = local.labels 59 | type = "spread" 60 | } 61 | 62 | resource "hcloud_placement_group" "agent_named" { 63 | for_each = local.agent_placement_groups 64 | name = "${var.cluster_name}-agent-${each.key}" 65 | labels = local.labels 66 | type = "spread" 67 | } 68 | -------------------------------------------------------------------------------- /scripts/cleanup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DRY_RUN=1 4 | 5 | echo "Welcome to the Kube-Hetzner cluster deletion script!" 6 | echo " " 7 | echo "We advise you to first run 'terraform destroy' and execute that script when it starts hanging because of resources still attached to the network." 8 | echo "In order to run this script need to have the hcloud CLI installed and configured with a context for the cluster you want to delete." 9 | command -v hcloud >/dev/null 2>&1 || { echo "hcloud (Hetzner CLI) is not installed. Install it with 'brew install hcloud'."; exit 1; } 10 | echo "You can do so by running 'hcloud context create ' and inputting your HCLOUD_TOKEN." 11 | echo " " 12 | 13 | if command -v tofu >/dev/null 2>&1 ; then 14 | terraform_command=tofu 15 | elif command -v terraform >/dev/null 2>&1 ; then 16 | terraform_command=terraform 17 | else 18 | echo "terraform or tofu is not installed. Install it with 'brew install terraform' or 'brew install opentofu'." 19 | exit 1 20 | fi 21 | 22 | 23 | # Try to guess the cluster name 24 | GUESSED_CLUSTER_NAME=$(sed -n 's/^[[:space:]]*cluster_name[[:space:]]*=[[:space:]]*"\([^"]*\)".*/\1/p' kube.tf 2>/dev/null) 25 | 26 | if [ -n "$GUESSED_CLUSTER_NAME" ]; then 27 | echo "Cluster name '$GUESSED_CLUSTER_NAME' has been detected in the kube.tf file." 28 | read -p "Enter the name of the cluster to delete (default: $GUESSED_CLUSTER_NAME): " CLUSTER_NAME 29 | if [ -z "$CLUSTER_NAME" ]; then 30 | CLUSTER_NAME="$GUESSED_CLUSTER_NAME" 31 | fi 32 | else 33 | read -p "Enter the name of the cluster to delete: " CLUSTER_NAME 34 | fi 35 | 36 | while true; do 37 | read -p "Do you want to perform a dry run? (yes/no): " dry_run_input 38 | case $dry_run_input in 39 | [Yy]* ) DRY_RUN=1; break;; 40 | [Nn]* ) DRY_RUN=0; break;; 41 | * ) echo "Please answer yes or no.";; 42 | esac 43 | done 44 | 45 | read -p "Do you want to delete volumes? (yes/no, default: no): " delete_volumes_input 46 | DELETE_VOLUMES=0 47 | if [[ "$delete_volumes_input" =~ ^([Yy]es|[Yy])$ ]]; then 48 | DELETE_VOLUMES=1 49 | fi 50 | 51 | read -p "Do you want to delete MicroOS snapshots? (yes/no, default: no): " delete_snapshots_input 52 | DELETE_SNAPSHOTS=0 53 | if [[ "$delete_snapshots_input" =~ ^([Yy]es|[Yy])$ ]]; then 54 | DELETE_SNAPSHOTS=1 55 | fi 56 | 57 | if (( DRY_RUN == 0 )); then 58 | echo "WARNING: STUFF WILL BE DELETED!" 59 | else 60 | echo "Performing a dry run, nothing will be deleted." 61 | fi 62 | 63 | HCLOUD_SELECTOR=(--selector='provisioner=terraform' --selector="cluster=$CLUSTER_NAME") 64 | HCLOUD_OUTPUT_OPTIONS=(-o noheader -o 'columns=id') 65 | 66 | VOLUMES=() 67 | while IFS='' read -r line; do VOLUMES+=("$line"); done < <(hcloud volume list "${HCLOUD_SELECTOR[@]}" "${HCLOUD_OUTPUT_OPTIONS[@]}") 68 | 69 | SERVERS=() 70 | while IFS='' read -r line; do SERVERS+=("$line"); done < <(hcloud server list "${HCLOUD_SELECTOR[@]}" "${HCLOUD_OUTPUT_OPTIONS[@]}") 71 | 72 | PLACEMENT_GROUPS=() 73 | while IFS='' read -r line; do PLACEMENT_GROUPS+=("$line"); done < <(hcloud placement-group list "${HCLOUD_SELECTOR[@]}" "${HCLOUD_OUTPUT_OPTIONS[@]}") 74 | 75 | LOAD_BALANCERS=() 76 | while IFS='' read -r line; do LOAD_BALANCER+=("$line"); done < <(hcloud load-balancer list "${HCLOUD_SELECTOR[@]}" "${HCLOUD_OUTPUT_OPTIONS[@]}") 77 | 78 | INGRESS_LB=$(hcloud load-balancer list -o noheader -o columns=id,name | grep "${CLUSTER_NAME}" | cut -d ' ' -f1 ) 79 | 80 | if [[ $INGRESS_LB != "" ]]; then 81 | LOAD_BALANCERS+=( "$INGRESS_LB" ) 82 | fi 83 | 84 | FIREWALLS=() 85 | while IFS='' read -r line; do FIREWALLS+=("$line"); done < <(hcloud firewall list "${HCLOUD_SELECTOR[@]}" "${HCLOUD_OUTPUT_OPTIONS[@]}") 86 | 87 | NETWORKS=() 88 | while IFS='' read -r line; do NETWORKS+=("$line"); done < <(hcloud network list "${HCLOUD_SELECTOR[@]}" "${HCLOUD_OUTPUT_OPTIONS[@]}") 89 | 90 | SSH_KEYS=() 91 | while IFS='' read -r line; do SSH_KEYS+=("$line"); done < <(hcloud ssh-key list "${HCLOUD_SELECTOR[@]}" "${HCLOUD_OUTPUT_OPTIONS[@]}") 92 | 93 | function detach_volumes() { 94 | for ID in "${VOLUMES[@]}"; do 95 | echo "Detach volume: $ID" 96 | if (( DRY_RUN == 0 )); then 97 | hcloud volume detach "$ID" 98 | fi 99 | done 100 | } 101 | 102 | function delete_volumes() { 103 | for ID in "${VOLUMES[@]}"; do 104 | echo "Delete volume: $ID" 105 | if (( DRY_RUN == 0 )); then 106 | hcloud volume delete "$ID" 107 | fi 108 | done 109 | } 110 | 111 | function delete_servers() { 112 | for ID in "${SERVERS[@]}"; do 113 | echo "Delete server: $ID" 114 | if (( DRY_RUN == 0 )); then 115 | hcloud server delete "$ID" 116 | fi 117 | done 118 | } 119 | 120 | function delete_placement_groups() { 121 | for ID in "${PLACEMENT_GROUPS[@]}"; do 122 | echo "Delete placement-group: $ID" 123 | if (( DRY_RUN == 0 )); then 124 | hcloud placement-group delete "$ID" 125 | fi 126 | done 127 | } 128 | 129 | function delete_load_balancer() { 130 | for ID in "${LOAD_BALANCERS[@]}"; do 131 | echo "Delete load-balancer: $ID" 132 | if (( DRY_RUN == 0 )); then 133 | hcloud load-balancer delete "$ID" 134 | fi 135 | done 136 | } 137 | 138 | function delete_firewalls() { 139 | for ID in "${FIREWALLS[@]}"; do 140 | echo "Delete firewall: $ID" 141 | if (( DRY_RUN == 0 )); then 142 | hcloud firewall delete "$ID" 143 | fi 144 | done 145 | } 146 | 147 | function delete_networks() { 148 | for ID in "${NETWORKS[@]}"; do 149 | echo "Delete network: $ID" 150 | if (( DRY_RUN == 0 )); then 151 | hcloud network delete "$ID" 152 | fi 153 | done 154 | } 155 | 156 | function delete_ssh_keys() { 157 | for ID in "${SSH_KEYS[@]}"; do 158 | echo "Delete ssh-key: $ID" 159 | if (( DRY_RUN == 0 )); then 160 | hcloud ssh-key delete "$ID" 161 | fi 162 | done 163 | } 164 | 165 | function delete_autoscaled_nodes() { 166 | local servers 167 | while IFS='' read -r line; do servers+=("$line"); done < <(hcloud server list -o noheader -o 'columns=id,name' | grep "${CLUSTER_NAME}") 168 | 169 | for server_info in "${servers[@]}"; do 170 | local ID=$(echo "$server_info" | awk '{print $1}') 171 | local server_name=$(echo "$server_info" | awk '{print $2}') 172 | echo "Delete autoscaled server: $ID (Name: $server_name)" 173 | if (( DRY_RUN == 0 )); then 174 | hcloud server delete "$ID" 175 | fi 176 | done 177 | } 178 | 179 | function delete_snapshots() { 180 | local snapshots 181 | while IFS='' read -r line; do snapshots+=("$line"); done < <(hcloud image list --selector 'microos-snapshot=yes' -o noheader -o 'columns=id,name') 182 | 183 | for snapshot_info in "${snapshots[@]}"; do 184 | local ID=$(echo "$snapshot_info" | awk '{print $1}') 185 | local snapshot_name=$(echo "$snapshot_info" | awk '{print $2}') 186 | echo "Delete snapshot: $ID (Name: $snapshot_name)" 187 | if (( DRY_RUN == 0 )); then 188 | hcloud image delete "$ID" 189 | fi 190 | done 191 | } 192 | 193 | if (( DRY_RUN > 0 )); then 194 | echo "Dry run, nothing will be deleted!" 195 | fi 196 | 197 | detach_volumes 198 | if (( DELETE_VOLUMES == 1 )); then 199 | delete_volumes 200 | fi 201 | delete_servers 202 | delete_autoscaled_nodes 203 | delete_placement_groups 204 | delete_load_balancer 205 | delete_networks 206 | delete_firewalls 207 | delete_ssh_keys 208 | 209 | 210 | if (( DELETE_SNAPSHOTS == 1 )); then 211 | delete_snapshots 212 | fi 213 | 214 | -------------------------------------------------------------------------------- /scripts/create.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Check if terraform, packer and hcloud CLIs are present 4 | command -v ssh >/dev/null 2>&1 || { 5 | echo "openssh is not installed. Install it with 'brew install openssh'." 6 | exit 1 7 | } 8 | 9 | if command -v tofu >/dev/null 2>&1 ; then 10 | terraform_command=tofu 11 | elif command -v terraform >/dev/null 2>&1 ; then 12 | terraform_command=terraform 13 | else 14 | echo "terraform or tofu is not installed. Install it with 'brew tap hashicorp/tap && brew install hashicorp/tap/terraform' or 'brew install opentofu'." 15 | exit 1 16 | fi 17 | 18 | command -v packer >/dev/null 2>&1 || { 19 | echo "packer is not installed. Install it with 'brew install packer'." 20 | exit 1 21 | } 22 | command -v hcloud >/dev/null 2>&1 || { 23 | echo "hcloud (Hetzner CLI) is not installed. Install it with 'brew install hcloud'." 24 | exit 1 25 | } 26 | 27 | # Ask for the folder name 28 | if [ -z "${folder_name}" ] ; then 29 | read -p "Enter the name of the folder you want to create (leave empty to use the current directory instead, useful for upgrades): " folder_name 30 | fi 31 | 32 | # Ask for the folder path only if folder_name is provided 33 | if [ -n "$folder_name" -a -z "${folder_path}" ]; then 34 | read -p "Enter the path to create the folder in (default: current path): " folder_path 35 | fi 36 | 37 | # Set default path if not provided 38 | if [ -z "$folder_path" ]; then 39 | folder_path="." 40 | fi 41 | 42 | # Create the folder if folder_name is provided 43 | if [ -n "$folder_name" ]; then 44 | mkdir -p "${folder_path}/${folder_name}" 45 | folder_path="${folder_path}/${folder_name}" 46 | fi 47 | 48 | # Download the required files only if they don't exist 49 | if [ ! -e "${folder_path}/kube.tf" ]; then 50 | curl -sL https://raw.githubusercontent.com/kube-hetzner/terraform-hcloud-kube-hetzner/master/kube.tf.example -o "${folder_path}/kube.tf" 51 | else 52 | echo "kube.tf already exists. Skipping download." 53 | fi 54 | 55 | if [ ! -e "${folder_path}/hcloud-microos-snapshots.pkr.hcl" ]; then 56 | curl -sL https://raw.githubusercontent.com/kube-hetzner/terraform-hcloud-kube-hetzner/master/packer-template/hcloud-microos-snapshots.pkr.hcl -o "${folder_path}/hcloud-microos-snapshots.pkr.hcl" 57 | else 58 | echo "hcloud-microos-snapshots.pkr.hcl already exists. Skipping download." 59 | fi 60 | 61 | # Ask if they want to create the MicroOS snapshots 62 | if [ -z "${create_snapshots}" ] ; then 63 | echo " " 64 | echo "The snapshots are required and deployed using packer. If you need specific extra packages, you need to choose no and edit hcloud-microos-snapshots.pkr.hcl file manually. This is not needed in 99% of cases, as we already include the most common packages." 65 | echo " " 66 | read -p "Do you want to create the MicroOS snapshots (we create one for x86 and one for ARM architectures) with packer now? (yes/no): " create_snapshots 67 | fi 68 | 69 | if [[ "$create_snapshots" =~ ^([Yy]es|[Yy])$ ]]; then 70 | if [[ -z "$HCLOUD_TOKEN" ]]; then 71 | read -p "Enter your HCLOUD_TOKEN: " hcloud_token 72 | export HCLOUD_TOKEN=$hcloud_token 73 | fi 74 | echo "Running packer build for hcloud-microos-snapshots.pkr.hcl" 75 | cd "${folder_path}" && packer init hcloud-microos-snapshots.pkr.hcl && packer build hcloud-microos-snapshots.pkr.hcl 76 | else 77 | echo " " 78 | echo "You can create the snapshots later by running 'packer build hcloud-microos-snapshots.pkr.hcl' in the folder." 79 | fi 80 | 81 | # Output commands 82 | echo " " 83 | echo "Remember, don't skip the hcloud cli, to activate it run 'hcloud context create '. It is ideal to quickly debug and allows targeted cleanup when needed!" 84 | echo " " 85 | echo "Before running '${terraform_command} apply', go through the kube.tf file and fill it with your desired values." 86 | -------------------------------------------------------------------------------- /templates/autoscaler-cloudinit.yaml.tpl: -------------------------------------------------------------------------------- 1 | #cloud-config 2 | 3 | debug: True 4 | 5 | write_files: 6 | 7 | ${cloudinit_write_files_common} 8 | 9 | - content: ${base64encode(k3s_config)} 10 | encoding: base64 11 | path: /tmp/config.yaml 12 | 13 | - content: ${base64encode(install_k3s_agent_script)} 14 | encoding: base64 15 | path: /var/pre_install/install-k3s-agent.sh 16 | 17 | # Apply DNS config 18 | %{ if has_dns_servers ~} 19 | manage_resolv_conf: true 20 | resolv_conf: 21 | nameservers: 22 | %{ for dns_server in dns_servers ~} 23 | - ${dns_server} 24 | %{ endfor ~} 25 | %{ endif ~} 26 | 27 | # Add ssh authorized keys 28 | ssh_authorized_keys: 29 | %{ for key in sshAuthorizedKeys ~} 30 | - ${key} 31 | %{ endfor ~} 32 | 33 | # Resize /var, not /, as that's the last partition in MicroOS image. 34 | growpart: 35 | devices: ["/var"] 36 | 37 | # Make sure the hostname is set correctly 38 | hostname: ${hostname} 39 | preserve_hostname: true 40 | 41 | runcmd: 42 | 43 | ${cloudinit_runcmd_common} 44 | 45 | # Configure default routes based on public ip availability 46 | %{if private_network_only~} 47 | # Private-only setup: eth0 is the private interface 48 | - [ip, route, add, default, via, '10.0.0.1', dev, 'eth0', metric, '100'] 49 | %{else~} 50 | # Standard setup: eth0 is public, configure both IPv4 and IPv6 51 | - [ip, route, add, default, via, '172.31.1.1', dev, 'eth0', metric, '100'] 52 | - [ip, -6, route, add, default, via, 'fe80::1', dev, 'eth0', metric, '100'] 53 | %{endif~} 54 | 55 | # Start the install-k3s-agent service 56 | - ['/bin/bash', '/var/pre_install/install-k3s-agent.sh'] 57 | -------------------------------------------------------------------------------- /templates/autoscaler.yaml.tpl: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | labels: 6 | k8s-addon: cluster-autoscaler.addons.k8s.io 7 | k8s-app: cluster-autoscaler 8 | name: cluster-autoscaler 9 | namespace: kube-system 10 | --- 11 | apiVersion: rbac.authorization.k8s.io/v1 12 | kind: ClusterRole 13 | metadata: 14 | name: cluster-autoscaler 15 | labels: 16 | k8s-addon: cluster-autoscaler.addons.k8s.io 17 | k8s-app: cluster-autoscaler 18 | rules: 19 | - apiGroups: [""] 20 | resources: ["events", "endpoints"] 21 | verbs: ["create", "patch"] 22 | - apiGroups: [""] 23 | resources: ["pods/eviction"] 24 | verbs: ["create"] 25 | - apiGroups: [""] 26 | resources: ["pods/status"] 27 | verbs: ["update"] 28 | - apiGroups: [""] 29 | resources: ["endpoints"] 30 | resourceNames: ["cluster-autoscaler"] 31 | verbs: ["get", "update"] 32 | - apiGroups: [""] 33 | resources: ["nodes"] 34 | verbs: ["watch", "list", "get", "update"] 35 | - apiGroups: [""] 36 | resources: 37 | - "namespaces" 38 | - "pods" 39 | - "services" 40 | - "replicationcontrollers" 41 | - "persistentvolumeclaims" 42 | - "persistentvolumes" 43 | verbs: ["watch", "list", "get"] 44 | - apiGroups: ["extensions"] 45 | resources: ["replicasets", "daemonsets"] 46 | verbs: ["watch", "list", "get"] 47 | - apiGroups: ["policy"] 48 | resources: ["poddisruptionbudgets"] 49 | verbs: ["watch", "list"] 50 | - apiGroups: ["apps"] 51 | resources: ["statefulsets", "replicasets", "daemonsets"] 52 | verbs: ["watch", "list", "get"] 53 | - apiGroups: ["storage.k8s.io"] 54 | resources: ["storageclasses", "csinodes", "csistoragecapacities", "csidrivers", "volumeattachments"] 55 | verbs: ["watch", "list", "get"] 56 | - apiGroups: ["batch", "extensions"] 57 | resources: ["jobs"] 58 | verbs: ["get", "list", "watch", "patch"] 59 | - apiGroups: ["coordination.k8s.io"] 60 | resources: ["leases"] 61 | verbs: ["create"] 62 | - apiGroups: ["coordination.k8s.io"] 63 | resourceNames: ["cluster-autoscaler"] 64 | resources: ["leases"] 65 | verbs: ["get", "update"] 66 | --- 67 | apiVersion: rbac.authorization.k8s.io/v1 68 | kind: Role 69 | metadata: 70 | name: cluster-autoscaler 71 | namespace: kube-system 72 | labels: 73 | k8s-addon: cluster-autoscaler.addons.k8s.io 74 | k8s-app: cluster-autoscaler 75 | rules: 76 | - apiGroups: [""] 77 | resources: ["configmaps"] 78 | verbs: ["create","list","watch"] 79 | - apiGroups: [""] 80 | resources: ["configmaps"] 81 | resourceNames: ["cluster-autoscaler-status", "cluster-autoscaler-priority-expander"] 82 | verbs: ["delete", "get", "update", "watch"] 83 | 84 | --- 85 | apiVersion: rbac.authorization.k8s.io/v1 86 | kind: ClusterRoleBinding 87 | metadata: 88 | name: cluster-autoscaler 89 | labels: 90 | k8s-addon: cluster-autoscaler.addons.k8s.io 91 | k8s-app: cluster-autoscaler 92 | roleRef: 93 | apiGroup: rbac.authorization.k8s.io 94 | kind: ClusterRole 95 | name: cluster-autoscaler 96 | subjects: 97 | - kind: ServiceAccount 98 | name: cluster-autoscaler 99 | namespace: kube-system 100 | 101 | --- 102 | apiVersion: rbac.authorization.k8s.io/v1 103 | kind: RoleBinding 104 | metadata: 105 | name: cluster-autoscaler 106 | namespace: kube-system 107 | labels: 108 | k8s-addon: cluster-autoscaler.addons.k8s.io 109 | k8s-app: cluster-autoscaler 110 | roleRef: 111 | apiGroup: rbac.authorization.k8s.io 112 | kind: Role 113 | name: cluster-autoscaler 114 | subjects: 115 | - kind: ServiceAccount 116 | name: cluster-autoscaler 117 | namespace: kube-system 118 | 119 | --- 120 | apiVersion: apps/v1 121 | kind: Deployment 122 | metadata: 123 | name: cluster-autoscaler 124 | namespace: kube-system 125 | labels: 126 | app: cluster-autoscaler 127 | spec: 128 | replicas: 1 129 | selector: 130 | matchLabels: 131 | app: cluster-autoscaler 132 | template: 133 | metadata: 134 | labels: 135 | app: cluster-autoscaler 136 | annotations: 137 | prometheus.io/scrape: 'true' 138 | prometheus.io/port: '8085' 139 | spec: 140 | serviceAccountName: cluster-autoscaler 141 | tolerations: 142 | - effect: NoSchedule 143 | key: node-role.kubernetes.io/control-plane 144 | 145 | # Node affinity is used to force cluster-autoscaler to stick 146 | # to the control-plane node. This allows the cluster to reliably downscale 147 | # to zero worker nodes when needed. 148 | affinity: 149 | nodeAffinity: 150 | requiredDuringSchedulingIgnoredDuringExecution: 151 | nodeSelectorTerms: 152 | - matchExpressions: 153 | - key: node-role.kubernetes.io/control-plane 154 | operator: Exists 155 | containers: 156 | - image: ${ca_image}:${ca_version} 157 | name: cluster-autoscaler 158 | resources: 159 | limits: 160 | cpu: 100m 161 | memory: 300Mi 162 | requests: 163 | cpu: 100m 164 | memory: 300Mi 165 | ports: 166 | - containerPort: 8085 167 | command: 168 | - ./cluster-autoscaler 169 | - --v=${cluster_autoscaler_log_level} 170 | - --logtostderr=${cluster_autoscaler_log_to_stderr} 171 | - --stderrthreshold=${cluster_autoscaler_stderr_threshold} 172 | - --cloud-provider=hetzner 173 | %{~ for pool in node_pools ~} 174 | - --nodes=${pool.min_nodes}:${pool.max_nodes}:${pool.server_type}:${pool.location}:${cluster_name}${pool.name} 175 | %{~ endfor ~} 176 | %{~ for extra_arg in cluster_autoscaler_extra_args ~} 177 | - ${extra_arg} 178 | %{~ endfor ~} 179 | env: 180 | - name: HCLOUD_TOKEN 181 | valueFrom: 182 | secretKeyRef: 183 | name: hcloud 184 | key: token 185 | - name: HCLOUD_CLOUD_INIT 186 | value: ${cloudinit_config} 187 | - name: HCLOUD_CLUSTER_CONFIG 188 | value: ${cluster_config} 189 | - name: HCLOUD_SSH_KEY 190 | value: '${ssh_key}' 191 | - name: HCLOUD_IMAGE 192 | value: '${snapshot_id}' 193 | - name: HCLOUD_NETWORK 194 | value: '${ipv4_subnet_id}' 195 | - name: HCLOUD_FIREWALL 196 | value: '${firewall_id}' 197 | - name: HCLOUD_PUBLIC_IPV4 198 | value: '${enable_ipv4}' 199 | - name: HCLOUD_PUBLIC_IPV6 200 | value: '${enable_ipv6}' 201 | %{~ if cluster_autoscaler_server_creation_timeout != "" ~} 202 | - name: HCLOUD_SERVER_CREATION_TIMEOUT 203 | value: '${cluster_autoscaler_server_creation_timeout}' 204 | %{~ endif ~} 205 | volumeMounts: 206 | - name: ssl-certs 207 | mountPath: /etc/ssl/certs 208 | readOnly: true 209 | imagePullPolicy: "Always" 210 | volumes: 211 | - name: ssl-certs 212 | hostPath: 213 | path: "/etc/ssl/certs" # right place on MicroOS? 214 | -------------------------------------------------------------------------------- /templates/calico.yaml.tpl: -------------------------------------------------------------------------------- 1 | ${values} -------------------------------------------------------------------------------- /templates/ccm.yaml.tpl: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: hcloud-cloud-controller-manager 6 | namespace: kube-system 7 | spec: 8 | template: 9 | spec: 10 | containers: 11 | - name: hcloud-cloud-controller-manager 12 | args: 13 | - "--cloud-provider=hcloud" 14 | - "--leader-elect=false" 15 | - "--allow-untagged-cloud" 16 | - "--route-reconciliation-period=30s" 17 | - "--allocate-node-cidrs=true" 18 | - "--cluster-cidr=${cluster_cidr_ipv4}" 19 | - "--webhook-secure-port=0" 20 | %{if using_klipper_lb~} 21 | - "--secure-port=10288" 22 | %{endif~} 23 | env: 24 | - name: "HCLOUD_LOAD_BALANCERS_LOCATION" 25 | value: "${default_lb_location}" 26 | - name: "HCLOUD_LOAD_BALANCERS_USE_PRIVATE_IP" 27 | value: "true" 28 | - name: "HCLOUD_LOAD_BALANCERS_ENABLED" 29 | value: "${!using_klipper_lb}" 30 | - name: "HCLOUD_LOAD_BALANCERS_DISABLE_PRIVATE_INGRESS" 31 | value: "true" 32 | 33 | -------------------------------------------------------------------------------- /templates/cert_manager.yaml.tpl: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: cert-manager 6 | --- 7 | apiVersion: helm.cattle.io/v1 8 | kind: HelmChart 9 | metadata: 10 | name: cert-manager 11 | namespace: kube-system 12 | spec: 13 | chart: cert-manager 14 | repo: https://charts.jetstack.io 15 | version: "${version}" 16 | targetNamespace: cert-manager 17 | bootstrap: ${bootstrap} 18 | valuesContent: |- 19 | ${values} 20 | -------------------------------------------------------------------------------- /templates/cilium.yaml.tpl: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.cattle.io/v1 3 | kind: HelmChart 4 | metadata: 5 | name: cilium 6 | namespace: kube-system 7 | spec: 8 | chart: cilium 9 | repo: https://helm.cilium.io/ 10 | version: "${version}" 11 | targetNamespace: kube-system 12 | bootstrap: true 13 | valuesContent: |- 14 | ${values} -------------------------------------------------------------------------------- /templates/csi-driver-smb.yaml.tpl: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.cattle.io/v1 3 | kind: HelmChart 4 | metadata: 5 | name: csi-driver-smb 6 | namespace: kube-system 7 | spec: 8 | chart: csi-driver-smb 9 | repo: https://raw.githubusercontent.com/kubernetes-csi/csi-driver-smb/master/charts 10 | version: "${version}" 11 | targetNamespace: kube-system 12 | bootstrap: ${bootstrap} 13 | valuesContent: |- 14 | ${values} -------------------------------------------------------------------------------- /templates/haproxy_ingress.yaml.tpl: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: ${target_namespace} 6 | --- 7 | apiVersion: helm.cattle.io/v1 8 | kind: HelmChart 9 | metadata: 10 | name: haproxy 11 | namespace: kube-system 12 | spec: 13 | chart: kubernetes-ingress 14 | version: "${version}" 15 | repo: https://haproxytech.github.io/helm-charts 16 | targetNamespace: ${target_namespace} 17 | bootstrap: true 18 | valuesContent: |- 19 | ${values} 20 | -------------------------------------------------------------------------------- /templates/hcloud-ccm-helm.yaml.tpl: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.cattle.io/v1 3 | kind: HelmChart 4 | metadata: 5 | name: hcloud-cloud-controller-manager 6 | namespace: kube-system 7 | spec: 8 | chart: hcloud-cloud-controller-manager 9 | repo: https://charts.hetzner.cloud 10 | version: "${version}" 11 | targetNamespace: kube-system 12 | bootstrap: true 13 | valuesContent: |- 14 | ${values} -------------------------------------------------------------------------------- /templates/hcloud-csi.yaml.tpl: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: helm.cattle.io/v1 3 | kind: HelmChart 4 | metadata: 5 | name: hcloud-csi 6 | namespace: kube-system 7 | spec: 8 | chart: hcloud-csi 9 | repo: https://charts.hetzner.cloud 10 | version: "${version}" 11 | targetNamespace: kube-system 12 | bootstrap: true 13 | valuesContent: |- 14 | ${values} -------------------------------------------------------------------------------- /templates/kured.yaml.tpl: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: DaemonSet 4 | metadata: 5 | name: kured 6 | namespace: kube-system 7 | spec: 8 | selector: 9 | matchLabels: 10 | name: kured 11 | template: 12 | metadata: 13 | labels: 14 | name: kured 15 | spec: 16 | serviceAccountName: kured 17 | containers: 18 | - name: kured 19 | command: 20 | - /usr/bin/kured 21 | %{~ for key, value in options ~} 22 | - --${key}=${value} 23 | %{~ endfor ~} 24 | 25 | -------------------------------------------------------------------------------- /templates/longhorn.yaml.tpl: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: ${longhorn_namespace} 6 | --- 7 | apiVersion: helm.cattle.io/v1 8 | kind: HelmChart 9 | metadata: 10 | name: longhorn 11 | namespace: kube-system 12 | spec: 13 | chart: longhorn 14 | repo: ${longhorn_repository} 15 | version: "${version}" 16 | targetNamespace: ${longhorn_namespace} 17 | bootstrap: ${bootstrap} 18 | valuesContent: |- 19 | ${values} -------------------------------------------------------------------------------- /templates/nat-router-cloudinit.yaml.tpl: -------------------------------------------------------------------------------- 1 | #cloud-config 2 | package_reboot_if_required: false 3 | package_update: true 4 | package_upgrade: true 5 | packages: 6 | - fail2ban 7 | 8 | write_files: 9 | - path: /etc/network/interfaces 10 | content: | 11 | auto eth0 12 | iface eth0 inet dhcp 13 | post-up echo 1 > /proc/sys/net/ipv4/ip_forward 14 | post-up iptables -t nat -A POSTROUTING -s '${ private_network_ipv4_range }' -o eth0 -j MASQUERADE 15 | append: true 16 | 17 | # Disable ssh password authentication 18 | - content: | 19 | Port ${ ssh_port } 20 | PasswordAuthentication no 21 | X11Forwarding no 22 | MaxAuthTries ${ ssh_max_auth_tries } 23 | AllowTcpForwarding yes 24 | AllowAgentForwarding yes 25 | AuthorizedKeysFile .ssh/authorized_keys 26 | # PermitRootLogin no 27 | path: /etc/ssh/sshd_config.d/kube-hetzner.conf 28 | - path: /etc/fail2ban/jail.d/sshd.local 29 | content: | 30 | [sshd] 31 | enabled = true 32 | port = ssh 33 | logpath = %(sshd_log)s 34 | maxretry = 5 35 | bantime = 86400 36 | 37 | users: 38 | - name: nat-router 39 | groups: 40 | %{ if enable_sudo ~} 41 | - sudo 42 | %{ endif ~} 43 | %{ if enable_sudo ~} 44 | sudo: 45 | - ALL=(ALL) NOPASSWD:ALL 46 | %{ endif ~} 47 | # Add ssh authorized keys 48 | ssh_authorized_keys: 49 | %{ for key in sshAuthorizedKeys ~} 50 | - ${key} 51 | %{ endfor ~} 52 | 53 | 54 | # Apply DNS config 55 | %{ if has_dns_servers ~} 56 | manage_resolv_conf: true 57 | resolv_conf: 58 | nameservers: 59 | %{ for dns_server in dns_servers ~} 60 | - ${dns_server} 61 | %{ endfor ~} 62 | %{ endif ~} 63 | 64 | 65 | runcmd: 66 | - [systemctl, 'enable', 'fail2ban'] 67 | - [systemctl, 'restart', 'sshd'] 68 | - [systemctl, 'restart', 'networking'] -------------------------------------------------------------------------------- /templates/nginx_ingress.yaml.tpl: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: ${target_namespace} 6 | --- 7 | apiVersion: helm.cattle.io/v1 8 | kind: HelmChart 9 | metadata: 10 | name: nginx 11 | namespace: kube-system 12 | spec: 13 | chart: ingress-nginx 14 | version: "${version}" 15 | repo: https://kubernetes.github.io/ingress-nginx 16 | targetNamespace: ${target_namespace} 17 | bootstrap: true 18 | valuesContent: |- 19 | ${values} 20 | -------------------------------------------------------------------------------- /templates/plans.yaml.tpl: -------------------------------------------------------------------------------- 1 | --- 2 | # Doc: https://rancher.com/docs/k3s/latest/en/upgrades/automated/ 3 | # agent plan 4 | apiVersion: upgrade.cattle.io/v1 5 | kind: Plan 6 | metadata: 7 | name: k3s-agent 8 | namespace: system-upgrade 9 | labels: 10 | k3s_upgrade: agent 11 | spec: 12 | concurrency: 1 13 | %{~ if version == "" ~} 14 | channel: https://update.k3s.io/v1-release/channels/${channel} 15 | %{~ else ~} 16 | version: ${version} 17 | %{~ endif ~} 18 | serviceAccountName: system-upgrade 19 | nodeSelector: 20 | matchExpressions: 21 | - {key: k3s_upgrade, operator: Exists} 22 | - {key: k3s_upgrade, operator: NotIn, values: ["disabled", "false"]} 23 | - {key: node-role.kubernetes.io/control-plane, operator: NotIn, values: ["true"]} 24 | - {key: kured, operator: NotIn, values: ["rebooting"]} 25 | tolerations: 26 | - {key: server-usage, effect: NoSchedule, operator: Equal, value: storage} 27 | - {operator: Exists} 28 | prepare: 29 | image: rancher/k3s-upgrade 30 | args: ["prepare", "k3s-server"] 31 | %{ if drain }drain: 32 | force: true 33 | disableEviction: ${disable_eviction} 34 | skipWaitForDeleteTimeout: 60%{ endif } 35 | %{ if !drain }cordon: true%{ endif } 36 | upgrade: 37 | image: rancher/k3s-upgrade 38 | --- 39 | # server plan 40 | apiVersion: upgrade.cattle.io/v1 41 | kind: Plan 42 | metadata: 43 | name: k3s-server 44 | namespace: system-upgrade 45 | labels: 46 | k3s_upgrade: server 47 | spec: 48 | concurrency: 1 49 | %{~ if version == "" ~} 50 | channel: https://update.k3s.io/v1-release/channels/${channel} 51 | %{~ else ~} 52 | version: ${version} 53 | %{~ endif ~} 54 | serviceAccountName: system-upgrade 55 | nodeSelector: 56 | matchExpressions: 57 | - {key: k3s_upgrade, operator: Exists} 58 | - {key: k3s_upgrade, operator: NotIn, values: ["disabled", "false"]} 59 | - {key: node-role.kubernetes.io/control-plane, operator: In, values: ["true"]} 60 | - {key: kured, operator: NotIn, values: ["rebooting"]} 61 | tolerations: 62 | - {key: node-role.kubernetes.io/control-plane, effect: NoSchedule, operator: Exists} 63 | - {key: CriticalAddonsOnly, effect: NoExecute, operator: Exists} 64 | cordon: true 65 | upgrade: 66 | image: rancher/k3s-upgrade 67 | -------------------------------------------------------------------------------- /templates/rancher.yaml.tpl: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: cattle-system 6 | --- 7 | apiVersion: helm.cattle.io/v1 8 | kind: HelmChart 9 | metadata: 10 | name: rancher 11 | namespace: kube-system 12 | spec: 13 | chart: rancher 14 | repo: https://releases.rancher.com/server-charts/${rancher_install_channel} 15 | version: "${version}" 16 | targetNamespace: cattle-system 17 | bootstrap: ${bootstrap} 18 | valuesContent: |- 19 | ${values} -------------------------------------------------------------------------------- /templates/traefik_ingress.yaml.tpl: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: ${target_namespace} 6 | --- 7 | apiVersion: helm.cattle.io/v1 8 | kind: HelmChart 9 | metadata: 10 | name: traefik 11 | namespace: kube-system 12 | spec: 13 | chart: traefik 14 | version: "${version}" 15 | repo: https://traefik.github.io/charts 16 | targetNamespace: ${target_namespace} 17 | bootstrap: true 18 | valuesContent: |- 19 | ${values} 20 | -------------------------------------------------------------------------------- /values-export.tf: -------------------------------------------------------------------------------- 1 | resource "local_file" "cilium_values" { 2 | count = var.export_values && var.cni_plugin == "cilium" ? 1 : 0 3 | content = local.cilium_values 4 | filename = "cilium_values.yaml" 5 | file_permission = "600" 6 | } 7 | 8 | resource "local_file" "cert_manager_values" { 9 | count = var.export_values && var.enable_cert_manager ? 1 : 0 10 | content = local.cert_manager_values 11 | filename = "cert_manager_values.yaml" 12 | file_permission = "600" 13 | } 14 | 15 | resource "local_file" "hetzner_ccm_values" { 16 | count = var.export_values && var.hetzner_ccm_use_helm ? 1 : 0 17 | content = local.hetzner_ccm_values 18 | filename = "hetzner_ccm_values.yaml" 19 | file_permission = "600" 20 | } 21 | 22 | resource "local_file" "csi_driver_smb_values" { 23 | count = var.export_values && var.enable_csi_driver_smb ? 1 : 0 24 | content = local.csi_driver_smb_values 25 | filename = "csi_driver_smb_values.yaml" 26 | file_permission = "600" 27 | } 28 | 29 | resource "local_file" "longhorn_values" { 30 | count = var.export_values && var.enable_longhorn ? 1 : 0 31 | content = local.longhorn_values 32 | filename = "longhorn_values.yaml" 33 | file_permission = "600" 34 | } 35 | 36 | resource "local_file" "traefik_values" { 37 | count = var.export_values && var.ingress_controller == "traefik" ? 1 : 0 38 | content = local.traefik_values 39 | filename = "traefik_values.yaml" 40 | file_permission = "600" 41 | } 42 | 43 | resource "local_file" "nginx_values" { 44 | count = var.export_values && var.ingress_controller == "nginx" ? 1 : 0 45 | content = local.nginx_values 46 | filename = "nginx_values.yaml" 47 | file_permission = "600" 48 | } 49 | 50 | resource "local_file" "haproxy_values" { 51 | count = var.export_values && var.ingress_controller == "haproxy" ? 1 : 0 52 | content = local.haproxy_values 53 | filename = "haproxy_values.yaml" 54 | file_permission = "600" 55 | } 56 | -------------------------------------------------------------------------------- /variables.tf: -------------------------------------------------------------------------------- 1 | variable "hcloud_token" { 2 | description = "Hetzner Cloud API Token." 3 | type = string 4 | sensitive = true 5 | } 6 | 7 | variable "k3s_token" { 8 | description = "k3s master token (must match when restoring a cluster)." 9 | type = string 10 | sensitive = true 11 | default = null 12 | } 13 | 14 | variable "microos_x86_snapshot_id" { 15 | description = "MicroOS x86 snapshot ID to be used. Per default empty, the most recent image created using createkh will be used" 16 | type = string 17 | default = "" 18 | } 19 | 20 | variable "microos_arm_snapshot_id" { 21 | description = "MicroOS ARM snapshot ID to be used. Per default empty, the most recent image created using createkh will be used" 22 | type = string 23 | default = "" 24 | } 25 | 26 | variable "ssh_port" { 27 | description = "The main SSH port to connect to the nodes." 28 | type = number 29 | default = 22 30 | 31 | validation { 32 | condition = var.ssh_port >= 0 && var.ssh_port <= 65535 33 | error_message = "The SSH port must use a valid range from 0 to 65535." 34 | } 35 | } 36 | 37 | variable "ssh_public_key" { 38 | description = "SSH public Key." 39 | type = string 40 | } 41 | 42 | variable "ssh_private_key" { 43 | description = "SSH private Key." 44 | type = string 45 | sensitive = true 46 | } 47 | 48 | variable "ssh_hcloud_key_label" { 49 | description = "Additional SSH public Keys by hcloud label. e.g. role=admin" 50 | type = string 51 | default = "" 52 | } 53 | 54 | variable "ssh_additional_public_keys" { 55 | description = "Additional SSH public Keys. Use them to grant other team members root access to your cluster nodes." 56 | type = list(string) 57 | default = [] 58 | } 59 | 60 | variable "authentication_config" { 61 | description = "Strucutred authentication configuration. This can be used to define external authentication providers." 62 | type = string 63 | default = "" 64 | } 65 | 66 | variable "hcloud_ssh_key_id" { 67 | description = "If passed, a key already registered within hetzner is used. Otherwise, a new one will be created by the module." 68 | type = string 69 | default = null 70 | } 71 | 72 | variable "ssh_max_auth_tries" { 73 | description = "The maximum number of authentication attempts permitted per connection." 74 | type = number 75 | default = 2 76 | } 77 | 78 | variable "network_region" { 79 | description = "Default region for network." 80 | type = string 81 | default = "eu-central" 82 | } 83 | variable "existing_network_id" { 84 | # Unfortunately, we need this to be a list or null. If we only use a plain 85 | # string here, and check that existing_network_id is null, terraform will 86 | # complain that it cannot set `count` variables based on existing_network_id 87 | # != null, because that id is an output value from 88 | # hcloud_network.your_network.id, which terraform will only know after its 89 | # construction. 90 | description = "If you want to create the private network before calling this module, you can do so and pass its id here. NOTE: make sure to adapt network_ipv4_cidr accordingly to a range which does not collide with your other nodes." 91 | type = list(string) 92 | default = [] 93 | nullable = false 94 | validation { 95 | condition = length(var.existing_network_id) == 0 || (can(var.existing_network_id[0]) && length(var.existing_network_id) == 1) 96 | error_message = "If you pass an existing_network_id, it must be enclosed in square brackets: [id]. This is necessary to be able to unambiguously distinguish between an empty network id (default) and a user-supplied network id." 97 | } 98 | } 99 | variable "network_ipv4_cidr" { 100 | description = "The main network cidr that all subnets will be created upon." 101 | type = string 102 | default = "10.0.0.0/8" 103 | } 104 | 105 | variable "cluster_ipv4_cidr" { 106 | description = "Internal Pod CIDR, used for the controller and currently for calico/cilium." 107 | type = string 108 | default = "10.42.0.0/16" 109 | } 110 | 111 | variable "service_ipv4_cidr" { 112 | description = "Internal Service CIDR, used for the controller and currently for calico/cilium." 113 | type = string 114 | default = "10.43.0.0/16" 115 | } 116 | 117 | variable "cluster_dns_ipv4" { 118 | description = "Internal Service IPv4 address of core-dns." 119 | type = string 120 | default = null 121 | } 122 | 123 | 124 | variable "nat_router" { 125 | description = "Do you want to pipe all egress through a single nat router which is to be constructed?" 126 | nullable = true 127 | default = null 128 | type = object({ 129 | server_type = string 130 | location = string 131 | labels = optional(map(string), {}) 132 | enable_sudo = optional(bool, false) 133 | }) 134 | validation { 135 | condition = (var.nat_router != null && var.use_control_plane_lb) || (var.nat_router == null) 136 | error_message = "If you enable the use of a NAT router, you must set use_control_plane_lb=true." 137 | } 138 | } 139 | 140 | variable "nat_router_subnet_index" { 141 | type = number 142 | default = 200 143 | description = "Subnet index (0-255) for NAT router. Default 200 is safe for most deployments. Must not conflict with control plane (counting down from 255) or agent pools (counting up from 0)." 144 | 145 | validation { 146 | condition = var.nat_router_subnet_index >= 0 && var.nat_router_subnet_index <= 255 147 | error_message = "NAT router subnet index must be between 0 and 255." 148 | } 149 | } 150 | 151 | 152 | variable "load_balancer_location" { 153 | description = "Default load balancer location." 154 | type = string 155 | default = "fsn1" 156 | } 157 | 158 | variable "load_balancer_type" { 159 | description = "Default load balancer server type." 160 | type = string 161 | default = "lb11" 162 | } 163 | 164 | variable "load_balancer_disable_ipv6" { 165 | description = "Disable IPv6 for the load balancer." 166 | type = bool 167 | default = false 168 | } 169 | 170 | variable "load_balancer_disable_public_network" { 171 | description = "Disables the public network of the load balancer." 172 | type = bool 173 | default = false 174 | } 175 | 176 | variable "load_balancer_algorithm_type" { 177 | description = "Specifies the algorithm type of the load balancer." 178 | type = string 179 | default = "round_robin" 180 | } 181 | 182 | variable "load_balancer_health_check_interval" { 183 | description = "Specifies the interval at which a health check is performed. Minimum is 3s." 184 | type = string 185 | default = "15s" 186 | } 187 | 188 | variable "load_balancer_health_check_timeout" { 189 | description = "Specifies the timeout of a single health check. Must not be greater than the health check interval. Minimum is 1s." 190 | type = string 191 | default = "10s" 192 | } 193 | 194 | variable "load_balancer_health_check_retries" { 195 | description = "Specifies the number of times a health check is retried before a target is marked as unhealthy." 196 | type = number 197 | default = 3 198 | } 199 | 200 | variable "control_plane_nodepools" { 201 | description = "Number of control plane nodes." 202 | type = list(object({ 203 | name = string 204 | server_type = string 205 | location = string 206 | backups = optional(bool) 207 | labels = list(string) 208 | taints = list(string) 209 | count = number 210 | swap_size = optional(string, "") 211 | zram_size = optional(string, "") 212 | kubelet_args = optional(list(string), ["kube-reserved=cpu=250m,memory=1500Mi,ephemeral-storage=1Gi", "system-reserved=cpu=250m,memory=300Mi"]) 213 | selinux = optional(bool, true) 214 | placement_group_compat_idx = optional(number, 0) 215 | placement_group = optional(string, null) 216 | disable_ipv4 = optional(bool, false) 217 | disable_ipv6 = optional(bool, false) 218 | network_id = optional(number, 0) 219 | })) 220 | default = [] 221 | validation { 222 | condition = length( 223 | [for control_plane_nodepool in var.control_plane_nodepools : control_plane_nodepool.name] 224 | ) == length( 225 | distinct( 226 | [for control_plane_nodepool in var.control_plane_nodepools : control_plane_nodepool.name] 227 | ) 228 | ) 229 | error_message = "Names in control_plane_nodepools must be unique." 230 | } 231 | } 232 | 233 | variable "agent_nodepools" { 234 | description = "Number of agent nodes." 235 | type = list(object({ 236 | name = string 237 | server_type = string 238 | location = string 239 | backups = optional(bool) 240 | floating_ip = optional(bool) 241 | floating_ip_rdns = optional(string, null) 242 | labels = list(string) 243 | taints = list(string) 244 | longhorn_volume_size = optional(number) 245 | swap_size = optional(string, "") 246 | zram_size = optional(string, "") 247 | kubelet_args = optional(list(string), ["kube-reserved=cpu=50m,memory=300Mi,ephemeral-storage=1Gi", "system-reserved=cpu=250m,memory=300Mi"]) 248 | selinux = optional(bool, true) 249 | placement_group_compat_idx = optional(number, 0) 250 | placement_group = optional(string, null) 251 | count = optional(number, null) 252 | disable_ipv4 = optional(bool, false) 253 | disable_ipv6 = optional(bool, false) 254 | network_id = optional(number, 0) 255 | nodes = optional(map(object({ 256 | server_type = optional(string) 257 | location = optional(string) 258 | backups = optional(bool) 259 | floating_ip = optional(bool) 260 | floating_ip_rdns = optional(string, null) 261 | labels = optional(list(string)) 262 | taints = optional(list(string)) 263 | longhorn_volume_size = optional(number) 264 | swap_size = optional(string, "") 265 | zram_size = optional(string, "") 266 | kubelet_args = optional(list(string), ["kube-reserved=cpu=50m,memory=300Mi,ephemeral-storage=1Gi", "system-reserved=cpu=250m,memory=300Mi"]) 267 | selinux = optional(bool, true) 268 | placement_group_compat_idx = optional(number, 0) 269 | placement_group = optional(string, null) 270 | append_index_to_node_name = optional(bool, true) 271 | }))) 272 | })) 273 | default = [] 274 | 275 | validation { 276 | condition = length( 277 | [for agent_nodepool in var.agent_nodepools : agent_nodepool.name] 278 | ) == length( 279 | distinct( 280 | [for agent_nodepool in var.agent_nodepools : agent_nodepool.name] 281 | ) 282 | ) 283 | error_message = "Names in agent_nodepools must be unique." 284 | } 285 | 286 | validation { 287 | condition = alltrue([for agent_nodepool in var.agent_nodepools : (agent_nodepool.count == null) != (agent_nodepool.nodes == null)]) 288 | error_message = "Set either nodes or count per agent_nodepool, not both." 289 | } 290 | 291 | 292 | validation { 293 | condition = alltrue([for agent_nodepool in var.agent_nodepools : 294 | alltrue([for agent_key, agent_node in coalesce(agent_nodepool.nodes, {}) : can(tonumber(agent_key)) && tonumber(agent_key) == floor(tonumber(agent_key)) && 0 <= tonumber(agent_key) && tonumber(agent_key) < 154]) 295 | ]) 296 | # 154 because the private ip is derived from tonumber(key) + 101. See private_ipv4 in agents.tf 297 | error_message = "The key for each individual node in a nodepool must be a stable integer in the range [0, 153] cast as a string." 298 | } 299 | 300 | validation { 301 | condition = length(var.agent_nodepools) == 0 ? true : sum([for agent_nodepool in var.agent_nodepools : length(coalesce(agent_nodepool.nodes, {})) + coalesce(agent_nodepool.count, 0)]) <= 100 302 | # 154 because the private ip is derived from tonumber(key) + 101. See private_ipv4 in agents.tf 303 | error_message = "Hetzner does not support networks with more than 100 servers." 304 | } 305 | 306 | } 307 | 308 | variable "cluster_autoscaler_image" { 309 | type = string 310 | default = "registry.k8s.io/autoscaling/cluster-autoscaler" 311 | description = "Image of Kubernetes Cluster Autoscaler for Hetzner Cloud to be used." 312 | } 313 | 314 | variable "cluster_autoscaler_version" { 315 | type = string 316 | default = "v1.32.0" 317 | description = "Version of Kubernetes Cluster Autoscaler for Hetzner Cloud. Should be aligned with Kubernetes version. Available versions for the official image can be found at https://explore.ggcr.dev/?repo=registry.k8s.io%2Fautoscaling%2Fcluster-autoscaler." 318 | } 319 | 320 | variable "cluster_autoscaler_log_level" { 321 | description = "Verbosity level of the logs for cluster-autoscaler" 322 | type = number 323 | default = 4 324 | 325 | validation { 326 | condition = var.cluster_autoscaler_log_level >= 0 && var.cluster_autoscaler_log_level <= 5 327 | error_message = "The log level must be between 0 and 5." 328 | } 329 | } 330 | 331 | variable "cluster_autoscaler_log_to_stderr" { 332 | description = "Determines whether to log to stderr or not" 333 | type = bool 334 | default = true 335 | } 336 | 337 | variable "cluster_autoscaler_stderr_threshold" { 338 | description = "Severity level above which logs are sent to stderr instead of stdout" 339 | type = string 340 | default = "INFO" 341 | 342 | validation { 343 | condition = var.cluster_autoscaler_stderr_threshold == "INFO" || var.cluster_autoscaler_stderr_threshold == "WARNING" || var.cluster_autoscaler_stderr_threshold == "ERROR" || var.cluster_autoscaler_stderr_threshold == "FATAL" 344 | error_message = "The stderr threshold must be one of the following: INFO, WARNING, ERROR, FATAL." 345 | } 346 | } 347 | 348 | variable "cluster_autoscaler_extra_args" { 349 | type = list(string) 350 | default = [] 351 | description = "Extra arguments for the Cluster Autoscaler deployment." 352 | } 353 | 354 | variable "cluster_autoscaler_server_creation_timeout" { 355 | type = number 356 | default = 15 357 | description = "Timeout (in minutes) until which a newly created server/node has to become available before giving up and destroying it." 358 | } 359 | 360 | variable "autoscaler_nodepools" { 361 | description = "Cluster autoscaler nodepools." 362 | type = list(object({ 363 | name = string 364 | server_type = string 365 | location = string 366 | min_nodes = number 367 | max_nodes = number 368 | labels = optional(map(string), {}) 369 | kubelet_args = optional(list(string), ["kube-reserved=cpu=50m,memory=300Mi,ephemeral-storage=1Gi", "system-reserved=cpu=250m,memory=300Mi"]) 370 | taints = optional(list(object({ 371 | key = string 372 | value = string 373 | effect = string 374 | })), []) 375 | })) 376 | default = [] 377 | } 378 | 379 | variable "autoscaler_labels" { 380 | description = "Labels for nodes created by the Cluster Autoscaler." 381 | type = list(string) 382 | default = [] 383 | } 384 | 385 | variable "autoscaler_taints" { 386 | description = "Taints for nodes created by the Cluster Autoscaler." 387 | type = list(string) 388 | default = [] 389 | } 390 | 391 | variable "autoscaler_disable_ipv4" { 392 | description = "Disable IPv4 on nodes created by the Cluster Autoscaler." 393 | type = bool 394 | default = false 395 | } 396 | 397 | variable "autoscaler_disable_ipv6" { 398 | description = "Disable IPv6 on nodes created by the Cluster Autoscaler." 399 | type = bool 400 | default = false 401 | } 402 | 403 | variable "hetzner_ccm_version" { 404 | type = string 405 | default = null 406 | description = "Version of Kubernetes Cloud Controller Manager for Hetzner Cloud. See https://github.com/hetznercloud/hcloud-cloud-controller-manager/releases for the available versions." 407 | } 408 | 409 | variable "hetzner_ccm_use_helm" { 410 | type = bool 411 | default = false 412 | description = "Whether to use the helm chart for the Hetzner CCM or the legacy manifest which is the default." 413 | } 414 | 415 | variable "hetzner_csi_version" { 416 | type = string 417 | default = null 418 | description = "Version of Container Storage Interface driver for Hetzner Cloud. See https://github.com/hetznercloud/csi-driver/releases for the available versions." 419 | } 420 | 421 | variable "hetzner_csi_values" { 422 | type = string 423 | default = "" 424 | description = "Additional helm values file to pass to hetzner csi as 'valuesContent' at the HelmChart." 425 | } 426 | 427 | 428 | variable "restrict_outbound_traffic" { 429 | type = bool 430 | default = true 431 | description = "Whether or not to restrict the outbound traffic." 432 | } 433 | 434 | variable "enable_klipper_metal_lb" { 435 | type = bool 436 | default = false 437 | description = "Use klipper load balancer." 438 | } 439 | 440 | variable "etcd_s3_backup" { 441 | description = "Etcd cluster state backup to S3 storage" 442 | type = map(any) 443 | sensitive = true 444 | default = {} 445 | } 446 | 447 | variable "ingress_controller" { 448 | type = string 449 | default = "traefik" 450 | description = "The name of the ingress controller." 451 | 452 | validation { 453 | condition = contains(["traefik", "nginx", "haproxy", "none"], var.ingress_controller) 454 | error_message = "Must be one of \"traefik\" or \"nginx\" or \"haproxy\" or \"none\"" 455 | } 456 | } 457 | 458 | variable "ingress_replica_count" { 459 | type = number 460 | default = 0 461 | description = "Number of replicas per ingress controller. 0 means autodetect based on the number of agent nodes." 462 | 463 | validation { 464 | condition = var.ingress_replica_count >= 0 465 | error_message = "Number of ingress replicas can't be below 0." 466 | } 467 | } 468 | 469 | variable "ingress_max_replica_count" { 470 | type = number 471 | default = 10 472 | description = "Number of maximum replicas per ingress controller. Used for ingress HPA. Must be higher than number of replicas." 473 | 474 | validation { 475 | condition = var.ingress_max_replica_count >= 0 476 | error_message = "Number of ingress maximum replicas can't be below 0." 477 | } 478 | } 479 | 480 | variable "traefik_image_tag" { 481 | type = string 482 | default = "" 483 | description = "Traefik image tag. Useful to use the beta version for new features. Example: v3.0.0-beta5" 484 | } 485 | 486 | variable "traefik_autoscaling" { 487 | type = bool 488 | default = true 489 | description = "Should traefik enable Horizontal Pod Autoscaler." 490 | } 491 | 492 | variable "traefik_redirect_to_https" { 493 | type = bool 494 | default = true 495 | description = "Should traefik redirect http traffic to https." 496 | } 497 | 498 | variable "traefik_pod_disruption_budget" { 499 | type = bool 500 | default = true 501 | description = "Should traefik enable pod disruption budget. Default values are maxUnavailable: 33% and minAvailable: 1." 502 | } 503 | 504 | variable "traefik_resource_limits" { 505 | type = bool 506 | default = true 507 | description = "Should traefik enable default resource requests and limits. Default values are requests: 100m & 50Mi and limits: 300m & 150Mi." 508 | } 509 | 510 | variable "traefik_resource_values" { 511 | type = object({ 512 | requests = object({ 513 | cpu = string 514 | memory = string 515 | }) 516 | limits = object({ 517 | cpu = string 518 | memory = string 519 | }) 520 | }) 521 | default = { 522 | requests = { 523 | memory = "50Mi" 524 | cpu = "100m" 525 | } 526 | limits = { 527 | memory = "150Mi" 528 | cpu = "300m" 529 | } 530 | } 531 | description = "Requests and limits for Traefik." 532 | } 533 | 534 | variable "traefik_additional_ports" { 535 | type = list(object({ 536 | name = string 537 | port = number 538 | exposedPort = number 539 | })) 540 | default = [] 541 | description = "Additional ports to pass to Traefik. These are the ones that go into the ports section of the Traefik helm values file." 542 | } 543 | 544 | variable "traefik_additional_options" { 545 | type = list(string) 546 | default = [] 547 | description = "Additional options to pass to Traefik as a list of strings. These are the ones that go into the additionalArguments section of the Traefik helm values file." 548 | } 549 | 550 | variable "traefik_additional_trusted_ips" { 551 | type = list(string) 552 | default = [] 553 | description = "Additional Trusted IPs to pass to Traefik. These are the ones that go into the trustedIPs section of the Traefik helm values file." 554 | } 555 | 556 | variable "traefik_version" { 557 | type = string 558 | default = "" 559 | description = "Version of Traefik helm chart. See https://github.com/traefik/traefik-helm-chart/releases for the available versions." 560 | } 561 | 562 | variable "traefik_values" { 563 | type = string 564 | default = "" 565 | description = "Additional helm values file to pass to Traefik as 'valuesContent' at the HelmChart." 566 | } 567 | 568 | variable "nginx_version" { 569 | type = string 570 | default = "" 571 | description = "Version of Nginx helm chart. See https://github.com/kubernetes/ingress-nginx?tab=readme-ov-file#supported-versions-table for the available versions." 572 | } 573 | 574 | variable "nginx_values" { 575 | type = string 576 | default = "" 577 | description = "Additional helm values file to pass to nginx as 'valuesContent' at the HelmChart." 578 | } 579 | 580 | variable "haproxy_requests_cpu" { 581 | type = string 582 | default = "250m" 583 | description = "Setting for HAProxy controller.resources.requests.cpu" 584 | } 585 | 586 | variable "haproxy_requests_memory" { 587 | type = string 588 | default = "400Mi" 589 | description = "Setting for HAProxy controller.resources.requests.memory" 590 | } 591 | 592 | variable "haproxy_additional_proxy_protocol_ips" { 593 | type = list(string) 594 | default = [] 595 | description = "Additional trusted proxy protocol IPs to pass to haproxy." 596 | } 597 | 598 | variable "haproxy_version" { 599 | type = string 600 | default = "" 601 | description = "Version of HAProxy helm chart." 602 | } 603 | 604 | variable "haproxy_values" { 605 | type = string 606 | default = "" 607 | description = "Helm values file to pass to haproxy as 'valuesContent' at the HelmChart, overriding the default." 608 | } 609 | 610 | variable "allow_scheduling_on_control_plane" { 611 | type = bool 612 | default = false 613 | description = "Whether to allow non-control-plane workloads to run on the control-plane nodes." 614 | } 615 | 616 | variable "enable_metrics_server" { 617 | type = bool 618 | default = true 619 | description = "Whether to enable or disable k3s metric server." 620 | } 621 | 622 | variable "initial_k3s_channel" { 623 | type = string 624 | default = "v1.31" # Please update kube.tf.example too when changing this variable 625 | description = "Allows you to specify an initial k3s channel. See https://update.k3s.io/v1-release/channels for available channels." 626 | 627 | validation { 628 | condition = contains(["stable", "latest", "testing", "v1.16", "v1.17", "v1.18", "v1.19", "v1.20", "v1.21", "v1.22", "v1.23", "v1.24", "v1.25", "v1.26", "v1.27", "v1.28", "v1.29", "v1.30", "v1.31", "v1.32", "v1.33"], var.initial_k3s_channel) 629 | error_message = "The initial k3s channel must be one of stable, latest or testing, or any of the minor kube versions like v1.26." 630 | } 631 | } 632 | 633 | variable "install_k3s_version" { 634 | type = string 635 | default = "" 636 | description = "Allows you to specify the k3s version (Example: v1.29.6+k3s2). Supersedes initial_k3s_channel. See https://github.com/k3s-io/k3s/releases for available versions." 637 | } 638 | 639 | variable "system_upgrade_enable_eviction" { 640 | type = bool 641 | default = true 642 | description = "Whether to directly delete pods during system upgrade (k3s) or evict them. Defaults to true. Disable this on small clusters to avoid system upgrades hanging since pods resisting eviction keep node unschedulable forever. NOTE: turning this off, introduces potential downtime of services of the upgraded nodes." 643 | } 644 | 645 | variable "system_upgrade_use_drain" { 646 | type = bool 647 | default = true 648 | description = "Wether using drain (true, the default), which will deletes and transfers all pods to other nodes before a node is being upgraded, or cordon (false), which just prevents schedulung new pods on the node during upgrade and keeps all pods running" 649 | } 650 | 651 | variable "automatically_upgrade_k3s" { 652 | type = bool 653 | default = true 654 | description = "Whether to automatically upgrade k3s based on the selected channel." 655 | } 656 | 657 | variable "automatically_upgrade_os" { 658 | type = bool 659 | default = true 660 | description = "Whether to enable or disable automatic os updates. Defaults to true. Should be disabled for single-node clusters" 661 | } 662 | 663 | variable "extra_firewall_rules" { 664 | type = list(any) 665 | default = [] 666 | description = "Additional firewall rules to apply to the cluster." 667 | } 668 | 669 | variable "firewall_kube_api_source" { 670 | type = list(string) 671 | default = ["0.0.0.0/0", "::/0"] 672 | description = "Source networks that have Kube API access to the servers." 673 | } 674 | 675 | variable "firewall_ssh_source" { 676 | type = list(string) 677 | default = ["0.0.0.0/0", "::/0"] 678 | description = "Source networks that have SSH access to the servers." 679 | } 680 | 681 | variable "use_cluster_name_in_node_name" { 682 | type = bool 683 | default = true 684 | description = "Whether to use the cluster name in the node name." 685 | } 686 | 687 | variable "cluster_name" { 688 | type = string 689 | default = "k3s" 690 | description = "Name of the cluster." 691 | 692 | validation { 693 | condition = can(regex("^[a-z0-9\\-]+$", var.cluster_name)) 694 | error_message = "The cluster name must be in the form of lowercase alphanumeric characters and/or dashes." 695 | } 696 | } 697 | 698 | variable "base_domain" { 699 | type = string 700 | default = "" 701 | description = "Base domain of the cluster, used for reverse dns." 702 | 703 | validation { 704 | condition = can(regex("^(?:(?:(?:[A-Za-z0-9])|(?:[A-Za-z0-9](?:[A-Za-z0-9\\-]+)?[A-Za-z0-9]))+(\\.))+([A-Za-z]{2,})([\\/?])?([\\/?][A-Za-z0-9\\-%._~:\\/?#\\[\\]@!\\$&\\'\\(\\)\\*\\+,;=]+)?$", var.base_domain)) || var.base_domain == "" 705 | error_message = "It must be a valid domain name (FQDN)." 706 | } 707 | } 708 | 709 | variable "placement_group_disable" { 710 | type = bool 711 | default = false 712 | description = "Whether to disable placement groups." 713 | } 714 | 715 | variable "disable_kube_proxy" { 716 | type = bool 717 | default = false 718 | description = "Disable kube-proxy in K3s (default false)." 719 | } 720 | 721 | variable "disable_network_policy" { 722 | type = bool 723 | default = false 724 | description = "Disable k3s default network policy controller (default false, automatically true for calico and cilium)." 725 | } 726 | 727 | variable "cni_plugin" { 728 | type = string 729 | default = "flannel" 730 | description = "CNI plugin for k3s." 731 | 732 | validation { 733 | condition = contains(["flannel", "calico", "cilium"], var.cni_plugin) 734 | error_message = "The cni_plugin must be one of \"flannel\", \"calico\", or \"cilium\"." 735 | } 736 | } 737 | 738 | variable "cilium_egress_gateway_enabled" { 739 | type = bool 740 | default = false 741 | description = "Enables egress gateway to redirect and SNAT the traffic that leaves the cluster." 742 | } 743 | 744 | variable "cilium_hubble_enabled" { 745 | type = bool 746 | default = false 747 | description = "Enables Hubble Observability to collect and visualize network traffic." 748 | } 749 | 750 | variable "cilium_hubble_metrics_enabled" { 751 | type = list(string) 752 | default = [] 753 | description = "Configures the list of Hubble metrics to collect" 754 | } 755 | 756 | variable "cilium_ipv4_native_routing_cidr" { 757 | type = string 758 | default = null 759 | description = "Used when Cilium is configured in native routing mode. The CNI assumes that the underlying network stack will forward packets to this destination without the need to apply SNAT. Default: value of \"cluster_ipv4_cidr\"" 760 | } 761 | 762 | variable "cilium_routing_mode" { 763 | type = string 764 | default = "tunnel" 765 | description = "Set native-routing mode (\"native\") or tunneling mode (\"tunnel\")." 766 | 767 | validation { 768 | condition = contains(["tunnel", "native"], var.cilium_routing_mode) 769 | error_message = "The cilium_routing_mode must be one of \"tunnel\" or \"native\"." 770 | } 771 | } 772 | 773 | variable "cilium_values" { 774 | type = string 775 | default = "" 776 | description = "Additional helm values file to pass to Cilium as 'valuesContent' at the HelmChart." 777 | } 778 | 779 | variable "cilium_version" { 780 | type = string 781 | default = "1.17.0" 782 | description = "Version of Cilium. See https://github.com/cilium/cilium/releases for the available versions." 783 | } 784 | 785 | variable "calico_values" { 786 | type = string 787 | default = "" 788 | description = "Just a stub for a future helm implementation. Now it can be used to replace the calico kustomize patch of the calico manifest." 789 | } 790 | 791 | variable "enable_iscsid" { 792 | type = bool 793 | default = false 794 | description = "This is always true when enable_longhorn=true, however, you may also want this enabled if you perform your own installation of longhorn after this module runs." 795 | } 796 | 797 | variable "enable_longhorn" { 798 | type = bool 799 | default = false 800 | description = "Whether or not to enable Longhorn." 801 | } 802 | 803 | variable "longhorn_version" { 804 | type = string 805 | default = "*" 806 | description = "Version of longhorn." 807 | } 808 | 809 | variable "longhorn_helmchart_bootstrap" { 810 | type = bool 811 | default = false 812 | description = "Whether the HelmChart longhorn shall be run on control-plane nodes." 813 | } 814 | 815 | variable "longhorn_repository" { 816 | type = string 817 | default = "https://charts.longhorn.io" 818 | description = "By default the official chart which may be incompatible with rancher is used. If you need to fully support rancher switch to https://charts.rancher.io." 819 | } 820 | 821 | variable "longhorn_namespace" { 822 | type = string 823 | default = "longhorn-system" 824 | description = "Namespace for longhorn deployment, defaults to 'longhorn-system'" 825 | } 826 | 827 | variable "longhorn_fstype" { 828 | type = string 829 | default = "ext4" 830 | description = "The longhorn fstype." 831 | 832 | validation { 833 | condition = contains(["ext4", "xfs"], var.longhorn_fstype) 834 | error_message = "Must be one of \"ext4\" or \"xfs\"" 835 | } 836 | } 837 | 838 | variable "longhorn_replica_count" { 839 | type = number 840 | default = 3 841 | description = "Number of replicas per longhorn volume." 842 | 843 | validation { 844 | condition = var.longhorn_replica_count > 0 845 | error_message = "Number of longhorn replicas can't be below 1." 846 | } 847 | } 848 | 849 | variable "longhorn_values" { 850 | type = string 851 | default = "" 852 | description = "Additional helm values file to pass to longhorn as 'valuesContent' at the HelmChart." 853 | } 854 | 855 | variable "disable_hetzner_csi" { 856 | type = bool 857 | default = false 858 | description = "Disable hetzner csi driver." 859 | } 860 | 861 | variable "enable_csi_driver_smb" { 862 | type = bool 863 | default = false 864 | description = "Whether or not to enable csi-driver-smb." 865 | } 866 | 867 | variable "csi_driver_smb_version" { 868 | type = string 869 | default = "*" 870 | description = "Version of csi_driver_smb. See https://github.com/kubernetes-csi/csi-driver-smb/releases for the available versions." 871 | } 872 | 873 | variable "csi_driver_smb_helmchart_bootstrap" { 874 | type = bool 875 | default = false 876 | description = "Whether the HelmChart csi_driver_smb shall be run on control-plane nodes." 877 | } 878 | 879 | variable "csi_driver_smb_values" { 880 | type = string 881 | default = "" 882 | description = "Additional helm values file to pass to csi-driver-smb as 'valuesContent' at the HelmChart." 883 | } 884 | 885 | variable "enable_cert_manager" { 886 | type = bool 887 | default = true 888 | description = "Enable cert manager." 889 | } 890 | 891 | variable "cert_manager_version" { 892 | type = string 893 | default = "*" 894 | description = "Version of cert_manager." 895 | } 896 | 897 | variable "cert_manager_helmchart_bootstrap" { 898 | type = bool 899 | default = false 900 | description = "Whether the HelmChart cert_manager shall be run on control-plane nodes." 901 | } 902 | 903 | variable "cert_manager_values" { 904 | type = string 905 | default = "" 906 | description = "Additional helm values file to pass to Cert-Manager as 'valuesContent' at the HelmChart. Defaults are set in locals.tf. For cert-manager versions prior to v1.15.0, you need to set 'installCRDs: true'." 907 | } 908 | 909 | variable "enable_rancher" { 910 | type = bool 911 | default = false 912 | description = "Enable rancher." 913 | } 914 | 915 | variable "rancher_version" { 916 | type = string 917 | default = "*" 918 | description = "Version of rancher." 919 | } 920 | 921 | variable "rancher_helmchart_bootstrap" { 922 | type = bool 923 | default = false 924 | description = "Whether the HelmChart rancher shall be run on control-plane nodes." 925 | } 926 | 927 | variable "rancher_install_channel" { 928 | type = string 929 | default = "stable" 930 | description = "The rancher installation channel." 931 | 932 | validation { 933 | condition = contains(["stable", "latest"], var.rancher_install_channel) 934 | error_message = "The allowed values for the Rancher install channel are stable or latest." 935 | } 936 | } 937 | 938 | variable "rancher_hostname" { 939 | type = string 940 | default = "" 941 | description = "The rancher hostname." 942 | 943 | validation { 944 | condition = can(regex("^(?:(?:(?:[A-Za-z0-9])|(?:[A-Za-z0-9](?:[A-Za-z0-9\\-]+)?[A-Za-z0-9]))+(\\.))+([A-Za-z]{2,})([\\/?])?([\\/?][A-Za-z0-9\\-%._~:\\/?#\\[\\]@!\\$&\\'\\(\\)\\*\\+,;=]+)?$", var.rancher_hostname)) || var.rancher_hostname == "" 945 | error_message = "It must be a valid domain name (FQDN)." 946 | } 947 | } 948 | 949 | variable "lb_hostname" { 950 | type = string 951 | default = "" 952 | description = "The Hetzner Load Balancer hostname, for either Traefik, HAProxy or Ingress-Nginx." 953 | 954 | validation { 955 | condition = can(regex("^(?:(?:(?:[A-Za-z0-9])|(?:[A-Za-z0-9](?:[A-Za-z0-9\\-]+)?[A-Za-z0-9]))+(\\.))+([A-Za-z]{2,})([\\/?])?([\\/?][A-Za-z0-9\\-%._~:\\/?#\\[\\]@!\\$&\\'\\(\\)\\*\\+,;=]+)?$", var.lb_hostname)) || var.lb_hostname == "" 956 | error_message = "It must be a valid domain name (FQDN)." 957 | } 958 | } 959 | 960 | variable "kubeconfig_server_address" { 961 | type = string 962 | default = "" 963 | description = "The hostname used for kubeconfig." 964 | } 965 | 966 | variable "rancher_registration_manifest_url" { 967 | type = string 968 | description = "The url of a rancher registration manifest to apply. (see https://rancher.com/docs/rancher/v2.6/en/cluster-provisioning/registered-clusters/)." 969 | default = "" 970 | sensitive = true 971 | } 972 | 973 | variable "rancher_bootstrap_password" { 974 | type = string 975 | default = "" 976 | description = "Rancher bootstrap password." 977 | sensitive = true 978 | 979 | validation { 980 | condition = (length(var.rancher_bootstrap_password) >= 48) || (length(var.rancher_bootstrap_password) == 0) 981 | error_message = "The Rancher bootstrap password must be at least 48 characters long." 982 | } 983 | } 984 | 985 | variable "rancher_values" { 986 | type = string 987 | default = "" 988 | description = "Additional helm values file to pass to Rancher as 'valuesContent' at the HelmChart." 989 | } 990 | 991 | variable "kured_version" { 992 | type = string 993 | default = null 994 | description = "Version of Kured. See https://github.com/kubereboot/kured/releases for the available versions." 995 | } 996 | 997 | variable "kured_options" { 998 | type = map(string) 999 | default = {} 1000 | } 1001 | 1002 | variable "block_icmp_ping_in" { 1003 | type = bool 1004 | default = false 1005 | description = "Block entering ICMP ping." 1006 | } 1007 | 1008 | variable "use_control_plane_lb" { 1009 | type = bool 1010 | default = false 1011 | description = "When this is enabled, rather than the first node, all external traffic will be routed via a control-plane loadbalancer, allowing for high availability." 1012 | } 1013 | 1014 | variable "control_plane_lb_type" { 1015 | type = string 1016 | default = "lb11" 1017 | description = "The type of load balancer to use for the control plane load balancer. Defaults to lb11, which is the cheapest one." 1018 | } 1019 | 1020 | variable "control_plane_lb_enable_public_interface" { 1021 | type = bool 1022 | default = true 1023 | description = "Enable or disable public interface for the control plane load balancer . Defaults to true." 1024 | } 1025 | 1026 | variable "dns_servers" { 1027 | type = list(string) 1028 | 1029 | default = [ 1030 | "185.12.64.1", 1031 | "185.12.64.2", 1032 | "2a01:4ff:ff00::add:1", 1033 | ] 1034 | description = "IP Addresses to use for the DNS Servers, set to an empty list to use the ones provided by Hetzner. The length is limited to 3 entries, more entries is not supported by kubernetes" 1035 | 1036 | validation { 1037 | condition = length(var.dns_servers) <= 3 1038 | error_message = "The list must have no more than 3 items." 1039 | } 1040 | 1041 | validation { 1042 | condition = alltrue([for ip in var.dns_servers : provider::assert::ip(ip)]) 1043 | error_message = "Some IP addresses are incorrect." 1044 | } 1045 | } 1046 | 1047 | variable "address_for_connectivity_test" { 1048 | description = "The address to test for external connectivity before proceeding with the installation. Defaults to Google's public DNS." 1049 | type = string 1050 | default = "8.8.8.8" 1051 | } 1052 | 1053 | variable "additional_k3s_environment" { 1054 | type = map(any) 1055 | default = {} 1056 | description = "Additional environment variables for the k3s binary. See for example https://docs.k3s.io/advanced#configuring-an-http-proxy ." 1057 | } 1058 | 1059 | variable "preinstall_exec" { 1060 | type = list(string) 1061 | default = [] 1062 | description = "Additional to execute before the install calls, for example fetching and installing certs." 1063 | } 1064 | 1065 | variable "postinstall_exec" { 1066 | type = list(string) 1067 | default = [] 1068 | description = "Additional to execute after the install calls, for example restoring a backup." 1069 | } 1070 | 1071 | 1072 | variable "extra_kustomize_deployment_commands" { 1073 | type = string 1074 | default = "" 1075 | description = "Commands to be executed after the `kubectl apply -k ` step." 1076 | } 1077 | 1078 | variable "extra_kustomize_parameters" { 1079 | type = any 1080 | default = {} 1081 | description = "All values will be passed to the `kustomization.tmp.yml` template." 1082 | } 1083 | 1084 | variable "extra_kustomize_folder" { 1085 | type = string 1086 | default = "extra-manifests" 1087 | description = "Folder from where to upload extra manifests" 1088 | } 1089 | 1090 | variable "create_kubeconfig" { 1091 | type = bool 1092 | default = true 1093 | description = "Create the kubeconfig as a local file resource. Should be disabled for automatic runs." 1094 | } 1095 | 1096 | variable "create_kustomization" { 1097 | type = bool 1098 | default = true 1099 | description = "Create the kustomization backup as a local file resource. Should be disabled for automatic runs." 1100 | } 1101 | 1102 | variable "export_values" { 1103 | type = bool 1104 | default = false 1105 | description = "Export for deployment used values.yaml-files as local files." 1106 | } 1107 | 1108 | variable "enable_wireguard" { 1109 | type = bool 1110 | default = false 1111 | description = "Use wireguard-native as the backend for CNI." 1112 | } 1113 | 1114 | variable "control_planes_custom_config" { 1115 | type = any 1116 | default = {} 1117 | description = "Additional configuration for control planes that will be added to k3s's config.yaml. E.g to allow etcd monitoring." 1118 | } 1119 | 1120 | variable "agent_nodes_custom_config" { 1121 | type = any 1122 | default = {} 1123 | description = "Additional configuration for agent nodes and autoscaler nodes that will be added to k3s's config.yaml. E.g to allow kube-proxy monitoring." 1124 | } 1125 | 1126 | variable "k3s_registries" { 1127 | description = "K3S registries.yml contents. It used to access private docker registries." 1128 | default = " " 1129 | type = string 1130 | } 1131 | 1132 | variable "additional_tls_sans" { 1133 | description = "Additional TLS SANs to allow connection to control-plane through it." 1134 | default = [] 1135 | type = list(string) 1136 | } 1137 | 1138 | variable "calico_version" { 1139 | type = string 1140 | default = null 1141 | description = "Version of Calico. See https://github.com/projectcalico/calico/releases for the available versions." 1142 | } 1143 | 1144 | variable "k3s_exec_server_args" { 1145 | type = string 1146 | default = "" 1147 | description = "The control plane is started with `k3s server {k3s_exec_server_args}`. Use this to add kube-apiserver-arg for example." 1148 | } 1149 | 1150 | variable "k3s_exec_agent_args" { 1151 | type = string 1152 | default = "" 1153 | description = "Agents nodes are started with `k3s agent {k3s_exec_agent_args}`. Use this to add kubelet-arg for example." 1154 | } 1155 | 1156 | variable "k3s_prefer_bundled_bin" { 1157 | type = bool 1158 | default = false 1159 | description = "Whether to use the bundled k3s mount binary instead of the one from the distro's util-linux package." 1160 | } 1161 | 1162 | variable "k3s_global_kubelet_args" { 1163 | type = list(string) 1164 | default = [] 1165 | description = "Global kubelet args for all nodes." 1166 | } 1167 | 1168 | variable "k3s_control_plane_kubelet_args" { 1169 | type = list(string) 1170 | default = [] 1171 | description = "Kubelet args for control plane nodes." 1172 | } 1173 | 1174 | variable "k3s_agent_kubelet_args" { 1175 | type = list(string) 1176 | default = [] 1177 | description = "Kubelet args for agent nodes." 1178 | } 1179 | 1180 | variable "k3s_autoscaler_kubelet_args" { 1181 | type = list(string) 1182 | default = [] 1183 | description = "Kubelet args for autoscaler nodes." 1184 | } 1185 | 1186 | variable "ingress_target_namespace" { 1187 | type = string 1188 | default = "" 1189 | description = "The namespace to deploy the ingress controller to. Defaults to ingress name." 1190 | } 1191 | 1192 | variable "enable_local_storage" { 1193 | type = bool 1194 | default = false 1195 | description = "Whether to enable or disable k3s local-storage. Warning: when enabled, there will be two default storage classes: \"local-path\" and \"hcloud-volumes\"!" 1196 | } 1197 | 1198 | variable "disable_selinux" { 1199 | type = bool 1200 | default = false 1201 | description = "Disable SELinux on all nodes." 1202 | } 1203 | 1204 | variable "enable_delete_protection" { 1205 | type = object({ 1206 | floating_ip = optional(bool, false) 1207 | load_balancer = optional(bool, false) 1208 | volume = optional(bool, false) 1209 | }) 1210 | default = { 1211 | floating_ip = false 1212 | load_balancer = false 1213 | volume = false 1214 | } 1215 | description = "Enable or disable delete protection for resources in Hetzner Cloud." 1216 | } 1217 | 1218 | variable "keep_disk_agents" { 1219 | type = bool 1220 | default = false 1221 | description = "Whether to keep OS disks of nodes the same size when upgrading an agent node" 1222 | } 1223 | 1224 | variable "keep_disk_cp" { 1225 | type = bool 1226 | default = false 1227 | description = "Whether to keep OS disks of nodes the same size when upgrading a control-plane node" 1228 | } 1229 | 1230 | 1231 | variable "sys_upgrade_controller_version" { 1232 | type = string 1233 | default = "v0.14.2" 1234 | description = "Version of the System Upgrade Controller for automated upgrades of k3s. See https://github.com/rancher/system-upgrade-controller/releases for the available versions." 1235 | } 1236 | 1237 | variable "hetzner_ccm_values" { 1238 | type = string 1239 | default = "" 1240 | description = "Additional helm values file to pass to Hetzner Controller Manager as 'valuesContent' at the HelmChart." 1241 | } 1242 | -------------------------------------------------------------------------------- /versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.8.0" 3 | required_providers { 4 | github = { 5 | source = "integrations/github" 6 | version = ">= 6.4.0" 7 | } 8 | hcloud = { 9 | source = "hetznercloud/hcloud" 10 | version = ">= 1.51.0" 11 | } 12 | local = { 13 | source = "hashicorp/local" 14 | version = ">= 2.5.2" 15 | } 16 | ssh = { 17 | source = "loafoe/ssh" 18 | version = "2.7.0" 19 | } 20 | assert = { 21 | source = "hashicorp/assert" 22 | version = ">= 0.16.0" 23 | } 24 | semvers = { 25 | source = "anapsix/semvers" 26 | version = ">= 0.7.1" 27 | } 28 | } 29 | } 30 | --------------------------------------------------------------------------------