├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── feature_request.yml │ └── bug_report.yml └── renovate.json ├── packer ├── requirements.pkr.hcl ├── image_amd64.pkr.hcl └── image_arm64.pkr.hcl ├── providers.tf ├── ssh_key.tf ├── terraform.tf ├── placement_group.tf ├── LICENSE ├── rbac.tf ├── floating_ip.tf ├── longhorn.tf ├── metrics_server.tf ├── oidc.tf ├── nodepool.tf ├── outputs.tf ├── cert_manager.tf ├── firewall.tf ├── autoscaler.tf ├── talos_backup.tf ├── hcloud.tf ├── cilium.tf ├── network.tf ├── ingress_nginx.tf ├── image.tf ├── client.tf ├── server.tf ├── rdns.tf ├── load_balancer.tf ├── talos_config.tf ├── talos.tf └── README.md /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: hcloud-k8s 4 | liberapay: hcloud-k8s 5 | -------------------------------------------------------------------------------- /packer/requirements.pkr.hcl: -------------------------------------------------------------------------------- 1 | packer { 2 | required_plugins { 3 | hcloud = { 4 | version = "1.7.0" 5 | source = "github.com/hetznercloud/hcloud" 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /providers.tf: -------------------------------------------------------------------------------- 1 | provider "hcloud" { 2 | token = var.hcloud_token 3 | poll_interval = "2s" 4 | } 5 | 6 | provider "helm" { 7 | repository_config_path = "${path.module}/.helm/repositories.yaml" 8 | 9 | kubernetes = { 10 | config_path = "${path.module}/.helm/kubeconfig" 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /ssh_key.tf: -------------------------------------------------------------------------------- 1 | resource "tls_private_key" "ssh_key" { 2 | algorithm = "ED25519" 3 | } 4 | 5 | resource "hcloud_ssh_key" "this" { 6 | name = "${var.cluster_name}-default" 7 | public_key = tls_private_key.ssh_key.public_key_openssh 8 | 9 | labels = { 10 | cluster = var.cluster_name 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | 3 | contact_links: 4 | - name: 💬 Discussions 5 | url: https://github.com/hcloud-k8s/terraform-hcloud-kubernetes/discussions 6 | about: Please use Discussions for general questions and community support 7 | 8 | - name: ☁️ Hetzner Cloud Docs 9 | url: https://docs.hetzner.com/cloud 10 | about: Check Hetzner Cloud documentation for provider-specific details 11 | 12 | - name: 🐧 Talos Linux Docs 13 | url: https://www.talos.dev/docs/ 14 | about: Consult the Talos Linux documentation for OS and cluster configuration topics 15 | -------------------------------------------------------------------------------- /terraform.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">=1.9.0" 3 | 4 | required_providers { 5 | talos = { 6 | source = "siderolabs/talos" 7 | version = "0.9.0" 8 | } 9 | 10 | hcloud = { 11 | source = "hetznercloud/hcloud" 12 | version = "1.57.0" 13 | } 14 | 15 | helm = { 16 | source = "hashicorp/helm" 17 | version = "~> 3.1.0" 18 | } 19 | 20 | http = { 21 | source = "hashicorp/http" 22 | version = "~> 3.5.0" 23 | } 24 | 25 | tls = { 26 | source = "hashicorp/tls" 27 | version = "~> 4.1.0" 28 | } 29 | 30 | random = { 31 | source = "hashicorp/random" 32 | version = "~>3.7.2" 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /placement_group.tf: -------------------------------------------------------------------------------- 1 | resource "hcloud_placement_group" "control_plane" { 2 | name = "${var.cluster_name}-control-plane-pg" 3 | type = "spread" 4 | 5 | labels = { 6 | cluster = var.cluster_name, 7 | role = "control-plane" 8 | } 9 | } 10 | 11 | resource "hcloud_placement_group" "worker" { 12 | for_each = merge([ 13 | for np in local.worker_nodepools : { 14 | for i in range(ceil(np.count / 10.0)) : "${var.cluster_name}-${np.name}-pg-${i + 1}" => { 15 | nodepool = np.name 16 | } 17 | } if np.placement_group && np.count > 0 18 | ]...) 19 | 20 | name = each.key 21 | type = "spread" 22 | 23 | labels = { 24 | cluster = var.cluster_name, 25 | nodepool = each.value.nodepool, 26 | role = "worker" 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: ✨ Feature Request 2 | description: Suggest an idea or improvement for the module 3 | labels: [enhancement] 4 | assignees: [] 5 | 6 | body: 7 | - type: textarea 8 | id: problem 9 | attributes: 10 | label: Problem 11 | placeholder: What problem would this feature solve? 12 | validations: 13 | required: true 14 | 15 | - type: textarea 16 | id: proposal 17 | attributes: 18 | label: Proposed Solution 19 | placeholder: How should this be implemented? 20 | validations: 21 | required: true 22 | 23 | - type: textarea 24 | id: context 25 | attributes: 26 | label: Additional Context 27 | description: Links, references, or alternatives considered 28 | 29 | - type: checkboxes 30 | id: confirmations 31 | attributes: 32 | label: Confirmation 33 | options: 34 | - label: I searched existing issues and discussions for similar requests 35 | required: true 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 VantaLabs 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /rbac.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | # Generate Kubernetes RBAC manifests 3 | rbac_manifests = concat( 4 | # Kubernetes namespaced roles 5 | [for role in var.rbac_roles : yamlencode({ 6 | apiVersion = "rbac.authorization.k8s.io/v1" 7 | kind = "Role" 8 | metadata = { 9 | name = role.name 10 | namespace = role.namespace 11 | } 12 | rules = [for rule in role.rules : { 13 | apiGroups = rule.api_groups 14 | resources = rule.resources 15 | verbs = rule.verbs 16 | }] 17 | })], 18 | # Kubernetes cluster roles 19 | [for role in var.rbac_cluster_roles : yamlencode({ 20 | apiVersion = "rbac.authorization.k8s.io/v1" 21 | kind = "ClusterRole" 22 | metadata = { 23 | name = role.name 24 | } 25 | rules = [for rule in role.rules : { 26 | apiGroups = rule.api_groups 27 | resources = rule.resources 28 | verbs = rule.verbs 29 | }] 30 | })] 31 | ) 32 | 33 | rbac_manifest = length(local.rbac_manifests) > 0 ? { 34 | name = "kube-rbac" 35 | contents = join("\n---\n", local.rbac_manifests) 36 | } : null 37 | } 38 | 39 | -------------------------------------------------------------------------------- /floating_ip.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | control_plane_public_vip_ipv4_enabled = ( 3 | local.network_public_ipv4_enabled && ( 4 | var.control_plane_public_vip_ipv4_id != null || 5 | var.control_plane_public_vip_ipv4_enabled 6 | ) 7 | ) 8 | } 9 | 10 | resource "hcloud_floating_ip" "control_plane_ipv4" { 11 | count = local.control_plane_public_vip_ipv4_enabled && var.control_plane_public_vip_ipv4_id == null ? 1 : 0 12 | 13 | name = "${var.cluster_name}-control-plane-ipv4" 14 | type = "ipv4" 15 | home_location = hcloud_server.control_plane[local.talos_primary_node_name].location 16 | description = "Control Plane Public VIP" 17 | delete_protection = var.cluster_delete_protection 18 | 19 | labels = { 20 | cluster = var.cluster_name, 21 | role = "control-plane" 22 | } 23 | } 24 | 25 | data "hcloud_floating_ip" "control_plane_ipv4" { 26 | count = local.control_plane_public_vip_ipv4_enabled ? 1 : 0 27 | 28 | id = coalesce( 29 | can(var.control_plane_public_vip_ipv4_id) ? var.control_plane_public_vip_ipv4_id : null, 30 | local.control_plane_public_vip_ipv4_enabled ? try(hcloud_floating_ip.control_plane_ipv4[0].id, null) : null 31 | ) 32 | } 33 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: 🐛 Bug Report 2 | description: Report a problem with the Hetzner Kubernetes Terraform module 3 | labels: [bug] 4 | 5 | body: 6 | - type: textarea 7 | id: description 8 | attributes: 9 | label: Description 10 | placeholder: | 11 | Describe the issue 12 | 13 | Steps to reproduce: 14 | 1. ... 15 | 2. ... 16 | 3. ... 17 | validations: 18 | required: true 19 | 20 | - type: textarea 21 | id: expected 22 | attributes: 23 | label: Expected Behavior 24 | validations: 25 | required: true 26 | 27 | - type: textarea 28 | id: actual 29 | attributes: 30 | label: Actual Behavior 31 | validations: 32 | required: true 33 | 34 | - type: textarea 35 | id: config 36 | attributes: 37 | label: Minimal Module Configuration 38 | description: Show the smallest possible config that reproduces the issue (redact secrets) 39 | value: |- 40 | ```hcl 41 | module "kubernetes" { 42 | # Configuration 43 | } 44 | ``` 45 | validations: 46 | required: true 47 | 48 | - type: textarea 49 | id: logs 50 | attributes: 51 | label: Relevant Output 52 | description: terraform plan/apply errors, logs, or stack traces 53 | render: shell 54 | 55 | - type: checkboxes 56 | id: confirmations 57 | attributes: 58 | label: Confirmation 59 | options: 60 | - label: I checked existing issues, discussions, and the web for similar problems 61 | required: true 62 | -------------------------------------------------------------------------------- /packer/image_amd64.pkr.hcl: -------------------------------------------------------------------------------- 1 | variable "cluster_name" { 2 | type = string 3 | } 4 | 5 | variable "server_location" { 6 | type = string 7 | } 8 | 9 | variable "talos_version" { 10 | type = string 11 | } 12 | 13 | variable "talos_schematic_id" { 14 | type = string 15 | } 16 | 17 | variable "talos_image_url" { 18 | type = string 19 | } 20 | 21 | variable "server_type" { 22 | type = string 23 | } 24 | 25 | # Source for the Talos AMD64 image 26 | source "hcloud" "talos_amd64_image" { 27 | rescue = "linux64" 28 | image = "debian-13" 29 | location = var.server_location 30 | server_type = var.server_type 31 | ssh_username = "root" 32 | 33 | snapshot_name = "Talos Linux AMD64 for ${var.cluster_name}" 34 | snapshot_labels = { 35 | cluster = var.cluster_name, 36 | os = "talos", 37 | talos_version = var.talos_version, 38 | talos_schematic_id = substr(var.talos_schematic_id, 0, 32) 39 | } 40 | } 41 | 42 | # Build the Talos AMD64 snapshot 43 | build { 44 | sources = ["source.hcloud.talos_amd64_image"] 45 | 46 | provisioner "shell" { 47 | inline_shebang = "/bin/bash -e" 48 | 49 | inline = [ 50 | <<-EOT 51 | set -euo pipefail 52 | 53 | printf '%s\n' 'Zeroing disk first before writing Talos image' 54 | blkdiscard -v /dev/sda 2>/dev/null 55 | 56 | printf '%s\n' 'Downloading Talos ${var.talos_version} (${var.talos_schematic_id}) ...' 57 | wget \ 58 | --quiet \ 59 | --timeout=20 \ 60 | --waitretry=5 \ 61 | --tries=5 \ 62 | --retry-connrefused \ 63 | --inet4-only \ 64 | --output-document=- \ 65 | '${var.talos_image_url}' \ 66 | | xz -T0 -dc \ 67 | | dd of=/dev/sda bs=1M iflag=fullblock oflag=direct conv=fsync status=none 68 | 69 | printf '%s\n' 'Talos ${var.talos_version} (${var.talos_schematic_id}) downloaded' 70 | EOT 71 | ] 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /packer/image_arm64.pkr.hcl: -------------------------------------------------------------------------------- 1 | variable "cluster_name" { 2 | type = string 3 | } 4 | 5 | variable "server_location" { 6 | type = string 7 | } 8 | 9 | variable "talos_version" { 10 | type = string 11 | } 12 | 13 | variable "talos_schematic_id" { 14 | type = string 15 | } 16 | 17 | variable "talos_image_url" { 18 | type = string 19 | } 20 | 21 | variable "server_type" { 22 | type = string 23 | } 24 | 25 | # Source for the Talos ARM64 image 26 | source "hcloud" "talos_arm64_image" { 27 | rescue = "linux64" 28 | image = "debian-13" 29 | location = var.server_location 30 | server_type = var.server_type 31 | ssh_username = "root" 32 | 33 | snapshot_name = "Talos Linux ARM64 for ${var.cluster_name}" 34 | snapshot_labels = { 35 | cluster = var.cluster_name, 36 | os = "talos", 37 | talos_version = var.talos_version, 38 | talos_schematic_id = substr(var.talos_schematic_id, 0, 32) 39 | } 40 | } 41 | 42 | # Build the Talos ARM64 snapshot 43 | build { 44 | sources = ["source.hcloud.talos_arm64_image"] 45 | 46 | provisioner "shell" { 47 | inline_shebang = "/bin/bash -e" 48 | 49 | inline = [ 50 | <<-EOT 51 | set -euo pipefail 52 | 53 | printf '%s\n' 'Zeroing disk first before writing Talos image' 54 | blkdiscard -v /dev/sda 2>/dev/null 55 | 56 | printf '%s\n' 'Downloading Talos ${var.talos_version} (${var.talos_schematic_id}) ...' 57 | wget \ 58 | --quiet \ 59 | --timeout=20 \ 60 | --waitretry=5 \ 61 | --tries=5 \ 62 | --retry-connrefused \ 63 | --inet4-only \ 64 | --output-document=- \ 65 | '${var.talos_image_url}' \ 66 | | xz -T0 -dc \ 67 | | dd of=/dev/sda bs=1M iflag=fullblock oflag=direct conv=fsync status=none 68 | 69 | printf '%s\n' 'Talos ${var.talos_version} (${var.talos_schematic_id}) downloaded' 70 | EOT 71 | ] 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /longhorn.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | longhorn_namespace = var.longhorn_enabled ? { 3 | apiVersion = "v1" 4 | kind = "Namespace" 5 | metadata = { 6 | name = data.helm_template.longhorn[0].namespace 7 | labels = { 8 | "pod-security.kubernetes.io/enforce" = "privileged" 9 | "pod-security.kubernetes.io/audit" = "privileged" 10 | "pod-security.kubernetes.io/warn" = "privileged" 11 | } 12 | } 13 | } : null 14 | } 15 | 16 | data "helm_template" "longhorn" { 17 | count = var.longhorn_enabled ? 1 : 0 18 | 19 | name = "longhorn" 20 | namespace = "longhorn-system" 21 | 22 | repository = var.longhorn_helm_repository 23 | chart = var.longhorn_helm_chart 24 | version = var.longhorn_helm_version 25 | kube_version = var.kubernetes_version 26 | 27 | disable_webhooks = true 28 | 29 | values = [ 30 | yamlencode({ 31 | # Temporary Hotfix: https://github.com/longhorn/longhorn/issues/12259 32 | image = { 33 | longhorn = { 34 | manager = { 35 | tag = "v1.10.1-hotfix-1" 36 | } 37 | } 38 | } 39 | preUpgradeChecker = { 40 | upgradeVersionCheck = false 41 | } 42 | 43 | defaultSettings = { 44 | allowCollectingLonghornUsageMetrics = false 45 | kubernetesClusterAutoscalerEnabled = local.cluster_autoscaler_enabled 46 | upgradeChecker = false 47 | } 48 | networkPolicies = { 49 | enabled = true 50 | type = "rke1" # rke1 = ingress-nginx 51 | } 52 | persistence = { 53 | defaultClass = var.longhorn_default_storage_class 54 | } 55 | }), 56 | yamlencode(var.longhorn_helm_values) 57 | ] 58 | } 59 | 60 | locals { 61 | longhorn_manifest = var.longhorn_enabled ? { 62 | name = "longhorn" 63 | contents = <<-EOF 64 | ${yamlencode(local.longhorn_namespace)} 65 | --- 66 | ${data.helm_template.longhorn[0].manifest} 67 | EOF 68 | } : null 69 | } 70 | -------------------------------------------------------------------------------- /.github/renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": [ 4 | "config:recommended" 5 | ], 6 | "separateMinorPatch": true, 7 | "packageRules": [ 8 | { 9 | "description": "Wait 3 days before raising PRs for patch updates", 10 | "matchUpdateTypes": [ 11 | "patch" 12 | ], 13 | "minimumReleaseAge": "3 days" 14 | }, 15 | { 16 | "description": "Wait 7 days before raising PRs for minor updates", 17 | "matchUpdateTypes": [ 18 | "minor" 19 | ], 20 | "minimumReleaseAge": "7 days" 21 | }, 22 | { 23 | "description": "Wait 14 days before raising PRs for major updates", 24 | "matchUpdateTypes": [ 25 | "major" 26 | ], 27 | "minimumReleaseAge": "14 days" 28 | }, 29 | { 30 | "description": "Kubernetes & Talos — block minor/major updates", 31 | "matchDatasources": [ 32 | "github-tags" 33 | ], 34 | "matchDepNames": [ 35 | "kubernetes/kubernetes", 36 | "siderolabs/talos" 37 | ], 38 | "matchUpdateTypes": [ 39 | "minor", 40 | "major" 41 | ], 42 | "enabled": false 43 | } 44 | ], 45 | "customManagers": [ 46 | { 47 | "description": "Update Helm Charts used in Terraform", 48 | "customType": "regex", 49 | "managerFilePatterns": [ 50 | "/^variables\\.tf$/" 51 | ], 52 | "matchStrings": [ 53 | "variable\\s+\"[^\"]*helm_repository\"\\s*{[^}]*default\\s*=\\s*\"(?[^\"]+)\"[^}]*?}\\s*variable\\s+\"[^\"]*helm_chart\"\\s*{[^}]*default\\s*=\\s*\"(?[^\"]+)\"[^}]*?}\\s*variable\\s+\"[^\"]*helm_version\"\\s*{[^}]*default\\s*=\\s*\"(?[^\"]+)\"[^}]*?}" 54 | ], 55 | "datasourceTemplate": "helm" 56 | }, 57 | { 58 | "description": "Update GitHub Dependencies", 59 | "customType": "regex", 60 | "managerFilePatterns": [ 61 | "/^variables\\.tf$/" 62 | ], 63 | "matchStrings": [ 64 | "variable\\s*\"[^\"]+_version\"\\s*{[^}]*\\s*default\\s*=\\s*\"(?[^\"]+)\"\\s*#\\s*https://github.com/(?[^\\s]+)[^}]*}" 65 | ], 66 | "datasourceTemplate": "github-tags" 67 | }, 68 | { 69 | "description": "Update Packer Dependencies from GitHub", 70 | "customType": "regex", 71 | "managerFilePatterns": [ 72 | "/\\.pkr\\.hcl$/" 73 | ], 74 | "matchStrings": [ 75 | "\\s*version\\s*=\\s*\"(?[^\"]+)\"\\s*source\\s*=\\s*\"github.com/(?[^\"]+)\"" 76 | ], 77 | "datasourceTemplate": "github-tags", 78 | "depNameTemplate": "{{{replace '^(.+)/(.+)$' '$1/packer-plugin-$2' depName}}}" 79 | } 80 | ] 81 | } 82 | -------------------------------------------------------------------------------- /metrics_server.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | metrics_server_schedule_on_control_plane = coalesce( 3 | var.metrics_server_schedule_on_control_plane, 4 | local.worker_sum == 0, 5 | false 6 | ) 7 | metrics_server_node_sum = ( 8 | local.metrics_server_schedule_on_control_plane ? 9 | local.control_plane_sum : 10 | local.worker_sum > 0 ? local.worker_sum : local.cluster_autoscaler_max_sum 11 | ) 12 | metrics_server_replicas = coalesce( 13 | var.metrics_server_replicas, 14 | local.metrics_server_node_sum > 1 ? 2 : 1 15 | ) 16 | } 17 | 18 | data "helm_template" "metrics_server" { 19 | count = var.metrics_server_enabled ? 1 : 0 20 | 21 | name = "metrics-server" 22 | namespace = "kube-system" 23 | 24 | repository = var.metrics_server_helm_repository 25 | chart = var.metrics_server_helm_chart 26 | version = var.metrics_server_helm_version 27 | kube_version = var.kubernetes_version 28 | 29 | values = [ 30 | yamlencode({ 31 | replicas = local.metrics_server_replicas 32 | podDisruptionBudget = { 33 | enabled = true 34 | minAvailable = null 35 | maxUnavailable = 1 36 | } 37 | topologySpreadConstraints = [ 38 | { 39 | topologyKey = "kubernetes.io/hostname" 40 | maxSkew = 1 41 | whenUnsatisfiable = "DoNotSchedule" 42 | labelSelector = { 43 | matchLabels = { 44 | "app.kubernetes.io/instance" = "metrics-server" 45 | "app.kubernetes.io/name" = "metrics-server" 46 | } 47 | } 48 | matchLabelKeys = ["pod-template-hash"] 49 | }, 50 | { 51 | topologyKey = "topology.kubernetes.io/zone" 52 | maxSkew = 1 53 | whenUnsatisfiable = "ScheduleAnyway" 54 | labelSelector = { 55 | matchLabels = { 56 | "app.kubernetes.io/instance" = "metrics-server" 57 | "app.kubernetes.io/name" = "metrics-server" 58 | } 59 | } 60 | matchLabelKeys = ["pod-template-hash"] 61 | } 62 | ] 63 | nodeSelector = local.metrics_server_schedule_on_control_plane ? { 64 | "node-role.kubernetes.io/control-plane" = "" 65 | } : {} 66 | tolerations = local.metrics_server_schedule_on_control_plane ? [ 67 | { 68 | key = "node-role.kubernetes.io/control-plane" 69 | effect = "NoSchedule" 70 | operator = "Exists" 71 | } 72 | ] : [] 73 | }), 74 | yamlencode(var.metrics_server_helm_values) 75 | ] 76 | } 77 | 78 | locals { 79 | metrics_server_manifest = var.metrics_server_enabled ? { 80 | name = "metrics-server" 81 | contents = data.helm_template.metrics_server[0].manifest 82 | } : null 83 | } 84 | -------------------------------------------------------------------------------- /oidc.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | # Collect all unique k8s cluster roles used across OIDC group mappings 3 | k8s_cluster_roles = var.oidc_enabled ? toset(flatten([ 4 | for group_mapping in var.oidc_group_mappings : group_mapping.cluster_roles 5 | ])) : toset([]) 6 | 7 | # Collect all unique k8s roles used across OIDC group mappings (grouped by namespace/role) 8 | k8s_roles = var.oidc_enabled ? { 9 | for role_key, role_info in merge([ 10 | for group_mapping in var.oidc_group_mappings : { 11 | for role in group_mapping.roles : "${role.namespace}/${role.name}" => role 12 | } 13 | ]...) : role_key => role_info 14 | } : {} 15 | 16 | # Create one ClusterRoleBinding per cluster role with all groups as subjects 17 | cluster_role_binding_manifests = [ 18 | for cluster_role in local.k8s_cluster_roles : yamlencode({ 19 | apiVersion = "rbac.authorization.k8s.io/v1" 20 | kind = "ClusterRoleBinding" 21 | metadata = { 22 | name = "oidc-${cluster_role}" 23 | } 24 | roleRef = { 25 | apiGroup = "rbac.authorization.k8s.io" 26 | kind = "ClusterRole" 27 | name = cluster_role 28 | } 29 | subjects = [ 30 | for group_mapping in var.oidc_group_mappings : { 31 | apiGroup = "rbac.authorization.k8s.io" 32 | kind = "Group" 33 | name = "${var.oidc_groups_prefix}${group_mapping.group}" 34 | } 35 | if contains(group_mapping.cluster_roles, cluster_role) 36 | ] 37 | }) 38 | if length([ 39 | for group_mapping in var.oidc_group_mappings : group_mapping 40 | if contains(group_mapping.cluster_roles, cluster_role) 41 | ]) > 0 42 | ] 43 | 44 | # Create one RoleBinding per role with all groups as subjects 45 | role_binding_manifests = [ 46 | for role_key, role_info in local.k8s_roles : yamlencode({ 47 | apiVersion = "rbac.authorization.k8s.io/v1" 48 | kind = "RoleBinding" 49 | metadata = { 50 | name = "oidc-${role_info.name}" 51 | namespace = role_info.namespace 52 | } 53 | roleRef = { 54 | apiGroup = "rbac.authorization.k8s.io" 55 | kind = "Role" 56 | name = role_info.name 57 | } 58 | subjects = [ 59 | for group_mapping in var.oidc_group_mappings : { 60 | apiGroup = "rbac.authorization.k8s.io" 61 | kind = "Group" 62 | name = "${var.oidc_groups_prefix}${group_mapping.group}" 63 | } 64 | if contains([for role in group_mapping.roles : "${role.namespace}/${role.name}"], role_key) 65 | ] 66 | }) 67 | if length([ 68 | for group_mapping in var.oidc_group_mappings : group_mapping 69 | if contains([for role in group_mapping.roles : "${role.namespace}/${role.name}"], role_key) 70 | ]) > 0 71 | ] 72 | 73 | # Combine all OIDC manifests 74 | oidc_manifests = var.oidc_enabled ? concat( 75 | local.cluster_role_binding_manifests, 76 | local.role_binding_manifests 77 | ) : [] 78 | 79 | # Final manifest 80 | oidc_manifest = length(local.oidc_manifests) > 0 ? { 81 | name = "kube-oidc-rbac" 82 | contents = join("\n---\n", local.oidc_manifests) 83 | } : null 84 | } 85 | -------------------------------------------------------------------------------- /nodepool.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | control_plane_nodepools = [ 3 | for np in var.control_plane_nodepools : { 4 | name = np.name, 5 | location = np.location, 6 | server_type = np.type, 7 | backups = np.backups, 8 | keep_disk = np.keep_disk, 9 | rdns_ipv4 = var.talos_public_ipv4_enabled ? ( 10 | np.rdns_ipv4 != null ? np.rdns_ipv4 : 11 | np.rdns != null ? np.rdns : 12 | local.cluster_rdns_ipv4 13 | ) : null, 14 | rdns_ipv6 = var.talos_public_ipv6_enabled ? ( 15 | np.rdns_ipv6 != null ? np.rdns_ipv6 : 16 | np.rdns != null ? np.rdns : 17 | local.cluster_rdns_ipv6 18 | ) : null, 19 | labels = merge( 20 | np.labels, 21 | { nodepool = np.name } 22 | ), 23 | annotations = np.annotations, 24 | taints = concat( 25 | [for taint in np.taints : regex( 26 | "^(?P[^=:]+)=?(?P[^=:]*?):(?P.+)$", 27 | taint 28 | )], 29 | local.talos_allow_scheduling_on_control_planes ? [] : [ 30 | { key = "node-role.kubernetes.io/control-plane", value = "", effect = "NoSchedule" } 31 | ] 32 | ), 33 | count = np.count, 34 | } 35 | ] 36 | 37 | worker_nodepools = [ 38 | for np in var.worker_nodepools : { 39 | name = np.name, 40 | location = np.location, 41 | server_type = np.type, 42 | backups = np.backups, 43 | keep_disk = np.keep_disk, 44 | rdns_ipv4 = var.talos_public_ipv4_enabled ? ( 45 | np.rdns_ipv4 != null ? np.rdns_ipv4 : 46 | np.rdns != null ? np.rdns : 47 | local.cluster_rdns_ipv4 48 | ) : null, 49 | rdns_ipv6 = var.talos_public_ipv6_enabled ? ( 50 | np.rdns_ipv6 != null ? np.rdns_ipv6 : 51 | np.rdns != null ? np.rdns : 52 | local.cluster_rdns_ipv6 53 | ) : null, 54 | labels = merge( 55 | np.labels, 56 | { nodepool = np.name } 57 | ), 58 | annotations = np.annotations, 59 | taints = [for taint in np.taints : regex( 60 | "^(?P[^=:]+)=?(?P[^=:]*?):(?P.+)$", 61 | taint 62 | )], 63 | count = np.count, 64 | placement_group = np.placement_group 65 | } 66 | ] 67 | 68 | cluster_autoscaler_nodepools = [ 69 | for np in var.cluster_autoscaler_nodepools : { 70 | name = np.name, 71 | location = np.location, 72 | server_type = np.type, 73 | labels = merge( 74 | np.labels, 75 | { nodepool = np.name } 76 | ), 77 | annotations = np.annotations, 78 | taints = [for taint in np.taints : regex( 79 | "^(?P[^=:]+)=?(?P[^=:]*?):(?P.+)$", 80 | taint 81 | )], 82 | min = np.min, 83 | max = np.max 84 | } 85 | ] 86 | 87 | control_plane_nodepools_map = { for np in local.control_plane_nodepools : np.name => np } 88 | worker_nodepools_map = { for np in local.worker_nodepools : np.name => np } 89 | cluster_autoscaler_nodepools_map = { for np in local.cluster_autoscaler_nodepools : np.name => np } 90 | 91 | control_plane_sum = sum(concat( 92 | [for np in local.control_plane_nodepools : np.count], [0] 93 | )) 94 | worker_sum = sum(concat( 95 | [for np in local.worker_nodepools : np.count if length(np.taints) == 0], [0] 96 | )) 97 | cluster_autoscaler_min_sum = sum(concat( 98 | [for np in local.cluster_autoscaler_nodepools : np.min if length(np.taints) == 0], [0] 99 | )) 100 | cluster_autoscaler_max_sum = sum(concat( 101 | [for np in local.cluster_autoscaler_nodepools : np.max if length(np.taints) == 0], [0] 102 | )) 103 | } 104 | -------------------------------------------------------------------------------- /outputs.tf: -------------------------------------------------------------------------------- 1 | output "talosconfig" { 2 | description = "Raw Talos OS configuration file used for cluster access and management." 3 | value = local.talosconfig 4 | sensitive = true 5 | } 6 | 7 | output "kubeconfig" { 8 | description = "Raw kubeconfig file for authenticating with the Kubernetes cluster." 9 | value = local.kubeconfig 10 | sensitive = true 11 | } 12 | 13 | output "kubeconfig_data" { 14 | description = "Structured kubeconfig data, suitable for use with other Terraform providers or tools." 15 | value = local.kubeconfig_data 16 | sensitive = true 17 | } 18 | 19 | output "talosconfig_data" { 20 | description = "Structured Talos configuration data, suitable for use with other Terraform providers or tools." 21 | value = local.talosconfig_data 22 | sensitive = true 23 | } 24 | 25 | output "talos_client_configuration" { 26 | description = "Detailed configuration data for the Talos client." 27 | value = data.talos_client_configuration.this 28 | } 29 | 30 | output "talos_machine_configurations_control_plane" { 31 | description = "Talos machine configurations for all control plane nodes." 32 | value = data.talos_machine_configuration.control_plane 33 | sensitive = true 34 | } 35 | 36 | output "talos_machine_configurations_worker" { 37 | description = "Talos machine configurations for all worker nodes." 38 | value = data.talos_machine_configuration.worker 39 | sensitive = true 40 | } 41 | 42 | output "control_plane_private_ipv4_list" { 43 | description = "List of private IPv4 addresses assigned to control plane nodes." 44 | value = local.control_plane_private_ipv4_list 45 | } 46 | 47 | output "control_plane_public_ipv4_list" { 48 | description = "List of public IPv4 addresses assigned to control plane nodes." 49 | value = local.control_plane_public_ipv4_list 50 | } 51 | 52 | output "control_plane_public_ipv6_list" { 53 | description = "List of public IPv6 addresses assigned to control plane nodes." 54 | value = local.control_plane_public_ipv6_list 55 | } 56 | 57 | output "worker_private_ipv4_list" { 58 | description = "List of private IPv4 addresses assigned to worker nodes." 59 | value = local.worker_private_ipv4_list 60 | } 61 | 62 | output "worker_public_ipv4_list" { 63 | description = "List of public IPv4 addresses assigned to worker nodes." 64 | value = local.worker_public_ipv4_list 65 | } 66 | 67 | output "worker_public_ipv6_list" { 68 | description = "List of public IPv6 addresses assigned to worker nodes." 69 | value = local.worker_public_ipv6_list 70 | } 71 | 72 | output "cilium_encryption_info" { 73 | description = "Cilium traffic encryption settings, including current state and IPsec details if enabled." 74 | value = { 75 | encryption_enabled = var.cilium_encryption_enabled 76 | encryption_type = var.cilium_encryption_type 77 | 78 | ipsec = local.cilium_ipsec_enabled ? { 79 | current_key_id = var.cilium_ipsec_key_id 80 | next_key_id = local.cilium_ipsec_key_config["next_id"] 81 | algorithm = var.cilium_ipsec_algorithm 82 | key_size_bits = var.cilium_ipsec_key_size 83 | secret_name = local.cilium_ipsec_keys_manifest.metadata["name"] 84 | namespace = local.cilium_ipsec_keys_manifest.metadata["namespace"] 85 | } : {} 86 | } 87 | } 88 | 89 | output "kube_api_load_balancer" { 90 | description = "Details about the Kubernetes API load balancer" 91 | value = var.kube_api_load_balancer_enabled ? { 92 | id = hcloud_load_balancer.kube_api[0].id 93 | name = local.kube_api_load_balancer_name 94 | public_ipv4 = local.kube_api_load_balancer_public_ipv4 95 | public_ipv6 = local.kube_api_load_balancer_public_ipv6 96 | private_ipv4 = local.kube_api_load_balancer_private_ipv4 97 | } : null 98 | } 99 | -------------------------------------------------------------------------------- /cert_manager.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | cert_manager_namespace = var.cert_manager_enabled ? { 3 | apiVersion = "v1" 4 | kind = "Namespace" 5 | metadata = { 6 | name = data.helm_template.cert_manager[0].namespace 7 | } 8 | } : null 9 | 10 | cert_manager_values = { 11 | replicaCount = local.control_plane_sum > 1 ? 2 : 1 12 | podDisruptionBudget = { 13 | enabled = true 14 | minAvailable = null 15 | maxUnavailable = 1 16 | } 17 | topologySpreadConstraints = [ 18 | { 19 | topologyKey = "kubernetes.io/hostname" 20 | maxSkew = 1 21 | whenUnsatisfiable = "DoNotSchedule" 22 | labelSelector = { 23 | matchLabels = { 24 | "app.kubernetes.io/instance" = "cert-manager" 25 | "app.kubernetes.io/component" = "controller" 26 | } 27 | } 28 | matchLabelKeys = ["pod-template-hash"] 29 | } 30 | ], 31 | nodeSelector = { "node-role.kubernetes.io/control-plane" : "" } 32 | tolerations = [ 33 | { 34 | key = "node-role.kubernetes.io/control-plane" 35 | effect = "NoSchedule" 36 | operator = "Exists" 37 | } 38 | ] 39 | } 40 | } 41 | 42 | data "helm_template" "cert_manager" { 43 | count = var.cert_manager_enabled ? 1 : 0 44 | 45 | name = "cert-manager" 46 | namespace = "cert-manager" 47 | 48 | repository = var.cert_manager_helm_repository 49 | chart = var.cert_manager_helm_chart 50 | version = var.cert_manager_helm_version 51 | kube_version = var.kubernetes_version 52 | 53 | values = [ 54 | yamlencode( 55 | merge( 56 | { 57 | crds = { enabled = true } 58 | startupapicheck = { enabled = false } 59 | config = { 60 | featureGates = { 61 | # Disable the use of Exact PathType in Ingress resources, to work around a bug in ingress-nginx 62 | # https://github.com/kubernetes/ingress-nginx/issues/11176 63 | ACMEHTTP01IngressPathTypeExact = !var.ingress_nginx_enabled 64 | } 65 | } 66 | }, 67 | local.cert_manager_values, 68 | { 69 | webhook = merge( 70 | local.cert_manager_values, 71 | { 72 | topologySpreadConstraints = [ 73 | for constraint in local.cert_manager_values.topologySpreadConstraints : 74 | merge( 75 | constraint, 76 | { 77 | labelSelector = { 78 | matchLabels = { 79 | "app.kubernetes.io/instance" = "cert-manager" 80 | "app.kubernetes.io/component" = "webhook" 81 | } 82 | } 83 | } 84 | ) 85 | ] 86 | } 87 | ) 88 | cainjector = merge( 89 | local.cert_manager_values, 90 | { 91 | topologySpreadConstraints = [ 92 | for constraint in local.cert_manager_values.topologySpreadConstraints : 93 | merge( 94 | constraint, 95 | { 96 | labelSelector = { 97 | matchLabels = { 98 | "app.kubernetes.io/instance" = "cert-manager" 99 | "app.kubernetes.io/component" = "cainjector" 100 | } 101 | } 102 | } 103 | ) 104 | ] 105 | } 106 | ) 107 | } 108 | ) 109 | ), 110 | yamlencode(var.cert_manager_helm_values) 111 | ] 112 | } 113 | 114 | locals { 115 | cert_manager_manifest = var.cert_manager_enabled ? { 116 | name = "cert-manager" 117 | contents = <<-EOF 118 | ${yamlencode(local.cert_manager_namespace)} 119 | --- 120 | ${data.helm_template.cert_manager[0].manifest} 121 | EOF 122 | } : null 123 | } 124 | -------------------------------------------------------------------------------- /firewall.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | firewall_external = var.firewall_id != null 3 | 4 | firewall_kube_api_source = ( 5 | var.firewall_kube_api_source != null ? 6 | var.firewall_kube_api_source : 7 | var.firewall_api_source 8 | ) 9 | firewall_talos_api_source = ( 10 | var.firewall_talos_api_source != null ? 11 | var.firewall_talos_api_source : 12 | var.firewall_api_source 13 | ) 14 | 15 | firewall_use_current_ipv4 = !local.firewall_external && local.network_public_ipv4_enabled && coalesce( 16 | var.firewall_use_current_ipv4, 17 | var.cluster_access == "public" && local.firewall_kube_api_source == null && local.firewall_talos_api_source == null 18 | ) 19 | firewall_use_current_ipv6 = !local.firewall_external && local.network_public_ipv6_enabled && coalesce( 20 | var.firewall_use_current_ipv6, 21 | var.cluster_access == "public" && local.firewall_kube_api_source == null && local.firewall_talos_api_source == null 22 | ) 23 | 24 | current_ip = concat( 25 | local.firewall_use_current_ipv4 ? ["${chomp(data.http.current_ipv4[0].response_body)}/32"] : [], 26 | local.firewall_use_current_ipv6 ? ( 27 | strcontains(data.http.current_ipv6[0].response_body, ":") ? 28 | [cidrsubnet("${chomp(data.http.current_ipv6[0].response_body)}/64", 0, 0)] : 29 | [] 30 | ) : [] 31 | ) 32 | 33 | firewall_kube_api_sources = distinct(compact(concat( 34 | coalesce(local.firewall_kube_api_source, []), 35 | coalesce(local.current_ip, []) 36 | ))) 37 | firewall_talos_api_sources = distinct(compact(concat( 38 | coalesce(local.firewall_talos_api_source, []), 39 | coalesce(local.current_ip, []) 40 | ))) 41 | 42 | firewall_default_rules = concat( 43 | length(local.firewall_kube_api_sources) > 0 ? [ 44 | { 45 | description = "Allow Incoming Requests to Kube API" 46 | direction = "in" 47 | source_ips = local.firewall_kube_api_sources 48 | protocol = "tcp" 49 | port = local.kube_api_port 50 | } 51 | ] : [], 52 | length(local.firewall_talos_api_sources) > 0 ? [ 53 | { 54 | description = "Allow Incoming Requests to Talos API" 55 | direction = "in" 56 | source_ips = local.firewall_talos_api_sources 57 | protocol = "tcp" 58 | port = local.talos_api_port 59 | } 60 | ] : [], 61 | ) 62 | 63 | firewall_rules = { 64 | for rule in local.firewall_default_rules : 65 | format("%s-%s-%s", 66 | lookup(rule, "direction", "null"), 67 | lookup(rule, "protocol", "null"), 68 | lookup(rule, "port", "null") 69 | ) => rule 70 | } 71 | firewall_extra_rules = { 72 | for rule in var.firewall_extra_rules : 73 | format("%s-%s-%s", 74 | lookup(rule, "direction", "null"), 75 | lookup(rule, "protocol", "null"), 76 | coalesce(lookup(rule, "port", "null"), "null") 77 | ) => rule 78 | } 79 | 80 | firewall_rules_list = values( 81 | merge(local.firewall_extra_rules, local.firewall_rules) 82 | ) 83 | 84 | firewall_id = local.firewall_external ? var.firewall_id : hcloud_firewall.this[0].id 85 | } 86 | 87 | data "http" "current_ipv4" { 88 | count = local.firewall_use_current_ipv4 ? 1 : 0 89 | url = "https://ipv4.icanhazip.com" 90 | 91 | retry { 92 | attempts = 10 93 | min_delay_ms = 1000 94 | max_delay_ms = 1000 95 | } 96 | 97 | lifecycle { 98 | postcondition { 99 | condition = contains([200], self.status_code) 100 | error_message = "HTTP status code invalid" 101 | } 102 | } 103 | } 104 | 105 | data "http" "current_ipv6" { 106 | count = local.firewall_use_current_ipv6 ? 1 : 0 107 | url = "https://${var.firewall_use_current_ipv6 == true ? "ipv6." : ""}icanhazip.com" 108 | 109 | retry { 110 | attempts = 10 111 | min_delay_ms = 1000 112 | max_delay_ms = 1000 113 | } 114 | 115 | lifecycle { 116 | postcondition { 117 | condition = contains([200], self.status_code) 118 | error_message = "HTTP status code invalid" 119 | } 120 | } 121 | } 122 | 123 | resource "hcloud_firewall" "this" { 124 | count = local.firewall_external ? 0 : 1 125 | name = var.cluster_name 126 | 127 | dynamic "rule" { 128 | for_each = local.firewall_rules_list 129 | //noinspection HILUnresolvedReference 130 | content { 131 | description = rule.value.description 132 | direction = rule.value.direction 133 | source_ips = lookup(rule.value, "source_ips", []) 134 | destination_ips = lookup(rule.value, "destination_ips", []) 135 | protocol = rule.value.protocol 136 | port = lookup(rule.value, "port", null) 137 | } 138 | } 139 | 140 | labels = { 141 | cluster = var.cluster_name 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /autoscaler.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | cluster_autoscaler_enabled = length(local.cluster_autoscaler_nodepools) > 0 3 | cluster_autoscaler_hostname_pattern = "^${var.cluster_name}-(${join("|", distinct([for np in local.cluster_autoscaler_nodepools : np.name]))})-[0-9a-f]+$" 4 | 5 | cluster_autoscaler_release_name = "cluster-autoscaler" 6 | cluster_autoscaler_cloud_provider = "hetzner" 7 | cluster_autoscaler_config_secret_name = "${local.cluster_autoscaler_release_name}-${local.cluster_autoscaler_cloud_provider}-config" 8 | 9 | cluster_autoscaler_cluster_config_manifest = local.cluster_autoscaler_enabled ? { 10 | apiVersion = "v1" 11 | kind = "Secret" 12 | type = "Opaque" 13 | metadata = { 14 | name = local.cluster_autoscaler_config_secret_name 15 | namespace = "kube-system" 16 | } 17 | data = { 18 | cluster-config = base64encode(jsonencode( 19 | { 20 | imagesForArch = { 21 | arm64 = local.image_label_selector, 22 | amd64 = local.image_label_selector 23 | }, 24 | nodeConfigs = { 25 | for nodepool in local.cluster_autoscaler_nodepools : "${var.cluster_name}-${nodepool.name}" => { 26 | cloudInit = data.talos_machine_configuration.cluster_autoscaler[nodepool.name].machine_configuration, 27 | labels = nodepool.labels 28 | taints = nodepool.taints 29 | } 30 | } 31 | } 32 | )) 33 | } 34 | } : null 35 | } 36 | 37 | data "helm_template" "cluster_autoscaler" { 38 | count = local.cluster_autoscaler_enabled ? 1 : 0 39 | 40 | name = local.cluster_autoscaler_release_name 41 | namespace = "kube-system" 42 | 43 | repository = var.cluster_autoscaler_helm_repository 44 | chart = var.cluster_autoscaler_helm_chart 45 | version = var.cluster_autoscaler_helm_version 46 | kube_version = var.kubernetes_version 47 | 48 | values = [ 49 | yamlencode({ 50 | cloudProvider = local.cluster_autoscaler_cloud_provider 51 | replicaCount = local.control_plane_sum > 1 ? 2 : 1 52 | podDisruptionBudget = { 53 | minAvailable = null 54 | maxUnavailable = 1 55 | } 56 | topologySpreadConstraints = [ 57 | { 58 | topologyKey = "kubernetes.io/hostname" 59 | maxSkew = 1 60 | whenUnsatisfiable = "DoNotSchedule" 61 | labelSelector = { 62 | matchLabels = { 63 | "app.kubernetes.io/instance" = local.cluster_autoscaler_release_name 64 | "app.kubernetes.io/name" = "${local.cluster_autoscaler_cloud_provider}-${var.cluster_autoscaler_helm_chart}" 65 | } 66 | } 67 | matchLabelKeys = ["pod-template-hash"] 68 | } 69 | ] 70 | nodeSelector = { "node-role.kubernetes.io/control-plane" : "" } 71 | tolerations = [ 72 | { 73 | key = "node-role.kubernetes.io/control-plane" 74 | effect = "NoSchedule" 75 | operator = "Exists" 76 | } 77 | ] 78 | autoscalingGroups = [ 79 | for np in local.cluster_autoscaler_nodepools : { 80 | name = "${var.cluster_name}-${np.name}" 81 | minSize = np.min 82 | maxSize = np.max 83 | instanceType = np.server_type 84 | region = np.location 85 | } 86 | ] 87 | extraEnv = { 88 | HCLOUD_CLUSTER_CONFIG_FILE = "/config/cluster-config" 89 | HCLOUD_SERVER_CREATION_TIMEOUT = "10" 90 | HCLOUD_FIREWALL = tostring(local.firewall_id) 91 | HCLOUD_SSH_KEY = tostring(hcloud_ssh_key.this.id) 92 | HCLOUD_PUBLIC_IPV4 = tostring(var.talos_public_ipv4_enabled) 93 | HCLOUD_PUBLIC_IPV6 = tostring(var.talos_public_ipv6_enabled) 94 | HCLOUD_NETWORK = tostring(hcloud_network_subnet.autoscaler.network_id) 95 | } 96 | extraEnvSecrets = { 97 | HCLOUD_TOKEN = { 98 | name = "hcloud" 99 | key = "token" 100 | } 101 | } 102 | extraVolumeSecrets = { 103 | "${local.cluster_autoscaler_config_secret_name}" = { 104 | name = local.cluster_autoscaler_config_secret_name 105 | mountPath = "/config" 106 | } 107 | } 108 | }), 109 | yamlencode(var.cluster_autoscaler_helm_values) 110 | ] 111 | 112 | depends_on = [ 113 | terraform_data.amd64_image, 114 | terraform_data.arm64_image, 115 | ] 116 | } 117 | 118 | locals { 119 | cluster_autoscaler_manifest = local.cluster_autoscaler_enabled ? { 120 | name = "cluster-autoscaler" 121 | contents = <<-EOF 122 | ${data.helm_template.cluster_autoscaler[0].manifest} 123 | --- 124 | ${yamlencode(local.cluster_autoscaler_cluster_config_manifest)} 125 | EOF 126 | } : null 127 | } 128 | -------------------------------------------------------------------------------- /talos_backup.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | talos_backup_s3_hcloud = var.talos_backup_s3_hcloud_url != null ? regex("^(?:https?://)?(?P[^.]+)\\.(?P[^.]+)\\.your-objectstorage\\.com\\.?$", var.talos_backup_s3_hcloud_url) : {} 3 | talos_backup_s3_bucket = var.talos_backup_s3_hcloud_url != null ? local.talos_backup_s3_hcloud.bucket : var.talos_backup_s3_bucket 4 | talos_backup_s3_region = var.talos_backup_s3_hcloud_url != null ? local.talos_backup_s3_hcloud.region : var.talos_backup_s3_region 5 | talos_backup_s3_endpoint = var.talos_backup_s3_hcloud_url != null ? "https://${local.talos_backup_s3_region}.your-objectstorage.com" : var.talos_backup_s3_endpoint 6 | 7 | talos_backup_service_account = { 8 | apiVersion = "talos.dev/v1alpha1" 9 | kind = "ServiceAccount" 10 | metadata = { 11 | name = "talos-backup-secrets" 12 | namespace = "kube-system" 13 | } 14 | spec = { 15 | roles = [ 16 | "os:etcd:backup" 17 | ] 18 | } 19 | } 20 | 21 | talos_backup_s3_secrets = { 22 | apiVersion = "v1" 23 | kind = "Secret" 24 | metadata = { 25 | name = "talos-backup-s3-secrets" 26 | namespace = "kube-system" 27 | } 28 | type = "Opaque" 29 | data = { 30 | access_key = base64encode(var.talos_backup_s3_access_key) 31 | secret_key = base64encode(var.talos_backup_s3_secret_key) 32 | } 33 | } 34 | 35 | talos_backup_cronjob = { 36 | apiVersion = "batch/v1" 37 | kind = "CronJob" 38 | metadata = { 39 | name = "talos-backup" 40 | namespace = "kube-system" 41 | } 42 | spec = { 43 | schedule = var.talos_backup_schedule 44 | suspend = local.talos_backup_s3_bucket == null 45 | concurrencyPolicy = "Forbid" 46 | jobTemplate = { 47 | spec = { 48 | template = { 49 | spec = { 50 | containers = [{ 51 | name = "talos-backup" 52 | image = "ghcr.io/siderolabs/talos-backup:${var.talos_backup_version}" 53 | workingDir = "/tmp" 54 | imagePullPolicy = "IfNotPresent" 55 | env = [ 56 | { name = "AWS_ACCESS_KEY_ID", valueFrom = { secretKeyRef = { name = "talos-backup-s3-secrets", key = "access_key" } } }, 57 | { name = "AWS_SECRET_ACCESS_KEY", valueFrom = { secretKeyRef = { name = "talos-backup-s3-secrets", key = "secret_key" } } }, 58 | { name = "AGE_X25519_PUBLIC_KEY", value = var.talos_backup_age_x25519_public_key }, 59 | { name = "DISABLE_ENCRYPTION", value = tostring(var.talos_backup_age_x25519_public_key == null) }, 60 | { name = "AWS_REGION", value = local.talos_backup_s3_region }, 61 | { name = "CUSTOM_S3_ENDPOINT", value = local.talos_backup_s3_endpoint }, 62 | { name = "BUCKET", value = local.talos_backup_s3_bucket }, 63 | { name = "CLUSTER_NAME", value = var.cluster_name }, 64 | { name = "S3_PREFIX", value = var.talos_backup_s3_prefix }, 65 | { name = "USE_PATH_STYLE", value = tostring(var.talos_backup_s3_path_style) }, 66 | { name = "ENABLE_COMPRESSION", value = tostring(var.talos_backup_enable_compression) } 67 | ] 68 | volumeMounts = [ 69 | { name = "tmp", mountPath = "/tmp" }, 70 | { name = "talos-secrets", mountPath = "/var/run/secrets/talos.dev" } 71 | ] 72 | resources = { 73 | requests = { memory = "128Mi", cpu = "250m" } 74 | limits = { memory = "256Mi", cpu = "500m" } 75 | } 76 | securityContext = { 77 | runAsUser = 1000 78 | runAsGroup = 1000 79 | allowPrivilegeEscalation = false 80 | runAsNonRoot = true 81 | capabilities = { drop = ["ALL"] } 82 | seccompProfile = { type = "RuntimeDefault" } 83 | } 84 | }] 85 | restartPolicy = "OnFailure" 86 | volumes = [ 87 | { emptyDir = {}, name = "tmp" }, 88 | { name = "talos-secrets", secret = { secretName = "talos-backup-secrets" } } 89 | ] 90 | tolerations = [ 91 | { key = "node-role.kubernetes.io/control-plane", operator = "Exists", effect = "NoSchedule" } 92 | ] 93 | } 94 | } 95 | } 96 | } 97 | } 98 | } 99 | 100 | talos_backup_manifest = var.talos_backup_s3_enabled ? { 101 | name = "talos-backup" 102 | contents = <<-EOF 103 | ${yamlencode(local.talos_backup_service_account)} 104 | --- 105 | ${yamlencode(local.talos_backup_s3_secrets)} 106 | --- 107 | ${yamlencode(local.talos_backup_cronjob)} 108 | EOF 109 | } : null 110 | } 111 | -------------------------------------------------------------------------------- /hcloud.tf: -------------------------------------------------------------------------------- 1 | # Hcloud Secret 2 | locals { 3 | hcloud_secret_manifest = { 4 | name = "hcloud-secret" 5 | contents = yamlencode({ 6 | apiVersion = "v1" 7 | kind = "Secret" 8 | type = "Opaque" 9 | metadata = { 10 | name = "hcloud" 11 | namespace = "kube-system" 12 | } 13 | data = { 14 | network = base64encode(local.hcloud_network_id) 15 | token = base64encode(var.hcloud_token) 16 | } 17 | }) 18 | } 19 | } 20 | 21 | # Hcloud CCM 22 | data "helm_template" "hcloud_ccm" { 23 | name = "hcloud-cloud-controller-manager" 24 | namespace = "kube-system" 25 | 26 | repository = var.hcloud_ccm_helm_repository 27 | chart = var.hcloud_ccm_helm_chart 28 | version = var.hcloud_ccm_helm_version 29 | kube_version = var.kubernetes_version 30 | 31 | values = [ 32 | yamlencode({ 33 | kind = "DaemonSet" 34 | nodeSelector = { "node-role.kubernetes.io/control-plane" : "" } 35 | networking = { 36 | enabled = true 37 | clusterCIDR = local.network_pod_ipv4_cidr 38 | } 39 | env = { 40 | HCLOUD_LOAD_BALANCERS_ENABLED = { value = tostring(var.hcloud_ccm_load_balancers_enabled) } 41 | HCLOUD_LOAD_BALANCERS_USE_PRIVATE_IP = { value = "true" } 42 | HCLOUD_LOAD_BALANCERS_DISABLE_PRIVATE_INGRESS = { value = "true" } 43 | HCLOUD_LOAD_BALANCERS_LOCATION = { value = local.hcloud_load_balancer_location } 44 | HCLOUD_NETWORK_ROUTES_ENABLED = { value = tostring(var.hcloud_ccm_network_routes_enabled) } 45 | } 46 | }), 47 | yamlencode(var.hcloud_ccm_helm_values) 48 | ] 49 | } 50 | 51 | locals { 52 | hcloud_ccm_manifest = var.hcloud_ccm_enabled ? { 53 | name = "hcloud-ccm" 54 | contents = data.helm_template.hcloud_ccm.manifest 55 | } : null 56 | } 57 | 58 | # Hcloud CSI 59 | resource "random_bytes" "hcloud_csi_encryption_key" { 60 | count = var.hcloud_csi_enabled ? 1 : 0 61 | length = 32 62 | } 63 | 64 | locals { 65 | hcloud_csi_secret_manifest = var.hcloud_csi_enabled ? { 66 | apiVersion = "v1" 67 | kind = "Secret" 68 | type = "Opaque" 69 | metadata = { 70 | name = "hcloud-csi-secret" 71 | namespace = "kube-system" 72 | } 73 | data = { 74 | encryption-passphrase = ( 75 | var.hcloud_csi_encryption_passphrase != null ? 76 | base64encode(var.hcloud_csi_encryption_passphrase) : 77 | base64encode(random_bytes.hcloud_csi_encryption_key[0].hex) 78 | ) 79 | } 80 | } : null 81 | 82 | hcloud_csi_storage_classes = [ 83 | for class in var.hcloud_csi_storage_classes : { 84 | name = class.name 85 | reclaimPolicy = class.reclaimPolicy 86 | defaultStorageClass = class.defaultStorageClass 87 | 88 | extraParameters = merge( 89 | class.encrypted ? { 90 | "csi.storage.k8s.io/node-publish-secret-name" = "hcloud-csi-secret" 91 | "csi.storage.k8s.io/node-publish-secret-namespace" = "kube-system" 92 | } : {}, 93 | class.extraParameters 94 | ) 95 | } 96 | ] 97 | } 98 | 99 | data "helm_template" "hcloud_csi" { 100 | count = var.hcloud_csi_enabled ? 1 : 0 101 | 102 | name = "hcloud-csi" 103 | namespace = "kube-system" 104 | 105 | repository = var.hcloud_csi_helm_repository 106 | chart = var.hcloud_csi_helm_chart 107 | version = var.hcloud_csi_helm_version 108 | kube_version = var.kubernetes_version 109 | 110 | values = [ 111 | yamlencode({ 112 | controller = { 113 | replicaCount = local.control_plane_sum > 1 ? 2 : 1 114 | podDisruptionBudget = { 115 | create = true 116 | minAvailable = null 117 | maxUnavailable = "1" 118 | } 119 | topologySpreadConstraints = [ 120 | { 121 | topologyKey = "kubernetes.io/hostname" 122 | maxSkew = 1 123 | whenUnsatisfiable = "DoNotSchedule" 124 | labelSelector = { 125 | matchLabels = { 126 | "app.kubernetes.io/name" = "hcloud-csi" 127 | "app.kubernetes.io/instance" = "hcloud-csi" 128 | "app.kubernetes.io/component" = "controller" 129 | } 130 | } 131 | matchLabelKeys = ["pod-template-hash"] 132 | } 133 | ] 134 | nodeSelector = { "node-role.kubernetes.io/control-plane" : "" } 135 | tolerations = [ 136 | { 137 | key = "node-role.kubernetes.io/control-plane" 138 | effect = "NoSchedule" 139 | operator = "Exists" 140 | } 141 | ] 142 | volumeExtraLabels = var.hcloud_csi_volume_extra_labels 143 | } 144 | storageClasses = local.hcloud_csi_storage_classes 145 | }), 146 | yamlencode(var.hcloud_csi_helm_values) 147 | ] 148 | } 149 | 150 | locals { 151 | hcloud_csi_manifest = var.hcloud_csi_enabled ? { 152 | name = "hcloud-csi" 153 | contents = <<-EOF 154 | ${yamlencode(local.hcloud_csi_secret_manifest)} 155 | --- 156 | ${data.helm_template.hcloud_csi[0].manifest} 157 | EOF 158 | } : null 159 | } 160 | -------------------------------------------------------------------------------- /cilium.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | # Cilium IPSec Configuration 3 | cilium_ipsec_enabled = var.cilium_encryption_enabled && var.cilium_encryption_type == "ipsec" 4 | 5 | # Key configuration when IPSec is enabled 6 | cilium_ipsec_key_config = local.cilium_ipsec_enabled ? { 7 | next_id = var.cilium_ipsec_key_id % 15 + 1 8 | format = "${var.cilium_ipsec_key_id}+ ${var.cilium_ipsec_algorithm} ${random_bytes.cilium_ipsec_key[0].hex} 128" 9 | } : null 10 | 11 | # Kubernetes Secret manifest 12 | cilium_ipsec_keys_manifest = local.cilium_ipsec_enabled ? { 13 | apiVersion = "v1" 14 | kind = "Secret" 15 | type = "Opaque" 16 | 17 | metadata = { 18 | name = "cilium-ipsec-keys" 19 | namespace = "kube-system" 20 | 21 | annotations = { 22 | "cilium.io/key-id" = tostring(var.cilium_ipsec_key_id) 23 | "cilium.io/key-algorithm" = var.cilium_ipsec_algorithm 24 | "cilium.io/key-size" = tostring(var.cilium_ipsec_key_size) 25 | } 26 | } 27 | 28 | data = { 29 | keys = base64encode(local.cilium_ipsec_key_config.format) 30 | } 31 | } : null 32 | } 33 | 34 | # Generate random key when IPSec is enabled 35 | resource "random_bytes" "cilium_ipsec_key" { 36 | count = local.cilium_ipsec_enabled ? 1 : 0 37 | length = ((var.cilium_ipsec_key_size / 8) + 4) # AES Key + 4 bytes salt 38 | 39 | # Keepers to force regeneration when key_id changes 40 | keepers = { 41 | key_id = var.cilium_ipsec_key_id 42 | } 43 | } 44 | 45 | data "helm_template" "cilium" { 46 | name = "cilium" 47 | namespace = "kube-system" 48 | 49 | repository = var.cilium_helm_repository 50 | chart = var.cilium_helm_chart 51 | version = var.cilium_helm_version 52 | kube_version = var.kubernetes_version 53 | 54 | values = [ 55 | yamlencode({ 56 | ipam = { 57 | mode = "kubernetes" 58 | } 59 | routingMode = var.cilium_routing_mode 60 | ipv4NativeRoutingCIDR = local.network_native_routing_ipv4_cidr 61 | bpf = { 62 | masquerade = var.cilium_kube_proxy_replacement_enabled 63 | datapathMode = var.cilium_bpf_datapath_mode 64 | hostLegacyRouting = local.cilium_ipsec_enabled 65 | } 66 | encryption = { 67 | enabled = var.cilium_encryption_enabled 68 | type = var.cilium_encryption_type 69 | } 70 | k8s = { 71 | requireIPv4PodCIDR = true 72 | } 73 | k8sServiceHost = local.kube_prism_host 74 | k8sServicePort = local.kube_prism_port 75 | kubeProxyReplacement = var.cilium_kube_proxy_replacement_enabled 76 | kubeProxyReplacementHealthzBindAddr = var.cilium_kube_proxy_replacement_enabled ? "0.0.0.0:10256" : "" 77 | installNoConntrackIptablesRules = var.cilium_kube_proxy_replacement_enabled && var.cilium_routing_mode == "native" 78 | socketLB = { 79 | hostNamespaceOnly = var.cilium_socket_lb_host_namespace_only_enabled 80 | } 81 | cgroup = { 82 | autoMount = { enabled = false } 83 | hostRoot = "/sys/fs/cgroup" 84 | } 85 | securityContext = { 86 | capabilities = { 87 | ciliumAgent = ["CHOWN", "KILL", "NET_ADMIN", "NET_RAW", "IPC_LOCK", "SYS_ADMIN", "SYS_RESOURCE", "DAC_OVERRIDE", "FOWNER", "SETGID", "SETUID"] 88 | cleanCiliumState = ["NET_ADMIN", "SYS_ADMIN", "SYS_RESOURCE"] 89 | } 90 | } 91 | dnsProxy = { 92 | enableTransparentMode = true 93 | } 94 | egressGateway = { 95 | enabled = var.cilium_egress_gateway_enabled 96 | } 97 | loadBalancer = { 98 | acceleration = "native" 99 | } 100 | hubble = { 101 | enabled = var.cilium_hubble_enabled 102 | relay = { enabled = var.cilium_hubble_relay_enabled } 103 | ui = { enabled = var.cilium_hubble_ui_enabled } 104 | peerService = { 105 | clusterDomain = var.cluster_domain 106 | } 107 | } 108 | prometheus = { 109 | enabled = true 110 | serviceMonitor = { 111 | enabled = var.cilium_service_monitor_enabled 112 | trustCRDsExist = var.cilium_service_monitor_enabled 113 | interval = "15s" 114 | } 115 | } 116 | operator = { 117 | nodeSelector = { "node-role.kubernetes.io/control-plane" : "" } 118 | replicas = local.control_plane_sum > 1 ? 2 : 1 119 | podDisruptionBudget = { 120 | enabled = true 121 | minAvailable = null 122 | maxUnavailable = 1 123 | } 124 | topologySpreadConstraints = [ 125 | { 126 | topologyKey = "kubernetes.io/hostname" 127 | maxSkew = 1 128 | whenUnsatisfiable = "DoNotSchedule" 129 | labelSelector = { 130 | matchLabels = { 131 | "app.kubernetes.io/name" = "cilium-operator" 132 | } 133 | } 134 | matchLabelKeys = ["pod-template-hash"] 135 | } 136 | ] 137 | prometheus = { 138 | enabled = true 139 | serviceMonitor = { 140 | enabled = var.cilium_service_monitor_enabled 141 | interval = "15s" 142 | } 143 | } 144 | } 145 | }), 146 | yamlencode(var.cilium_helm_values) 147 | ] 148 | } 149 | 150 | locals { 151 | cilium_manifest = var.cilium_enabled ? { 152 | name = "cilium" 153 | contents = <<-EOF 154 | ${yamlencode(local.cilium_ipsec_keys_manifest)} 155 | --- 156 | ${data.helm_template.cilium.manifest} 157 | EOF 158 | } : null 159 | } 160 | -------------------------------------------------------------------------------- /network.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | network_public_ipv4_enabled = var.talos_public_ipv4_enabled 3 | network_public_ipv6_enabled = var.talos_public_ipv6_enabled && var.talos_ipv6_enabled 4 | 5 | hcloud_network_id = length(data.hcloud_network.this) > 0 ? data.hcloud_network.this[0].id : hcloud_network.this[0].id 6 | hcloud_network_zone = data.hcloud_location.this.network_zone 7 | 8 | # Network ranges 9 | network_ipv4_cidr = length(data.hcloud_network.this) > 0 ? data.hcloud_network.this[0].ip_range : var.network_ipv4_cidr 10 | network_node_ipv4_cidr = coalesce(var.network_node_ipv4_cidr, cidrsubnet(local.network_ipv4_cidr, 3, 2)) 11 | network_service_ipv4_cidr = coalesce(var.network_service_ipv4_cidr, cidrsubnet(local.network_ipv4_cidr, 3, 3)) 12 | network_pod_ipv4_cidr = coalesce(var.network_pod_ipv4_cidr, cidrsubnet(local.network_ipv4_cidr, 1, 1)) 13 | network_native_routing_ipv4_cidr = coalesce(var.network_native_routing_ipv4_cidr, local.network_ipv4_cidr) 14 | 15 | network_node_ipv4_cidr_skip_first_subnet = cidrhost(local.network_ipv4_cidr, 0) == cidrhost(local.network_node_ipv4_cidr, 0) 16 | network_ipv4_gateway = cidrhost(local.network_ipv4_cidr, 1) 17 | 18 | # Subnet mask sizes 19 | network_pod_ipv4_subnet_mask_size = 24 20 | network_node_ipv4_subnet_mask_size = coalesce( 21 | var.network_node_ipv4_subnet_mask_size, 22 | 32 - (local.network_pod_ipv4_subnet_mask_size - split("/", local.network_pod_ipv4_cidr)[1]) 23 | ) 24 | 25 | # Lists for control plane nodes 26 | control_plane_public_ipv4_list = compact(distinct([for server in hcloud_server.control_plane : server.ipv4_address])) 27 | control_plane_public_ipv6_list = compact(distinct([for server in hcloud_server.control_plane : server.ipv6_address])) 28 | control_plane_private_ipv4_list = compact(distinct([for server in hcloud_server.control_plane : tolist(server.network)[0].ip])) 29 | 30 | # Control plane VIPs 31 | control_plane_public_vip_ipv4 = local.control_plane_public_vip_ipv4_enabled ? data.hcloud_floating_ip.control_plane_ipv4[0].ip_address : null 32 | control_plane_private_vip_ipv4 = cidrhost(hcloud_network_subnet.control_plane.ip_range, -2) 33 | 34 | # Lists for worker nodes 35 | worker_public_ipv4_list = compact(distinct([for server in hcloud_server.worker : server.ipv4_address])) 36 | worker_public_ipv6_list = compact(distinct([for server in hcloud_server.worker : server.ipv6_address])) 37 | worker_private_ipv4_list = compact(distinct([for server in hcloud_server.worker : tolist(server.network)[0].ip])) 38 | 39 | # Lists for cluster autoscaler nodes 40 | cluster_autoscaler_public_ipv4_list = compact(distinct([for server in local.talos_discovery_cluster_autoscaler : server.public_ipv4_address])) 41 | cluster_autoscaler_public_ipv6_list = compact(distinct([for server in local.talos_discovery_cluster_autoscaler : server.public_ipv6_address])) 42 | cluster_autoscaler_private_ipv4_list = compact(distinct([for server in local.talos_discovery_cluster_autoscaler : server.private_ipv4_address])) 43 | } 44 | 45 | data "hcloud_location" "this" { 46 | name = local.control_plane_nodepools[0].location 47 | } 48 | 49 | data "hcloud_network" "this" { 50 | count = var.hcloud_network != null || var.hcloud_network_id != null ? 1 : 0 51 | 52 | id = var.hcloud_network != null ? var.hcloud_network.id : var.hcloud_network_id 53 | } 54 | 55 | resource "hcloud_network" "this" { 56 | count = length(data.hcloud_network.this) > 0 ? 0 : 1 57 | 58 | name = var.cluster_name 59 | ip_range = local.network_ipv4_cidr 60 | delete_protection = var.cluster_delete_protection 61 | 62 | labels = { 63 | cluster = var.cluster_name 64 | } 65 | } 66 | 67 | resource "hcloud_network_subnet" "control_plane" { 68 | network_id = local.hcloud_network_id 69 | type = "cloud" 70 | network_zone = local.hcloud_network_zone 71 | 72 | ip_range = cidrsubnet( 73 | local.network_node_ipv4_cidr, 74 | local.network_node_ipv4_subnet_mask_size - split("/", local.network_node_ipv4_cidr)[1], 75 | 0 + (local.network_node_ipv4_cidr_skip_first_subnet ? 1 : 0) 76 | ) 77 | } 78 | 79 | resource "hcloud_network_subnet" "load_balancer" { 80 | network_id = local.hcloud_network_id 81 | type = "cloud" 82 | network_zone = local.hcloud_network_zone 83 | 84 | ip_range = cidrsubnet( 85 | local.network_node_ipv4_cidr, 86 | local.network_node_ipv4_subnet_mask_size - split("/", local.network_node_ipv4_cidr)[1], 87 | 1 + (local.network_node_ipv4_cidr_skip_first_subnet ? 1 : 0) 88 | ) 89 | } 90 | 91 | resource "hcloud_network_subnet" "worker" { 92 | for_each = { for np in local.worker_nodepools : np.name => np } 93 | 94 | network_id = local.hcloud_network_id 95 | type = "cloud" 96 | network_zone = local.hcloud_network_zone 97 | 98 | ip_range = cidrsubnet( 99 | local.network_node_ipv4_cidr, 100 | local.network_node_ipv4_subnet_mask_size - split("/", local.network_node_ipv4_cidr)[1], 101 | 2 + (local.network_node_ipv4_cidr_skip_first_subnet ? 1 : 0) + index(local.worker_nodepools, each.value) 102 | ) 103 | } 104 | 105 | resource "hcloud_network_subnet" "autoscaler" { 106 | network_id = local.hcloud_network_id 107 | type = "cloud" 108 | network_zone = local.hcloud_network_zone 109 | 110 | ip_range = cidrsubnet( 111 | local.network_node_ipv4_cidr, 112 | local.network_node_ipv4_subnet_mask_size - split("/", local.network_node_ipv4_cidr)[1], 113 | pow(2, local.network_node_ipv4_subnet_mask_size - split("/", local.network_node_ipv4_cidr)[1]) - 1 114 | ) 115 | 116 | depends_on = [ 117 | hcloud_network_subnet.control_plane, 118 | hcloud_network_subnet.load_balancer, 119 | hcloud_network_subnet.worker 120 | ] 121 | } 122 | -------------------------------------------------------------------------------- /ingress_nginx.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | ingress_nginx_namespace = var.ingress_nginx_enabled ? { 3 | apiVersion = "v1" 4 | kind = "Namespace" 5 | metadata = { 6 | name = data.helm_template.ingress_nginx[0].namespace 7 | } 8 | } : null 9 | 10 | ingress_nginx_replicas = coalesce( 11 | var.ingress_nginx_replicas, 12 | local.worker_sum < 3 ? 2 : 3 13 | ) 14 | 15 | ingress_nginx_service_load_balancer_required = ( 16 | var.ingress_nginx_enabled && 17 | length(var.ingress_load_balancer_pools) == 0 18 | ) 19 | ingress_nginx_service_type = ( 20 | local.ingress_nginx_service_load_balancer_required ? 21 | "LoadBalancer" : 22 | "NodePort" 23 | ) 24 | ingress_nginx_service_node_port_http = 30000 25 | ingress_nginx_service_node_port_https = 30001 26 | } 27 | 28 | data "helm_template" "ingress_nginx" { 29 | count = var.ingress_nginx_enabled ? 1 : 0 30 | 31 | name = "ingress-nginx" 32 | namespace = "ingress-nginx" 33 | 34 | repository = var.ingress_nginx_helm_repository 35 | chart = var.ingress_nginx_helm_chart 36 | version = var.ingress_nginx_helm_version 37 | kube_version = var.kubernetes_version 38 | 39 | values = [ 40 | yamlencode({ 41 | controller = { 42 | admissionWebhooks = { 43 | certManager = { 44 | enabled = true 45 | } 46 | } 47 | kind = var.ingress_nginx_kind 48 | replicaCount = local.ingress_nginx_replicas 49 | minAvailable = null 50 | maxUnavailable = 1 51 | topologySpreadConstraints = var.ingress_nginx_kind == "Deployment" ? [ 52 | { 53 | topologyKey = "kubernetes.io/hostname" 54 | maxSkew = 1 55 | whenUnsatisfiable = local.worker_sum > 1 ? "DoNotSchedule" : "ScheduleAnyway" 56 | labelSelector = { 57 | matchLabels = { 58 | "app.kubernetes.io/instance" = "ingress-nginx" 59 | "app.kubernetes.io/name" = "ingress-nginx" 60 | "app.kubernetes.io/component" = "controller" 61 | } 62 | } 63 | matchLabelKeys = ["pod-template-hash"] 64 | }, 65 | { 66 | topologyKey = "topology.kubernetes.io/zone" 67 | maxSkew = 1 68 | whenUnsatisfiable = "ScheduleAnyway" 69 | labelSelector = { 70 | matchLabels = { 71 | "app.kubernetes.io/instance" = "ingress-nginx" 72 | "app.kubernetes.io/name" = "ingress-nginx" 73 | "app.kubernetes.io/component" = "controller" 74 | } 75 | } 76 | matchLabelKeys = ["pod-template-hash"] 77 | } 78 | ] : [] 79 | enableTopologyAwareRouting = var.ingress_nginx_topology_aware_routing 80 | watchIngressWithoutClass = true 81 | service = merge( 82 | { 83 | type = local.ingress_nginx_service_type 84 | externalTrafficPolicy = var.ingress_nginx_service_external_traffic_policy 85 | }, 86 | local.ingress_nginx_service_type == "NodePort" ? 87 | { 88 | nodePorts = { 89 | http = local.ingress_nginx_service_node_port_http 90 | https = local.ingress_nginx_service_node_port_https 91 | } 92 | } : {}, 93 | local.ingress_nginx_service_type == "LoadBalancer" ? 94 | { 95 | annotations = { 96 | "load-balancer.hetzner.cloud/algorithm-type" = var.ingress_load_balancer_algorithm 97 | "load-balancer.hetzner.cloud/disable-private-ingress" = true 98 | "load-balancer.hetzner.cloud/disable-public-network" = !var.ingress_load_balancer_public_network_enabled 99 | "load-balancer.hetzner.cloud/health-check-interval" = "${var.ingress_load_balancer_health_check_interval}s" 100 | "load-balancer.hetzner.cloud/health-check-retries" = var.ingress_load_balancer_health_check_retries 101 | "load-balancer.hetzner.cloud/health-check-timeout" = "${var.ingress_load_balancer_health_check_timeout}s" 102 | "load-balancer.hetzner.cloud/hostname" = local.ingress_service_load_balancer_hostname 103 | "load-balancer.hetzner.cloud/ipv6-disabled" = false 104 | "load-balancer.hetzner.cloud/location" = local.ingress_service_load_balancer_location 105 | "load-balancer.hetzner.cloud/name" = local.ingress_service_load_balancer_name 106 | "load-balancer.hetzner.cloud/type" = var.ingress_load_balancer_type 107 | "load-balancer.hetzner.cloud/use-private-ip" = true 108 | "load-balancer.hetzner.cloud/uses-proxyprotocol" = true 109 | } 110 | } : {} 111 | ) 112 | config = merge( 113 | { 114 | proxy-real-ip-cidr = ( 115 | var.ingress_nginx_service_external_traffic_policy == "Local" ? 116 | hcloud_network_subnet.load_balancer.ip_range : 117 | local.network_node_ipv4_cidr 118 | ) 119 | compute-full-forwarded-for = true 120 | use-proxy-protocol = true 121 | }, 122 | var.ingress_nginx_config 123 | ) 124 | networkPolicy = { 125 | enabled = true 126 | } 127 | } 128 | }), 129 | yamlencode(var.ingress_nginx_helm_values) 130 | ] 131 | 132 | depends_on = [hcloud_load_balancer_network.ingress] 133 | } 134 | 135 | locals { 136 | ingress_nginx_manifest = var.ingress_nginx_enabled ? { 137 | name = "ingress-nginx" 138 | contents = <<-EOF 139 | ${yamlencode(local.ingress_nginx_namespace)} 140 | --- 141 | ${data.helm_template.ingress_nginx[0].manifest} 142 | EOF 143 | } : null 144 | } 145 | -------------------------------------------------------------------------------- /image.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | talos_schematic_id = coalesce(var.talos_schematic_id, talos_image_factory_schematic.this[0].id) 3 | 4 | talos_installer_image_url = data.talos_image_factory_urls.amd64.urls.installer 5 | talos_amd64_image_url = data.talos_image_factory_urls.amd64.urls.disk_image 6 | talos_arm64_image_url = data.talos_image_factory_urls.arm64.urls.disk_image 7 | 8 | amd64_image_required = anytrue([ 9 | for np in concat( 10 | local.control_plane_nodepools, 11 | local.worker_nodepools, 12 | local.cluster_autoscaler_nodepools 13 | ) : substr(np.server_type, 0, 3) != "cax" 14 | ]) 15 | arm64_image_required = anytrue([ 16 | for np in concat( 17 | local.control_plane_nodepools, 18 | local.worker_nodepools, 19 | local.cluster_autoscaler_nodepools 20 | ) : substr(np.server_type, 0, 3) == "cax" 21 | ]) 22 | 23 | image_label_selector = join(",", 24 | [ 25 | "os=talos", 26 | "cluster=${var.cluster_name}", 27 | "talos_version=${var.talos_version}", 28 | "talos_schematic_id=${substr(local.talos_schematic_id, 0, 32)}" 29 | ] 30 | ) 31 | 32 | talos_image_extentions_longhorn = [ 33 | "siderolabs/iscsi-tools", 34 | "siderolabs/util-linux-tools" 35 | ] 36 | 37 | talos_image_extensions = distinct( 38 | concat( 39 | ["siderolabs/qemu-guest-agent"], 40 | var.talos_image_extensions, 41 | var.longhorn_enabled ? local.talos_image_extentions_longhorn : [] 42 | ) 43 | ) 44 | } 45 | 46 | data "talos_image_factory_extensions_versions" "this" { 47 | count = var.talos_schematic_id == null ? 1 : 0 48 | 49 | talos_version = var.talos_version 50 | filters = { 51 | names = local.talos_image_extensions 52 | } 53 | } 54 | 55 | resource "talos_image_factory_schematic" "this" { 56 | count = var.talos_schematic_id == null ? 1 : 0 57 | 58 | schematic = yamlencode( 59 | { 60 | customization = { 61 | extraKernelArgs = var.talos_extra_kernel_args 62 | systemExtensions = { 63 | officialExtensions = ( 64 | length(local.talos_image_extensions) > 0 ? 65 | data.talos_image_factory_extensions_versions.this[0].extensions_info.*.name : 66 | [] 67 | ) 68 | } 69 | } 70 | } 71 | ) 72 | } 73 | 74 | data "talos_image_factory_urls" "amd64" { 75 | talos_version = var.talos_version 76 | schematic_id = local.talos_schematic_id 77 | platform = "hcloud" 78 | architecture = "amd64" 79 | } 80 | 81 | data "talos_image_factory_urls" "arm64" { 82 | talos_version = var.talos_version 83 | schematic_id = local.talos_schematic_id 84 | platform = "hcloud" 85 | architecture = "arm64" 86 | } 87 | 88 | data "hcloud_images" "amd64" { 89 | count = local.amd64_image_required ? 1 : 0 90 | 91 | with_selector = local.image_label_selector 92 | with_architecture = ["x86"] 93 | most_recent = true 94 | } 95 | 96 | data "hcloud_images" "arm64" { 97 | count = local.arm64_image_required ? 1 : 0 98 | 99 | with_selector = local.image_label_selector 100 | with_architecture = ["arm"] 101 | most_recent = true 102 | } 103 | 104 | resource "terraform_data" "packer_init" { 105 | triggers_replace = [ 106 | "${sha1(file("${path.module}/packer/requirements.pkr.hcl"))}", 107 | var.cluster_name, 108 | var.talos_version, 109 | local.talos_schematic_id, 110 | local.amd64_image_required, 111 | local.arm64_image_required 112 | ] 113 | 114 | provisioner "local-exec" { 115 | when = create 116 | quiet = true 117 | working_dir = "${path.module}/packer/" 118 | command = "packer init -upgrade requirements.pkr.hcl" 119 | } 120 | 121 | depends_on = [data.external.client_prerequisites_check] 122 | } 123 | 124 | resource "terraform_data" "amd64_image" { 125 | count = local.amd64_image_required ? 1 : 0 126 | 127 | triggers_replace = [ 128 | var.cluster_name, 129 | var.talos_version, 130 | local.talos_schematic_id 131 | ] 132 | 133 | provisioner "local-exec" { 134 | when = create 135 | quiet = true 136 | working_dir = "${path.module}/packer/" 137 | command = join(" ", 138 | [ 139 | "${length(data.hcloud_images.amd64[0].images) > 0} ||", 140 | "packer build -force", 141 | "-var 'cluster_name=${var.cluster_name}'", 142 | "-var 'server_type=${var.packer_amd64_builder.server_type}'", 143 | "-var 'server_location=${var.packer_amd64_builder.server_location}'", 144 | "-var 'talos_version=${var.talos_version}'", 145 | "-var 'talos_schematic_id=${local.talos_schematic_id}'", 146 | "-var 'talos_image_url=${local.talos_amd64_image_url}'", 147 | "image_amd64.pkr.hcl" 148 | ] 149 | ) 150 | environment = { 151 | HCLOUD_TOKEN = nonsensitive(var.hcloud_token) 152 | } 153 | } 154 | 155 | depends_on = [ 156 | data.external.client_prerequisites_check, 157 | terraform_data.packer_init 158 | ] 159 | } 160 | 161 | resource "terraform_data" "arm64_image" { 162 | count = local.arm64_image_required ? 1 : 0 163 | 164 | triggers_replace = [ 165 | var.cluster_name, 166 | var.talos_version, 167 | local.talos_schematic_id 168 | ] 169 | 170 | provisioner "local-exec" { 171 | when = create 172 | quiet = true 173 | working_dir = "${path.module}/packer/" 174 | command = join(" ", 175 | [ 176 | "${length(data.hcloud_images.arm64[0].images) > 0} ||", 177 | "packer build -force", 178 | "-var 'cluster_name=${var.cluster_name}'", 179 | "-var 'server_type=${var.packer_arm64_builder.server_type}'", 180 | "-var 'server_location=${var.packer_arm64_builder.server_location}'", 181 | "-var 'talos_version=${var.talos_version}'", 182 | "-var 'talos_schematic_id=${local.talos_schematic_id}'", 183 | "-var 'talos_image_url=${local.talos_arm64_image_url}'", 184 | "image_arm64.pkr.hcl" 185 | ] 186 | ) 187 | environment = { 188 | HCLOUD_TOKEN = nonsensitive(var.hcloud_token) 189 | } 190 | } 191 | 192 | depends_on = [ 193 | data.external.client_prerequisites_check, 194 | terraform_data.packer_init 195 | ] 196 | } 197 | 198 | data "hcloud_image" "amd64" { 199 | count = local.amd64_image_required ? 1 : 0 200 | 201 | with_selector = local.image_label_selector 202 | with_architecture = "x86" 203 | most_recent = true 204 | 205 | depends_on = [terraform_data.amd64_image] 206 | } 207 | 208 | data "hcloud_image" "arm64" { 209 | count = local.arm64_image_required ? 1 : 0 210 | 211 | with_selector = local.image_label_selector 212 | with_architecture = "arm" 213 | most_recent = true 214 | 215 | depends_on = [terraform_data.arm64_image] 216 | } 217 | -------------------------------------------------------------------------------- /client.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | kubeconfig = replace( 3 | talos_cluster_kubeconfig.this.kubeconfig_raw, 4 | "/(\\s+server:).*/", 5 | "$1 ${local.kube_api_url_external}" 6 | ) 7 | talosconfig = data.talos_client_configuration.this.talos_config 8 | 9 | kubeconfig_data = { 10 | name = var.cluster_name 11 | server = local.kube_api_url_external 12 | ca = base64decode(talos_cluster_kubeconfig.this.kubernetes_client_configuration.ca_certificate) 13 | cert = base64decode(talos_cluster_kubeconfig.this.kubernetes_client_configuration.client_certificate) 14 | key = base64decode(talos_cluster_kubeconfig.this.kubernetes_client_configuration.client_key) 15 | } 16 | 17 | talosconfig_data = { 18 | name = data.talos_client_configuration.this.cluster_name 19 | endpoints = data.talos_client_configuration.this.endpoints 20 | ca = base64decode(data.talos_client_configuration.this.client_configuration.ca_certificate) 21 | cert = base64decode(data.talos_client_configuration.this.client_configuration.client_certificate) 22 | key = base64decode(data.talos_client_configuration.this.client_configuration.client_key) 23 | } 24 | } 25 | 26 | data "talos_client_configuration" "this" { 27 | cluster_name = var.cluster_name 28 | client_configuration = talos_machine_secrets.this.client_configuration 29 | endpoints = local.talos_endpoints 30 | nodes = [local.talos_primary_node_private_ipv4] 31 | } 32 | 33 | resource "talos_cluster_kubeconfig" "this" { 34 | client_configuration = talos_machine_secrets.this.client_configuration 35 | node = local.talos_primary_endpoint 36 | 37 | depends_on = [talos_machine_configuration_apply.control_plane] 38 | } 39 | 40 | resource "terraform_data" "create_talosconfig" { 41 | count = var.cluster_talosconfig_path != null ? 1 : 0 42 | 43 | triggers_replace = [ 44 | nonsensitive(sha1(local.talosconfig)), 45 | var.cluster_talosconfig_path 46 | ] 47 | 48 | input = { 49 | cluster_talosconfig_path = var.cluster_talosconfig_path 50 | } 51 | 52 | provisioner "local-exec" { 53 | when = create 54 | quiet = true 55 | command = <<-EOT 56 | set -eu 57 | 58 | printf '%s' "$TALOSCONFIG_CONTENT" > "$CLUSTER_TALOSCONFIG_PATH" 59 | EOT 60 | environment = { 61 | TALOSCONFIG_CONTENT = local.talosconfig 62 | CLUSTER_TALOSCONFIG_PATH = var.cluster_talosconfig_path 63 | } 64 | } 65 | 66 | provisioner "local-exec" { 67 | when = destroy 68 | quiet = true 69 | on_failure = continue 70 | command = <<-EOT 71 | set -eu 72 | 73 | if [ -f "$CLUSTER_TALOSCONFIG_PATH" ]; then 74 | cp -f "$CLUSTER_TALOSCONFIG_PATH" "$CLUSTER_TALOSCONFIG_PATH.bak" 75 | fi 76 | EOT 77 | environment = { 78 | CLUSTER_TALOSCONFIG_PATH = self.input.cluster_talosconfig_path 79 | } 80 | } 81 | 82 | depends_on = [talos_machine_configuration_apply.control_plane] 83 | } 84 | 85 | resource "terraform_data" "create_kubeconfig" { 86 | count = var.cluster_kubeconfig_path != null ? 1 : 0 87 | 88 | triggers_replace = [ 89 | nonsensitive(sha1(local.kubeconfig)), 90 | var.cluster_kubeconfig_path 91 | ] 92 | 93 | input = { 94 | cluster_kubeconfig_path = var.cluster_kubeconfig_path 95 | } 96 | 97 | provisioner "local-exec" { 98 | when = create 99 | quiet = true 100 | command = <<-EOT 101 | set -eu 102 | 103 | printf '%s' "$KUBECONFIG_CONTENT" > "$CLUSTER_KUBECONFIG_PATH" 104 | EOT 105 | environment = { 106 | KUBECONFIG_CONTENT = local.kubeconfig 107 | CLUSTER_KUBECONFIG_PATH = var.cluster_kubeconfig_path 108 | } 109 | } 110 | 111 | provisioner "local-exec" { 112 | when = destroy 113 | quiet = true 114 | on_failure = continue 115 | command = <<-EOT 116 | set -eu 117 | 118 | if [ -f "$CLUSTER_KUBECONFIG_PATH" ]; then 119 | cp -f "$CLUSTER_KUBECONFIG_PATH" "$CLUSTER_KUBECONFIG_PATH.bak" 120 | fi 121 | EOT 122 | environment = { 123 | CLUSTER_KUBECONFIG_PATH = self.input.cluster_kubeconfig_path 124 | } 125 | } 126 | 127 | depends_on = [talos_machine_configuration_apply.control_plane] 128 | } 129 | 130 | data "external" "client_prerequisites_check" { 131 | count = var.client_prerequisites_check_enabled ? 1 : 0 132 | 133 | program = [ 134 | "sh", "-c", <<-EOT 135 | set -eu 136 | 137 | missing=0 138 | 139 | if ! command -v packer >/dev/null 2>&1; then 140 | printf '\n%s' ' - packer is not installed or not in PATH. Install it at https://developer.hashicorp.com/packer/install' >&2 141 | missing=1 142 | fi 143 | 144 | if ! command -v jq >/dev/null 2>&1; then 145 | printf '\n%s' ' - jq is not installed or not in PATH. Install it at https://jqlang.org/download/' >&2 146 | missing=1 147 | fi 148 | 149 | if ! command -v talosctl >/dev/null 2>&1; then 150 | printf '\n%s' ' - talosctl is not installed or not in PATH. Install it at https://www.talos.dev/latest/talos-guides/install/talosctl' >&2 151 | missing=1 152 | fi 153 | 154 | printf '%s' '{}' 155 | exit "$missing" 156 | EOT 157 | ] 158 | } 159 | 160 | data "external" "talosctl_version_check" { 161 | count = var.talosctl_version_check_enabled ? 1 : 0 162 | 163 | program = [ 164 | "sh", "-c", <<-EOT 165 | set -eu 166 | 167 | parse() { 168 | case $1 in 169 | *[vV][0-9]*.[0-9]*.[0-9]*) 170 | v=$${1##*[vV]} 171 | maj=$${v%%.*} 172 | r=$${v#*.} 173 | min=$${r%%.*} 174 | patch=$${r#*.} 175 | patch=$${patch%%[!0-9]*} 176 | printf '%s %s %s\n' "$maj" "$min" "$patch" 177 | return 0 178 | ;; 179 | esac 180 | return 1 181 | } 182 | 183 | parsed_version=$( 184 | talosctl version --client --short | 185 | while IFS= read -r line; do 186 | if out=$(parse "$line"); then 187 | printf '%s\n' "$out" 188 | break 189 | fi 190 | done 191 | ) 192 | 193 | if [ -z "$parsed_version" ]; then 194 | printf '%s\n' "Could not parse talosctl client version" >&2 195 | exit 1 196 | fi 197 | 198 | set -- $parsed_version; major=$1; minor=$2; patch=$3 199 | if [ "$major" -lt "${local.talos_version_major}" ] || 200 | { [ "$major" -eq "${local.talos_version_major}" ] && [ "$minor" -lt "${local.talos_version_minor}" ]; } || 201 | { [ "$major" -eq "${local.talos_version_major}" ] && [ "$minor" -eq "${local.talos_version_minor}" ] && [ "$patch" -lt "${local.talos_version_patch}" ]; } 202 | then 203 | printf '%s\n' "talosctl version ($major.$minor.$patch) is lower than Talos target version: ${local.talos_version_major}.${local.talos_version_minor}.${local.talos_version_patch}" >&2 204 | exit 1 205 | fi 206 | 207 | printf '%s' "{\"talosctl_version\": \"$major.$minor.$patch\"}" 208 | EOT 209 | ] 210 | 211 | depends_on = [data.external.client_prerequisites_check] 212 | } 213 | -------------------------------------------------------------------------------- /server.tf: -------------------------------------------------------------------------------- 1 | resource "hcloud_server" "control_plane" { 2 | for_each = merge([ 3 | for np_index in range(length(local.control_plane_nodepools)) : { 4 | for cp_index in range(local.control_plane_nodepools[np_index].count) : "${var.cluster_name}-${local.control_plane_nodepools[np_index].name}-${cp_index + 1}" => { 5 | server_type = local.control_plane_nodepools[np_index].server_type, 6 | location = local.control_plane_nodepools[np_index].location, 7 | backups = local.control_plane_nodepools[np_index].backups, 8 | keep_disk = local.control_plane_nodepools[np_index].keep_disk, 9 | labels = local.control_plane_nodepools[np_index].labels, 10 | placement_group_id = hcloud_placement_group.control_plane.id, 11 | subnet = hcloud_network_subnet.control_plane, 12 | ipv4_private = cidrhost( 13 | hcloud_network_subnet.control_plane.ip_range, 14 | np_index * 10 + cp_index + 1 15 | ) 16 | } 17 | } 18 | ]...) 19 | 20 | name = each.key 21 | image = substr(each.value.server_type, 0, 3) == "cax" ? data.hcloud_image.arm64[0].id : data.hcloud_image.amd64[0].id 22 | server_type = each.value.server_type 23 | location = each.value.location 24 | placement_group_id = each.value.placement_group_id 25 | backups = each.value.backups 26 | keep_disk = each.value.keep_disk 27 | ssh_keys = [hcloud_ssh_key.this.id] 28 | shutdown_before_deletion = true 29 | delete_protection = var.cluster_delete_protection 30 | rebuild_protection = var.cluster_delete_protection 31 | 32 | labels = merge( 33 | each.value.labels, 34 | { 35 | cluster = var.cluster_name, 36 | role = "control-plane" 37 | } 38 | ) 39 | 40 | firewall_ids = [local.firewall_id] 41 | 42 | public_net { 43 | ipv4_enabled = var.talos_public_ipv4_enabled 44 | ipv6_enabled = var.talos_public_ipv6_enabled 45 | } 46 | 47 | network { 48 | network_id = each.value.subnet.network_id 49 | ip = each.value.ipv4_private 50 | alias_ips = [] 51 | } 52 | 53 | depends_on = [ 54 | hcloud_network_subnet.control_plane, 55 | hcloud_placement_group.control_plane 56 | ] 57 | 58 | lifecycle { 59 | ignore_changes = [ 60 | image, 61 | user_data, 62 | network, 63 | ssh_keys 64 | ] 65 | } 66 | } 67 | 68 | resource "hcloud_server" "worker" { 69 | for_each = merge([ 70 | for np_index in range(length(local.worker_nodepools)) : { 71 | for wkr_index in range(local.worker_nodepools[np_index].count) : "${var.cluster_name}-${local.worker_nodepools[np_index].name}-${wkr_index + 1}" => { 72 | server_type = local.worker_nodepools[np_index].server_type, 73 | location = local.worker_nodepools[np_index].location, 74 | backups = local.worker_nodepools[np_index].backups, 75 | keep_disk = local.worker_nodepools[np_index].keep_disk, 76 | labels = local.worker_nodepools[np_index].labels, 77 | placement_group_id = local.worker_nodepools[np_index].placement_group ? hcloud_placement_group.worker["${var.cluster_name}-${local.worker_nodepools[np_index].name}-pg-${ceil((wkr_index + 1) / 10.0)}"].id : null, 78 | subnet = hcloud_network_subnet.worker[local.worker_nodepools[np_index].name], 79 | ipv4_private = cidrhost(hcloud_network_subnet.worker[local.worker_nodepools[np_index].name].ip_range, wkr_index + 1) 80 | } 81 | } 82 | ]...) 83 | 84 | name = each.key 85 | image = substr(each.value.server_type, 0, 3) == "cax" ? data.hcloud_image.arm64[0].id : data.hcloud_image.amd64[0].id 86 | server_type = each.value.server_type 87 | location = each.value.location 88 | placement_group_id = each.value.placement_group_id 89 | backups = each.value.backups 90 | keep_disk = each.value.keep_disk 91 | ssh_keys = [hcloud_ssh_key.this.id] 92 | shutdown_before_deletion = true 93 | delete_protection = var.cluster_delete_protection 94 | rebuild_protection = var.cluster_delete_protection 95 | 96 | labels = merge( 97 | each.value.labels, 98 | { 99 | cluster = var.cluster_name, 100 | role = "worker" 101 | } 102 | ) 103 | 104 | firewall_ids = [local.firewall_id] 105 | 106 | public_net { 107 | ipv4_enabled = var.talos_public_ipv4_enabled 108 | ipv6_enabled = var.talos_public_ipv6_enabled 109 | } 110 | 111 | network { 112 | network_id = each.value.subnet.network_id 113 | ip = each.value.ipv4_private 114 | alias_ips = [] 115 | } 116 | 117 | depends_on = [ 118 | hcloud_network_subnet.worker, 119 | hcloud_placement_group.worker 120 | ] 121 | 122 | lifecycle { 123 | ignore_changes = [ 124 | image, 125 | user_data, 126 | ssh_keys 127 | ] 128 | } 129 | } 130 | 131 | locals { 132 | # IPv4 private (RFC1918) 133 | ipv4_private_pattern = "^(10\\.|192\\.168\\.|172\\.(1[6-9]|2\\d|3[0-1])\\.)" 134 | 135 | # IPv4 special or non-public 136 | # 0/8, 127/8, 169.254/16, 100.64/10, 192.0.0/24, 192.0.2/24, 192.88.99/24, 137 | # 198.18/15, 198.51.100/24, 203.0.113/24, 224/4 multicast, 240/4 reserved 138 | ipv4_special_pattern = "^(0\\.|127\\.|169\\.254\\.|100\\.(6[4-9]|[7-9]\\d|1[01]\\d|12[0-7])\\.|192\\.0\\.0\\.|192\\.0\\.2\\.|192\\.88\\.99\\.|198\\.(1[8-9])\\.|198\\.51\\.100\\.|203\\.0\\.113\\.|22[4-9]\\.|23\\d\\.|24\\d\\.|25[0-5]\\.)" 139 | 140 | # IPv6 private (ULA only: fc00::/7) 141 | ipv6_private_pattern = "^f[cd][0-9a-f]{2}:" 142 | 143 | # IPv6 non-public or special 144 | # ::, ::1, link-local fe80::/10 (fe80..febf), unique local fc00::/7, multicast ff00::/8, 145 | # documentation 2001:db8::/32, IPv4-mapped ::ffff:0:0/96 146 | ipv6_non_public_pattern = "^(::$|::1$|fe[89ab][0-9a-f]:|f[cd][0-9a-f]*:|ff[0-9a-f]*:|2001:db8:|::ffff:)" 147 | 148 | talos_discovery_cluster_autoscaler = var.cluster_autoscaler_discovery_enabled ? { 149 | for m in jsondecode(data.external.talos_member[0].result.cluster_autoscaler) : m.spec.hostname => { 150 | nodepool = regex(local.cluster_autoscaler_hostname_pattern, m.spec.hostname)[0] 151 | 152 | private_ipv4_address = try( 153 | [ 154 | for a in m.spec.addresses : a 155 | if can(cidrnetmask("${a}/32")) 156 | && can(regex(local.ipv4_private_pattern, a)) 157 | ][0], null 158 | ) 159 | public_ipv4_address = try( 160 | [ 161 | for a in m.spec.addresses : a 162 | if can(cidrnetmask("${a}/32")) 163 | && !can(regex(local.ipv4_private_pattern, a)) 164 | && !can(regex(local.ipv4_special_pattern, a)) 165 | ][0], null 166 | ) 167 | private_ipv6_address = try( 168 | [ 169 | for a in m.spec.addresses : lower(a) 170 | if can(cidrsubnet("${a}/128", 0, 0)) 171 | && can(regex(local.ipv6_private_pattern, lower(a))) 172 | ][0], null 173 | ) 174 | public_ipv6_address = try( 175 | [ 176 | for a in m.spec.addresses : lower(a) 177 | if can(cidrsubnet("${a}/128", 0, 0)) 178 | && !can(regex(local.ipv6_non_public_pattern, lower(a))) 179 | ][0], null 180 | ) 181 | } 182 | } : {} 183 | } 184 | 185 | data "external" "talos_member" { 186 | count = var.cluster_autoscaler_discovery_enabled ? 1 : 0 187 | 188 | program = [ 189 | "sh", "-c", <<-EOT 190 | set -eu 191 | 192 | talosconfig=$(mktemp) 193 | trap 'rm -f "$talosconfig"' EXIT HUP INT TERM QUIT PIPE 194 | jq -r '.talosconfig' > "$talosconfig" 195 | 196 | if ${local.cluster_initialized}; then 197 | if talos_member_json=$(talosctl --talosconfig "$talosconfig" get member -n '${terraform_data.talos_access_data.output.talos_primary_node}' -o json); then 198 | printf '%s' "$talos_member_json" | jq -c -s '{ 199 | control_plane: ( 200 | map(select(.spec.machineType == "controlplane")) | tostring 201 | ), 202 | worker: ( 203 | map(select( 204 | .spec.machineType == "worker" 205 | and (.spec.hostname | test("${local.cluster_autoscaler_hostname_pattern}") | not) 206 | )) | tostring 207 | ), 208 | cluster_autoscaler: ( 209 | map(select( 210 | .spec.machineType == "worker" 211 | and (.spec.hostname | test("${local.cluster_autoscaler_hostname_pattern}")) 212 | )) | tostring 213 | ) 214 | }' 215 | else 216 | printf '%s\n' "talosctl failed" >&2 217 | exit 1 218 | fi 219 | else 220 | printf '%s\n' '{"control_plane":"[]","cluster_autoscaler":"[]","worker":"[]"}' 221 | fi 222 | EOT 223 | ] 224 | 225 | query = { 226 | talosconfig = data.talos_client_configuration.this.talos_config 227 | } 228 | 229 | depends_on = [ 230 | data.external.client_prerequisites_check, 231 | data.external.talosctl_version_check, 232 | data.talos_machine_configuration.control_plane, 233 | data.talos_machine_configuration.worker, 234 | data.talos_machine_configuration.cluster_autoscaler 235 | ] 236 | } 237 | -------------------------------------------------------------------------------- /rdns.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | rdns_cluster_domain_pattern = "/{{\\s*cluster-domain\\s*}}/" 3 | rdns_cluster_name_pattern = "/{{\\s*cluster-name\\s*}}/" 4 | rdns_hostname_pattern = "/{{\\s*hostname\\s*}}/" 5 | rdns_id_pattern = "/{{\\s*id\\s*}}/" 6 | rdns_ip_labels_pattern = "/{{\\s*ip-labels\\s*}}/" 7 | rdns_ip_type_pattern = "/{{\\s*ip-type\\s*}}/" 8 | rdns_pool_pattern = "/{{\\s*pool\\s*}}/" 9 | rdns_role_pattern = "/{{\\s*role\\s*}}/" 10 | 11 | cluster_rdns_ipv4 = var.cluster_rdns_ipv4 != null ? var.cluster_rdns_ipv4 : var.cluster_rdns 12 | cluster_rdns_ipv6 = var.cluster_rdns_ipv6 != null ? var.cluster_rdns_ipv6 : var.cluster_rdns 13 | 14 | ingress_load_balancer_rdns_ipv4 = ( 15 | var.ingress_load_balancer_rdns_ipv4 != null ? var.ingress_load_balancer_rdns_ipv4 : 16 | var.ingress_load_balancer_rdns != null ? var.ingress_load_balancer_rdns : 17 | local.cluster_rdns_ipv4 18 | ) 19 | ingress_load_balancer_rdns_ipv6 = ( 20 | var.ingress_load_balancer_rdns_ipv6 != null ? var.ingress_load_balancer_rdns_ipv6 : 21 | var.ingress_load_balancer_rdns != null ? var.ingress_load_balancer_rdns : 22 | local.cluster_rdns_ipv6 23 | ) 24 | } 25 | 26 | resource "hcloud_rdns" "control_plane" { 27 | for_each = { 28 | for entry in flatten([ 29 | for server in hcloud_server.control_plane : [ 30 | for ip_type in concat( 31 | local.control_plane_nodepools_map[server.labels.nodepool].rdns_ipv4 != null ? ["ipv4"] : [], 32 | local.control_plane_nodepools_map[server.labels.nodepool].rdns_ipv6 != null ? ["ipv6"] : [], 33 | ) : { 34 | key = "${server.name}-${ip_type}" 35 | value = { 36 | ip_address = ip_type == "ipv4" ? server.ipv4_address : server.ipv6_address 37 | rdns = ( 38 | ip_type == "ipv4" ? 39 | local.control_plane_nodepools_map[server.labels.nodepool].rdns_ipv4 : 40 | local.control_plane_nodepools_map[server.labels.nodepool].rdns_ipv6 41 | ) 42 | hostname = server.name 43 | id = server.id 44 | ip_labels = ( 45 | ip_type == "ipv4" ? 46 | join(".", reverse(split(".", server.ipv4_address))) : 47 | join(".", reverse(flatten([ 48 | for part in split(":", replace( 49 | server.ipv6_address, "::", ":${join(":", 50 | slice( 51 | [0, 0, 0, 0, 0, 0, 0, 0], 52 | 0, 8 - length(compact(split(":", server.ipv6_address))) 53 | ) 54 | )}:" 55 | )) : [for char in split("", format("%04s", part)) : char] 56 | ]))) 57 | ) 58 | ip_type = ip_type 59 | pool = server.labels.nodepool 60 | role = server.labels.role 61 | } 62 | } 63 | ] 64 | ]) : entry.key => entry.value 65 | } 66 | 67 | server_id = each.value.id 68 | ip_address = each.value.ip_address 69 | dns_ptr = ( 70 | replace(replace(replace(replace(replace(replace(replace(replace((each.value.rdns 71 | ), local.rdns_cluster_domain_pattern, var.cluster_domain 72 | ), local.rdns_cluster_name_pattern, var.cluster_name 73 | ), local.rdns_hostname_pattern, each.value.hostname 74 | ), local.rdns_id_pattern, each.value.id 75 | ), local.rdns_ip_labels_pattern, each.value.ip_labels 76 | ), local.rdns_ip_type_pattern, each.value.ip_type 77 | ), local.rdns_pool_pattern, each.value.pool 78 | ), local.rdns_role_pattern, each.value.role 79 | ) 80 | ) 81 | } 82 | 83 | resource "hcloud_rdns" "worker" { 84 | for_each = { 85 | for entry in flatten([ 86 | for server in hcloud_server.worker : [ 87 | for ip_type in concat( 88 | local.worker_nodepools_map[server.labels.nodepool].rdns_ipv4 != null ? ["ipv4"] : [], 89 | local.worker_nodepools_map[server.labels.nodepool].rdns_ipv6 != null ? ["ipv6"] : [], 90 | ) : { 91 | key = "${server.name}-${ip_type}" 92 | value = { 93 | ip_address = ip_type == "ipv4" ? server.ipv4_address : server.ipv6_address 94 | rdns = ( 95 | ip_type == "ipv4" ? 96 | local.worker_nodepools_map[server.labels.nodepool].rdns_ipv4 : 97 | local.worker_nodepools_map[server.labels.nodepool].rdns_ipv6 98 | ) 99 | hostname = server.name 100 | id = server.id 101 | ip_labels = ( 102 | ip_type == "ipv4" ? 103 | join(".", reverse(split(".", server.ipv4_address))) : 104 | join(".", reverse(flatten([ 105 | for part in split(":", replace( 106 | server.ipv6_address, "::", ":${join(":", 107 | slice( 108 | [0, 0, 0, 0, 0, 0, 0, 0], 109 | 0, 8 - length(compact(split(":", server.ipv6_address))) 110 | ) 111 | )}:" 112 | )) : [for char in split("", format("%04s", part)) : char] 113 | ]))) 114 | ) 115 | ip_type = ip_type 116 | pool = server.labels.nodepool 117 | role = server.labels.role 118 | } 119 | } 120 | ] 121 | ]) : entry.key => entry.value 122 | } 123 | 124 | server_id = each.value.id 125 | ip_address = each.value.ip_address 126 | dns_ptr = ( 127 | replace(replace(replace(replace(replace(replace(replace(replace((each.value.rdns 128 | ), local.rdns_cluster_domain_pattern, var.cluster_domain 129 | ), local.rdns_cluster_name_pattern, var.cluster_name 130 | ), local.rdns_hostname_pattern, each.value.hostname 131 | ), local.rdns_id_pattern, each.value.id 132 | ), local.rdns_ip_labels_pattern, each.value.ip_labels 133 | ), local.rdns_ip_type_pattern, each.value.ip_type 134 | ), local.rdns_pool_pattern, each.value.pool 135 | ), local.rdns_role_pattern, each.value.role 136 | ) 137 | ) 138 | } 139 | 140 | resource "hcloud_rdns" "ingress" { 141 | for_each = { 142 | for entry in flatten([ 143 | for lb in hcloud_load_balancer.ingress : [ 144 | for ip_type in concat( 145 | local.ingress_load_balancer_rdns_ipv4 != null ? ["ipv4"] : [], 146 | local.ingress_load_balancer_rdns_ipv6 != null ? ["ipv6"] : [], 147 | ) : { 148 | key = "${lb.name}-${ip_type}" 149 | value = { 150 | ip_address = ip_type == "ipv4" ? lb.ipv4 : lb.ipv6 151 | rdns = ( 152 | ip_type == "ipv4" ? 153 | local.ingress_load_balancer_rdns_ipv4 : 154 | local.ingress_load_balancer_rdns_ipv6 155 | ) 156 | hostname = lb.name 157 | id = lb.id 158 | ip_labels = ( 159 | ip_type == "ipv4" ? 160 | join(".", reverse(split(".", lb.ipv4))) : 161 | join(".", reverse(flatten([ 162 | for part in split(":", replace( 163 | lb.ipv6, "::", ":${join(":", 164 | slice( 165 | [0, 0, 0, 0, 0, 0, 0, 0], 166 | 0, 8 - length(compact(split(":", lb.ipv6))) 167 | ) 168 | )}:" 169 | )) : [for char in split("", format("%04s", part)) : char] 170 | ]))) 171 | ) 172 | ip_type = ip_type 173 | pool = "ingress" 174 | role = lb.labels.role 175 | } 176 | } 177 | ] 178 | ]) : entry.key => entry.value 179 | } 180 | 181 | load_balancer_id = each.value.id 182 | ip_address = each.value.ip_address 183 | dns_ptr = ( 184 | replace(replace(replace(replace(replace(replace(replace(replace((each.value.rdns 185 | ), local.rdns_cluster_domain_pattern, var.cluster_domain 186 | ), local.rdns_cluster_name_pattern, var.cluster_name 187 | ), local.rdns_hostname_pattern, each.value.hostname 188 | ), local.rdns_id_pattern, each.value.id 189 | ), local.rdns_ip_labels_pattern, each.value.ip_labels 190 | ), local.rdns_ip_type_pattern, each.value.ip_type 191 | ), local.rdns_pool_pattern, each.value.pool 192 | ), local.rdns_role_pattern, each.value.role 193 | ) 194 | ) 195 | } 196 | 197 | resource "hcloud_rdns" "ingress_pool" { 198 | for_each = { 199 | for entry in flatten([ 200 | for lb in hcloud_load_balancer.ingress_pool : [ 201 | for ip_type in concat( 202 | local.ingress_load_balancer_pools_map[lb.labels.pool].rdns_ipv4 != null ? ["ipv4"] : [], 203 | local.ingress_load_balancer_pools_map[lb.labels.pool].rdns_ipv6 != null ? ["ipv6"] : [], 204 | ) : { 205 | key = "${lb.name}-${ip_type}" 206 | value = { 207 | ip_address = ip_type == "ipv4" ? lb.ipv4 : lb.ipv6 208 | rdns = ( 209 | ip_type == "ipv4" ? 210 | local.ingress_load_balancer_pools_map[lb.labels.pool].rdns_ipv4 : 211 | local.ingress_load_balancer_pools_map[lb.labels.pool].rdns_ipv6 212 | ) 213 | hostname = lb.name 214 | id = lb.id 215 | ip_labels = ( 216 | ip_type == "ipv4" ? 217 | join(".", reverse(split(".", lb.ipv4))) : 218 | join(".", reverse(flatten([ 219 | for part in split(":", replace( 220 | lb.ipv6, "::", ":${join(":", 221 | slice( 222 | [0, 0, 0, 0, 0, 0, 0, 0], 223 | 0, 8 - length(compact(split(":", lb.ipv6))) 224 | ) 225 | )}:" 226 | )) : [for char in split("", format("%04s", part)) : char] 227 | ]))) 228 | ) 229 | ip_type = ip_type 230 | pool = lb.labels.pool 231 | role = lb.labels.role 232 | } 233 | } 234 | ] 235 | ]) : entry.key => entry.value 236 | } 237 | 238 | load_balancer_id = each.value.id 239 | ip_address = each.value.ip_address 240 | dns_ptr = ( 241 | replace(replace(replace(replace(replace(replace(replace(replace((each.value.rdns 242 | ), local.rdns_cluster_domain_pattern, var.cluster_domain 243 | ), local.rdns_cluster_name_pattern, var.cluster_name 244 | ), local.rdns_hostname_pattern, each.value.hostname 245 | ), local.rdns_id_pattern, each.value.id 246 | ), local.rdns_ip_labels_pattern, each.value.ip_labels 247 | ), local.rdns_ip_type_pattern, each.value.ip_type 248 | ), local.rdns_pool_pattern, each.value.pool 249 | ), local.rdns_role_pattern, each.value.role 250 | ) 251 | ) 252 | } 253 | 254 | -------------------------------------------------------------------------------- /load_balancer.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | hcloud_load_balancer_location = coalesce( 3 | var.hcloud_load_balancer_location, 4 | length(local.worker_nodepools) > 0 ? local.worker_nodepools[0].location : null, 5 | length(local.cluster_autoscaler_nodepools) > 0 ? local.cluster_autoscaler_nodepools[0].location : null, 6 | local.control_plane_nodepools[0].location 7 | ) 8 | } 9 | 10 | # Kubernetes API Load Balancer 11 | locals { 12 | kube_api_load_balancer_private_ipv4 = cidrhost(hcloud_network_subnet.load_balancer.ip_range, -2) 13 | kube_api_load_balancer_public_ipv4 = var.kube_api_load_balancer_enabled ? hcloud_load_balancer.kube_api[0].ipv4 : null 14 | kube_api_load_balancer_public_ipv6 = var.kube_api_load_balancer_enabled ? hcloud_load_balancer.kube_api[0].ipv6 : null 15 | kube_api_load_balancer_name = "${var.cluster_name}-kube-api" 16 | kube_api_load_balancer_location = local.control_plane_nodepools[0].location 17 | 18 | kube_api_load_balancer_public_network_enabled = coalesce( 19 | var.kube_api_load_balancer_public_network_enabled, 20 | var.cluster_access == "public" 21 | ) 22 | } 23 | 24 | resource "hcloud_load_balancer" "kube_api" { 25 | count = var.kube_api_load_balancer_enabled ? 1 : 0 26 | 27 | name = local.kube_api_load_balancer_name 28 | location = local.kube_api_load_balancer_location 29 | load_balancer_type = "lb11" 30 | delete_protection = var.cluster_delete_protection 31 | 32 | algorithm { 33 | type = "round_robin" 34 | } 35 | 36 | labels = { 37 | cluster = var.cluster_name 38 | role = "kube-api" 39 | } 40 | } 41 | 42 | resource "hcloud_load_balancer_network" "kube_api" { 43 | count = var.kube_api_load_balancer_enabled ? 1 : 0 44 | 45 | load_balancer_id = hcloud_load_balancer.kube_api[0].id 46 | enable_public_interface = local.kube_api_load_balancer_public_network_enabled 47 | subnet_id = hcloud_network_subnet.load_balancer.id 48 | ip = local.kube_api_load_balancer_private_ipv4 49 | 50 | depends_on = [hcloud_network_subnet.load_balancer] 51 | } 52 | 53 | resource "hcloud_load_balancer_target" "kube_api" { 54 | count = var.kube_api_load_balancer_enabled ? 1 : 0 55 | 56 | load_balancer_id = hcloud_load_balancer.kube_api[0].id 57 | use_private_ip = true 58 | 59 | type = "label_selector" 60 | label_selector = join(",", 61 | [ 62 | "cluster=${var.cluster_name}", 63 | "role=control-plane" 64 | ] 65 | ) 66 | 67 | lifecycle { 68 | replace_triggered_by = [ 69 | hcloud_load_balancer_network.kube_api 70 | ] 71 | } 72 | 73 | depends_on = [hcloud_load_balancer_network.kube_api] 74 | } 75 | 76 | resource "hcloud_load_balancer_service" "kube_api" { 77 | count = var.kube_api_load_balancer_enabled ? 1 : 0 78 | 79 | load_balancer_id = hcloud_load_balancer.kube_api[0].id 80 | protocol = "tcp" 81 | listen_port = local.kube_api_port 82 | destination_port = local.kube_api_port 83 | 84 | health_check { 85 | protocol = "http" 86 | port = local.kube_api_port 87 | interval = 3 88 | timeout = 2 89 | retries = 2 90 | 91 | http { 92 | path = "/version" 93 | response = "Status" 94 | tls = true 95 | status_codes = ["401"] 96 | } 97 | } 98 | 99 | depends_on = [hcloud_load_balancer_target.kube_api] 100 | } 101 | 102 | # Ingress Service Load Balancer 103 | locals { 104 | ingress_service_load_balancer_private_ipv4 = cidrhost(hcloud_network_subnet.load_balancer.ip_range, -4) 105 | ingress_service_load_balancer_public_ipv4 = local.ingress_nginx_service_load_balancer_required ? hcloud_load_balancer.ingress[0].ipv4 : null 106 | ingress_service_load_balancer_public_ipv6 = local.ingress_nginx_service_load_balancer_required ? hcloud_load_balancer.ingress[0].ipv6 : null 107 | ingress_service_load_balancer_hostname = local.ingress_nginx_service_load_balancer_required ? "static.${join(".", reverse(split(".", local.ingress_service_load_balancer_public_ipv4)))}.clients.your-server.de" : "" 108 | ingress_service_load_balancer_name = "${var.cluster_name}-ingress" 109 | ingress_service_load_balancer_location = local.hcloud_load_balancer_location 110 | } 111 | 112 | resource "hcloud_load_balancer" "ingress" { 113 | count = local.ingress_nginx_service_load_balancer_required ? 1 : 0 114 | 115 | name = local.ingress_service_load_balancer_name 116 | location = local.ingress_service_load_balancer_location 117 | load_balancer_type = var.ingress_load_balancer_type 118 | delete_protection = var.cluster_delete_protection 119 | 120 | algorithm { 121 | type = var.ingress_load_balancer_algorithm 122 | } 123 | 124 | labels = { 125 | cluster = var.cluster_name 126 | role = "ingress" 127 | } 128 | 129 | lifecycle { 130 | ignore_changes = [ 131 | labels 132 | ] 133 | } 134 | } 135 | 136 | resource "hcloud_load_balancer_network" "ingress" { 137 | count = local.ingress_nginx_service_load_balancer_required ? 1 : 0 138 | 139 | load_balancer_id = hcloud_load_balancer.ingress[0].id 140 | enable_public_interface = var.ingress_load_balancer_public_network_enabled 141 | subnet_id = hcloud_network_subnet.load_balancer.id 142 | ip = local.ingress_service_load_balancer_private_ipv4 143 | 144 | depends_on = [hcloud_network_subnet.load_balancer] 145 | } 146 | 147 | # Ingress Load Balancer Pools 148 | locals { 149 | ingress_load_balancer_pools = [ 150 | for lp in var.ingress_load_balancer_pools : { 151 | name = lp.name 152 | location = lp.location 153 | load_balancer_type = coalesce(lp.type, var.ingress_load_balancer_type) 154 | count = lp.count 155 | labels = merge( 156 | lp.labels, 157 | { pool = lp.name } 158 | ) 159 | rdns_ipv4 = ( 160 | lp.rdns_ipv4 != null ? lp.rdns_ipv4 : 161 | lp.rdns != null ? lp.rdns : 162 | local.ingress_load_balancer_rdns_ipv4 163 | ) 164 | rdns_ipv6 = ( 165 | lp.rdns_ipv6 != null ? lp.rdns_ipv6 : 166 | lp.rdns != null ? lp.rdns : 167 | local.ingress_load_balancer_rdns_ipv6 168 | ) 169 | target_label_selector = length(lp.target_label_selector) > 0 ? lp.target_label_selector : concat( 170 | [ 171 | for np in concat( 172 | local.talos_allow_scheduling_on_control_planes ? local.control_plane_nodepools : [], 173 | local.worker_nodepools 174 | ) : "cluster=${var.cluster_name},nodepool=${np.labels.nodepool}" 175 | if(lp.local_traffic ? np.location == lp.location : true) && 176 | lookup(np.labels, "node.kubernetes.io/exclude-from-external-load-balancers", null) == null 177 | ], 178 | [ 179 | for np in local.cluster_autoscaler_nodepools : 180 | "hcloud/node-group=${var.cluster_name}-${np.name}" 181 | if(lp.local_traffic ? np.location == lp.location : true) && 182 | lookup(np.labels, "node.kubernetes.io/exclude-from-external-load-balancers", null) == null 183 | ] 184 | ) 185 | load_balancer_algorithm = coalesce(lp.load_balancer_algorithm, var.ingress_load_balancer_algorithm) 186 | public_network_enabled = coalesce(lp.public_network_enabled, var.ingress_load_balancer_public_network_enabled) 187 | } 188 | ] 189 | ingress_load_balancer_pools_map = { for lp in local.ingress_load_balancer_pools : lp.name => lp } 190 | } 191 | 192 | resource "hcloud_load_balancer" "ingress_pool" { 193 | for_each = merge([ 194 | for pool_index in range(length(local.ingress_load_balancer_pools)) : { 195 | for lb_index in range(local.ingress_load_balancer_pools[pool_index].count) : "${var.cluster_name}-${local.ingress_load_balancer_pools[pool_index].name}-${lb_index + 1}" => { 196 | location = local.ingress_load_balancer_pools[pool_index].location, 197 | load_balancer_type = local.ingress_load_balancer_pools[pool_index].load_balancer_type, 198 | load_balancer_algorithm = local.ingress_load_balancer_pools[pool_index].load_balancer_algorithm, 199 | labels = local.ingress_load_balancer_pools[pool_index].labels 200 | } 201 | } 202 | ]...) 203 | 204 | name = each.key 205 | location = each.value.location 206 | load_balancer_type = each.value.load_balancer_type 207 | 208 | algorithm { 209 | type = each.value.load_balancer_algorithm 210 | } 211 | 212 | labels = merge( 213 | each.value.labels, 214 | { 215 | cluster = var.cluster_name, 216 | role = "ingress" 217 | } 218 | ) 219 | } 220 | 221 | resource "hcloud_load_balancer_network" "ingress_pool" { 222 | for_each = merge([ 223 | for pool_index in range(length(local.ingress_load_balancer_pools)) : { 224 | for lb_index in range(local.ingress_load_balancer_pools[pool_index].count) : "${var.cluster_name}-${local.ingress_load_balancer_pools[pool_index].name}-${lb_index + 1}" => { 225 | public_network_enabled = local.ingress_load_balancer_pools[pool_index].public_network_enabled 226 | ipv4_private = cidrhost( 227 | hcloud_network_subnet.load_balancer.ip_range, 228 | -5 - lb_index - ( 229 | pool_index > 0 ? 230 | sum([for prior_pool_index in range(0, pool_index) : local.ingress_load_balancer_pools[prior_pool_index].count]) : 231 | 0 232 | ) 233 | ) 234 | } 235 | } 236 | ]...) 237 | 238 | load_balancer_id = hcloud_load_balancer.ingress_pool[each.key].id 239 | enable_public_interface = each.value.public_network_enabled 240 | subnet_id = hcloud_network_subnet.load_balancer.id 241 | ip = each.value.ipv4_private 242 | } 243 | 244 | resource "hcloud_load_balancer_target" "ingress_pool" { 245 | for_each = { 246 | for entry in flatten([ 247 | for pool in local.ingress_load_balancer_pools : [ 248 | for lb_index in range(pool.count) : [ 249 | for target_index, target_label_selector in pool.target_label_selector : { 250 | key = "${var.cluster_name}-${pool.name}-${lb_index + 1}-${target_index + 1}" 251 | value = { 252 | lb_name = "${var.cluster_name}-${pool.name}-${lb_index + 1}" 253 | label_selector = target_label_selector 254 | } 255 | } 256 | ] 257 | ] 258 | ]) : entry.key => entry.value 259 | } 260 | 261 | load_balancer_id = hcloud_load_balancer.ingress_pool[each.value.lb_name].id 262 | use_private_ip = true 263 | 264 | type = "label_selector" 265 | label_selector = each.value.label_selector 266 | 267 | lifecycle { 268 | replace_triggered_by = [ 269 | # Can't reference a specific network, only count.index or each.key are supported here 270 | hcloud_load_balancer_network.ingress_pool 271 | ] 272 | } 273 | 274 | depends_on = [hcloud_load_balancer_network.ingress_pool] 275 | } 276 | 277 | resource "hcloud_load_balancer_service" "ingress_pool" { 278 | for_each = { 279 | for entry in flatten([ 280 | for pool in local.ingress_load_balancer_pools : [ 281 | for lb_index in range(pool.count) : [ 282 | for protocol in ["http", "https"] : { 283 | key = "${var.cluster_name}-${pool.name}-${lb_index + 1}-${protocol}" 284 | value = { 285 | lb_name = "${var.cluster_name}-${pool.name}-${lb_index + 1}" 286 | listen_port = protocol == "http" ? 80 : 443 287 | destination_port = ( 288 | protocol == "http" ? 289 | local.ingress_nginx_service_node_port_http : 290 | local.ingress_nginx_service_node_port_https 291 | ) 292 | } 293 | } 294 | ] 295 | ] 296 | ]) : entry.key => entry.value 297 | } 298 | 299 | load_balancer_id = hcloud_load_balancer.ingress_pool[each.value.lb_name].id 300 | listen_port = each.value.listen_port 301 | destination_port = each.value.destination_port 302 | protocol = "tcp" 303 | proxyprotocol = true 304 | 305 | health_check { 306 | protocol = "tcp" 307 | port = each.value.destination_port 308 | interval = var.ingress_load_balancer_health_check_interval 309 | timeout = var.ingress_load_balancer_health_check_timeout 310 | retries = var.ingress_load_balancer_health_check_retries 311 | } 312 | 313 | depends_on = [hcloud_load_balancer_target.ingress_pool] 314 | } 315 | -------------------------------------------------------------------------------- /talos_config.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | talos_allow_scheduling_on_control_planes = coalesce(var.cluster_allow_scheduling_on_control_planes, (local.worker_sum + local.cluster_autoscaler_max_sum) == 0) 3 | 4 | kube_oidc_configuration = var.oidc_enabled ? { 5 | "oidc-issuer-url" = var.oidc_issuer_url 6 | "oidc-client-id" = var.oidc_client_id 7 | "oidc-username-claim" = var.oidc_username_claim 8 | "oidc-groups-claim" = var.oidc_groups_claim 9 | "oidc-groups-prefix" = var.oidc_groups_prefix 10 | } : {} 11 | 12 | # Kubernetes Manifests for Talos 13 | talos_inline_manifests = concat( 14 | [local.hcloud_secret_manifest], 15 | local.cilium_manifest != null ? [local.cilium_manifest] : [], 16 | local.hcloud_ccm_manifest != null ? [local.hcloud_ccm_manifest] : [], 17 | local.hcloud_csi_manifest != null ? [local.hcloud_csi_manifest] : [], 18 | local.talos_backup_manifest != null ? [local.talos_backup_manifest] : [], 19 | local.longhorn_manifest != null ? [local.longhorn_manifest] : [], 20 | local.metrics_server_manifest != null ? [local.metrics_server_manifest] : [], 21 | local.cert_manager_manifest != null ? [local.cert_manager_manifest] : [], 22 | local.ingress_nginx_manifest != null ? [local.ingress_nginx_manifest] : [], 23 | local.cluster_autoscaler_manifest != null ? [local.cluster_autoscaler_manifest] : [], 24 | var.talos_extra_inline_manifests != null ? var.talos_extra_inline_manifests : [], 25 | local.rbac_manifest != null ? [local.rbac_manifest] : [], 26 | local.oidc_manifest != null ? [local.oidc_manifest] : [] 27 | ) 28 | talos_manifests = concat( 29 | var.talos_ccm_enabled ? ["https://raw.githubusercontent.com/siderolabs/talos-cloud-controller-manager/${var.talos_ccm_version}/docs/deploy/cloud-controller-manager-daemonset.yml"] : [], 30 | var.prometheus_operator_crds_enabled ? ["https://github.com/prometheus-operator/prometheus-operator/releases/download/${var.prometheus_operator_crds_version}/stripped-down-crds.yaml"] : [], 31 | var.talos_extra_remote_manifests != null ? var.talos_extra_remote_manifests : [] 32 | ) 33 | 34 | # Talos and Kubernetes Certificates 35 | certificate_san = sort( 36 | distinct( 37 | compact( 38 | concat( 39 | # Virtual IPs 40 | var.control_plane_public_vip_ipv4_enabled ? [local.control_plane_public_vip_ipv4] : [], 41 | [local.control_plane_private_vip_ipv4], 42 | # Load Balancer IPs 43 | [ 44 | local.kube_api_load_balancer_private_ipv4, 45 | local.kube_api_load_balancer_public_ipv4, 46 | local.kube_api_load_balancer_public_ipv6 47 | ], 48 | # Control Plane Node IPs 49 | local.control_plane_private_ipv4_list, 50 | local.control_plane_public_ipv4_list, 51 | local.control_plane_public_ipv6_list, 52 | # Other Addresses 53 | [var.kube_api_hostname], 54 | ["127.0.0.1", "::1", "localhost"], 55 | ) 56 | ) 57 | ) 58 | ) 59 | 60 | # DNS Configuration 61 | talos_host_dns = { 62 | enabled = true 63 | forwardKubeDNSToHost = false 64 | resolveMemberNames = true 65 | } 66 | 67 | talos_nameservers = [ 68 | for ns in var.talos_nameservers : ns 69 | if var.talos_ipv6_enabled || !strcontains(ns, ":") 70 | ] 71 | 72 | # Routes 73 | talos_extra_routes = [for cidr in var.talos_extra_routes : { 74 | network = cidr 75 | gateway = local.network_ipv4_gateway 76 | metric = 512 77 | }] 78 | 79 | # Interface Configuration 80 | talos_public_interface_enabled = var.talos_public_ipv4_enabled || var.talos_public_ipv6_enabled 81 | 82 | # Extra Host Entries 83 | talos_extra_host_entries = concat( 84 | var.kube_api_hostname != null ? [ 85 | { 86 | ip = local.kube_api_private_ipv4 87 | aliases = [var.kube_api_hostname] 88 | } 89 | ] : [], 90 | var.talos_extra_host_entries 91 | ) 92 | 93 | # Disk Encryption Configuration 94 | talos_system_disk_encryption = merge( 95 | var.talos_state_partition_encryption_enabled ? { 96 | state = { 97 | provider = "luks2" 98 | options = ["no_read_workqueue", "no_write_workqueue"] 99 | keys = [{ 100 | nodeID = {} 101 | slot = 0 102 | }] 103 | } 104 | } : {}, 105 | var.talos_ephemeral_partition_encryption_enabled ? { 106 | ephemeral = { 107 | provider = "luks2" 108 | options = ["no_read_workqueue", "no_write_workqueue"] 109 | keys = [{ 110 | nodeID = {} 111 | slot = 0 112 | }] 113 | } 114 | } : {} 115 | ) 116 | 117 | # Kubelet extra mounts 118 | talos_kubelet_extra_mounts = concat( 119 | var.longhorn_enabled ? [ 120 | { 121 | source = "/var/lib/longhorn" 122 | destination = "/var/lib/longhorn" 123 | type = "bind" 124 | options = ["bind", "rshared", "rw"] 125 | } 126 | ] : [], 127 | [ 128 | for mount in var.talos_kubelet_extra_mounts : { 129 | source = mount.source 130 | destination = coalesce(mount.destination, mount.source) 131 | type = mount.type 132 | options = mount.options 133 | } 134 | ] 135 | ) 136 | 137 | # Talos Discovery 138 | talos_discovery_enabled = var.talos_discovery_kubernetes_enabled || var.talos_discovery_service_enabled 139 | 140 | talos_discovery = { 141 | enabled = local.talos_discovery_enabled 142 | registries = { 143 | kubernetes = { disabled = !var.talos_discovery_kubernetes_enabled } 144 | service = { disabled = !var.talos_discovery_service_enabled } 145 | } 146 | } 147 | 148 | # Control Plane Config 149 | control_plane_talos_config_patch = { 150 | for node in hcloud_server.control_plane : node.name => { 151 | machine = { 152 | install = { 153 | image = local.talos_installer_image_url 154 | extraKernelArgs = var.talos_extra_kernel_args 155 | } 156 | nodeLabels = merge( 157 | local.talos_allow_scheduling_on_control_planes ? { "node.kubernetes.io/exclude-from-external-load-balancers" = { "$patch" = "delete" } } : {}, 158 | local.control_plane_nodepools_map[node.labels.nodepool].labels 159 | ) 160 | nodeAnnotations = local.control_plane_nodepools_map[node.labels.nodepool].annotations 161 | nodeTaints = { 162 | for taint in local.control_plane_nodepools_map[node.labels.nodepool].taints : taint.key => "${taint.value}:${taint.effect}" 163 | } 164 | certSANs = local.certificate_san 165 | network = { 166 | hostname = node.name 167 | interfaces = concat( 168 | local.talos_public_interface_enabled ? [{ 169 | interface = "eth0" 170 | dhcp = true 171 | dhcpOptions = { 172 | ipv4 = var.talos_public_ipv4_enabled 173 | ipv6 = false 174 | } 175 | vip = local.control_plane_public_vip_ipv4_enabled ? { 176 | ip = local.control_plane_public_vip_ipv4 177 | hcloud = { 178 | apiToken = var.hcloud_token 179 | } 180 | } : null 181 | }] : [], 182 | [{ 183 | interface = local.talos_public_interface_enabled ? "eth1" : "eth0" 184 | dhcp = true 185 | routes = local.talos_extra_routes 186 | vip = var.control_plane_private_vip_ipv4_enabled ? { 187 | ip = local.control_plane_private_vip_ipv4 188 | hcloud = { 189 | apiToken = var.hcloud_token 190 | } 191 | } : null 192 | }] 193 | ) 194 | nameservers = local.talos_nameservers 195 | extraHostEntries = local.talos_extra_host_entries 196 | } 197 | kubelet = { 198 | extraArgs = merge( 199 | { 200 | "cloud-provider" = "external" 201 | "rotate-server-certificates" = true 202 | }, 203 | var.kubernetes_kubelet_extra_args 204 | ) 205 | extraConfig = merge( 206 | { 207 | shutdownGracePeriod = "90s" 208 | shutdownGracePeriodCriticalPods = "15s" 209 | registerWithTaints = local.control_plane_nodepools_map[node.labels.nodepool].taints 210 | systemReserved = { 211 | cpu = "250m" 212 | memory = "300Mi" 213 | ephemeral-storage = "1Gi" 214 | } 215 | kubeReserved = { 216 | cpu = "250m" 217 | memory = "350Mi" 218 | ephemeral-storage = "1Gi" 219 | } 220 | }, 221 | var.kubernetes_kubelet_extra_config 222 | ) 223 | extraMounts = local.talos_kubelet_extra_mounts 224 | nodeIP = { 225 | validSubnets = [local.network_node_ipv4_cidr] 226 | } 227 | } 228 | kernel = { 229 | modules = var.talos_kernel_modules 230 | } 231 | sysctls = merge( 232 | { 233 | "net.core.somaxconn" = "65535", 234 | "net.core.netdev_max_backlog" = "4096", 235 | "net.ipv6.conf.default.disable_ipv6" = "${var.talos_ipv6_enabled ? 0 : 1}", 236 | "net.ipv6.conf.all.disable_ipv6" = "${var.talos_ipv6_enabled ? 0 : 1}" 237 | }, 238 | var.talos_sysctls_extra_args 239 | ) 240 | registries = var.talos_registries 241 | systemDiskEncryption = local.talos_system_disk_encryption 242 | features = { 243 | kubernetesTalosAPIAccess = { 244 | enabled = true, 245 | allowedRoles = [ 246 | "os:reader", 247 | "os:etcd:backup" 248 | ], 249 | allowedKubernetesNamespaces = ["kube-system"] 250 | }, 251 | hostDNS = local.talos_host_dns 252 | } 253 | time = { 254 | servers = var.talos_time_servers 255 | } 256 | logging = { 257 | destinations = var.talos_logging_destinations 258 | } 259 | } 260 | cluster = { 261 | allowSchedulingOnControlPlanes = local.talos_allow_scheduling_on_control_planes 262 | network = { 263 | dnsDomain = var.cluster_domain 264 | podSubnets = [local.network_pod_ipv4_cidr] 265 | serviceSubnets = [local.network_service_ipv4_cidr] 266 | cni = { name = "none" } 267 | } 268 | coreDNS = { 269 | disabled = !var.talos_coredns_enabled 270 | } 271 | proxy = { 272 | disabled = var.cilium_kube_proxy_replacement_enabled 273 | } 274 | apiServer = { 275 | admissionControl = var.kube_api_admission_control 276 | certSANs = local.certificate_san, 277 | extraArgs = merge( 278 | { "enable-aggregator-routing" = true }, 279 | local.kube_oidc_configuration, 280 | var.kube_api_extra_args 281 | ) 282 | } 283 | controllerManager = { 284 | extraArgs = { 285 | "cloud-provider" = "external" 286 | "bind-address" = "0.0.0.0" 287 | } 288 | } 289 | discovery = local.talos_discovery 290 | etcd = { 291 | advertisedSubnets = [hcloud_network_subnet.control_plane.ip_range] 292 | extraArgs = { 293 | "listen-metrics-urls" = "http://0.0.0.0:2381" 294 | } 295 | } 296 | scheduler = { 297 | extraArgs = { 298 | "bind-address" = "0.0.0.0" 299 | } 300 | } 301 | adminKubeconfig = { 302 | certLifetime = "87600h" 303 | } 304 | inlineManifests = local.talos_inline_manifests 305 | externalCloudProvider = { 306 | enabled = true, 307 | manifests = local.talos_manifests 308 | } 309 | } 310 | } 311 | } 312 | 313 | # Worker Config 314 | worker_talos_config_patch = { 315 | for node in hcloud_server.worker : node.name => { 316 | machine = { 317 | install = { 318 | image = local.talos_installer_image_url 319 | extraKernelArgs = var.talos_extra_kernel_args 320 | } 321 | nodeLabels = local.worker_nodepools_map[node.labels.nodepool].labels 322 | nodeAnnotations = local.worker_nodepools_map[node.labels.nodepool].annotations 323 | certSANs = local.certificate_san 324 | network = { 325 | hostname = node.name 326 | interfaces = concat( 327 | local.talos_public_interface_enabled ? [{ 328 | interface = "eth0" 329 | dhcp = true 330 | dhcpOptions = { 331 | ipv4 = var.talos_public_ipv4_enabled 332 | ipv6 = false 333 | } 334 | }] : [], 335 | [{ 336 | interface = local.talos_public_interface_enabled ? "eth1" : "eth0" 337 | dhcp = true 338 | routes = local.talos_extra_routes 339 | }] 340 | ) 341 | nameservers = local.talos_nameservers 342 | extraHostEntries = local.talos_extra_host_entries 343 | } 344 | kubelet = { 345 | extraArgs = merge( 346 | { 347 | "cloud-provider" = "external", 348 | "rotate-server-certificates" = true 349 | }, 350 | var.kubernetes_kubelet_extra_args 351 | ) 352 | extraConfig = merge( 353 | { 354 | shutdownGracePeriod = "90s" 355 | shutdownGracePeriodCriticalPods = "15s" 356 | registerWithTaints = local.worker_nodepools_map[node.labels.nodepool].taints 357 | systemReserved = { 358 | cpu = "100m" 359 | memory = "300Mi" 360 | ephemeral-storage = "1Gi" 361 | } 362 | kubeReserved = { 363 | cpu = "100m" 364 | memory = "350Mi" 365 | ephemeral-storage = "1Gi" 366 | } 367 | }, 368 | var.kubernetes_kubelet_extra_config 369 | ) 370 | extraMounts = local.talos_kubelet_extra_mounts 371 | nodeIP = { 372 | validSubnets = [local.network_node_ipv4_cidr] 373 | } 374 | } 375 | kernel = { 376 | modules = var.talos_kernel_modules 377 | } 378 | sysctls = merge( 379 | { 380 | "net.core.somaxconn" = "65535" 381 | "net.core.netdev_max_backlog" = "4096" 382 | "net.ipv6.conf.default.disable_ipv6" = "${var.talos_ipv6_enabled ? 0 : 1}" 383 | "net.ipv6.conf.all.disable_ipv6" = "${var.talos_ipv6_enabled ? 0 : 1}" 384 | }, 385 | var.talos_sysctls_extra_args 386 | ) 387 | registries = var.talos_registries 388 | systemDiskEncryption = local.talos_system_disk_encryption 389 | features = { 390 | hostDNS = local.talos_host_dns 391 | } 392 | time = { 393 | servers = var.talos_time_servers 394 | } 395 | logging = { 396 | destinations = var.talos_logging_destinations 397 | } 398 | } 399 | cluster = { 400 | network = { 401 | dnsDomain = var.cluster_domain 402 | podSubnets = [local.network_pod_ipv4_cidr] 403 | serviceSubnets = [local.network_service_ipv4_cidr] 404 | cni = { name = "none" } 405 | } 406 | proxy = { 407 | disabled = var.cilium_kube_proxy_replacement_enabled 408 | } 409 | discovery = local.talos_discovery 410 | } 411 | } 412 | } 413 | 414 | # Autoscaler Config 415 | autoscaler_nodepool_talos_config_patch = { 416 | for nodepool in local.cluster_autoscaler_nodepools : nodepool.name => { 417 | machine = { 418 | install = { 419 | image = local.talos_installer_image_url 420 | extraKernelArgs = var.talos_extra_kernel_args 421 | } 422 | nodeLabels = nodepool.labels 423 | nodeAnnotations = nodepool.annotations 424 | certSANs = local.certificate_san 425 | network = { 426 | interfaces = concat( 427 | local.talos_public_interface_enabled ? [{ 428 | interface = "eth0" 429 | dhcp = true 430 | dhcpOptions = { 431 | ipv4 = var.talos_public_ipv4_enabled 432 | ipv6 = false 433 | } 434 | }] : [], 435 | [{ 436 | interface = local.talos_public_interface_enabled ? "eth1" : "eth0" 437 | dhcp = true 438 | routes = local.talos_extra_routes 439 | }] 440 | ) 441 | nameservers = local.talos_nameservers 442 | extraHostEntries = local.talos_extra_host_entries 443 | } 444 | kubelet = { 445 | extraArgs = merge( 446 | { 447 | "cloud-provider" = "external" 448 | "rotate-server-certificates" = true 449 | }, 450 | var.kubernetes_kubelet_extra_args 451 | ) 452 | extraConfig = merge( 453 | { 454 | shutdownGracePeriod = "90s" 455 | shutdownGracePeriodCriticalPods = "15s" 456 | registerWithTaints = nodepool.taints 457 | systemReserved = { 458 | cpu = "100m" 459 | memory = "300Mi" 460 | ephemeral-storage = "1Gi" 461 | } 462 | kubeReserved = { 463 | cpu = "100m" 464 | memory = "350Mi" 465 | ephemeral-storage = "1Gi" 466 | } 467 | }, 468 | var.kubernetes_kubelet_extra_config 469 | ) 470 | extraMounts = local.talos_kubelet_extra_mounts 471 | nodeIP = { 472 | validSubnets = [local.network_node_ipv4_cidr] 473 | } 474 | } 475 | kernel = { 476 | modules = var.talos_kernel_modules 477 | } 478 | sysctls = merge( 479 | { 480 | "net.core.somaxconn" = "65535" 481 | "net.core.netdev_max_backlog" = "4096" 482 | "net.ipv6.conf.default.disable_ipv6" = "${var.talos_ipv6_enabled ? 0 : 1}" 483 | "net.ipv6.conf.all.disable_ipv6" = "${var.talos_ipv6_enabled ? 0 : 1}" 484 | }, 485 | var.talos_sysctls_extra_args 486 | ) 487 | registries = var.talos_registries 488 | systemDiskEncryption = local.talos_system_disk_encryption 489 | features = { 490 | hostDNS = local.talos_host_dns 491 | } 492 | time = { 493 | servers = var.talos_time_servers 494 | } 495 | logging = { 496 | destinations = var.talos_logging_destinations 497 | } 498 | } 499 | cluster = { 500 | network = { 501 | dnsDomain = var.cluster_domain 502 | podSubnets = [local.network_pod_ipv4_cidr] 503 | serviceSubnets = [local.network_service_ipv4_cidr] 504 | cni = { name = "none" } 505 | } 506 | proxy = { 507 | disabled = var.cilium_kube_proxy_replacement_enabled 508 | } 509 | discovery = local.talos_discovery 510 | } 511 | } 512 | } 513 | } 514 | 515 | data "talos_machine_configuration" "control_plane" { 516 | for_each = { for node in hcloud_server.control_plane : node.name => node } 517 | 518 | talos_version = var.talos_version 519 | cluster_name = var.cluster_name 520 | cluster_endpoint = local.kube_api_url_internal 521 | kubernetes_version = var.kubernetes_version 522 | machine_type = "controlplane" 523 | machine_secrets = talos_machine_secrets.this.machine_secrets 524 | docs = false 525 | examples = false 526 | 527 | config_patches = concat( 528 | [yamlencode(local.control_plane_talos_config_patch[each.key])], 529 | [for patch in var.control_plane_config_patches : yamlencode(patch)] 530 | ) 531 | } 532 | 533 | data "talos_machine_configuration" "worker" { 534 | for_each = { for node in hcloud_server.worker : node.name => node } 535 | 536 | talos_version = var.talos_version 537 | cluster_name = var.cluster_name 538 | cluster_endpoint = local.kube_api_url_internal 539 | kubernetes_version = var.kubernetes_version 540 | machine_type = "worker" 541 | machine_secrets = talos_machine_secrets.this.machine_secrets 542 | docs = false 543 | examples = false 544 | 545 | config_patches = concat( 546 | [yamlencode(local.worker_talos_config_patch[each.key])], 547 | [for patch in var.worker_config_patches : yamlencode(patch)] 548 | ) 549 | } 550 | 551 | data "talos_machine_configuration" "cluster_autoscaler" { 552 | for_each = { for nodepool in local.cluster_autoscaler_nodepools : nodepool.name => nodepool } 553 | 554 | talos_version = var.talos_version 555 | cluster_name = var.cluster_name 556 | cluster_endpoint = local.kube_api_url_internal 557 | kubernetes_version = var.kubernetes_version 558 | machine_type = "worker" 559 | machine_secrets = talos_machine_secrets.this.machine_secrets 560 | docs = false 561 | examples = false 562 | 563 | config_patches = concat( 564 | [yamlencode(local.autoscaler_nodepool_talos_config_patch[each.key])], 565 | [for patch in var.cluster_autoscaler_config_patches : yamlencode(patch)] 566 | ) 567 | } 568 | -------------------------------------------------------------------------------- /talos.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | # Talos Version 3 | talos_version_parts = regex("^v?(?P[0-9]+)\\.(?P[0-9]+)\\.(?P[0-9]+)", var.talos_version) 4 | talos_version_major = local.talos_version_parts.major 5 | talos_version_minor = local.talos_version_parts.minor 6 | talos_version_patch = local.talos_version_parts.patch 7 | 8 | # Talos Nodes 9 | talos_primary_node_name = sort(keys(hcloud_server.control_plane))[0] 10 | talos_primary_node_private_ipv4 = tolist(hcloud_server.control_plane[local.talos_primary_node_name].network)[0].ip 11 | talos_primary_node_public_ipv4 = hcloud_server.control_plane[local.talos_primary_node_name].ipv4_address 12 | talos_primary_node_public_ipv6 = hcloud_server.control_plane[local.talos_primary_node_name].ipv6_address 13 | 14 | # Talos API 15 | talos_api_port = 50000 16 | talos_primary_endpoint = var.cluster_access == "private" ? local.talos_primary_node_private_ipv4 : coalesce( 17 | local.talos_primary_node_public_ipv4, local.talos_primary_node_public_ipv6 18 | ) 19 | talos_endpoints = compact( 20 | var.cluster_access == "private" ? local.control_plane_private_ipv4_list : concat( 21 | local.network_public_ipv4_enabled ? local.control_plane_public_ipv4_list : [], 22 | local.network_public_ipv6_enabled ? local.control_plane_public_ipv6_list : [] 23 | ) 24 | ) 25 | 26 | # Kubernetes API 27 | kube_api_private_ipv4 = ( 28 | var.kube_api_load_balancer_enabled ? local.kube_api_load_balancer_private_ipv4 : 29 | var.control_plane_private_vip_ipv4_enabled ? local.control_plane_private_vip_ipv4 : 30 | local.talos_primary_node_private_ipv4 31 | ) 32 | 33 | kube_api_port = 6443 34 | kube_api_host = coalesce( 35 | var.kube_api_hostname, 36 | var.cluster_access == "private" ? local.kube_api_private_ipv4 : null, 37 | ( 38 | var.kube_api_load_balancer_enabled && local.kube_api_load_balancer_public_network_enabled ? 39 | coalesce(local.kube_api_load_balancer_public_ipv4, local.kube_api_load_balancer_public_ipv6) : null 40 | ), 41 | var.control_plane_public_vip_ipv4_enabled ? local.control_plane_public_vip_ipv4 : null, 42 | local.talos_primary_node_public_ipv4, 43 | local.talos_primary_node_public_ipv6 44 | ) 45 | 46 | kube_api_url_internal = "https://${local.kube_api_private_ipv4}:${local.kube_api_port}" 47 | kube_api_url_external = "https://${local.kube_api_host}:${local.kube_api_port}" 48 | 49 | # KubePrism 50 | kube_prism_host = "127.0.0.1" 51 | kube_prism_port = 7445 52 | 53 | # Talos Control 54 | talosctl_upgrade_command = join(" ", 55 | [ 56 | "talosctl upgrade", 57 | "--talosconfig \"$talosconfig\"", 58 | "--nodes \"$host\"", 59 | "--image '${local.talos_installer_image_url}'" 60 | ] 61 | ) 62 | talosctl_upgrade_k8s_command = join(" ", 63 | [ 64 | "talosctl upgrade-k8s", 65 | "--talosconfig \"$talosconfig\"", 66 | "--nodes '${local.talos_primary_node_private_ipv4}'", 67 | "--endpoint '${local.kube_api_url_external}'", 68 | "--to '${var.kubernetes_version}'", 69 | "--with-docs=false", 70 | "--with-examples=false" 71 | ] 72 | ) 73 | talosctl_apply_config_command = join(" ", 74 | [ 75 | "talosctl apply-config", 76 | "--talosconfig \"$talosconfig\"", 77 | "--nodes \"$host\"", 78 | "--file \"$machine_config\"" 79 | ] 80 | ) 81 | talosctl_health_check_command = join(" ", 82 | [ 83 | "talosctl health", 84 | "--talosconfig \"$talosconfig\"", 85 | "--server=true", 86 | "--control-plane-nodes '${join(",", local.control_plane_private_ipv4_list)}'", 87 | "--worker-nodes '${join(",", concat(local.worker_private_ipv4_list, local.cluster_autoscaler_private_ipv4_list))}'" 88 | ] 89 | ) 90 | talosctl_retry_snippet = join(" ", 91 | [ 92 | "[ \"$retry\" -gt ${var.talosctl_retry_count} ] && exit 1 ||", 93 | "{ printf '%s\n' \"Retry $retry/${var.talosctl_retry_count} ...\"; retry=$((retry + 1)); sleep 10; }" 94 | ] 95 | ) 96 | talosctl_get_version_command = join(" ", 97 | [ 98 | "talosctl get version", 99 | "--talosconfig \"$talosconfig\"", 100 | "--nodes \"$host\"", 101 | "--output jsonpath='{.spec.version}'", 102 | "2>/dev/null || true" 103 | ] 104 | ) 105 | talosctl_get_schematic_command = join(" ", 106 | [ 107 | "talosctl get extensions", 108 | "--talosconfig \"$talosconfig\"", 109 | "--nodes \"$host\"", 110 | "--output json", 111 | "| jq -r 'select(.spec.metadata.name==\"schematic\") | .spec.metadata.version'", 112 | "2>/dev/null || true" 113 | ] 114 | ) 115 | # Cluster Status 116 | cluster_initialized = length(data.hcloud_certificates.state.certificates) > 0 117 | } 118 | 119 | data "hcloud_certificates" "state" { 120 | with_selector = join(",", 121 | [ 122 | "cluster=${var.cluster_name}", 123 | "state=initialized" 124 | ] 125 | ) 126 | } 127 | 128 | resource "talos_machine_secrets" "this" { 129 | talos_version = var.talos_version 130 | 131 | lifecycle { 132 | prevent_destroy = true 133 | } 134 | } 135 | 136 | resource "terraform_data" "upgrade_control_plane" { 137 | triggers_replace = [ 138 | var.talos_version, 139 | local.talos_schematic_id 140 | ] 141 | 142 | provisioner "local-exec" { 143 | when = create 144 | quiet = true 145 | command = <<-EOT 146 | set -eu 147 | 148 | talosconfig=$(mktemp) 149 | trap 'rm -f "$talosconfig"' EXIT HUP INT TERM QUIT PIPE 150 | printf '%s' "$TALOSCONFIG" > "$talosconfig" 151 | 152 | if ${local.cluster_initialized}; then 153 | printf '%s\n' "Start upgrading Control Plane Nodes" 154 | 155 | retry=1 156 | while ${var.cluster_healthcheck_enabled} && ! ${local.talosctl_health_check_command} -n '${local.control_plane_private_ipv4_list[0]}'; do 157 | ${local.talosctl_retry_snippet} 158 | done 159 | 160 | set -- ${join(" ", local.control_plane_private_ipv4_list)} 161 | for host in "$@"; do 162 | printf '%s\n' "Checking node $host ..." 163 | 164 | retry=1 165 | while true; do 166 | current_version=$(${local.talosctl_get_version_command}) 167 | current_schematic=$(${local.talosctl_get_schematic_command}) 168 | if [ "$current_version" = "${var.talos_version}" ] && [ "$current_schematic" = "${local.talos_schematic_id}" ]; then 169 | if [ "$retry" -gt 1 ]; then 170 | printf '%s\n' "Node $host is already at Talos $current_version ($current_schematic). Waiting for cluster to stabilize ..." 171 | sleep 5 172 | 173 | retry=1 174 | while ${var.cluster_healthcheck_enabled} && ! ${local.talosctl_health_check_command} -n '${local.talos_primary_node_private_ipv4}'; do 175 | ${local.talosctl_retry_snippet} 176 | done 177 | printf '%s\n' "Node $host upgraded successfully" 178 | else 179 | printf '%s\n' "Node $host is already at Talos $current_version ($current_schematic). Skipping upgrade ..." 180 | fi 181 | break 182 | elif [ -n "$current_version" ] && [ -n "$current_schematic" ]; then 183 | printf '%s\n' "Node $host is currently at Talos $current_version ($current_schematic)" 184 | else 185 | printf '%s\n' "Could not determine current Talos version or schematic for node $host" 186 | fi 187 | 188 | printf '%s\n' "Upgrading node $host to Talos ${var.talos_version} (${local.talos_schematic_id}) ..." 189 | if ${local.talosctl_upgrade_command}; then 190 | printf '%s\n' "Upgrade successfully completed for node $host" 191 | sleep 5 192 | 193 | retry=1 194 | while ${var.cluster_healthcheck_enabled} && ! ${local.talosctl_health_check_command} -n '${local.talos_primary_node_private_ipv4}'; do 195 | ${local.talosctl_retry_snippet} 196 | done 197 | 198 | printf '%s\n' "Node $host upgraded successfully" 199 | break 200 | fi 201 | ${local.talosctl_retry_snippet} 202 | done 203 | done 204 | printf '%s\n' "Control Plane Nodes upgraded successfully" 205 | else 206 | printf '%s\n' "Cluster not initialized, skipping Control Plane Node upgrade" 207 | fi 208 | EOT 209 | 210 | environment = { 211 | TALOSCONFIG = nonsensitive(data.talos_client_configuration.this.talos_config) 212 | } 213 | } 214 | 215 | depends_on = [ 216 | data.external.talosctl_version_check, 217 | data.talos_machine_configuration.control_plane, 218 | data.talos_client_configuration.this 219 | ] 220 | } 221 | 222 | resource "terraform_data" "upgrade_worker" { 223 | triggers_replace = [ 224 | var.talos_version, 225 | local.talos_schematic_id 226 | ] 227 | 228 | provisioner "local-exec" { 229 | when = create 230 | quiet = true 231 | command = <<-EOT 232 | set -eu 233 | 234 | talosconfig=$(mktemp) 235 | trap 'rm -f "$talosconfig"' EXIT HUP INT TERM QUIT PIPE 236 | printf '%s' "$TALOSCONFIG" > "$talosconfig" 237 | 238 | if ${local.cluster_initialized}; then 239 | printf '%s\n' "Start upgrading Worker Nodes" 240 | 241 | retry=1 242 | while ${var.cluster_healthcheck_enabled} && ! ${local.talosctl_health_check_command} -n '${local.talos_primary_node_private_ipv4}'; do 243 | ${local.talosctl_retry_snippet} 244 | done 245 | 246 | set -- ${join(" ", local.worker_private_ipv4_list)} 247 | for host in "$@"; do 248 | printf '%s\n' "Checking node $host ..." 249 | 250 | retry=1 251 | while true; do 252 | current_version=$(${local.talosctl_get_version_command}) 253 | current_schematic=$(${local.talosctl_get_schematic_command}) 254 | if [ "$current_version" = "${var.talos_version}" ] && [ "$current_schematic" = "${local.talos_schematic_id}" ]; then 255 | if [ "$retry" -gt 1 ]; then 256 | printf '%s\n' "Node $host is already at Talos $current_version ($current_schematic). Waiting for cluster to stabilize ..." 257 | sleep 5 258 | 259 | retry=1 260 | while ${var.cluster_healthcheck_enabled} && ! ${local.talosctl_health_check_command} -n '${local.talos_primary_node_private_ipv4}'; do 261 | ${local.talosctl_retry_snippet} 262 | done 263 | printf '%s\n' "Node $host upgraded successfully" 264 | else 265 | printf '%s\n' "Node $host is already at Talos $current_version ($current_schematic). Skipping upgrade ..." 266 | fi 267 | break 268 | elif [ -n "$current_version" ] && [ -n "$current_schematic" ]; then 269 | printf '%s\n' "Node $host is currently at Talos $current_version ($current_schematic)" 270 | else 271 | printf '%s\n' "Could not determine current Talos version or schematic for node $host" 272 | fi 273 | 274 | printf '%s\n' "Upgrading node $host to Talos ${var.talos_version} (${local.talos_schematic_id}) ..." 275 | if ${local.talosctl_upgrade_command}; then 276 | printf '%s\n' "Upgrade successfully completed for node $host" 277 | sleep 5 278 | 279 | retry=1 280 | while ${var.cluster_healthcheck_enabled} && ! ${local.talosctl_health_check_command} -n '${local.talos_primary_node_private_ipv4}'; do 281 | ${local.talosctl_retry_snippet} 282 | done 283 | 284 | printf '%s\n' "Node $host upgraded successfully" 285 | break 286 | fi 287 | ${local.talosctl_retry_snippet} 288 | done 289 | done 290 | printf '%s\n' "Worker Nodes upgraded successfully" 291 | else 292 | printf '%s\n' "Cluster not initialized, skipping Worker Node upgrade" 293 | fi 294 | EOT 295 | 296 | environment = { 297 | TALOSCONFIG = nonsensitive(data.talos_client_configuration.this.talos_config) 298 | } 299 | } 300 | 301 | depends_on = [ 302 | data.external.talosctl_version_check, 303 | data.talos_machine_configuration.worker, 304 | terraform_data.upgrade_control_plane 305 | ] 306 | } 307 | 308 | resource "terraform_data" "upgrade_cluster_autoscaler" { 309 | count = var.cluster_autoscaler_discovery_enabled ? 1 : 0 310 | 311 | triggers_replace = [ 312 | var.talos_version, 313 | local.talos_schematic_id 314 | ] 315 | 316 | provisioner "local-exec" { 317 | when = create 318 | quiet = true 319 | command = <<-EOT 320 | set -eu 321 | 322 | talosconfig=$(mktemp) 323 | trap 'rm -f "$talosconfig"' EXIT HUP INT TERM QUIT PIPE 324 | printf '%s' "$TALOSCONFIG" > "$talosconfig" 325 | 326 | if ${local.cluster_initialized}; then 327 | printf '%s\n' "Start upgrading Cluster Autoscaler Nodes" 328 | 329 | retry=1 330 | while ${var.cluster_healthcheck_enabled} && ! ${local.talosctl_health_check_command} -n '${local.talos_primary_node_private_ipv4}'; do 331 | ${local.talosctl_retry_snippet} 332 | done 333 | 334 | set -- ${join(" ", local.cluster_autoscaler_private_ipv4_list)} 335 | for host in "$@"; do 336 | printf '%s\n' "Checking node $host ..." 337 | 338 | retry=1 339 | while true; do 340 | current_version=$(${local.talosctl_get_version_command}) 341 | current_schematic=$(${local.talosctl_get_schematic_command}) 342 | if [ "$current_version" = "${var.talos_version}" ] && [ "$current_schematic" = "${local.talos_schematic_id}" ]; then 343 | if [ "$retry" -gt 1 ]; then 344 | printf '%s\n' "Node $host is already at Talos $current_version ($current_schematic). Waiting for cluster to stabilize ..." 345 | sleep 5 346 | 347 | retry=1 348 | while ${var.cluster_healthcheck_enabled} && ! ${local.talosctl_health_check_command} -n '${local.talos_primary_node_private_ipv4}'; do 349 | ${local.talosctl_retry_snippet} 350 | done 351 | printf '%s\n' "Node $host upgraded successfully" 352 | else 353 | printf '%s\n' "Node $host is already at Talos $current_version ($current_schematic). Skipping upgrade ..." 354 | fi 355 | break 356 | elif [ -n "$current_version" ] && [ -n "$current_schematic" ]; then 357 | printf '%s\n' "Node $host is currently at Talos $current_version ($current_schematic)" 358 | else 359 | printf '%s\n' "Could not determine current Talos version or schematic for node $host" 360 | fi 361 | 362 | printf '%s\n' "Upgrading node $host to Talos ${var.talos_version} (${local.talos_schematic_id}) ..." 363 | if ${local.talosctl_upgrade_command}; then 364 | printf '%s\n' "Upgrade successfully completed for node $host" 365 | sleep 5 366 | 367 | retry=1 368 | while ${var.cluster_healthcheck_enabled} && ! ${local.talosctl_health_check_command} -n '${local.talos_primary_node_private_ipv4}'; do 369 | ${local.talosctl_retry_snippet} 370 | done 371 | 372 | printf '%s\n' "Node $host upgraded successfully" 373 | break 374 | fi 375 | ${local.talosctl_retry_snippet} 376 | done 377 | done 378 | printf '%s\n' "Cluster Autoscaler Nodes upgraded successfully" 379 | else 380 | printf '%s\n' "Cluster not initialized, skipping Cluster Autoscaler Node upgrade" 381 | fi 382 | EOT 383 | 384 | environment = { 385 | TALOSCONFIG = nonsensitive(data.talos_client_configuration.this.talos_config) 386 | } 387 | } 388 | 389 | depends_on = [ 390 | data.external.talosctl_version_check, 391 | data.talos_machine_configuration.cluster_autoscaler, 392 | terraform_data.upgrade_control_plane, 393 | terraform_data.upgrade_worker 394 | ] 395 | } 396 | 397 | resource "terraform_data" "upgrade_kubernetes" { 398 | triggers_replace = [var.kubernetes_version] 399 | 400 | provisioner "local-exec" { 401 | when = create 402 | quiet = true 403 | command = <<-EOT 404 | set -eu 405 | 406 | talosconfig=$(mktemp) 407 | trap 'rm -f "$talosconfig"' EXIT HUP INT TERM QUIT PIPE 408 | printf '%s' "$TALOSCONFIG" > "$talosconfig" 409 | 410 | if ${local.cluster_initialized}; then 411 | printf '%s\n' "Start upgrading Kubernetes" 412 | 413 | retry=1 414 | while ${var.cluster_healthcheck_enabled} && ! ${local.talosctl_health_check_command} -n '${local.talos_primary_node_private_ipv4}'; do 415 | ${local.talosctl_retry_snippet} 416 | done 417 | 418 | retry=1 419 | while ! ${local.talosctl_upgrade_k8s_command}; do 420 | ${local.talosctl_retry_snippet} 421 | done 422 | sleep 5 423 | 424 | retry=1 425 | while ${var.cluster_healthcheck_enabled} && ! ${local.talosctl_health_check_command} -n '${local.talos_primary_node_private_ipv4}'; do 426 | ${local.talosctl_retry_snippet} 427 | done 428 | 429 | printf '%s\n' "Kubernetes upgraded successfully" 430 | else 431 | printf '%s\n' "Cluster not initialized, skipping Kubernetes upgrade" 432 | fi 433 | EOT 434 | 435 | environment = { 436 | TALOSCONFIG = nonsensitive(data.talos_client_configuration.this.talos_config) 437 | } 438 | } 439 | 440 | depends_on = [ 441 | data.external.talosctl_version_check, 442 | terraform_data.upgrade_control_plane, 443 | terraform_data.upgrade_worker, 444 | terraform_data.upgrade_cluster_autoscaler 445 | ] 446 | } 447 | 448 | resource "talos_machine_configuration_apply" "control_plane" { 449 | for_each = { for control_plane in hcloud_server.control_plane : control_plane.name => control_plane } 450 | 451 | client_configuration = talos_machine_secrets.this.client_configuration 452 | machine_configuration_input = data.talos_machine_configuration.control_plane[each.key].machine_configuration 453 | endpoint = var.cluster_access == "private" ? tolist(each.value.network)[0].ip : coalesce(each.value.ipv4_address, each.value.ipv6_address) 454 | node = tolist(each.value.network)[0].ip 455 | apply_mode = var.talos_machine_configuration_apply_mode 456 | 457 | on_destroy = { 458 | graceful = var.cluster_graceful_destroy 459 | reset = true 460 | reboot = false 461 | } 462 | 463 | depends_on = [ 464 | hcloud_load_balancer_service.kube_api, 465 | terraform_data.upgrade_kubernetes 466 | ] 467 | } 468 | 469 | resource "talos_machine_configuration_apply" "worker" { 470 | for_each = { for worker in hcloud_server.worker : worker.name => worker } 471 | 472 | client_configuration = talos_machine_secrets.this.client_configuration 473 | machine_configuration_input = data.talos_machine_configuration.worker[each.key].machine_configuration 474 | endpoint = var.cluster_access == "private" ? tolist(each.value.network)[0].ip : coalesce(each.value.ipv4_address, each.value.ipv6_address) 475 | node = tolist(each.value.network)[0].ip 476 | apply_mode = var.talos_machine_configuration_apply_mode 477 | 478 | on_destroy = { 479 | graceful = var.cluster_graceful_destroy 480 | reset = true 481 | reboot = false 482 | } 483 | 484 | depends_on = [ 485 | terraform_data.upgrade_kubernetes, 486 | talos_machine_configuration_apply.control_plane 487 | ] 488 | } 489 | 490 | resource "terraform_data" "talos_machine_configuration_apply_cluster_autoscaler" { 491 | count = var.cluster_autoscaler_discovery_enabled ? 1 : 0 492 | 493 | triggers_replace = [ 494 | nonsensitive(sha1(jsonencode({ 495 | for k, r in data.talos_machine_configuration.cluster_autoscaler : 496 | k => r.machine_configuration 497 | }))) 498 | ] 499 | 500 | provisioner "local-exec" { 501 | when = create 502 | quiet = true 503 | command = <<-EOT 504 | set -eu 505 | 506 | talosconfig=$(mktemp) 507 | trap 'rm -f "$talosconfig"' EXIT HUP INT TERM QUIT PIPE 508 | printf '%s' "$TALOSCONFIG" > "$talosconfig" 509 | 510 | set -- ${join(" ", local.cluster_autoscaler_private_ipv4_list)} 511 | for host in "$@"; do 512 | ( 513 | set -eu 514 | 515 | machine_config=$(mktemp) 516 | trap 'rm -f "$machine_config"' EXIT HUP INT TERM QUIT PIPE 517 | 518 | printf '%s\n' "Applying machine configuration to Cluster Autoscaler Node: $host" 519 | envname="TALOS_MC_$(printf '%s' "$host" | tr . _)" 520 | eval "machine_config_value=\$${$envname}" 521 | printf '%s' "$machine_config_value" > "$machine_config" 522 | 523 | retry=1 524 | while ! ${local.talosctl_apply_config_command}; do 525 | ${local.talosctl_retry_snippet} 526 | done 527 | ) 528 | done 529 | EOT 530 | 531 | environment = merge( 532 | { TALOSCONFIG = nonsensitive(data.talos_client_configuration.this.talos_config) }, 533 | { 534 | for server in local.talos_discovery_cluster_autoscaler : 535 | "TALOS_MC_${replace(server.private_ipv4_address, ".", "_")}" => 536 | nonsensitive(data.talos_machine_configuration.cluster_autoscaler[server.nodepool].machine_configuration) 537 | } 538 | ) 539 | } 540 | 541 | depends_on = [ 542 | data.external.talosctl_version_check, 543 | terraform_data.upgrade_kubernetes, 544 | talos_machine_configuration_apply.control_plane, 545 | talos_machine_configuration_apply.worker 546 | ] 547 | } 548 | 549 | resource "talos_machine_bootstrap" "this" { 550 | client_configuration = talos_machine_secrets.this.client_configuration 551 | endpoint = local.talos_primary_endpoint 552 | node = local.talos_primary_node_private_ipv4 553 | 554 | depends_on = [ 555 | talos_machine_configuration_apply.control_plane, 556 | talos_machine_configuration_apply.worker, 557 | terraform_data.talos_machine_configuration_apply_cluster_autoscaler 558 | ] 559 | } 560 | 561 | resource "terraform_data" "synchronize_manifests" { 562 | triggers_replace = [ 563 | nonsensitive(sha1(jsonencode(local.talos_inline_manifests))), 564 | var.talos_ccm_version, 565 | var.prometheus_operator_crds_version 566 | ] 567 | 568 | provisioner "local-exec" { 569 | when = create 570 | quiet = true 571 | command = <<-EOT 572 | set -eu 573 | 574 | talosconfig=$(mktemp) 575 | trap 'rm -f "$talosconfig"' EXIT HUP INT TERM QUIT PIPE 576 | printf '%s' "$TALOSCONFIG" > "$talosconfig" 577 | 578 | if ${local.cluster_initialized}; then 579 | printf '%s\n' "Start synchronizing manifests" 580 | retry=1 581 | while ${var.cluster_healthcheck_enabled} && ! ${local.talosctl_health_check_command} -n '${local.talos_primary_node_private_ipv4}'; do 582 | ${local.talosctl_retry_snippet} 583 | done 584 | 585 | retry=1 586 | while ! ${local.talosctl_upgrade_k8s_command}; do 587 | ${local.talosctl_retry_snippet} 588 | done 589 | sleep 5 590 | 591 | retry=1 592 | while ${var.cluster_healthcheck_enabled} && ! ${local.talosctl_health_check_command} -n '${local.talos_primary_node_private_ipv4}'; do 593 | ${local.talosctl_retry_snippet} 594 | done 595 | 596 | printf '%s\n' "Manifests synchronized successfully" 597 | else 598 | printf '%s\n' "Cluster not initialized, skipping manifest synchronization" 599 | fi 600 | EOT 601 | 602 | environment = { 603 | TALOSCONFIG = nonsensitive(data.talos_client_configuration.this.talos_config) 604 | } 605 | } 606 | 607 | depends_on = [ 608 | data.external.talosctl_version_check, 609 | talos_machine_bootstrap.this, 610 | talos_machine_configuration_apply.control_plane, 611 | talos_machine_configuration_apply.worker, 612 | terraform_data.talos_machine_configuration_apply_cluster_autoscaler 613 | ] 614 | } 615 | 616 | resource "tls_private_key" "state" { 617 | algorithm = "RSA" 618 | rsa_bits = 2048 619 | } 620 | 621 | resource "tls_self_signed_cert" "state" { 622 | private_key_pem = tls_private_key.state.private_key_pem 623 | 624 | subject { common_name = var.cluster_name } 625 | allowed_uses = ["server_auth"] 626 | validity_period_hours = 876600 627 | } 628 | 629 | resource "hcloud_uploaded_certificate" "state" { 630 | name = "${var.cluster_name}-state" 631 | 632 | private_key = tls_private_key.state.private_key_pem 633 | certificate = tls_self_signed_cert.state.cert_pem 634 | 635 | labels = { 636 | cluster = var.cluster_name 637 | state = "initialized" 638 | } 639 | 640 | depends_on = [terraform_data.synchronize_manifests] 641 | } 642 | 643 | resource "terraform_data" "talos_access_data" { 644 | input = { 645 | kube_api_source = local.firewall_kube_api_sources 646 | talos_api_source = local.firewall_talos_api_sources 647 | talos_primary_node = local.talos_primary_node_private_ipv4 648 | endpoints = local.talos_endpoints 649 | control_plane_nodes = local.control_plane_private_ipv4_list 650 | worker_nodes = local.worker_private_ipv4_list 651 | kube_api_url = local.kube_api_url_external 652 | } 653 | } 654 | 655 | data "http" "kube_api_health" { 656 | count = var.cluster_healthcheck_enabled ? 1 : 0 657 | 658 | url = "${terraform_data.talos_access_data.output.kube_api_url}/version" 659 | insecure = true 660 | 661 | retry { 662 | attempts = 60 663 | min_delay_ms = 5000 664 | max_delay_ms = 5000 665 | } 666 | 667 | lifecycle { 668 | postcondition { 669 | condition = self.status_code == 401 670 | error_message = "Status code invalid" 671 | } 672 | } 673 | 674 | depends_on = [terraform_data.synchronize_manifests] 675 | } 676 | 677 | data "talos_cluster_health" "this" { 678 | count = var.cluster_healthcheck_enabled && (var.cluster_access == "private") ? 1 : 0 679 | 680 | client_configuration = talos_machine_secrets.this.client_configuration 681 | endpoints = terraform_data.talos_access_data.output.endpoints 682 | control_plane_nodes = terraform_data.talos_access_data.output.control_plane_nodes 683 | worker_nodes = terraform_data.talos_access_data.output.worker_nodes 684 | skip_kubernetes_checks = false 685 | 686 | depends_on = [data.http.kube_api_health] 687 | } 688 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | logo 4 |

Hcloud Kubernetes

5 | 6 |

7 | Terraform Module to deploy Kubernetes on Hetzner Cloud! 8 |

9 | 10 | 11 |

12 | 13 | last update 14 | 15 | 16 | last update 17 | 18 | 19 | stars 20 | 21 | 22 | forks 23 | 24 | 25 | OpenTofu Module 26 | 27 | 28 | Terraform Registry downloads 29 | 30 | 31 | HCL top language 32 | 33 | 34 | open issues 35 | 36 | 37 | open pull requests 38 | 39 | 40 | discussions 41 | 42 | 43 | license 44 | 45 |

46 | 47 |
48 | 49 |
50 | 51 | 52 | # 📔 Overview 53 | - [🌟 About the Project](#-about-the-project) 54 | - [🚀 Getting Started](#-getting-started) 55 | - [⚙️ Advanced Configuration](#%EF%B8%8F-advanced-configuration) 56 | - [♻️ Lifecycle](#%EF%B8%8F-lifecycle) 57 | - [👋 Community](#-community) 58 | - [❤️ Support this Project](#%EF%B8%8F-support-this-project) 59 | - [📎 Project Info](#-project-info) 60 | 61 | 62 | ## 🌟 About the Project 63 | Hcloud Kubernetes is a Terraform module for deploying a fully declarative, managed Kubernetes cluster on Hetzner Cloud. It utilizes Talos, a secure, immutable, and minimal operating system specifically designed for Kubernetes, featuring a streamlined architecture with only a handful of binaries and shared libraries. Just enough to run containerd and a small set of system services. 64 | 65 | This project is committed to production-grade configuration and lifecycle management, ensuring all components are set up for high availability. It includes a curated selection of widely used and officially recognized Kubernetes components. If you encounter any issues, suboptimal settings, or missing elements, please file an [issue](https://github.com/hcloud-k8s/terraform-hcloud-kubernetes/issues) to help us improve this project. 66 | 67 | > [!TIP] 68 | > If you don’t have a Hetzner account yet, you can use this [Hetzner Cloud Referral Link](https://hetzner.cloud/?ref=GMylKeDmqtsD) to claim a €20 credit and support this project at the same time. 69 | 70 | 71 | ### ✨ Features 72 | 73 | Provision a highly available and secure Kubernetes cluster on Hetzner Cloud, defined by these key features: 74 | 75 | * **Immutable Infrastructure:** Utilizes Talos Linux to provide a fully declarative, completely immutable Kubernetes cluster. 76 | * **Architecture Flexibility:** Supports deployment on both **AMD64** and **ARM64** instances with automated image synchronization. 77 | * **Maximized Uptime:** Delivers high availability across all control plane and worker components for consistent, reliable performance. 78 | * **Elastic Scaling:** Supports automatic scaling of both **nodes** and **pods** to effortlessly accommodate dynamic workloads. 79 | * **Quick Start Addons:** Optional, pre-integrated Ingress Controller and Cert Manager simplifies the rapid deployment of applications. 80 | * **Dual-Stack Networking:** Load Balancers offer native support for both **IPv4** and **IPv6** for modern, efficient traffic management. 81 | * **Isolated Network Fabric:** Ensures all internal cluster traffic is confined to an isolated, private Hetzner Cloud Network. 82 | * **Comprehensive Security:** Enforces a security-first design with perimeter firewalls and encryption applied to data both in transit and at rest. 83 | 84 | 85 | ### 📦 Components 86 | This project bundles essential Kubernetes components, preconfigured for seamless operation on Hetzner Cloud: 87 | - 88 | Easy 89 | Talos Cloud Controller Manager (CCM) 90 | 91 | Manages node resources by updating with cloud metadata, handling lifecycle deletions, and automatically approving node CSRs. 92 | - 93 | Easy 94 | Talos Backup 95 | 96 | Automates etcd snapshots and S3 storage for backup in Talos Linux-based Kubernetes clusters. 97 | - 98 | Easy 99 | Hcloud Cloud Controller Manager (CCM) 100 | 101 | Manages the integration of Kubernetes clusters with Hetzner Cloud services, ensuring the update of node data, private network traffic control, and load balancer setup. 102 | - 103 | Easy 104 | Hcloud Container Storage Interface (CSI) 105 | 106 | Provides persistent storage for Kubernetes using Hetzner Cloud Volumes, supporting encryption and dynamic provisioning. 107 | - 108 | Easy 109 | Longhorn 110 | 111 | Distributed block storage for Kubernetes, providing high availability, snapshots, and automatic replica rebuilding for easy persistent volume management. 112 | - 113 | Easy 114 | Cilium Container Network Interface (CNI) 115 | 116 | A high performance CNI plugin that enhances and secures network connectivity and observability for container workloads through the use of eBPF technology in Linux kernels. 117 | - 118 | Easy 119 | Ingress NGINX Controller 120 | 121 | Provides a robust web routing and load balancing solution for Kubernetes, utilizing NGINX as a reverse proxy to manage traffic and enhance network performance. 122 | - 123 | Easy 124 | Cert Manager 125 | 126 | Automates the management of certificates in Kubernetes, handling the issuance and renewal of certificates from various sources like Let's Encrypt, and ensures certificates are valid and updated. 127 | - 128 | Easy 129 | Cluster Autoscaler 130 | 131 | Dynamically adjusts Kubernetes cluster size based on resource demands and node utilization, scaling nodes in or out to optimize cost and performance. 132 | - 133 | Easy 134 | Metrics Server 135 | 136 | Collects and provides container resource metrics for Kubernetes, enabling features like autoscaling by interacting with Horizontal and Vertical Pod Autoscalers. 137 | 138 | 139 | ### 🛡️ Security 140 | Talos Linux is a secure, minimal, and immutable OS for Kubernetes, removing SSH and shell access to reduce attack surfaces. Managed through a secure API with mTLS, Talos prevents configuration drift, enhancing both security and predictability. It follows [NIST](https://www.nist.gov/publications/application-container-security-guide) and [CIS](https://www.cisecurity.org/benchmark/kubernetes) hardening standards, operates in memory, and is built to support modern, production-grade Kubernetes environments. 141 | 142 | **Perimeter Security:** External access to cluster nodes is controlled and restricted using [Hetzner Cloud Firewall](https://docs.hetzner.com/cloud/firewalls/). 143 | 144 | **Network Policy:** Internal cluster traffic can be governed by Kubernetes Network Policies using [Cilium CNI](https://docs.cilium.io/en/stable/network/kubernetes/policy/). 145 | 146 | **Encryption in Transit:** Pod network traffic is transparently encrypted by Cilium using [WireGuard](https://docs.cilium.io/en/latest/security/network/encryption-wireguard/) by default, with optional support for [IPsec](https://docs.cilium.io/en/latest/security/network/encryption-ipsec/). 147 | 148 | **Encryption at Rest:** The [STATE](https://www.talos.dev/latest/learn-more/architecture/#file-system-partitions) and [EPHEMERAL](https://www.talos.dev/latest/learn-more/architecture/#file-system-partitions) partitions are encrypted by default using [Talos Disk Encryption](https://www.talos.dev/latest/talos-guides/configuration/disk-encryption/) with LUKS2. Each node is secured with an individual encryption key derived from its unique `nodeID`. 149 | 150 | 151 | ## 🚀 Getting Started 152 | 153 | 154 | ### ✅ Prerequisites 155 | 156 | - [terraform](https://developer.hashicorp.com/terraform/install) or [tofu](https://opentofu.org/docs/intro/install/) to deploy the Cluster 157 | - [packer](https://developer.hashicorp.com/packer/install) to upload Talos Images 158 | - [jq](https://jqlang.org/download/) for internal API Communication 159 | - [talosctl](https://www.talos.dev/latest/talos-guides/install/talosctl) to control the Talos Cluster 160 | - [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl) to control Kubernetes (optional) 161 | 162 | > [!IMPORTANT] 163 | > Keep the CLI tools up to date. Ensure that `talosctl` matches your Talos version for compatibility, especially before a Talos upgrade. 164 | 165 | 166 | ### 🎯 Installation 167 | 168 | Create `kubernetes.tf` file with the module configuration: 169 | ```hcl 170 | module "kubernetes" { 171 | source = "hcloud-k8s/kubernetes/hcloud" 172 | version = "" 173 | 174 | cluster_name = "k8s" 175 | hcloud_token = "" 176 | 177 | # Export configs for talosctl and kubectl (optional) 178 | cluster_kubeconfig_path = "kubeconfig" 179 | cluster_talosconfig_path = "talosconfig" 180 | 181 | # Enable Ingress NGINX Controller and Cert Manager (optional) 182 | cert_manager_enabled = true 183 | ingress_nginx_enabled = true 184 | 185 | control_plane_nodepools = [ 186 | { name = "control", type = "cpx22", location = "fsn1", count = 3 } 187 | ] 188 | worker_nodepools = [ 189 | { name = "worker", type = "cpx22", location = "fsn1", count = 3 } 190 | ] 191 | } 192 | ``` 193 | > [!NOTE] 194 | > Each Control Plane node requires at least 4GB of memory and each Worker node at least 2GB. For High-Availability (HA), at least 3 Control Plane nodes and 3 Worker nodes are required. 195 | 196 | Initialize and deploy the cluster: 197 | 198 | **Terraform:** 199 | ```sh 200 | terraform init -upgrade 201 | terraform apply 202 | ``` 203 | **OpenTofu:** 204 | ```sh 205 | tofu init -upgrade 206 | tofu apply 207 | ``` 208 | 209 | 210 | ### 🔑 Cluster Access 211 | 212 | Set config file locations: 213 | ```sh 214 | export TALOSCONFIG=talosconfig 215 | export KUBECONFIG=kubeconfig 216 | ``` 217 | 218 | Display cluster nodes: 219 | ```sh 220 | talosctl get member 221 | kubectl get nodes -o wide 222 | ``` 223 | 224 | Display all pods: 225 | ```sh 226 | kubectl get pods -A 227 | ``` 228 | 229 | For more detailed information and examples, please visit: 230 | - [Talos CLI Documentation](https://www.talos.dev/latest/reference/cli/) 231 | - [Kubernetes CLI Documentation](https://kubernetes.io/docs/reference/kubectl/introduction/) 232 | 233 | ### 💥 Teardown 234 | To destroy the cluster, first disable the delete protection by setting: 235 | ```hcl 236 | cluster_delete_protection = false 237 | ``` 238 | 239 | Apply this change before proceeding. Once the delete protection is disabled, you can teardown the cluster. 240 | 241 | **Terraform:** 242 | ```sh 243 | terraform state rm 'module.kubernetes.talos_machine_configuration_apply.worker' 244 | terraform state rm 'module.kubernetes.talos_machine_configuration_apply.control_plane' 245 | terraform state rm 'module.kubernetes.talos_machine_secrets.this' 246 | terraform destroy 247 | ``` 248 | **OpenTofu:** 249 | ```sh 250 | tofu state rm 'module.kubernetes.talos_machine_configuration_apply.worker' 251 | tofu state rm 'module.kubernetes.talos_machine_configuration_apply.control_plane' 252 | tofu state rm 'module.kubernetes.talos_machine_secrets.this' 253 | tofu destroy 254 | ``` 255 | 256 | 257 | ## ⚙️ Advanced Configuration 258 | 259 | 260 |
261 | Cluster Access 262 | 263 | #### Public Cluster Access 264 | By default, the cluster is accessible over the public internet. The firewall is automatically configured to use the IPv4 address and /64 IPv6 CIDR of the machine running this module. To disable this automatic configuration, set the following variables to `false`: 265 | 266 | ```hcl 267 | firewall_use_current_ipv4 = false 268 | firewall_use_current_ipv6 = false 269 | ``` 270 | 271 | To manually specify source networks for the Talos API and Kube API, configure the `firewall_api_source` variable as follows: 272 | ```hcl 273 | firewall_api_source = [ 274 | "1.2.3.0/32", 275 | "1:2:3::/64" 276 | ] 277 | ``` 278 | This allows explicit control over which networks can access your APIs, overriding the default behavior when set. 279 | 280 | #### Internal Cluster Access 281 | If your internal network is routed and accessible, you can directly access the cluster using internal IPs by setting: 282 | ```hcl 283 | cluster_access = "private" 284 | ``` 285 | 286 | For integrating Talos nodes with an internal network, configure a default route (`0.0.0.0/0`) in the Hetzner Network to point to your router or gateway. Additionally, add specific routes on the Talos nodes to encompass your entire network CIDR: 287 | ```hcl 288 | talos_extra_routes = ["10.0.0.0/8"] 289 | 290 | # Optionally, disable NAT for your globally routed CIDR 291 | network_native_routing_ipv4_cidr = "10.0.0.0/8" 292 | 293 | # Optionally, use an existing Network 294 | hcloud_network_id = 123456789 295 | ``` 296 | This setup ensures that the Talos nodes can route traffic appropriately across your internal network. 297 | 298 | 299 | #### Access to Kubernetes API 300 | 301 | Optionally, a hostname can be configured to direct access to the Kubernetes API through a node IP, load balancer, or Virtual IP (VIP): 302 | ```hcl 303 | kube_api_hostname = "kube-api.example.com" 304 | ``` 305 | 306 | ##### Access from Public Internet 307 | For accessing the Kubernetes API from the public internet, choose one of the following options based on your needs: 308 | 1. **Use single Control Plane IP (default):**
309 | By default the IP address of a single Control Plane node is used to access the Kube API. 310 | 2. **Use a Load Balancer:**
311 | Deploy a load balancer to manage API traffic, enhancing availability and load distribution. 312 | ```hcl 313 | kube_api_load_balancer_enabled = true 314 | ``` 315 | 3. **Use a Virtual IP (Floating IP):**
316 | A Floating IP is configured to automatically move between control plane nodes in case of an outage, ensuring continuous access to the Kubernetes API. 317 | ```hcl 318 | control_plane_public_vip_ipv4_enabled = true 319 | 320 | # Optionally, specify an existing Floating IP 321 | control_plane_public_vip_ipv4_id = 123456789 322 | ``` 323 | 324 | ##### Access from Internal Network 325 | When accessing the Kubernetes API via an internal network, an internal Virtual IP (Alias IP) is utilized by default to route API requests within the network. This feature can be disabled with the following configuration: 326 | ```hcl 327 | control_plane_private_vip_ipv4_enabled = false 328 | ``` 329 | 330 | To enhance internal availability, a load balancer can be used: 331 | ```hcl 332 | kube_api_load_balancer_enabled = true 333 | ``` 334 | 335 | This setup ensures secure and flexible access to the Kubernetes API, accommodating different networking environments. 336 |
337 | 338 | 339 |
340 | Cluster Autoscaler 341 | The Cluster Autoscaler dynamically adjusts the number of nodes in a Kubernetes cluster based on the demand, ensuring that there are enough nodes to run all pods and no unneeded nodes when the workload decreases. 342 | 343 | Example `kubernetes.tf` snippet: 344 | ```hcl 345 | # Configuration for cluster autoscaler node pools 346 | cluster_autoscaler_nodepools = [ 347 | { 348 | name = "autoscaler" 349 | type = "cpx22" 350 | location = "fsn1" 351 | min = 0 352 | max = 6 353 | labels = { "autoscaler-node" = "true" } 354 | taints = [ "autoscaler-node=true:NoExecute" ] 355 | } 356 | ] 357 | ``` 358 | 359 | Optionally, pass additional [Helm values](https://github.com/kubernetes/autoscaler/blob/master/charts/cluster-autoscaler/values.yaml) to the cluster autoscaler configuration: 360 | ```hcl 361 | cluster_autoscaler_helm_values = { 362 | extraArgs = { 363 | enforce-node-group-min-size = true 364 | scale-down-delay-after-add = "45m" 365 | scale-down-delay-after-delete = "4m" 366 | scale-down-unneeded-time = "5m" 367 | } 368 | } 369 | ``` 370 | 371 | ##### Talos Upgrades and Configuration Changes 372 | Cluster Autoscaler does not support upgrading nodes or changing their configuration, as its primary purpose is to manage short-lived nodes that handle load peaks. If you require long-lived autoscaled nodes, you can upgrade them manually using `talosctl` or use this Terraform module, which supports discovery of autoscaled nodes and manages their upgrades and configuration changes. 373 | 374 | To enable this feature, add the following to your configuration: 375 | ```hcl 376 | cluster_autoscaler_discovery_enabled = true 377 | ``` 378 | 379 | Please note that errors may occur if a node pool has been scaled down recently, as Talos caches absent nodes for up to [30 minutes](https://www.talos.dev/latest/introduction/troubleshooting/#removed-members-are-still-present). You can pause automatic scaling by stopping the Cluster Autoscaler pods: 380 | ```sh 381 | kubectl -n kube-system scale deployment cluster-autoscaler-hetzner-cluster-autoscaler --replicas=0 382 | ``` 383 | 384 |
385 | 386 | 387 | 388 |
389 | Cilium Advanced Configuration 390 | 391 | #### Cilium Transparent Encryption 392 | 393 | This module enables [Cilium Transparent Encryption](https://cilium.io/use-cases/transparent-encryption/) feature by default. 394 | 395 | All pod network traffic is encrypted using WireGuard (Default) or protocols, includes automatic key rotation and efficient in-kernel encryption, covering all traffic types. 396 | 397 | 💡 Although WireGuard is the default option, Hetzner Cloud VMs supports AES-NI instruction set, making IPSec encryption more CPU-efficient compared to WireGuard. Consider enabling IPSec for CPU savings through hardware acceleration. 398 | 399 | IPSec mode supports RFC4106 AES-GCM encryption with 128, 192 and 256 bits key sizes. 400 | 401 | 402 | **⚠️ IPSec encryption has the following limitations:** 403 | 404 | - No transparent encryption when chaining Cilium with other CNI plugins 405 | - Host Policies not supported with IPSec 406 | - Incompatible with BPF Host Routing (automatically disabled on switch) 407 | - IPv6-only clusters not supported 408 | - Maximum 65,535 nodes per cluster/clustermesh 409 | - Single CPU core limitation per IPSec tunnel may affect high-throughput scenarios 410 | 411 | *Source: [Cilium Documentation](https://docs.cilium.io/en/stable/security/network/encryption-ipsec/#limitations)* 412 | 413 | Example `kubernetes.tf` configuration: 414 | 415 | ```hcl 416 | cilium_encryption_enabled = true # Default true 417 | cilium_encryption_type = "wireguard" # wireguard (Default) | ipsec 418 | cilium_ipsec_algorithm = "rfc4106(gcm(aes))" # IPSec AES key algorithm (Default rfc4106(gcm(aes))) 419 | cilium_ipsec_key_size = 256 # IPSec AES key size (Default 256) 420 | cilium_ipsec_key_id = 1 # IPSec key ID (Default 1) 421 | ``` 422 | 423 | ##### IPSec Key Rotation 424 | 425 | Keys automatically rotate when `cilium_ipsec_key_id` is incremented (1-15 range, resets to 1 after 15). 426 | 427 |
428 | 429 | 430 |
431 | Egress Gateway 432 | 433 | Cilium offers an Egress Gateway to ensure network compatibility with legacy systems and firewalls requiring fixed IPs. The use of Cilium Egress Gateway does not provide high availability and increases latency due to extra network hops and tunneling. Consider this configuration only as a last resort. 434 | 435 | Example `kubernetes.tf` snippet: 436 | ```hcl 437 | # Enable Cilium Egress Gateway 438 | cilium_egress_gateway_enabled = true 439 | 440 | # Define worker nodepools including an egress-specific node pool 441 | worker_nodepools = [ 442 | # ... (other node pool configurations) 443 | { 444 | name = "egress" 445 | type = "cpx22" 446 | location = "fsn1" 447 | labels = { "egress-node" = "true" } 448 | taints = [ "egress-node=true:NoSchedule" ] 449 | } 450 | ] 451 | ``` 452 | 453 | Example Egress Gateway Policy: 454 | ```yml 455 | apiVersion: cilium.io/v2 456 | kind: CiliumEgressGatewayPolicy 457 | metadata: 458 | name: sample-egress-policy 459 | spec: 460 | selectors: 461 | - podSelector: 462 | matchLabels: 463 | io.kubernetes.pod.namespace: sample-namespace 464 | app: sample-app 465 | 466 | destinationCIDRs: 467 | - "0.0.0.0/0" 468 | 469 | egressGateway: 470 | nodeSelector: 471 | matchLabels: 472 | egress-node: "true" 473 | ``` 474 | 475 | Please visit the Cilium [documentation](https://docs.cilium.io/en/stable/network/egress-gateway) for more details. 476 |
477 | 478 | 479 |
480 | Firewall Configuration 481 | By default, a firewall is configured that can be extended with custom rules. If no egress rules are configured, outbound traffic remains unrestricted. However, inbound traffic is always restricted to mitigate the risk of exposing Talos nodes to the public internet, which could pose a serious security vulnerability. 482 | 483 | Each rule is defined with the following properties: 484 | - `description`: A brief description of the rule. 485 | - `direction`: The direction of traffic (`in` for inbound, `out` for outbound). 486 | - `source_ips`: A list of source IP addresses for outbound rules. 487 | - `destination_ips`: A list of destination IP addresses for inbound rules. 488 | - `protocol`: The protocol used (valid options: `tcp`, `udp`, `icmp`, `gre`, `esp`). 489 | - `port`: The port number (required for `tcp` and `udp` protocols, must not be specified for `icmp`, `gre`, and `esp`). 490 | 491 | Example `kubernetes.tf` snippet: 492 | ```hcl 493 | firewall_extra_rules = [ 494 | { 495 | description = "Custom UDP Rule" 496 | direction = "in" 497 | source_ips = ["0.0.0.0/0", "::/0"] 498 | protocol = "udp" 499 | port = "12345" 500 | }, 501 | { 502 | description = "Custom TCP Rule" 503 | direction = "in" 504 | source_ips = ["1.2.3.4", "1:2:3:4::"] 505 | protocol = "tcp" 506 | port = "8080-9000" 507 | }, 508 | { 509 | description = "Allow ICMP" 510 | direction = "in" 511 | source_ips = ["0.0.0.0/0", "::/0"] 512 | protocol = "icmp" 513 | } 514 | ] 515 | ``` 516 | 517 | For access to Talos and the Kubernetes API, please refer to the [Cluster Access](#public-cluster-access) configuration section. 518 | 519 |
520 | 521 | 522 |
523 | Ingress Load Balancer 524 | 525 | The ingress controller uses a default load balancer service to manage external traffic. For geo-redundancy and high availability, `ingress_load_balancer_pools` can be configured as an alternative, replacing the default load balancer with the specified pool of load balancers. 526 | 527 | ##### Configuring Load Balancer Pools 528 | To replace the default load balancer, use `ingress_load_balancer_pools` in the Terraform configuration. This setup ensures high availability and geo-redundancy by distributing traffic from various locations across all targets in all regions. 529 | 530 | Example `kubernetes.tf` configuration: 531 | ```hcl 532 | ingress_load_balancer_pools = [ 533 | { 534 | name = "lb-nbg" 535 | location = "nbg1" 536 | type = "lb11" 537 | }, 538 | { 539 | name = "lb-fsn" 540 | location = "fsn1" 541 | type = "lb11" 542 | } 543 | ] 544 | ``` 545 | 546 | ##### Local Traffic Optimization 547 | Configuring local traffic handling enhances network efficiency by reducing latency. Processing traffic closer to its source eliminates unnecessary routing delays, ensuring consistent performance for low-latency or region-sensitive applications. 548 | 549 | Example `kubernetes.tf` configuration: 550 | ```hcl 551 | ingress_nginx_kind = "DaemonSet" 552 | ingress_nginx_service_external_traffic_policy = "Local" 553 | 554 | ingress_load_balancer_pools = [ 555 | { 556 | name = "regional-lb-nbg" 557 | location = "nbg1" 558 | local_traffic = true 559 | }, 560 | { 561 | name = "regional-lb-fsn" 562 | location = "fsn1" 563 | local_traffic = true 564 | } 565 | ] 566 | ``` 567 | 568 | Key settings in this configuration: 569 | - `local_traffic`: Limits load balancer targets to nodes in the same geographic location as the load balancer, reducing data travel distances and keeping traffic within the region. 570 | - `ingress_nginx_service_external_traffic_policy` set to `Local`: Ensures external traffic is handled directly on the local node, avoiding extra network hops. 571 | - `ingress_nginx_kind` set to `DaemonSet`: Deploys an ingress controller instance on every node, enabling requests to be handled locally for faster response times. 572 | 573 | Topology-aware routing in ingress-nginx can optionally be enabled by setting the `ingress_nginx_topology_aware_routing` variable to `true`. This functionality routes traffic to the nearest upstream endpoints, enhancing efficiency for supported services. Note that this feature is only applicable to services that support topology-aware routing. For more information, refer to the [Kubernetes documentation](https://kubernetes.io/docs/concepts/services-networking/topology-aware-routing/). 574 | 575 |
576 | 577 | 578 |
579 | Network Segmentation 580 | 581 | By default, this module calculates optimal subnets based on the provided network CIDR (`network_ipv4_cidr`). The network is segmented automatically as follows: 582 | 583 | - **1st Quarter**: Reserved for other uses such as classic VMs. 584 | - **2nd Quarter**: 585 | - **1st Half**: Allocated for Node Subnets (`network_node_ipv4_cidr`) 586 | - **2nd Half**: Allocated for Service IPs (`network_service_ipv4_cidr`) 587 | - **3rd and 4th Quarters**: 588 | - **Full Span**: Allocated for Pod Subnets (`network_pod_ipv4_cidr`) 589 | 590 | Each Kubernetes node requires a `/24` subnet within `network_pod_ipv4_cidr`. To support this configuration, the optimal node subnet size (`network_node_ipv4_subnet_mask_size`) is calculated using the formula:
591 | 32 - (24 - subnet_mask_size(`network_pod_ipv4_cidr`)). 592 | 593 | With the default `10.0.0.0/16` network CIDR (`network_ipv4_cidr`), the following values are calculated: 594 | - **Node Subnet Size**: `/25` (Max. 128 Nodes per Subnet) 595 | - **Node Subnets**: `10.0.64.0/19` (Max. 64 Subnets, each with `/25`) 596 | - **Service IPs**: `10.0.96.0/19` (Max. 8192 Services) 597 | - **Pod Subnet Size**: `/24` (Max. 256 Pods per Node) 598 | - **Pod Subnets**: `10.0.128.0/17` (Max. 128 Nodes, each with `/24`) 599 | 600 | Please consider the following Hetzner Cloud limits: 601 | - Up to **100 servers** can be attached to a network. 602 | - Up to **100 routes** can be created per network. 603 | - Up to **50 subnets** can be created per network. 604 | - A project can have up to **50 placement groups**. 605 | 606 | A `/16` Network CIDR is sufficient to fully utilize Hetzner Cloud's scaling capabilities. It supports: 607 | - Up to 100 nodes, each with its own `/24` Pod subnet route. 608 | - Configuration of up to 50 nodepools, one nodepool per subnet, each with at least one placement group. 609 | 610 | 611 | Here is a table with more example calculations: 612 | | Network CIDR | Node Subnet Size | Node Subnets | Service IPs | Pod Subnets | 613 | | --------------- | ---------------- | ----------------- | ------------------- | ------------------- | 614 | | **10.0.0.0/16** | /25 (128 IPs) | 10.0.64.0/19 (64) | 10.0.96.0/19 (8192) | 10.0.128.0/17 (128) | 615 | | **10.0.0.0/17** | /26 (64 IPs) | 10.0.32.0/20 (64) | 10.0.48.0/20 (4096) | 10.0.64.0/18 (64) | 616 | | **10.0.0.0/18** | /27 (32 IPs) | 10.0.16.0/21 (64) | 10.0.24.0/21 (2048) | 10.0.32.0/19 (32) | 617 | | **10.0.0.0/19** | /28 (16 IPs) | 10.0.8.0/22 (64) | 10.0.12.0/22 (1024) | 10.0.16.0/20 (16) | 618 | | **10.0.0.0/20** | /29 (8 IPs) | 10.0.4.0/23 (64) | 10.0.6.0/23 (512) | 10.0.8.0/21 (8) | 619 | | **10.0.0.0/21** | /30 (4 IPs) | 10.0.2.0/24 (64) | 10.0.3.0/24 (256) | 10.0.4.0/22 (4) | 620 | 621 |
622 | 623 | 624 | 625 |
626 | Storage Configuration 627 | 628 | #### Hetzner Cloud CSI 629 | 630 | The Hetzner Cloud Container Storage Interface (CSI) driver can be flexibly configured through the `hcloud_csi_storage_classes` variable. You can define multiple storage classes for your cluster: 631 | 632 | * **name:** The name of the StorageClass (string, required). 633 | * **encrypted:** Enable LUKS encryption for volumes (bool, required). 634 | * **defaultStorageClass:** Set this class as the default (optional, bool, defaults to `false`). 635 | * **reclaimPolicy:** The Kubernetes reclaim policy (`Delete` or `Retain`, optional, defaults to `Delete`). 636 | * **extraParameters:** Additional parameters for the StorageClass (optional map). 637 | 638 | **Example:** 639 | 640 | ```hcl 641 | hcloud_csi_storage_classes = [ 642 | { 643 | name = "hcloud-volumes" 644 | encrypted = false 645 | defaultStorageClass = true 646 | }, 647 | { 648 | name = "hcloud-volumes-encrypted-xfs" 649 | encrypted = true 650 | reclaimPolicy = "Retain" 651 | extraParameters = { 652 | "csi.storage.k8s.io/fstype" = "xfs" 653 | "fsFormatOption" = "-i nrext64=1" 654 | } 655 | } 656 | ] 657 | ``` 658 | 659 | **Other settings:** 660 | 661 | * **hcloud\_csi\_encryption\_passphrase:** 662 | Optionally provide a custom encryption passphrase for LUKS-encrypted storage classes. 663 | 664 | ```hcl 665 | hcloud_csi_encryption_passphrase = "" 666 | ``` 667 | 668 | **Storage Class Immutability:** 669 | StorageClasses created by the Hcloud CSI driver are immutable. To change parameters after creation, you must either edit the StorageClass directly with `kubectl`, or delete it from both Terraform state and Kubernetes, then let this module recreate it. 670 | 671 | For more details, see the [HCloud CSI Driver documentation](https://github.com/hetznercloud/csi-driver/tree/main/docs/kubernetes). 672 | 673 | 674 | #### Longhorn 675 | 676 | Longhorn is a lightweight, reliable, and easy-to-use distributed block storage system for Kubernetes. 677 | It is fully independent from the Hetzner Cloud CSI driver. 678 | 679 | You can enable Longhorn and configure it as the default StorageClass for your cluster via module variables: 680 | 681 | * **Enable Longhorn:** 682 | Set `longhorn_enabled` to `true` to deploy Longhorn in your cluster. 683 | 684 | * **Default StorageClass:** 685 | Set `longhorn_default_storage_class` to `true` if you want Longhorn to be the default StorageClass. 686 | 687 | **Example:** 688 | 689 | ```hcl 690 | longhorn_enabled = true 691 | longhorn_default_storage_class = true 692 | ``` 693 | 694 | For more information about Longhorn, see the [Longhorn documentation](https://longhorn.io/docs/). 695 | 696 |
697 | 698 | 699 | 700 |
701 | Talos Backup 702 | 703 | This module natively supports Hcloud Object Storage. Below is an example of how to configure backups with [MinIO Client](https://github.com/minio/mc?tab=readme-ov-file#homebrew) (`mc`) and Hcloud Object Storage. While it's possible to create the bucket through the [Hcloud Console](https://console.hetzner.cloud), this method does not allow for the configuration of automatic retention policies. 704 | 705 | Create an alias for the endpoint using the following command: 706 | ```sh 707 | mc alias set \ 708 | https://.your-objectstorage.com \ 709 | \ 710 | --api "s3v4" \ 711 | --path "off" 712 | ``` 713 | 714 | Create a bucket with automatic retention policies to protect your backups: 715 | ```sh 716 | mc mb --with-lock --region / 717 | mc retention set GOVERNANCE 14d --default / 718 | ``` 719 | 720 | Configure your `kubernetes.tf` file: 721 | ```hcl 722 | talos_backup_s3_hcloud_url = "https://..your-objectstorage.com" 723 | talos_backup_s3_access_key = "" 724 | talos_backup_s3_secret_key = "" 725 | 726 | # Optional: AGE X25519 Public Key for encryption 727 | talos_backup_age_x25519_public_key = "" 728 | 729 | # Optional: Change schedule (cron syntax) 730 | talos_backup_schedule = "0 * * * *" 731 | ``` 732 | 733 | For users of other object storage providers, configure `kubernetes.tf` as follows: 734 | ```hcl 735 | talos_backup_s3_region = "" 736 | talos_backup_s3_endpoint = "" 737 | talos_backup_s3_bucket = "" 738 | talos_backup_s3_prefix = "" 739 | 740 | # Use path-style URLs (set true if required by your provider) 741 | talos_backup_s3_path_style = true 742 | 743 | # Access credentials 744 | talos_backup_s3_access_key = "" 745 | talos_backup_s3_secret_key = "" 746 | 747 | # Optional: AGE X25519 Public Key for encryption 748 | talos_backup_age_x25519_public_key = "" 749 | 750 | # Optional: Change schedule (cron syntax) 751 | talos_backup_schedule = "0 * * * *" 752 | ``` 753 | 754 | To recover from a snapshot, please refer to the Talos Disaster Recovery section in the [Documentation](https://www.talos.dev/latest/advanced/disaster-recovery/#recovery). 755 |
756 | 757 | 758 | 759 |
760 | Talos Bootstrap Manifests 761 | 762 | ### Component Deployment Control 763 | 764 | During cluster provisioning, each component manifest is applied using Talos’s bootstrap manifests feature. Components are upgraded as part of the normal lifecycle of this module. 765 | You can enable or disable component deployment using the variables below: 766 | 767 | ```hcl 768 | # Core Components (enabled by default) 769 | cilium_enabled = true 770 | talos_backup_s3_enabled = true 771 | talos_ccm_enabled = true 772 | talos_coredns_enabled = true 773 | hcloud_ccm_enabled = true 774 | hcloud_csi_enabled = true 775 | metrics_server_enabled = true 776 | prometheus_operator_crds_enabled = true 777 | 778 | # Additional Components (disabled by default) 779 | cert_manager_enabled = true 780 | ingress_nginx_enabled = true 781 | longhorn_enabled = true 782 | 783 | # Enable etcd backup by defining one of these variables: 784 | talos_backup_s3_endpoint = "https://..." 785 | talos_backup_s3_hcloud_url = "https://..your-objectstorage.com" 786 | 787 | # Cluster Autoscaler: Enabled when node pools are defined 788 | cluster_autoscaler_nodepools = [ 789 | { 790 | name = "autoscaler" 791 | type = "cpx22" 792 | location = "fsn1" 793 | min = 0 794 | max = 6 795 | labels = { 796 | "autoscaler-node" = "true" 797 | } 798 | taints = [ 799 | "autoscaler-node=true:NoExecute" 800 | ] 801 | } 802 | ] 803 | ``` 804 | 805 | > **Note:** Disabling a component **does not delete** its existing resources. 806 | > This is documented in the [Talos documentation](https://www.talos.dev/latest/kubernetes-guides/upgrading-kubernetes/#automated-kubernetes-upgrade). 807 | > You must remove deployed resources manually after disabling a component in the manifests. 808 | 809 | --- 810 | 811 | ### Adding Additional Manifests 812 | 813 | Besides the default components, you can add extra bootstrap manifests as follows: 814 | 815 | ```hcl 816 | # Extra remote manifests (URLs fetched at apply time) 817 | talos_extra_remote_manifests = [ 818 | "https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.2.0/standard-install.yaml" 819 | ] 820 | 821 | # Extra inline manifests (defined directly) 822 | talos_extra_inline_manifests = [ 823 | { 824 | name = "test-manifest" 825 | contents = <<-EOF 826 | --- 827 | apiVersion: v1 828 | kind: Secret 829 | metadata: 830 | name: test-secret 831 | data: 832 | secret: dGVzdA== 833 | EOF 834 | } 835 | ] 836 | ``` 837 | 838 |
839 | 840 | 841 | 842 |
843 | Talos Discovery Service 844 | 845 | Talos supports two node discovery mechanisms: 846 | 847 | - **Discovery Service Registry** (default): A public, external registry operated by Sidero Labs that works even when Kubernetes is unavailable. Nodes must have outbound access to TCP port 443 to communicate with it. 848 | - **Kubernetes Registry**: Relies on Kubernetes Node metadata stored in etcd. 849 | 850 | This module uses the discovery service to perform additional health checks during Talos upgrades, Kubernetes upgrades, and Kubernetes manifest synchronization. If no discovery mechanism is enabled, these additional checks will be skipped. 851 | 852 | > ⚠️ **Important:** Kubernetes-based discovery is **incompatible by default** with Kubernetes **v1.32+** due to the `AuthorizeNodeWithSelectors` feature gate, which restricts access to Node metadata. This can cause broken discovery behavior, such as failing or incomplete results from `talosctl health` or `talosctl get members`. 853 | 854 | ##### Example Configuration 855 | 856 | ```hcl 857 | # Disable Kubernetes-based discovery (deprecated in Kubernetes >= 1.32) 858 | talos_kubernetes_discovery_service_enabled = false 859 | 860 | # Enable the external Sidero Labs discovery service (default) 861 | talos_siderolabs_discovery_service_enabled = true 862 | ``` 863 | 864 | For more details, refer to the [official Talos discovery guide](https://www.talos.dev/latest/talos-guides/discovery/). 865 |
866 | 867 | 868 |
869 | Kubernetes RBAC 870 | 871 | This module allows you to create custom Kubernetes RBAC (Role-Based Access Control) roles and cluster roles that define specific permissions for users and groups. RBAC controls what actions users can perform on which Kubernetes resources. 872 | These custom roles can be used independently or combined with OIDC group mappings to automatically assign permissions based on user group membership from your identity provider. 873 | 874 | #### Example Configuration 875 | 876 | ##### Cluster Roles (`rbac_cluster_roles`) 877 | 878 | ```hcl 879 | rbac_cluster_roles = [ 880 | { 881 | name = my-cluster-role # ClusterRole name 882 | rules = [ 883 | { 884 | api_groups = [""] # Core API group (empty string for core resources) 885 | resources = ["nodes"] # Cluster-wide resources this role can access 886 | verbs = ["get", "list", "watch"] # Actions allowed on these resources 887 | } 888 | ] 889 | } 890 | ] 891 | ``` 892 | 893 | ##### Namespaced Roles (`rbac_roles`) 894 | 895 | ```hcl 896 | rbac_roles = [ 897 | { 898 | name = "my-role" # Role name 899 | namespace = "target-namespace" # Namespace where the role will be created 900 | rules = [ 901 | { 902 | api_groups = [""] # Core API group (empty string for core resources) 903 | resources = ["pods", "services"] # Resources this role can access 904 | verbs = ["get", "list", "watch"] # Actions allowed on these resources 905 | } 906 | ] 907 | } 908 | ] 909 | ``` 910 | 911 |
912 | 913 | 914 |
915 | OIDC Cluster Authentication 916 | 917 | The Kubernetes API server supports OIDC (OpenID Connect) authentication, allowing integration with external identity providers like Keycloak, Auth0, Authentik, Zitadel, etc. 918 | When enabled, users can authenticate using their existing organizational credentials instead of managing separate Kubernetes certificates or tokens. 919 | 920 | OIDC authentication works by validating JWT tokens issued by your identity provider, extracting user information and group memberships, and mapping them to Kubernetes RBAC roles. 921 | 922 | #### Example Configuration 923 | 924 | ```hcl 925 | # OIDC Configuration 926 | oidc_enabled = true # Enable OIDC authentication 927 | oidc_issuer_url = "https://your-oidc-provider.com" # Your OIDC provider issuer URL 928 | oidc_client_id = "your-client-id" # Client ID registered in your OIDC provider 929 | oidc_username_claim = "preferred_username" # OIDC JWT claim to extract username from 930 | oidc_groups_claim = "groups" # OIDC JWT claim to extract user groups from 931 | oidc_groups_prefix = "oidc:" # Prefix added to group names in K8s to avoid conflicts 932 | 933 | # Map OIDC groups to Kubernetes roles and cluster roles 934 | oidc_group_mappings = [ # List of OIDC group mappings 935 | { 936 | group = "cluster-admins-group" # OIDC provider group name 937 | cluster_roles = ["cluster-admin"] # Grant cluster-admin access 938 | }, 939 | { 940 | group = "developers-group" # OIDC provider group name 941 | cluster_roles = ["view"] # Grant cluster-wide view access 942 | roles = [ # Grant namespace scoped roles 943 | { 944 | name = "developer-role" # Custom role name 945 | namespace = "development" # Namespace where role applies 946 | } 947 | ] 948 | } 949 | ] 950 | ``` 951 | 952 | #### Client Configuration with kubelogin 953 | 954 | Once OIDC is configured in your cluster, you'll need to configure your local kubectl to authenticate using OIDC tokens. This requires the [kubelogin](https://github.com/int128/kubelogin) plugin. 955 | 956 | ##### Install kubelogin 957 | 958 | ```bash 959 | # Homebrew (macOS and Linux) 960 | brew install kubelogin 961 | 962 | # Krew (macOS, Linux, Windows and ARM) 963 | kubectl krew install oidc-login 964 | 965 | # Chocolatey (Windows) 966 | choco install kubelogin 967 | ``` 968 | 969 | #### Test OIDC Authentication 970 | 971 | First, verify that your OIDC provider is returning proper JWT tokens. Replace the placeholder values with your actual OIDC configuration: 972 | 973 | ```bash 974 | kubectl oidc-login setup \ 975 | --oidc-issuer-url=https://your-oidc-provider.com \ 976 | --oidc-client-id=your-client-id \ 977 | --oidc-client-secret=your-client-secret \ 978 | --oidc-extra-scope=openid,email,profile # Add or change the scopes according to your IDP 979 | ``` 980 | 981 | This will open your browser for authentication. After successful login, you should see a JWT token in your terminal that looks like: 982 | 983 | ```json 984 | { 985 | "aud": "your-client-id", 986 | "email": "user@example.com", 987 | "email_verified": true, 988 | "exp": 1749867571, 989 | "groups": [ 990 | "developers", 991 | "kubernetes-users" 992 | ], 993 | "iat": 1749863971, 994 | "iss": "https://your-oidc-provider.com", 995 | "nonce": "random-nonce-string", 996 | "sub": "user-unique-identifier" 997 | } 998 | ``` 999 | 1000 | Verify that: 1001 | 1002 | - The `groups` array contains your expected groups 1003 | - The `email` field matches your user email 1004 | - `email_verified` is `true` (required by K8s) 1005 | 1006 | #### Configure kubectl 1007 | 1008 | Add a new user to your `~/.kube/config` file: 1009 | 1010 | ```yaml 1011 | users: 1012 | - name: oidc-user 1013 | user: 1014 | exec: 1015 | apiVersion: client.authentication.k8s.io/v1beta1 1016 | command: kubectl 1017 | args: 1018 | - oidc-login 1019 | - get-token 1020 | - --oidc-issuer-url=https://your-oidc-provider.com 1021 | - --oidc-client-id=your-client-id 1022 | - --oidc-client-secret=your-client-secret 1023 | - --oidc-extra-scope=groups 1024 | - --oidc-extra-scope=email 1025 | - --oidc-extra-scope=name # Add or change the scopes according to your IDP 1026 | ``` 1027 | 1028 | Update your context to use the new OIDC user: 1029 | 1030 | ```yaml 1031 | contexts: 1032 | - context: 1033 | cluster: your-cluster 1034 | namespace: default 1035 | user: oidc-user # Changed from certificate-based user 1036 | name: oidc@your-cluster # Updated context name 1037 | ``` 1038 | 1039 | Now you can switch to the OIDC context and authenticate using your identity provider: 1040 | 1041 | ```bash 1042 | kubectl config use-context your-cluster-oidc 1043 | kubectl get pods # This will trigger OIDC authentication 1044 | ``` 1045 | 1046 |
1047 | 1048 | 1049 | ## ♻️ Lifecycle 1050 | The [Talos Terraform Provider](https://registry.terraform.io/providers/siderolabs/talos) does not support declarative upgrades of Talos or Kubernetes versions. This module compensates for these limitations using `talosctl` to implement the required functionalities. Any minor or major upgrades to Talos and Kubernetes will result in a major version change of this module. Please be aware that downgrades are typically neither supported nor tested. 1051 | 1052 | > [!IMPORTANT] 1053 | > Before upgrading to the next major version of this module, ensure you are on the latest release of the current major version. Do not skip any major release upgrades. 1054 | 1055 | ### ☑️ Version Compatibility Matrix 1056 | | Hcloud K8s | Kubernetes | Talos | Hcloud CCM | Hcloud CSI | Long-horn | Cilium | Ingress NGINX | Cert Manager | Auto-scaler | 1057 | | :--------: | :--------: | :---: | :--------: | :--------: | :-------: | :----: | :-----------: | :----------: | :---------: | 1058 | | **(4)** | 1.34 | 1.11 | 1.27 | 2.18 | ? | (1.19) | 4.14 | 1.19 | 9.51 | 1059 | | **3** | 1.33 | 1.10 | 1.26 | 2.14 | 1.8.2 | 1.18 | 4.13 | 1.18 | 9.47 | 1060 | | **2** | 1.32 | 1.9 | 1.23 | 2.12 | 1.8.1 | 1.17 | 4.12 | 1.17 | 9.45 | 1061 | 1065 | 1066 | In this module, upgrades are conducted with care. You will consistently receive the most tested and compatible releases of all components, avoiding the latest untested or incompatible releases that could disrupt your cluster. 1067 | 1068 | > [!WARNING] 1069 | > It is not recommended to change any software versions in this project on your own. Each component is specifically configured for compatibility with new Kubernetes releases. The specified versions are supported and have been tested to work together. 1070 | 1071 | 1081 | 1082 | 1083 | 1084 | ### 🧭 Roadmap 1085 | * [ ] **Ingress NGINX Retirement in March 2026**
1086 | Replace Ingress NGINX (will be deprecated as announced in this [blog post](https://kubernetes.io/blog/2025/11/11/ingress-nginx-retirement/)) with Cilium Gateway API. 1087 | * [ ] **Upgrade to latest Talos 1.11 and Kubernetes 1.34**
1088 | Once all components have compatible versions, the upgrade can be performed. 1089 | 1090 | 1091 | ## 👋 Community 1092 | We welcome everyone to join the discussion, report issues, and help improve this project. 1093 | 1094 | 1095 | ### 🤝 Contributing 1096 | 1097 | 1098 | 1099 | 1100 | 1101 | 1102 | Contributions are always welcome! 1103 | 1104 | 1105 | ## ❤️ Support this Project 1106 | If you'd like to support this project, please consider leaving a ⭐ on GitHub!
1107 | 1108 | Stars 1113 | 1114 | 1115 | 1116 | 1117 | 1118 | 1119 | Star History Chart 1120 | 1121 | 1122 | > [!TIP] 1123 | > If you don’t have a Hetzner account yet, you can use this [Hetzner Cloud Referral Link](https://hetzner.cloud/?ref=GMylKeDmqtsD) to claim a €20 credit and support this project at the same time. 1124 | 1125 | ### Sponsor this Project 1126 | Your contributions support development, maintenance, documentation, support, and operating costs 🙏 1127 | 1128 | **Become a Sponsor:** 1129 | - [![GitHub Sponsors](https://img.shields.io/static/v1?label=GitHub%20Sponsors&message=%E2%9D%A4&logo=GitHub&color=%23fe8e86)](https://github.com/sponsors/hcloud-k8s) 1130 | - [![Liberapay](https://img.shields.io/static/v1?label=Liberapay&message=Donate&logo=liberapay&color=F6C915&labelColor=555555)](https://liberapay.com/hcloud-k8s/donate) 1131 | 1132 | 1133 | 1134 | 1135 | ## 📎 Project Info 1136 | This project is built for the public and will always remain fully [open source](https://opensource.org). 1137 | 1138 | 1139 | ### ⚖️ License 1140 | 1141 | Distributed under the MIT License. See [LICENSE](https://github.com/hcloud-k8s/terraform-hcloud-kubernetes/blob/main/LICENSE) for more information. 1142 | 1143 | 1144 | ### 💎 Acknowledgements 1145 | 1146 | - [Talos Linux](https://www.talos.dev) for its impressively secure, immutable, and minimalistic Kubernetes distribution. 1147 | - [Hetzner Cloud](https://www.hetzner.com/cloud) for offering excellent cloud infrastructure with robust Kubernetes integrations. 1148 | - [Kube-Hetzner](https://github.com/kube-hetzner/terraform-hcloud-kube-hetzner) and [Terraform - Hcloud - Talos](https://github.com/hcloud-talos/terraform-hcloud-talos) for providing practical examples and inspiration for this module. 1149 | --------------------------------------------------------------------------------