├── .dockerignore ├── .github └── workflows │ ├── cla.yml │ ├── docker.yml │ └── release.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── charts └── restate-operator-helm │ ├── .helmignore │ ├── Chart.yaml │ ├── crds │ └── restateclusters.yaml │ ├── templates │ ├── _helpers.tpl │ ├── deployment.yaml │ ├── rbac.yaml │ ├── service.yaml │ └── servicemonitor.yaml │ └── values.yaml ├── crd ├── RestateCluster.pkl ├── crd.yaml ├── instance.pkl └── pklgen │ └── generate.pkl ├── docker └── Dockerfile ├── justfile └── src ├── controller.rs ├── crdgen.rs ├── lib.rs ├── main.rs ├── metrics.rs ├── podidentityassociations.rs ├── reconcilers ├── compute.rs ├── mod.rs ├── network_policies.rs ├── quantity_parser.rs └── signing_key.rs ├── schemagen.rs ├── secretproviderclasses.rs ├── securitygrouppolicies.rs └── telemetry.rs /.dockerignore: -------------------------------------------------------------------------------- 1 | target/ 2 | .github/ 3 | deploy/ 4 | crd/ 5 | tests/ 6 | -------------------------------------------------------------------------------- /.github/workflows/cla.yml: -------------------------------------------------------------------------------- 1 | name: "CLA Assistant" 2 | on: 3 | issue_comment: 4 | types: [created] 5 | pull_request_target: 6 | types: [opened, closed, synchronize] 7 | 8 | jobs: 9 | CLAAssistant: 10 | uses: restatedev/restate/.github/workflows/cla.yml@main 11 | secrets: inherit 12 | -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | name: Build docker image 2 | 3 | on: 4 | workflow_dispatch: 5 | workflow_call: 6 | 7 | jobs: 8 | build-docker-image: 9 | name: Build docker image 10 | uses: restatedev/restate/.github/workflows/docker.yml@main 11 | secrets: inherit 12 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Create new release 2 | 3 | on: 4 | push: 5 | tags: 6 | - v** 7 | 8 | jobs: 9 | build-docker-image: 10 | name: Build release Docker image 11 | uses: ./.github/workflows/docker.yml 12 | secrets: inherit 13 | 14 | build-helm-chart: 15 | runs-on: ubuntu-latest 16 | needs: [build-docker-image] # otherwise you might install a latest helm chart and get image pull error 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v3 20 | - name: Write release version 21 | id: version 22 | run: | 23 | VERSION=${GITHUB_REF_NAME#v} 24 | echo Version: $VERSION 25 | echo "VERSION=$VERSION" >> "$GITHUB_OUTPUT" 26 | - name: Push helm chart 27 | uses: appany/helm-oci-chart-releaser@v0.3.0 28 | with: 29 | name: restate-operator-helm 30 | repository: restatedev 31 | tag: ${{ steps.version.outputs.VERSION }} 32 | registry: ghcr.io 33 | registry_username: ${{ github.actor }} 34 | registry_password: ${{ secrets.GITHUB_TOKEN }} 35 | 36 | publish-release: 37 | name: Publish release 38 | runs-on: ubuntu-latest 39 | needs: [build-docker-image, build-helm-chart] 40 | 41 | steps: 42 | - name: Checkout 43 | uses: actions/checkout@v3 44 | - name: Create release 45 | uses: softprops/action-gh-release@v1 46 | with: 47 | # create a draft release which needs manual approval 48 | draft: true 49 | files: | 50 | crd/crd.yaml 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "restate-operator" 3 | version = "1.6.0" 4 | authors = ["restate.dev"] 5 | edition = "2021" 6 | rust-version = "1.75" 7 | license = "MIT" 8 | repository = "https://github.com/restatedev/restate-operator" 9 | description = "Restate makes distributed applications easy!" 10 | 11 | [[bin]] 12 | doc = false 13 | name = "restate-operator" 14 | path = "src/main.rs" 15 | 16 | [[bin]] 17 | doc = false 18 | name = "crdgen" 19 | path = "src/crdgen.rs" 20 | 21 | [[bin]] 22 | doc = false 23 | name = "schemagen" 24 | path = "src/schemagen.rs" 25 | 26 | [lib] 27 | name = "restate_operator" 28 | path = "src/lib.rs" 29 | 30 | [features] 31 | default = [] 32 | telemetry = ["tonic", "opentelemetry-otlp"] 33 | 34 | [dependencies] 35 | actix-web = "4.4.0" 36 | futures = "0.3.28" 37 | tokio = { version = "1.32.0", features = ["macros", "rt-multi-thread"] } 38 | k8s-openapi = { version = "0.21.0", features = ["latest", "schemars"] } 39 | kube = { version = "0.88.1", features = [ 40 | "runtime", 41 | "client", 42 | "derive", 43 | "unstable-runtime", 44 | ] } 45 | schemars = { version = "0.8.12", features = ["chrono"] } 46 | serde = { version = "1.0.185", features = ["derive"] } 47 | serde-hashkey = "0.4.5" 48 | serde_json = "1.0.105" 49 | serde_yaml = "0.9.25" 50 | prometheus = "0.13.3" 51 | chrono = { version = "0.4.26", features = ["serde"] } 52 | tracing = "0.1.37" 53 | tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] } 54 | tracing-opentelemetry = "0.20.0" 55 | opentelemetry = { version = "0.20.0", features = ["trace", "rt-tokio"] } 56 | opentelemetry-otlp = { version = "0.13.0", features = [ 57 | "tokio", 58 | ], optional = true } 59 | tonic = { version = "0.9", optional = true } 60 | thiserror = "1.0.47" 61 | anyhow = "1.0.75" 62 | clap = { version = "4.1", features = ["derive", "env"] } 63 | regex = "1.10.4" 64 | sha2 = "0.10.8" 65 | 66 | [dev-dependencies] 67 | assert-json-diff = "2.0.2" 68 | http = "0.2.9" 69 | hyper = "0.14.27" 70 | tower-test = "0.4.0" 71 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 - Restate Software, Inc., Restate GmbH 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Restate Operator 2 | 3 | A Kubernetes operator that creates [Restate](https://restate.dev/) clusters. Supported features: 4 | 5 | - Online volume expansion 6 | - Network security via `NetworkPolicy` 7 | - Manage credentials using [EKS Pod Identity](https://docs.aws.amazon.com/eks/latest/userguide/pod-identities.html) 8 | - Manage security groups using [Security Groups for Pods](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) 9 | - Sign requests using private keys from Secrets or CSI Secret Store 10 | 11 | ## Usage 12 | 13 | ### Installing 14 | 15 | ```bash 16 | helm install restate-operator oci://ghcr.io/restatedev/restate-operator-helm --namespace restate-operator --create-namespace 17 | ``` 18 | 19 | ### Creating a cluster 20 | 21 | The operator watches `RestateCluster` objects, which are not namespaced. A Namespace with the same name as the 22 | `RestateCluster` will be created, in which a StatefulSet, Service, and NetworkPolicies are created. 23 | 24 | An example `RestateCluster` with one node: 25 | 26 | ```yaml 27 | apiVersion: restate.dev/v1 28 | kind: RestateCluster 29 | metadata: 30 | name: restate-test 31 | spec: 32 | compute: 33 | image: restatedev/restate:1.3.2 34 | storage: 35 | storageRequestBytes: 2147483648 # 2 GiB 36 | ``` 37 | 38 | An example `RestateCluster` with 3 nodes using the Raft metastore: 39 | 40 | ```yaml 41 | apiVersion: restate.dev/v1 42 | kind: RestateCluster 43 | metadata: 44 | name: restate-test 45 | spec: 46 | compute: 47 | replicas: 3 48 | image: restatedev/restate:1.3.2 49 | storage: 50 | storageRequestBytes: 2147483648 # 2 GiB 51 | config: | 52 | auto-provision = false 53 | 54 | [metadata-server] 55 | type = "replicated" 56 | 57 | [metadata-client] 58 | addresses = ["http://restate:5122/"] 59 | ``` 60 | 61 | Or with the S3 metastore (recommended if you're running in a single region): 62 | 63 | ```yaml 64 | apiVersion: restate.dev/v1 65 | kind: RestateCluster 66 | metadata: 67 | name: restate-test 68 | spec: 69 | compute: 70 | replicas: 3 71 | image: restatedev/restate:1.3.2 72 | storage: 73 | storageRequestBytes: 2147483648 # 2 GiB 74 | security: 75 | serviceAccountAnnotations: 76 | eks.amazonaws.com/role-arn: arn:aws:iam::111122223333:role/my-role-that-can-read-write-to-the-bucket 77 | config: | 78 | auto-provision = false 79 | roles = [ 80 | "worker", 81 | "admin", 82 | "log-server", 83 | ] 84 | 85 | [metadata-client] 86 | type = "object-store" 87 | path = "s3://some-bucket/metadata" 88 | ``` 89 | 90 | In either case, you would then need to provision the cluster eg with: 91 | 92 | ```bash 93 | kubectl -n restate-test exec -it restate-0 -- restatectl provision --log-provider replicated --log-replication 2 --partition-replication 2 --num-partitions 128 94 | ``` 95 | 96 | For the full schema as a [Pkl](https://pkl-lang.org/) template see [`crd/RestateCluster.pkl`](./crd/RestateCluster.pkl). 97 | 98 | ### EKS Pod Identity 99 | 100 | [EKS Pod Identity](https://docs.aws.amazon.com/eks/latest/userguide/pod-identities.html) is a convenient way to have a 101 | single AWS role shared amongst many Restate clusters, where the AWS identities will contain tags detailing their 102 | Kubernetes identity. This can be useful for access control eg 'Restate clusters in namespace `my-cluster` may call this 103 | Lambda'. 104 | 105 | This operator can create objects for the 106 | [AWS ACK EKS controller](https://github.com/aws-controllers-k8s/eks-controller) such that pod identity associations are 107 | created for each `RestateCluster`. To enable this functionality the operator must be started with knowledge of the EKS 108 | cluster name, by setting `awsPodIdentityAssociationCluster` in the helm chart. If this option is set, the ACK CRDs must 109 | be installed or the operator will fail to start. Then, you may provide `awsPodIdentityAssociationRoleArn` in 110 | the `RestateCluster` spec. 111 | 112 | ### EKS Security Groups for Pods 113 | 114 | [EKS Security Groups for Pods](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) allows 115 | you to isolate pods into separate AWS Security Groups, which is a powerful security primitive which can help you limit 116 | Restate to public IP access, as well as to obtain VPC flow logs. 117 | 118 | The operator can create `SecurityGroupPolicy` objects which put Restate pods into a set of Security Groups. If this CRD 119 | is installed, you may provide `awsPodSecurityGroups` in the `RestateCluster` spec. 120 | 121 | ## Releasing 122 | 123 | 1. Update the version in charts/restate-operator/Chart.yaml and the version in Cargo.{toml,lock} eg to `0.0.2` 124 | 2. Push a new tag `v0.0.2` 125 | 3. Accept the draft release once the workflow finishes 126 | -------------------------------------------------------------------------------- /charts/restate-operator-helm/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /charts/restate-operator-helm/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: restate-operator-helm 3 | description: An operator for Restate clusters 4 | type: application 5 | version: "1.6.0" 6 | -------------------------------------------------------------------------------- /charts/restate-operator-helm/crds/restateclusters.yaml: -------------------------------------------------------------------------------- 1 | ../../../crd/crd.yaml -------------------------------------------------------------------------------- /charts/restate-operator-helm/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{- define "controller.name" -}} 2 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 3 | {{- end }} 4 | 5 | {{- define "controller.fullname" -}} 6 | {{- $name := default .Chart.Name .Values.nameOverride }} 7 | {{- $name | trunc 63 | trimSuffix "-" }} 8 | {{- end }} 9 | 10 | {{- define "controller.labels" -}} 11 | {{- include "controller.selectorLabels" . }} 12 | app.kubernetes.io/name: {{ include "controller.name" . }} 13 | app.kubernetes.io/version: {{ .Values.version | default .Chart.Version | quote }} 14 | {{- end }} 15 | 16 | {{- define "controller.selectorLabels" -}} 17 | app: {{ include "controller.name" . }} 18 | {{- end }} 19 | 20 | {{- define "controller.tag" -}} 21 | {{- .Values.version | default .Chart.Version }} 22 | {{- end }} 23 | -------------------------------------------------------------------------------- /charts/restate-operator-helm/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: {{ include "controller.fullname" . }} 6 | labels: 7 | {{- include "controller.labels" . | nindent 4 }} 8 | spec: 9 | replicas: {{ .Values.replicaCount }} 10 | selector: 11 | matchLabels: 12 | {{- include "controller.selectorLabels" . | nindent 6 }} 13 | template: 14 | metadata: 15 | labels: 16 | {{- include "controller.selectorLabels" . | nindent 8 }} 17 | annotations: 18 | kubectl.kubernetes.io/default-container: {{ include "controller.fullname" . }} 19 | {{- if .Values.podAnnotations }} 20 | {{- toYaml .Values.podAnnotations | nindent 8 }} 21 | {{- end }} 22 | spec: 23 | {{- if .Values.affinity }} 24 | affinity: 25 | {{- toYaml .Values.affinity | nindent 8 }} 26 | {{- end }} 27 | serviceAccountName: {{ include "controller.fullname" . }} 28 | {{- with .Values.imagePullSecrets }} 29 | imagePullSecrets: 30 | {{- toYaml . | nindent 8 }} 31 | {{- end }} 32 | securityContext: 33 | {{- toYaml .Values.podSecurityContext | nindent 8 }} 34 | containers: 35 | - name: {{ include "controller.fullname" . }} 36 | image: {{ .Values.image.repository }}:{{ include "controller.tag" . }} 37 | imagePullPolicy: {{ .Values.image.pullPolicy }} 38 | securityContext: 39 | {{- toYaml .Values.securityContext | nindent 12 }} 40 | resources: 41 | {{- toYaml .Values.resources | nindent 12 }} 42 | ports: 43 | - name: http 44 | containerPort: 8080 45 | protocol: TCP 46 | env: 47 | - name: RUST_LOG 48 | value: {{ .Values.logging.env_filter }} 49 | {{- if .Values.awsPodIdentityAssociationCluster }} 50 | - name: AWS_POD_IDENTITY_ASSOCIATION_CLUSTER 51 | value: {{ .Values.awsPodIdentityAssociationCluster }} 52 | {{- end }} 53 | {{- with .Values.env }} 54 | {{- toYaml . | nindent 12 }} 55 | {{- end }} 56 | readinessProbe: 57 | httpGet: 58 | path: /health 59 | port: http 60 | initialDelaySeconds: 5 61 | periodSeconds: 5 62 | {{- if .Values.tolerations }} 63 | tolerations: 64 | {{- toYaml .Values.tolerations | nindent 8 }} 65 | {{- end }} 66 | -------------------------------------------------------------------------------- /charts/restate-operator-helm/templates/rbac.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create }} 2 | --- 3 | # Scoped service account 4 | apiVersion: v1 5 | kind: ServiceAccount 6 | metadata: 7 | name: {{ include "controller.fullname" . }} 8 | labels: 9 | {{- include "controller.labels" . | nindent 4 }} 10 | {{- with .Values.serviceAccount.annotations }} 11 | annotations: 12 | {{- toYaml . | nindent 4 }} 13 | {{- end }} 14 | namespace: {{ .Release.Namespace }} 15 | automountServiceAccountToken: true 16 | {{- end }} 17 | 18 | --- 19 | # Access for the service account 20 | kind: ClusterRole 21 | apiVersion: rbac.authorization.k8s.io/v1 22 | metadata: 23 | name: {{ include "controller.fullname" . }} 24 | rules: 25 | - resources: 26 | - restateclusters 27 | - restateclusters/status 28 | - restateclusters/finalizers 29 | verbs: 30 | - get 31 | - list 32 | - watch 33 | - patch 34 | apiGroups: 35 | - restate.dev 36 | - resources: 37 | - events 38 | verbs: 39 | - create 40 | apiGroups: 41 | - events.k8s.io 42 | - resources: 43 | - namespaces 44 | - services 45 | - configmaps 46 | - serviceaccounts 47 | - networkpolicies 48 | - statefulsets 49 | - persistentvolumeclaims 50 | - pods 51 | - jobs 52 | - securitygrouppolicies 53 | - secretproviderclasses 54 | verbs: 55 | - get 56 | - list 57 | - watch 58 | - create 59 | - patch 60 | apiGroups: 61 | - '' 62 | - batch 63 | - apps 64 | - networking.k8s.io 65 | - vpcresources.k8s.aws 66 | - secrets-store.csi.x-k8s.io 67 | - resources: 68 | - statefulsets 69 | - networkpolicies 70 | - jobs 71 | - securitygrouppolicies 72 | - secretproviderclasses 73 | verbs: 74 | - delete 75 | apiGroups: 76 | - batch 77 | - apps 78 | - networking.k8s.io 79 | - vpcresources.k8s.aws 80 | - secrets-store.csi.x-k8s.io 81 | {{- if .Values.awsPodIdentityAssociationCluster }} 82 | - resources: 83 | - podidentityassociations 84 | verbs: 85 | - get 86 | - list 87 | - watch 88 | - create 89 | - patch 90 | - delete 91 | apiGroups: 92 | - eks.services.k8s.aws 93 | {{- end }} 94 | --- 95 | # Binding the role to the account 96 | kind: ClusterRoleBinding 97 | apiVersion: rbac.authorization.k8s.io/v1 98 | metadata: 99 | name: {{ include "controller.fullname" . }} 100 | subjects: 101 | - kind: ServiceAccount 102 | namespace: {{ .Release.Namespace }} 103 | name: {{ include "controller.fullname" . }} 104 | roleRef: 105 | kind: ClusterRole 106 | name: {{ include "controller.fullname" . }} 107 | apiGroup: rbac.authorization.k8s.io 108 | -------------------------------------------------------------------------------- /charts/restate-operator-helm/templates/service.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Expose the http port of the service 3 | apiVersion: v1 4 | kind: Service 5 | metadata: 6 | name: {{ include "controller.fullname" . }} 7 | labels: 8 | {{- include "controller.labels" . | nindent 4 }} 9 | {{- with .Values.service.annotations }} 10 | annotations: 11 | {{- toYaml . | nindent 4 }} 12 | {{- end }} 13 | spec: 14 | type: {{ .Values.service.type }} 15 | ports: 16 | - port: {{ .Values.service.port }} 17 | targetPort: 8080 18 | protocol: TCP 19 | name: http 20 | selector: 21 | app: {{ include "controller.fullname" . }} 22 | -------------------------------------------------------------------------------- /charts/restate-operator-helm/templates/servicemonitor.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceMonitor.enabled }} 2 | --- 3 | apiVersion: monitoring.coreos.com/v1 4 | kind: ServiceMonitor 5 | metadata: 6 | name: {{ include "controller.fullname" . }} 7 | labels: 8 | {{- include "controller.labels" . | nindent 4 }} 9 | {{- with .Values.service.annotations }} 10 | annotations: 11 | {{- toYaml . | nindent 4 }} 12 | {{- end }} 13 | spec: 14 | endpoints: 15 | - port: http 16 | {{- with .Values.serviceMonitor.interval }} 17 | interval: {{ . }} 18 | {{- end }} 19 | {{- with .Values.serviceMonitor.scrapeTimeout }} 20 | scrapeTimeout: {{ . }} 21 | {{- end }} 22 | honorLabels: true 23 | path: {{ .Values.serviceMonitor.path }} 24 | scheme: {{ .Values.serviceMonitor.scheme }} 25 | {{- with .Values.serviceMonitor.relabelings }} 26 | relabelings: 27 | {{- toYaml . | nindent 6 }} 28 | {{- end }} 29 | {{- with .Values.serviceMonitor.metricRelabelings }} 30 | metricRelabelings: 31 | {{- toYaml . | nindent 6 }} 32 | {{- end }} 33 | jobLabel: {{ include "controller.fullname" . }} 34 | selector: 35 | matchLabels: 36 | {{- include "controller.selectorLabels" . | nindent 6 }} 37 | {{- with .Values.serviceMonitor.targetLabels }} 38 | targetLabels: 39 | {{- toYaml . | nindent 4 }} 40 | {{- end }} 41 | {{- end }} 42 | -------------------------------------------------------------------------------- /charts/restate-operator-helm/values.yaml: -------------------------------------------------------------------------------- 1 | replicaCount: 1 2 | nameOverride: "restate-operator" 3 | version: "local" # pin a specific version 4 | 5 | image: 6 | repository: ghcr.io/restatedev/restate-operator 7 | pullPolicy: IfNotPresent 8 | 9 | imagePullSecrets: [] 10 | 11 | serviceAccount: 12 | create: true 13 | annotations: {} 14 | podAnnotations: {} 15 | 16 | awsPodIdentityAssociationCluster: null 17 | 18 | podSecurityContext: 19 | fsGroup: 2000 20 | fsGroupChangePolicy: "OnRootMismatch" 21 | securityContext: 22 | capabilities: 23 | drop: 24 | - ALL 25 | readOnlyRootFilesystem: true 26 | allowPrivilegeEscalation: false 27 | runAsNonRoot: true 28 | runAsUser: 1000 29 | runAsGroup: 3000 30 | 31 | logging: 32 | env_filter: info,restate=debug 33 | 34 | env: [] 35 | 36 | affinity: {} 37 | 38 | service: 39 | type: ClusterIP 40 | port: 80 41 | 42 | resources: 43 | limits: 44 | cpu: 200m 45 | memory: 256Mi 46 | requests: 47 | cpu: 50m 48 | memory: 100Mi 49 | 50 | serviceMonitor: 51 | enabled: false 52 | path: /metrics 53 | scheme: http 54 | -------------------------------------------------------------------------------- /crd/RestateCluster.pkl: -------------------------------------------------------------------------------- 1 | /// RestateCluster describes the configuration and status of a Restate cluster. 2 | /// 3 | /// This module was generated from the CustomResourceDefinition at . 4 | module dev.restate.v1.RestateCluster 5 | 6 | extends "package://pkg.pkl-lang.org/pkl-k8s/k8s@1.0.1#/K8sResource.pkl" 7 | 8 | import "package://pkg.pkl-lang.org/pkl-k8s/k8s@1.0.1#/apimachinery/pkg/apis/meta/v1/ObjectMeta.pkl" 9 | import "package://pkg.pkl-lang.org/pkl-k8s/k8s@1.0.1#/api/core/v1/PodSpec.pkl" 10 | import "package://pkg.pkl-lang.org/pkl-k8s/k8s@1.0.1#/api/core/v1/EnvVar.pkl" 11 | import "package://pkg.pkl-lang.org/pkl-k8s/k8s@1.0.1#/api/core/v1/ResourceRequirements.pkl" 12 | import "package://pkg.pkl-lang.org/pkl-k8s/k8s@1.0.1#/api/core/v1/Toleration.pkl" 13 | import "package://pkg.pkl-lang.org/pkl-k8s/k8s@1.0.1#/api/networking/v1/NetworkPolicy.pkl" 14 | 15 | fixed apiVersion: "restate.dev/v1" 16 | 17 | fixed kind: "RestateCluster" 18 | 19 | /// Standard object's metadata. 20 | /// 21 | /// More info: . 22 | metadata: ObjectMeta? 23 | 24 | /// Represents the configuration of a Restate Cluster 25 | spec: Spec 26 | 27 | /// Status of the RestateCluster. This is set and managed automatically. Read-only. More info: 28 | /// https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status 29 | status: Status? 30 | 31 | /// Represents the configuration of a Restate Cluster 32 | class Spec { 33 | /// clusterName sets the RESTATE_CLUSTER_NAME environment variable. Defaults to the object name. 34 | clusterName: String? 35 | 36 | /// Compute configuration 37 | compute: Compute 38 | 39 | /// TOML-encoded Restate config file 40 | config: String? 41 | 42 | /// Security configuration 43 | security: Security? 44 | 45 | /// Storage configuration 46 | storage: Storage 47 | } 48 | 49 | /// Compute configuration 50 | class Compute { 51 | /// Affinity is a group of affinity scheduling rules. 52 | affinity: PodSpec.Affinity? 53 | 54 | /// Specifies the DNS parameters of the Restate pod. Parameters specified here will be merged to the 55 | /// generated DNS configuration based on DNSPolicy. 56 | dnsConfig: PodSpec.PodDNSConfig? 57 | 58 | /// Set DNS policy for the pod. Defaults to "ClusterFirst". Valid values are 'ClusterFirstWithHostNet', 59 | /// 'ClusterFirst', 'Default' or 'None'. DNS parameters given in DNSConfig will be merged with the 60 | /// policy selected with DNSPolicy. 61 | dnsPolicy: String? 62 | 63 | /// List of environment variables to set in the container; these may override defaults 64 | env: Listing? 65 | 66 | /// Container image name. More info: https://kubernetes.io/docs/concepts/containers/images. 67 | image: String 68 | 69 | /// Image pull policy. One of Always, Never, IfNotPresent. Defaults to Always if :latest tag is 70 | /// specified, or IfNotPresent otherwise. More info: 71 | /// https://kubernetes.io/docs/concepts/containers/images#updating-images 72 | imagePullPolicy: String? 73 | 74 | /// If specified, a node selector for the pod 75 | nodeSelector: Mapping? 76 | 77 | /// replicas is the desired number of Restate nodes. If unspecified, defaults to 1. 78 | replicas: Int? 79 | 80 | /// Compute Resources for the Restate container. More info: 81 | /// https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ 82 | resources: ResourceRequirements? 83 | 84 | /// If specified, the pod's tolerations. 85 | tolerations: Listing? 86 | } 87 | 88 | /// Security configuration 89 | class Security { 90 | /// If set, create an AWS PodIdentityAssociation using the ACK CRD in order to give the Restate pod 91 | /// access to this role and allow the cluster to reach the Pod Identity agent. 92 | awsPodIdentityAssociationRoleArn: String? 93 | 94 | /// If set, create an AWS SecurityGroupPolicy CRD object to place the Restate pod into these security 95 | /// groups 96 | awsPodSecurityGroups: Listing? 97 | 98 | /// Egress rules to allow the cluster to make outbound requests; this is in addition to the default of 99 | /// allowing public internet access and cluster DNS access. Providing a single empty rule will allow 100 | /// all outbound traffic - not recommended 101 | networkEgressRules: Listing? 102 | 103 | /// Network peers to allow inbound access to restate ports If unset, will not allow any new traffic. 104 | /// Set any of these to [] to allow all traffic - not recommended. 105 | networkPeers: NetworkPeers? 106 | 107 | /// If set, configure the use of a private key to sign outbound requests from this cluster 108 | requestSigningPrivateKey: RequestSigningPrivateKey? 109 | 110 | /// Annotations to set on the ServiceAccount created for Restate 111 | serviceAccountAnnotations: Mapping? 112 | 113 | /// Annotations to set on the Service created for Restate 114 | serviceAnnotations: Mapping? 115 | } 116 | 117 | /// Network peers to allow inbound access to restate ports If unset, will not allow any new traffic. Set 118 | /// any of these to [] to allow all traffic - not recommended. 119 | class NetworkPeers { 120 | admin: Listing? 121 | 122 | ingress: Listing? 123 | 124 | metrics: Listing? 125 | } 126 | 127 | /// If set, configure the use of a private key to sign outbound requests from this cluster 128 | class RequestSigningPrivateKey { 129 | /// A Kubernetes Secret source for the private key 130 | secret: Secret? 131 | 132 | /// A CSI secret provider source for the private key; will create a SecretProviderClass. 133 | secretProvider: SecretProvider? 134 | 135 | /// The version of Restate request signing that the key is for; currently only "v1" accepted. 136 | version: String 137 | } 138 | 139 | /// A Kubernetes Secret source for the private key 140 | class Secret { 141 | /// The key of the secret to select from. Must be a valid secret key. 142 | key: String 143 | 144 | /// Name of the secret. 145 | secretName: String 146 | } 147 | 148 | /// A CSI secret provider source for the private key; will create a SecretProviderClass. 149 | class SecretProvider { 150 | /// Configuration for specific provider 151 | parameters: Mapping? 152 | 153 | /// The path of the private key relative to the root of the mounted volume 154 | path: String 155 | 156 | /// Configuration for provider name 157 | provider: String? 158 | } 159 | 160 | /// Storage configuration 161 | class Storage { 162 | /// storageClassName is the name of the StorageClass required by the claim. More info: 163 | /// https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 this field is immutable 164 | storageClassName: String? 165 | 166 | /// storageRequestBytes is the amount of storage to request in volume claims. It is allowed to increase 167 | /// but not decrease. 168 | storageRequestBytes: Int(this >= 1.0) 169 | } 170 | 171 | /// Status of the RestateCluster. This is set and managed automatically. Read-only. More info: 172 | /// https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status 173 | class Status { 174 | conditions: Listing? 175 | } 176 | 177 | class Condition { 178 | /// Last time the condition transitioned from one status to another. 179 | lastTransitionTime: String? 180 | 181 | /// Human-readable message indicating details about last transition. 182 | message: String? 183 | 184 | /// Unique, one-word, CamelCase reason for the condition's last transition. 185 | reason: String? 186 | 187 | /// Status is the status of the condition. Can be True, False, Unknown. 188 | status: String 189 | 190 | /// Type of the condition, known values are (`Ready`). 191 | type: String 192 | } 193 | -------------------------------------------------------------------------------- /crd/instance.pkl: -------------------------------------------------------------------------------- 1 | import "RestateCluster.pkl" 2 | 3 | cluster = (RestateCluster) { 4 | metadata { 5 | name = "restate-test" 6 | } 7 | spec { 8 | compute { 9 | image = "restatedev/restate:1.3.2" 10 | } 11 | storage { 12 | storageRequestBytes = 2.gib.toUnit("b").value as Int 13 | } 14 | } 15 | } 16 | 17 | output { 18 | renderer = new YamlRenderer { 19 | isStream = true 20 | } 21 | value = new Listing { 22 | cluster 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /crd/pklgen/generate.pkl: -------------------------------------------------------------------------------- 1 | amends "package://pkg.pkl-lang.org/pkl-pantry/k8s.contrib.crd@1.0.13#/generate.pkl" 2 | 3 | import "package://pkg.pkl-lang.org/pkl-k8s/k8s@1.0.1#/api/core/v1/ResourceRequirements.pkl" 4 | import "package://pkg.pkl-lang.org/pkl-k8s/k8s@1.0.1#/api/core/v1/PodSpec.pkl" 5 | import "package://pkg.pkl-lang.org/pkl-k8s/k8s@1.0.1#/api/networking/v1/NetworkPolicy.pkl" 6 | import "package://pkg.pkl-lang.org/pkl-k8s/k8s@1.0.1#/api/core/v1/EnvVar.pkl" 7 | import "package://pkg.pkl-lang.org/pkl-k8s/k8s@1.0.1#/api/core/v1/Toleration.pkl" 8 | 9 | source = "file:./crd/crd.yaml" 10 | 11 | converters { 12 | ["restateclusters.restate.dev"] { 13 | [List("spec", "compute", "env", "env")] = EnvVar 14 | [List("spec", "compute", "resources")] = ResourceRequirements 15 | [List("spec", "compute", "dnsConfig")] = PodSpec.PodDNSConfig 16 | [List("spec", "compute", "affinity")] = PodSpec.Affinity 17 | [List("spec", "compute", "tolerations", "toleration")] = Toleration 18 | [List("spec", "security", "networkEgressRules", "networkEgressRule")] = NetworkPolicy.NetworkPolicyEgressRule 19 | [List("spec", "security", "networkPeers", "admin", "admin")] = NetworkPolicy.NetworkPolicyPeer 20 | [List("spec", "security", "networkPeers", "ingress", "ingres")] = NetworkPolicy.NetworkPolicyPeer 21 | [List("spec", "security", "networkPeers", "metrics", "metric")] = NetworkPolicy.NetworkPolicyPeer 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=$BUILDPLATFORM ghcr.io/restatedev/dev-tools:latest AS planner 2 | COPY .. . 3 | RUN just chef-prepare 4 | 5 | FROM --platform=$BUILDPLATFORM ghcr.io/restatedev/dev-tools:latest AS base 6 | COPY --from=planner /restate/recipe.json recipe.json 7 | COPY ../justfile justfile 8 | 9 | # avoid sharing sccache port between multiplatform builds - they share a network but not a filesystem, so it won't work 10 | FROM base AS base-amd64 11 | ARG SCCACHE_SERVER_PORT=4226 12 | 13 | FROM base AS base-arm64 14 | ARG SCCACHE_SERVER_PORT=4227 15 | 16 | FROM base-$TARGETARCH AS builder 17 | ARG SCCACHE_SERVER_PORT 18 | ARG TARGETARCH 19 | 20 | ENV RUSTC_WRAPPER=/usr/bin/sccache 21 | ENV SCCACHE_DIR=/var/cache/sccache 22 | # Overrides the behaviour of the release profile re including debug symbols, which in our repo is not to include them. 23 | # Should be set to 'false' or 'true'. See https://doc.rust-lang.org/cargo/reference/environment-variables.html 24 | ARG CARGO_PROFILE_RELEASE_DEBUG=false 25 | RUN just arch=$TARGETARCH libc=musl chef-cook --release --bin restate-operator 26 | COPY .. . 27 | RUN just arch=$TARGETARCH libc=musl build --release --bin restate-operator && mv target/$(just arch=$TARGETARCH libc=musl print-target)/release/restate-operator target/restate-operator 28 | 29 | FROM gcr.io/distroless/static:nonroot 30 | COPY --from=builder --chown=nonroot:nonroot /restate/target/restate-operator /app/restate-operator 31 | ENTRYPOINT ["/app/restate-operator"] 32 | -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | export DOCKER_PROGRESS := env_var_or_default('DOCKER_PROGRESS', 'auto') 2 | 3 | features := "" 4 | libc := "libc" 5 | arch := "" # use the default architecture 6 | os := "" # use the default os 7 | image := "ghcr.io/restatedev/restate-operator:local" 8 | 9 | _features := if features == "all" { 10 | "--all-features" 11 | } else if features != "" { 12 | "--features=" + features 13 | } else { "" } 14 | 15 | _arch := if arch == "" { 16 | arch() 17 | } else if arch == "amd64" { 18 | "x86_64" 19 | } else if arch == "x86_64" { 20 | "x86_64" 21 | } else if arch == "arm64" { 22 | "aarch64" 23 | } else if arch == "aarch64" { 24 | "aarch64" 25 | } else { 26 | error("unsupported arch=" + arch) 27 | } 28 | 29 | _os := if os == "" { 30 | os() 31 | } else { 32 | os 33 | } 34 | 35 | _os_target := if _os == "macos" { 36 | "apple-darwin" 37 | } else if _os == "linux" { 38 | "unknown-linux" 39 | } else { 40 | error("unsupported os=" + _os) 41 | } 42 | 43 | _default_target := `rustc -vV | sed -n 's|host: ||p'` 44 | target := _arch + "-" + _os_target + if _os == "linux" { "-" + libc } else { "" } 45 | _resolved_target := if target != _default_target { target } else { "" } 46 | _target-option := if _resolved_target != "" { "--target " + _resolved_target } else { "" } 47 | 48 | generate: 49 | cargo run --bin crdgen | grep -vF 'categories: []' > crd/crd.yaml 50 | 51 | generate-pkl: 52 | cargo run --bin schemagen | pkl eval crd/pklgen/generate.pkl -m crd 53 | 54 | install-crd: generate 55 | kubectl apply -f crd/crd.yaml 56 | 57 | # Extract dependencies 58 | chef-prepare: 59 | cargo chef prepare --recipe-path recipe.json 60 | 61 | # Compile dependencies 62 | chef-cook *flags: 63 | cargo chef cook --recipe-path recipe.json {{ _target-option }} {{ _features }} {{ flags }} 64 | 65 | print-target: 66 | @echo {{ _resolved_target }} 67 | 68 | build *flags: 69 | cargo build {{ _target-option }} {{ _features }} {{ flags }} 70 | 71 | docker: 72 | docker build . -f docker/Dockerfile --tag={{ image }} --progress='{{ DOCKER_PROGRESS }}' --load 73 | -------------------------------------------------------------------------------- /src/controller.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | use std::collections::hash_map::DefaultHasher; 3 | use std::collections::BTreeMap; 4 | use std::hash::{Hash, Hasher}; 5 | use std::path::PathBuf; 6 | use std::sync::Arc; 7 | 8 | use chrono::{DateTime, Utc}; 9 | use futures::StreamExt; 10 | use k8s_openapi::api::apps::v1::{StatefulSet, StatefulSetStatus}; 11 | use k8s_openapi::api::batch::v1::Job; 12 | use k8s_openapi::api::core::v1::{ 13 | Affinity, ConfigMap, EnvVar, Namespace, PersistentVolumeClaim, PodDNSConfig, 14 | ResourceRequirements, Service, ServiceAccount, ServiceSpec, Toleration, 15 | }; 16 | use k8s_openapi::api::networking::v1; 17 | use k8s_openapi::api::networking::v1::{NetworkPolicy, NetworkPolicyPeer, NetworkPolicyPort}; 18 | use k8s_openapi::apimachinery::pkg::apis::meta::v1::{APIGroup, ObjectMeta}; 19 | 20 | use kube::core::object::HasStatus; 21 | use kube::core::PartialObjectMeta; 22 | use kube::runtime::reflector::{ObjectRef, Store}; 23 | use kube::runtime::{metadata_watcher, reflector, watcher, Predicate, WatchStreamExt}; 24 | use kube::{ 25 | api::{Api, ListParams, Patch, PatchParams, ResourceExt}, 26 | client::Client, 27 | runtime::{ 28 | controller::{Action, Controller}, 29 | events::{Event, EventType, Recorder, Reporter}, 30 | finalizer::{finalizer, Event as Finalizer}, 31 | watcher::Config, 32 | }, 33 | CustomResource, Resource, 34 | }; 35 | use schemars::schema::{Schema, SchemaObject}; 36 | use schemars::JsonSchema; 37 | use serde::{Deserialize, Serialize}; 38 | use serde_json::json; 39 | use tokio::{sync::RwLock, time::Duration}; 40 | use tracing::*; 41 | 42 | use crate::podidentityassociations::PodIdentityAssociation; 43 | use crate::reconcilers::compute::reconcile_compute; 44 | use crate::reconcilers::network_policies::reconcile_network_policies; 45 | use crate::reconcilers::object_meta; 46 | use crate::reconcilers::signing_key::reconcile_signing_key; 47 | use crate::secretproviderclasses::SecretProviderClass; 48 | use crate::securitygrouppolicies::SecurityGroupPolicy; 49 | use crate::{telemetry, Error, Metrics, Result}; 50 | 51 | pub static RESTATE_CLUSTER_FINALIZER: &str = "clusters.restate.dev"; 52 | 53 | /// Represents the configuration of a Restate Cluster 54 | #[derive(CustomResource, Deserialize, Serialize, Clone, Debug, JsonSchema)] 55 | #[cfg_attr(test, derive(Default))] 56 | #[kube( 57 | kind = "RestateCluster", 58 | group = "restate.dev", 59 | version = "v1", 60 | schema = "manual", 61 | printcolumn = r#"{"name":"Ready", "type":"string", "jsonPath":".status.conditions[?(@.type==\"Ready\")].status"}"#, 62 | printcolumn = r#"{"name":"Status", "priority": 1, "type":"string", "jsonPath":".status.conditions[?(@.type==\"Ready\")].message"}"#, 63 | printcolumn = r#"{"name":"Age", "description": "CreationTimestamp is a timestamp representing the server time when this object was created. It is not guaranteed to be set in happens-before order across separate operations. Clients may not set this value. It is represented in RFC3339 form and is in UTC", "type":"date", "jsonPath":".metadata.creationTimestamp"}"# 64 | )] 65 | #[kube(status = "RestateClusterStatus", shortname = "rsc")] 66 | #[serde(rename_all = "camelCase")] 67 | pub struct RestateClusterSpec { 68 | /// clusterName sets the RESTATE_CLUSTER_NAME environment variable. Defaults to the object name. 69 | pub cluster_name: Option, 70 | pub storage: RestateClusterStorage, 71 | pub compute: RestateClusterCompute, 72 | pub security: Option, 73 | /// TOML-encoded Restate config file 74 | pub config: Option, 75 | } 76 | 77 | // Hoisted from the derived implementation so that we can restrict names to be valid namespace names 78 | impl schemars::JsonSchema for RestateCluster { 79 | fn schema_name() -> String { 80 | "RestateCluster".to_owned() 81 | } 82 | fn schema_id() -> Cow<'static, str> { 83 | "restate_operator::controller::RestateCluster".into() 84 | } 85 | fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> Schema { 86 | { 87 | let mut schema_object = SchemaObject { 88 | instance_type: Some( 89 | schemars::schema::InstanceType::Object.into(), 90 | ), 91 | metadata: Some(Box::new(schemars::schema::Metadata { 92 | description: Some( 93 | "RestateCluster describes the configuration and status of a Restate cluster." 94 | .to_owned(), 95 | ), 96 | ..Default::default() 97 | })), 98 | ..Default::default() 99 | }; 100 | let object_validation = schema_object.object(); 101 | 102 | object_validation 103 | .properties 104 | .insert( 105 | "metadata".to_owned(), 106 | serde_json::from_value(json!({ 107 | "type": "object", 108 | "properties": { 109 | "name": { 110 | "type": "string", 111 | "minLength": 1, 112 | "maxLength": 63, 113 | "pattern": "^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$", 114 | } 115 | } 116 | })).unwrap(), 117 | ); 118 | object_validation.required.insert("metadata".to_owned()); 119 | 120 | object_validation 121 | .properties 122 | .insert("spec".to_owned(), gen.subschema_for::()); 123 | object_validation.required.insert("spec".to_owned()); 124 | 125 | object_validation.properties.insert( 126 | "status".to_owned(), 127 | gen.subschema_for::>(), 128 | ); 129 | Schema::Object(schema_object) 130 | } 131 | } 132 | } 133 | 134 | /// Storage configuration 135 | #[derive(Deserialize, Serialize, Clone, Default, Debug, JsonSchema)] 136 | #[serde(rename_all = "camelCase")] 137 | pub struct RestateClusterStorage { 138 | /// storageClassName is the name of the StorageClass required by the claim. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 139 | /// this field is immutable 140 | #[schemars(default, schema_with = "immutable_storage_class_name")] 141 | pub storage_class_name: Option, 142 | /// storageRequestBytes is the amount of storage to request in volume claims. It is allowed to increase but not decrease. 143 | #[schemars(schema_with = "expanding_volume_request", range(min = 1))] 144 | pub storage_request_bytes: i64, 145 | } 146 | 147 | fn immutable_storage_class_name( 148 | _: &mut schemars::gen::SchemaGenerator, 149 | ) -> schemars::schema::Schema { 150 | serde_json::from_value(json!({ 151 | "nullable": true, 152 | "type": "string", 153 | "x-kubernetes-validations": [{ 154 | "rule": "self == oldSelf", 155 | "message": "storageClassName is immutable" 156 | }] 157 | })) 158 | .unwrap() 159 | } 160 | 161 | fn expanding_volume_request(_: &mut schemars::gen::SchemaGenerator) -> Schema { 162 | serde_json::from_value(json!({ 163 | "format": "int64", 164 | "type": "integer", 165 | "x-kubernetes-validations": [ 166 | { 167 | "rule": "self >= oldSelf", 168 | "message": "storageRequestBytes cannot be decreased" 169 | } 170 | ] 171 | })) 172 | .unwrap() 173 | } 174 | 175 | /// Compute configuration 176 | #[derive(Deserialize, Serialize, Clone, Default, Debug, JsonSchema)] 177 | #[serde(rename_all = "camelCase")] 178 | pub struct RestateClusterCompute { 179 | /// replicas is the desired number of Restate nodes. If unspecified, defaults to 1. 180 | pub replicas: Option, 181 | /// Container image name. More info: https://kubernetes.io/docs/concepts/containers/images. 182 | pub image: String, 183 | /// Image pull policy. One of Always, Never, IfNotPresent. Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. More info: https://kubernetes.io/docs/concepts/containers/images#updating-images 184 | pub image_pull_policy: Option, 185 | /// List of environment variables to set in the container; these may override defaults 186 | #[schemars(default, schema_with = "env_schema")] 187 | pub env: Option>, 188 | /// Compute Resources for the Restate container. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ 189 | pub resources: Option, 190 | /// Specifies the DNS parameters of the Restate pod. Parameters specified here will be merged to the generated DNS configuration based on DNSPolicy. 191 | pub dns_config: Option, 192 | /// Set DNS policy for the pod. Defaults to "ClusterFirst". Valid values are 'ClusterFirstWithHostNet', 'ClusterFirst', 'Default' or 'None'. DNS parameters given in DNSConfig will be merged with the policy selected with DNSPolicy. 193 | pub dns_policy: Option, 194 | /// If specified, the pod's tolerations. 195 | pub tolerations: Option>, 196 | // If specified, a node selector for the pod 197 | #[schemars(default, schema_with = "node_selector_schema")] 198 | pub node_selector: Option>, 199 | // If specified, pod affinity 200 | pub affinity: Option, 201 | } 202 | 203 | fn env_schema(g: &mut schemars::gen::SchemaGenerator) -> Schema { 204 | serde_json::from_value(json!({ 205 | "items": EnvVar::json_schema(g), 206 | "nullable": true, 207 | "type": "array", 208 | "x-kubernetes-list-map-keys": ["name"], 209 | "x-kubernetes-list-type": "map" 210 | })) 211 | .unwrap() 212 | } 213 | 214 | fn node_selector_schema(_g: &mut schemars::gen::SchemaGenerator) -> Schema { 215 | serde_json::from_value(json!({ 216 | "description": "If specified, a node selector for the pod", 217 | "additionalProperties": { 218 | "type": "string" 219 | }, 220 | "type": "object", 221 | "x-kubernetes-map-type": "atomic" 222 | })) 223 | .unwrap() 224 | } 225 | 226 | /// Security configuration 227 | #[derive(Deserialize, Serialize, Clone, Default, Debug, JsonSchema)] 228 | #[serde(rename_all = "camelCase")] 229 | pub struct RestateClusterSecurity { 230 | /// Annotations to set on the Service created for Restate 231 | pub service_annotations: Option>, 232 | /// Annotations to set on the ServiceAccount created for Restate 233 | pub service_account_annotations: Option>, 234 | /// If set, create an AWS PodIdentityAssociation using the ACK CRD in order to give the Restate pod access to this role and 235 | /// allow the cluster to reach the Pod Identity agent. 236 | pub aws_pod_identity_association_role_arn: Option, 237 | /// If set, create an AWS SecurityGroupPolicy CRD object to place the Restate pod into these security groups 238 | pub aws_pod_security_groups: Option>, 239 | /// Network peers to allow inbound access to restate ports 240 | /// If unset, will not allow any new traffic. Set any of these to [] to allow all traffic - not recommended. 241 | pub network_peers: Option, 242 | /// Egress rules to allow the cluster to make outbound requests; this is in addition to the default 243 | /// of allowing public internet access and cluster DNS access. Providing a single empty rule will allow 244 | /// all outbound traffic - not recommended 245 | pub network_egress_rules: Option>, 246 | /// If set, configure the use of a private key to sign outbound requests from this cluster 247 | pub request_signing_private_key: Option, 248 | } 249 | 250 | #[derive(Deserialize, Serialize, Clone, Default, Debug, JsonSchema)] 251 | pub struct RestateClusterNetworkPeers { 252 | #[schemars(default, schema_with = "network_peers_schema")] 253 | pub ingress: Option>, 254 | #[schemars(default, schema_with = "network_peers_schema")] 255 | pub admin: Option>, 256 | #[schemars(default, schema_with = "network_peers_schema")] 257 | pub metrics: Option>, 258 | } 259 | 260 | fn network_peers_schema(g: &mut schemars::gen::SchemaGenerator) -> Schema { 261 | serde_json::from_value(json!({ 262 | "items": NetworkPolicyPeer::json_schema(g), 263 | "nullable": true, 264 | "type": "array", 265 | "x-kubernetes-list-type": "atomic" 266 | })) 267 | .unwrap() 268 | } 269 | 270 | /// NetworkPolicyEgressRule describes a particular set of traffic that is allowed out of pods matched by a NetworkPolicySpec's podSelector. The traffic must match both ports and to. This type is beta-level in 1.8 271 | #[derive(Deserialize, Serialize, Clone, Default, Debug, JsonSchema)] 272 | pub struct NetworkPolicyEgressRule { 273 | /// ports is a list of destination ports for outgoing traffic. Each item in this list is combined using a logical OR. If this field is empty or missing, this rule matches all ports (traffic not restricted by port). If this field is present and contains at least one item, then this rule allows traffic only if the traffic matches at least one port in the list. 274 | #[schemars(default, schema_with = "network_ports_schema")] 275 | pub ports: Option>, 276 | 277 | /// to is a list of destinations for outgoing traffic of pods selected for this rule. Items in this list are combined using a logical OR operation. If this field is empty or missing, this rule matches all destinations (traffic not restricted by destination). If this field is present and contains at least one item, this rule allows traffic only if the traffic matches at least one item in the to list. 278 | #[schemars(default, schema_with = "network_peers_schema")] 279 | pub to: Option>, 280 | } 281 | 282 | impl From for v1::NetworkPolicyEgressRule { 283 | fn from(value: NetworkPolicyEgressRule) -> Self { 284 | Self { 285 | ports: value.ports, 286 | to: value.to, 287 | } 288 | } 289 | } 290 | 291 | fn network_ports_schema(_: &mut schemars::gen::SchemaGenerator) -> Schema { 292 | serde_json::from_value(json!({ 293 | "items": { 294 | "description": "NetworkPolicyPort describes a port to allow traffic on", 295 | "properties": { 296 | "endPort": { 297 | "description": "endPort indicates that the range of ports from port to endPort if set, inclusive, should be allowed by the policy. This field cannot be defined if the port field is not defined or if the port field is defined as a named (string) port. The endPort must be equal or greater than port.", 298 | "format": "int32", 299 | "type": "integer" 300 | }, 301 | "port": { 302 | "x-kubernetes-int-or-string": true, 303 | "anyOf": [{"type": "integer"}, {"type": "string"}], 304 | "description": "port represents the port on the given protocol. This can either be a numerical or named port on a pod. If this field is not provided, this matches all port names and numbers. If present, only traffic on the specified protocol AND port will be matched." 305 | }, 306 | "protocol": { 307 | "description": "protocol represents the protocol (TCP, UDP, or SCTP) which traffic must match. If not specified, this field defaults to TCP.", 308 | "type": "string" 309 | } 310 | }, 311 | "type": "object", 312 | }, 313 | "nullable": true, 314 | "type": "array", 315 | "x-kubernetes-list-type": "atomic" 316 | })) 317 | .unwrap() 318 | } 319 | 320 | /// Configuration for request signing private keys. Exactly one source of 'secret', 'secretProvider' 321 | /// must be provided. 322 | #[derive(Deserialize, Serialize, Clone, Default, Debug, JsonSchema)] 323 | #[serde(rename_all = "camelCase")] 324 | pub struct RequestSigningPrivateKey { 325 | /// The version of Restate request signing that the key is for; currently only "v1" accepted. 326 | pub version: String, 327 | /// A Kubernetes Secret source for the private key 328 | pub secret: Option, 329 | /// A CSI secret provider source for the private key; will create a SecretProviderClass. 330 | pub secret_provider: Option, 331 | } 332 | 333 | #[derive(Deserialize, Serialize, Clone, Default, Debug, JsonSchema)] 334 | #[serde(rename_all = "camelCase")] 335 | pub struct SecretSigningKeySource { 336 | /// The key of the secret to select from. Must be a valid secret key. 337 | pub key: String, 338 | /// Name of the secret. 339 | pub secret_name: String, 340 | } 341 | 342 | #[derive(Deserialize, Serialize, Clone, Default, Debug, JsonSchema)] 343 | pub struct SecretProviderSigningKeySource { 344 | /// Configuration for specific provider 345 | pub parameters: Option>, 346 | /// Configuration for provider name 347 | pub provider: Option, 348 | /// The path of the private key relative to the root of the mounted volume 349 | pub path: PathBuf, 350 | } 351 | 352 | /// Status of the RestateCluster. 353 | /// This is set and managed automatically. 354 | /// Read-only. 355 | /// More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status 356 | #[derive(Deserialize, Serialize, Clone, Default, Debug, JsonSchema)] 357 | pub struct RestateClusterStatus { 358 | conditions: Option>, 359 | } 360 | 361 | #[derive(Deserialize, Serialize, Clone, Default, Debug, JsonSchema, PartialEq, Eq)] 362 | #[serde(rename_all = "camelCase")] 363 | pub struct RestateClusterCondition { 364 | /// Last time the condition transitioned from one status to another. 365 | pub last_transition_time: Option, 366 | 367 | /// Human-readable message indicating details about last transition. 368 | pub message: Option, 369 | 370 | /// Unique, one-word, CamelCase reason for the condition's last transition. 371 | pub reason: Option, 372 | 373 | /// Status is the status of the condition. Can be True, False, Unknown. 374 | pub status: String, 375 | 376 | /// Type of the condition, known values are (`Ready`). 377 | pub r#type: String, 378 | } 379 | 380 | // Context for our reconciler 381 | #[derive(Clone)] 382 | pub struct Context { 383 | /// Kubernetes client 384 | // Store for pvc metadata 385 | pub client: Client, 386 | // Store for pvc metadata 387 | pub pvc_meta_store: Store>, 388 | // Store for statefulsets 389 | pub ss_store: Store, 390 | // If set, watch PodIdentityAssociation resources, and if requested create them against this cluster 391 | pub aws_pod_identity_association_cluster: Option, 392 | // Whether the EKS SecurityGroupPolicy CRD is installed 393 | pub security_group_policy_installed: bool, 394 | // Whether the SecretProviderClass CRD is installed 395 | pub secret_provider_class_installed: bool, 396 | /// Diagnostics read by the web server 397 | pub diagnostics: Arc>, 398 | /// Prometheus metrics 399 | pub metrics: Metrics, 400 | } 401 | 402 | #[instrument(skip(ctx, rc), fields(trace_id))] 403 | async fn reconcile(rc: Arc, ctx: Arc) -> Result { 404 | if let Some(trace_id) = telemetry::get_trace_id() { 405 | Span::current().record("trace_id", field::display(&trace_id)); 406 | } 407 | let recorder = ctx 408 | .diagnostics 409 | .read() 410 | .await 411 | .recorder(ctx.client.clone(), &rc); 412 | let _timer = ctx.metrics.count_and_measure(); 413 | ctx.diagnostics.write().await.last_event = Utc::now(); 414 | let rcs: Api = Api::all(ctx.client.clone()); 415 | 416 | info!("Reconciling RestateCluster \"{}\"", rc.name_any()); 417 | match finalizer(&rcs, RESTATE_CLUSTER_FINALIZER, rc.clone(), |event| async { 418 | match event { 419 | Finalizer::Apply(rc) => rc.reconcile_status(ctx.clone()).await, 420 | Finalizer::Cleanup(rc) => rc.cleanup(ctx.clone()).await, 421 | } 422 | }) 423 | .await 424 | { 425 | Ok(action) => Ok(action), 426 | Err(err) => { 427 | warn!("reconcile failed: {:?}", err); 428 | 429 | recorder 430 | .publish(Event { 431 | type_: EventType::Warning, 432 | reason: "FailedReconcile".into(), 433 | note: Some(err.to_string()), 434 | action: "Reconcile".into(), 435 | secondary: None, 436 | }) 437 | .await?; 438 | 439 | let err = Error::FinalizerError(Box::new(err)); 440 | ctx.metrics.reconcile_failure(&rc, &err); 441 | Err(err) 442 | } 443 | } 444 | } 445 | 446 | fn error_policy(_rc: Arc, _error: &Error, _ctx: C) -> Action { 447 | Action::requeue(Duration::from_secs(30)) 448 | } 449 | 450 | impl RestateCluster { 451 | // Reconcile (for non-finalizer related changes) 452 | async fn reconcile(&self, ctx: Arc, name: &str) -> Result<()> { 453 | let client = ctx.client.clone(); 454 | let nss: Api = Api::all(client.clone()); 455 | 456 | let oref = self.controller_owner_ref(&()).unwrap(); 457 | 458 | let base_metadata = ObjectMeta { 459 | name: Some(name.into()), 460 | labels: Some(self.labels().clone()), 461 | annotations: Some(self.annotations().clone()), 462 | owner_references: Some(vec![oref.clone()]), 463 | ..Default::default() 464 | }; 465 | 466 | if let Some(ns) = nss.get_metadata_opt(name).await? { 467 | // check to see if extant namespace is managed by us 468 | if !ns 469 | .metadata 470 | .owner_references 471 | .map(|orefs| orefs.contains(&oref)) 472 | .unwrap_or(false) 473 | { 474 | return Err(Error::NameConflict); 475 | } 476 | } 477 | 478 | apply_namespace( 479 | &nss, 480 | Namespace { 481 | metadata: object_meta(&base_metadata, name), 482 | ..Default::default() 483 | }, 484 | ) 485 | .await?; 486 | 487 | reconcile_network_policies( 488 | ctx.client.clone(), 489 | name, 490 | &base_metadata, 491 | self.spec 492 | .security 493 | .as_ref() 494 | .and_then(|s| s.network_peers.as_ref()), 495 | self.spec 496 | .security 497 | .as_ref() 498 | .and_then(|s| s.network_egress_rules.as_deref()), 499 | self.spec 500 | .security 501 | .as_ref() 502 | .is_some_and(|s| s.aws_pod_identity_association_role_arn.is_some()), 503 | ) 504 | .await?; 505 | 506 | let signing_key = reconcile_signing_key( 507 | &ctx, 508 | name, 509 | &base_metadata, 510 | self.spec 511 | .security 512 | .as_ref() 513 | .and_then(|s| s.request_signing_private_key.as_ref()), 514 | ) 515 | .await?; 516 | 517 | reconcile_compute(&ctx, name, &base_metadata, &self.spec, signing_key).await?; 518 | 519 | Ok(()) 520 | } 521 | 522 | async fn reconcile_status(&self, ctx: Arc) -> Result { 523 | let rcs: Api = Api::all(ctx.client.clone()); 524 | 525 | let name = self.name_any(); 526 | 527 | let (result, message, reason, status) = match self.reconcile(ctx, &name).await { 528 | Ok(()) => { 529 | // If no events were received, check back every 5 minutes 530 | let action = Action::requeue(Duration::from_secs(5 * 60)); 531 | 532 | ( 533 | Ok(action), 534 | "Restate Cluster provisioned successfully".into(), 535 | "Provisioned".into(), 536 | "True".into(), 537 | ) 538 | } 539 | Err(Error::NotReady { 540 | message, 541 | reason, 542 | requeue_after, 543 | }) => { 544 | // default 1 minute in the NotReady case 545 | let requeue_after = requeue_after.unwrap_or(Duration::from_secs(60)); 546 | 547 | info!("RestateCluster is not yet ready: {message}"); 548 | 549 | ( 550 | Ok(Action::requeue(requeue_after)), 551 | message, 552 | reason, 553 | "False".into(), 554 | ) 555 | } 556 | Err(err) => { 557 | let message = err.to_string(); 558 | ( 559 | Err(err), 560 | message, 561 | "FailedReconcile".into(), 562 | "Unknown".into(), 563 | ) 564 | } 565 | }; 566 | 567 | let existing_ready = self 568 | .status 569 | .as_ref() 570 | .and_then(|s| s.conditions.as_ref()) 571 | .and_then(|c| c.iter().find(|cond| cond.r#type == "Ready")); 572 | let now = k8s_openapi::apimachinery::pkg::apis::meta::v1::Time(Utc::now()); 573 | 574 | let mut ready = RestateClusterCondition { 575 | last_transition_time: Some( 576 | existing_ready 577 | .and_then(|r| r.last_transition_time.clone()) 578 | .unwrap_or_else(|| now.clone()), 579 | ), 580 | message: Some(message), 581 | reason: Some(reason), 582 | status, 583 | r#type: "Ready".into(), 584 | }; 585 | 586 | if existing_ready.map(|r| &r.status) != Some(&ready.status) { 587 | // update transition time if the status has at all changed 588 | ready.last_transition_time = Some(now) 589 | } 590 | 591 | // always overwrite status object with what we saw 592 | let new_status = Patch::Apply(json!({ 593 | "apiVersion": "restate.dev/v1", 594 | "kind": "RestateCluster", 595 | "status": RestateClusterStatus { 596 | conditions: Some(vec![ready]), 597 | } 598 | })); 599 | let ps = PatchParams::apply("restate-operator").force(); 600 | let _o = rcs.patch_status(&name, &ps, &new_status).await?; 601 | 602 | result 603 | } 604 | 605 | // Finalizer cleanup (the object was deleted, ensure nothing is orphaned) 606 | async fn cleanup(&self, ctx: Arc) -> Result { 607 | let recorder = ctx 608 | .diagnostics 609 | .read() 610 | .await 611 | .recorder(ctx.client.clone(), self); 612 | // RestateCluster doesn't have any real cleanup, so we just publish an event 613 | recorder 614 | .publish(Event { 615 | type_: EventType::Normal, 616 | reason: "DeleteRequested".into(), 617 | note: Some(format!("Delete `{}`", self.name_any())), 618 | action: "Deleting".into(), 619 | secondary: None, 620 | }) 621 | .await?; 622 | Ok(Action::await_change()) 623 | } 624 | } 625 | 626 | async fn apply_namespace(nss: &Api, ns: Namespace) -> std::result::Result<(), Error> { 627 | let name = ns.metadata.name.as_ref().unwrap(); 628 | let params = PatchParams::apply("restate-operator").force(); 629 | debug!("Applying Namespace {}", name); 630 | nss.patch(name, ¶ms, &Patch::Apply(&ns)).await?; 631 | Ok(()) 632 | } 633 | 634 | /// Diagnostics to be exposed by the web server 635 | #[derive(Clone, Serialize)] 636 | pub struct Diagnostics { 637 | #[serde(deserialize_with = "from_ts")] 638 | pub last_event: DateTime, 639 | #[serde(skip)] 640 | pub reporter: Reporter, 641 | } 642 | 643 | impl Default for Diagnostics { 644 | fn default() -> Self { 645 | Self { 646 | last_event: Utc::now(), 647 | reporter: "restate-operator".into(), 648 | } 649 | } 650 | } 651 | 652 | impl Diagnostics { 653 | fn recorder(&self, client: Client, rc: &RestateCluster) -> Recorder { 654 | Recorder::new(client, self.reporter.clone(), rc.object_ref(&())) 655 | } 656 | } 657 | 658 | /// State shared between the controller and the web server 659 | #[derive(Clone, Default)] 660 | pub struct State { 661 | /// Diagnostics populated by the reconciler 662 | diagnostics: Arc>, 663 | /// Metrics registry 664 | registry: prometheus::Registry, 665 | /// If set, watch AWS PodIdentityAssociation resources, and if requested create them against this cluster 666 | aws_pod_identity_association_cluster: Option, 667 | } 668 | 669 | /// State wrapper around the controller outputs for the web server 670 | impl State { 671 | /// Metrics getter 672 | pub fn metrics(&self) -> Vec { 673 | self.registry.gather() 674 | } 675 | 676 | /// State getter 677 | pub async fn diagnostics(&self) -> Diagnostics { 678 | self.diagnostics.read().await.clone() 679 | } 680 | 681 | pub fn with_aws_pod_identity_association_cluster( 682 | self, 683 | aws_pod_identity_association_cluster: Option, 684 | ) -> Self { 685 | Self { 686 | aws_pod_identity_association_cluster, 687 | ..self 688 | } 689 | } 690 | 691 | // Create a Controller Context that can update State 692 | pub fn to_context( 693 | &self, 694 | client: Client, 695 | pvc_meta_store: Store>, 696 | ss_store: Store, 697 | security_group_policy_installed: bool, 698 | secret_provider_class_installed: bool, 699 | ) -> Arc { 700 | Arc::new(Context { 701 | client, 702 | pvc_meta_store, 703 | ss_store, 704 | aws_pod_identity_association_cluster: self.aws_pod_identity_association_cluster.clone(), 705 | security_group_policy_installed, 706 | secret_provider_class_installed, 707 | metrics: Metrics::default().register(&self.registry).unwrap(), 708 | diagnostics: self.diagnostics.clone(), 709 | }) 710 | } 711 | } 712 | 713 | /// Initialize the controller and shared state (given the crd is installed) 714 | pub async fn run(state: State) { 715 | let client = Client::try_default() 716 | .await 717 | .expect("failed to create kube Client"); 718 | 719 | let api_groups = match client.list_api_groups().await { 720 | Ok(list) => list, 721 | Err(e) => { 722 | error!("Could not list api groups: {e:?}"); 723 | std::process::exit(1); 724 | } 725 | }; 726 | 727 | let ( 728 | security_group_policy_installed, 729 | pod_identity_association_installed, 730 | secret_provider_class_installed, 731 | ) = api_groups 732 | .groups 733 | .iter() 734 | .fold((false, false, false), |(sgp, pia, spc), group| { 735 | fn group_matches>(group: &APIGroup) -> bool { 736 | group.name == R::group(&()) 737 | && group.versions.iter().any(|v| v.version == R::version(&())) 738 | } 739 | ( 740 | sgp || group_matches::(group), 741 | pia || group_matches::(group), 742 | spc || group_matches::(group), 743 | ) 744 | }); 745 | 746 | let rc_api = Api::::all(client.clone()); 747 | let ns_api = Api::::all(client.clone()); 748 | let ss_api = Api::::all(client.clone()); 749 | let pvc_api = Api::::all(client.clone()); 750 | let svc_api = Api::::all(client.clone()); 751 | let svcacc_api = Api::::all(client.clone()); 752 | let cm_api = Api::::all(client.clone()); 753 | let np_api = Api::::all(client.clone()); 754 | let pia_api = Api::::all(client.clone()); 755 | let sgp_api = Api::::all(client.clone()); 756 | let spc_api = Api::::all(client.clone()); 757 | 758 | if state.aws_pod_identity_association_cluster.is_some() && !pod_identity_association_installed { 759 | error!("PodIdentityAssociation is not available on apiserver, but a pod identity association cluster was provided. Is the CRD installed?"); 760 | std::process::exit(1); 761 | } 762 | 763 | if let Err(e) = rc_api.list(&ListParams::default().limit(1)).await { 764 | error!("RestateCluster is not queryable; {e:?}. Is the CRD installed?"); 765 | std::process::exit(1); 766 | } 767 | 768 | // all resources we create have this label 769 | let cfg = Config::default().labels("app.kubernetes.io/name=restate"); 770 | // but restateclusters themselves dont 771 | let rc_cfg = Config::default(); 772 | 773 | let (pvc_meta_store, pvc_meta_writer) = reflector::store(); 774 | let pvc_meta_reflector = reflector(pvc_meta_writer, metadata_watcher(pvc_api, cfg.clone())) 775 | .touched_objects() 776 | .default_backoff(); 777 | 778 | let (ss_store, ss_writer) = reflector::store(); 779 | let ss_reflector = reflector(ss_writer, watcher(ss_api, cfg.clone())) 780 | .touched_objects() 781 | .default_backoff() 782 | .predicate_filter(changed_predicate.combine(status_predicate_serde)); 783 | 784 | let np_watcher = metadata_watcher(np_api, cfg.clone()) 785 | .touched_objects() 786 | .predicate_filter(changed_predicate); 787 | 788 | let ns_watcher = metadata_watcher(ns_api, cfg.clone()) 789 | .touched_objects() 790 | .predicate_filter(changed_predicate); 791 | 792 | let svcacc_watcher = metadata_watcher(svcacc_api, cfg.clone()) 793 | .touched_objects() 794 | .predicate_filter(changed_predicate); 795 | 796 | let svc_watcher = watcher(svc_api, cfg.clone()) 797 | .touched_objects() 798 | // svc has no generation so we hash the spec to check for changes 799 | .predicate_filter(changed_predicate.combine(spec_predicate_serde)); 800 | 801 | let cm_watcher = watcher(cm_api, cfg.clone()) 802 | .touched_objects() 803 | // cm has no generation so we hash the data to check for changes 804 | .predicate_filter(changed_predicate.combine(spec_predicate)); 805 | 806 | let controller = Controller::new(rc_api, rc_cfg.clone()) 807 | .shutdown_on_signal() 808 | .owns_stream(svc_watcher) 809 | .owns_stream(cm_watcher) 810 | .owns_stream(ns_watcher) 811 | .owns_stream(svcacc_watcher) 812 | .owns_stream(np_watcher) 813 | .owns_stream(ss_reflector) 814 | .watches_stream( 815 | pvc_meta_reflector, 816 | |pvc| -> Option> { 817 | let name = pvc.labels().get("app.kubernetes.io/name")?.as_str(); 818 | if name != "restate" { 819 | // should have been caught by the label selector 820 | return None; 821 | } 822 | 823 | let instance = pvc.labels().get("app.kubernetes.io/instance")?.as_str(); 824 | 825 | Some(ObjectRef::new(instance)) 826 | }, 827 | ); 828 | let controller = if pod_identity_association_installed { 829 | let pia_watcher = watcher(pia_api, cfg.clone()) 830 | .touched_objects() 831 | // avoid apply loops that seem to happen with crds 832 | .predicate_filter(changed_predicate.combine(status_predicate)); 833 | 834 | let job_api = Api::::all(client.clone()); 835 | 836 | let job_watcher = metadata_watcher( 837 | job_api, 838 | Config::default().labels("app.kubernetes.io/name=restate-pia-canary"), 839 | ) 840 | .touched_objects() 841 | .predicate_filter(changed_predicate); 842 | 843 | controller.owns_stream(pia_watcher).owns_stream(job_watcher) 844 | } else { 845 | controller 846 | }; 847 | let controller = if security_group_policy_installed { 848 | let sgp_watcher = metadata_watcher(sgp_api, cfg.clone()) 849 | .touched_objects() 850 | // avoid apply loops that seem to happen with crds 851 | .predicate_filter(changed_predicate); 852 | 853 | controller.owns_stream(sgp_watcher) 854 | } else { 855 | controller 856 | }; 857 | let controller = if secret_provider_class_installed { 858 | let spc_watcher = metadata_watcher(spc_api, cfg.clone()) 859 | .touched_objects() 860 | // avoid apply loops that seem to happen with crds 861 | .predicate_filter(changed_predicate); 862 | 863 | controller.owns_stream(spc_watcher) 864 | } else { 865 | controller 866 | }; 867 | controller 868 | .run( 869 | reconcile, 870 | error_policy, 871 | state.to_context( 872 | client, 873 | pvc_meta_store, 874 | ss_store, 875 | security_group_policy_installed, 876 | secret_provider_class_installed, 877 | ), 878 | ) 879 | .filter_map(|x| async move { Result::ok(x) }) 880 | .for_each(|_| futures::future::ready(())) 881 | .await; 882 | } 883 | 884 | fn changed_predicate(obj: &K) -> Option { 885 | let mut hasher = DefaultHasher::new(); 886 | if let Some(g) = obj.meta().generation { 887 | // covers spec but not metadata or status 888 | g.hash(&mut hasher) 889 | } 890 | obj.labels().hash(&mut hasher); 891 | obj.annotations().hash(&mut hasher); 892 | // ignore status 893 | Some(hasher.finish()) 894 | } 895 | 896 | fn status_predicate(obj: &K) -> Option 897 | where 898 | K::Status: Hash, 899 | { 900 | let mut hasher = DefaultHasher::new(); 901 | if let Some(s) = obj.status() { 902 | s.hash(&mut hasher) 903 | } 904 | Some(hasher.finish()) 905 | } 906 | 907 | trait MyHasStatus { 908 | type Status; 909 | 910 | fn status(&self) -> Option<&Self::Status>; 911 | } 912 | 913 | impl MyHasStatus for StatefulSet { 914 | type Status = StatefulSetStatus; 915 | 916 | fn status(&self) -> Option<&Self::Status> { 917 | self.status.as_ref() 918 | } 919 | } 920 | 921 | fn status_predicate_serde(obj: &K) -> Option 922 | where 923 | K::Status: Serialize, 924 | { 925 | let mut hasher = DefaultHasher::new(); 926 | if let Some(s) = obj.status() { 927 | serde_hashkey::to_key(s) 928 | .expect("serde_hashkey never to return an error") 929 | .hash(&mut hasher); 930 | } 931 | Some(hasher.finish()) 932 | } 933 | 934 | pub trait MyHasSpec { 935 | type Spec; 936 | 937 | fn spec(&self) -> &Self::Spec; 938 | } 939 | 940 | impl MyHasSpec for Service { 941 | type Spec = Option; 942 | 943 | fn spec(&self) -> &Self::Spec { 944 | &self.spec 945 | } 946 | } 947 | 948 | impl MyHasSpec for ConfigMap { 949 | type Spec = Option>; 950 | 951 | fn spec(&self) -> &Self::Spec { 952 | &self.data 953 | } 954 | } 955 | 956 | fn spec_predicate(obj: &K) -> Option 957 | where 958 | K::Spec: Hash, 959 | { 960 | let mut hasher = DefaultHasher::new(); 961 | obj.spec().hash(&mut hasher); 962 | Some(hasher.finish()) 963 | } 964 | 965 | fn spec_predicate_serde(obj: &K) -> Option 966 | where 967 | K::Spec: Serialize, 968 | { 969 | let mut hasher = DefaultHasher::new(); 970 | serde_hashkey::to_key(obj.spec()) 971 | .expect("serde_hashkey never to return an error") 972 | .hash(&mut hasher); 973 | Some(hasher.finish()) 974 | } 975 | -------------------------------------------------------------------------------- /src/crdgen.rs: -------------------------------------------------------------------------------- 1 | use kube::CustomResourceExt; 2 | fn main() { 3 | print!( 4 | "{}", 5 | serde_yaml::to_string(&restate_operator::RestateCluster::crd()).unwrap() 6 | ) 7 | } 8 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use reconcilers::signing_key::InvalidSigningKeyError; 2 | use std::time::Duration; 3 | use thiserror::Error; 4 | 5 | #[derive(Error, Debug)] 6 | pub enum Error { 7 | #[error("SerializationError: {0}")] 8 | SerializationError(#[from] serde_json::Error), 9 | 10 | #[error("Kube Error: {0}")] 11 | KubeError(#[from] kube::Error), 12 | 13 | #[error("Finalizer Error: {0}")] 14 | // NB: awkward type because finalizer::Error embeds the reconciler error (which is this) 15 | // so boxing this error to break cycles 16 | FinalizerError(#[from] Box>), 17 | 18 | #[error("A namespace cannot be created for this name as one already exists")] 19 | NameConflict, 20 | 21 | #[error("Cluster is not yet Ready: {message}")] 22 | NotReady { 23 | message: String, 24 | reason: String, 25 | requeue_after: Option, 26 | }, 27 | 28 | #[error(transparent)] 29 | InvalidSigningKeyError(#[from] InvalidSigningKeyError), 30 | } 31 | 32 | pub type Result = std::result::Result; 33 | 34 | impl Error { 35 | pub fn metric_label(&self) -> &'static str { 36 | match self { 37 | Error::SerializationError(_) => "SerializationError", 38 | Error::KubeError(_) => "KubeError", 39 | Error::FinalizerError(_) => "FinalizerError", 40 | Error::NameConflict => "NameConflict", 41 | Error::NotReady { .. } => "NotReady", 42 | Error::InvalidSigningKeyError(_) => "InvalidSigningKeyError", 43 | } 44 | } 45 | } 46 | 47 | /// Expose all controller components used by main 48 | pub mod controller; 49 | 50 | pub use crate::controller::*; 51 | 52 | /// Log and trace integrations 53 | pub mod telemetry; 54 | 55 | /// Metrics 56 | mod metrics; 57 | 58 | pub use metrics::Metrics; 59 | 60 | /// Reconcilers 61 | mod reconcilers; 62 | 63 | /// External CRDs 64 | mod podidentityassociations; 65 | mod secretproviderclasses; 66 | mod securitygrouppolicies; 67 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::OsString; 2 | 3 | use actix_web::{ 4 | get, middleware, web::Data, App, HttpRequest, HttpResponse, HttpServer, Responder, 5 | }; 6 | use clap::Parser; 7 | use prometheus::{Encoder, TextEncoder}; 8 | 9 | pub use restate_operator::{self, telemetry, State}; 10 | 11 | #[derive(Debug, clap::Parser)] 12 | struct Arguments { 13 | #[arg( 14 | long = "aws-pod-identity-association-cluster", 15 | env = "AWS_POD_IDENTITY_ASSOCIATION_CLUSTER", 16 | value_name = "CLUSTERNAME" 17 | )] 18 | aws_pod_identity_association_cluster: Option, 19 | } 20 | 21 | #[get("/metrics")] 22 | async fn metrics(c: Data, _req: HttpRequest) -> impl Responder { 23 | let metrics = c.metrics(); 24 | let encoder = TextEncoder::new(); 25 | let mut buffer = vec![]; 26 | encoder.encode(&metrics, &mut buffer).unwrap(); 27 | HttpResponse::Ok().body(buffer) 28 | } 29 | 30 | #[get("/health")] 31 | async fn health(_: HttpRequest) -> impl Responder { 32 | HttpResponse::Ok().json("healthy") 33 | } 34 | 35 | #[get("/")] 36 | async fn index(c: Data, _req: HttpRequest) -> impl Responder { 37 | let d = c.diagnostics().await; 38 | HttpResponse::Ok().json(&d) 39 | } 40 | 41 | #[tokio::main] 42 | async fn main() -> anyhow::Result<()> { 43 | telemetry::init().await; 44 | 45 | let args: Arguments = Arguments::parse(); 46 | 47 | // Initialize Kubernetes controller state 48 | let state = State::default().with_aws_pod_identity_association_cluster( 49 | args.aws_pod_identity_association_cluster 50 | .and_then(|s| s.to_str().map(|s| s.to_string())), 51 | ); 52 | let controller = restate_operator::run(state.clone()); 53 | tokio::pin!(controller); 54 | 55 | // Start web server 56 | let server = HttpServer::new(move || { 57 | App::new() 58 | .app_data(Data::new(state.clone())) 59 | .wrap(middleware::Logger::default().exclude("/health")) 60 | .service(index) 61 | .service(health) 62 | .service(metrics) 63 | }) 64 | .bind("0.0.0.0:8080")? 65 | .shutdown_timeout(5) 66 | .run(); 67 | 68 | tokio::pin!(server); 69 | 70 | // Both runtimes implements graceful shutdown, so poll until both are done 71 | tokio::join!(controller, server).1?; 72 | Ok(()) 73 | } 74 | -------------------------------------------------------------------------------- /src/metrics.rs: -------------------------------------------------------------------------------- 1 | use crate::{Error, RestateCluster}; 2 | use kube::ResourceExt; 3 | use prometheus::{histogram_opts, opts, HistogramVec, IntCounter, IntCounterVec, Registry}; 4 | use tokio::time::Instant; 5 | 6 | #[derive(Clone)] 7 | pub struct Metrics { 8 | pub reconciliations: IntCounter, 9 | pub failures: IntCounterVec, 10 | pub reconcile_duration: HistogramVec, 11 | } 12 | 13 | impl Default for Metrics { 14 | fn default() -> Self { 15 | let reconcile_duration = HistogramVec::new( 16 | histogram_opts!( 17 | "restate_operator_reconcile_duration_seconds", 18 | "The duration of reconcile to complete in seconds" 19 | ) 20 | .buckets(vec![0.01, 0.1, 0.25, 0.5, 1., 5., 15., 60.]), 21 | &[], 22 | ) 23 | .unwrap(); 24 | let failures = IntCounterVec::new( 25 | opts!( 26 | "restate_operator_reconciliation_errors_total", 27 | "reconciliation errors", 28 | ), 29 | &["instance", "error"], 30 | ) 31 | .unwrap(); 32 | let reconciliations = 33 | IntCounter::new("restate_operator_reconciliations_total", "reconciliations").unwrap(); 34 | Metrics { 35 | reconciliations, 36 | failures, 37 | reconcile_duration, 38 | } 39 | } 40 | } 41 | 42 | impl Metrics { 43 | /// Register API metrics to start tracking them. 44 | pub fn register(self, registry: &Registry) -> Result { 45 | registry.register(Box::new(self.reconcile_duration.clone()))?; 46 | registry.register(Box::new(self.failures.clone()))?; 47 | registry.register(Box::new(self.reconciliations.clone()))?; 48 | Ok(self) 49 | } 50 | 51 | pub fn reconcile_failure(&self, rc: &RestateCluster, e: &Error) { 52 | self.failures 53 | .with_label_values(&[rc.name_any().as_ref(), e.metric_label()]) 54 | .inc() 55 | } 56 | 57 | pub fn count_and_measure(&self) -> ReconcileMeasurer { 58 | self.reconciliations.inc(); 59 | ReconcileMeasurer { 60 | start: Instant::now(), 61 | metric: self.reconcile_duration.clone(), 62 | } 63 | } 64 | } 65 | 66 | /// Smart function duration measurer 67 | /// 68 | /// Relies on Drop to calculate duration and register the observation in the histogram 69 | pub struct ReconcileMeasurer { 70 | start: Instant, 71 | metric: HistogramVec, 72 | } 73 | 74 | impl Drop for ReconcileMeasurer { 75 | fn drop(&mut self) { 76 | #[allow(clippy::cast_precision_loss)] 77 | let duration = self.start.elapsed().as_millis() as f64 / 1000.0; 78 | self.metric.with_label_values(&[]).observe(duration); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/podidentityassociations.rs: -------------------------------------------------------------------------------- 1 | // WARNING: generated by kopium - manual changes will be overwritten 2 | // kopium command: kopium podidentityassociations.eks.services.k8s.aws -A 3 | // kopium version: 0.16.5 4 | 5 | use kube::CustomResource; 6 | use schemars::JsonSchema; 7 | use serde::{Deserialize, Serialize}; 8 | use std::collections::BTreeMap; 9 | use std::hash::Hash; 10 | 11 | /// PodIdentityAssociationSpec defines the desired state of PodIdentityAssociation. 12 | /// 13 | /// 14 | /// Amazon EKS Pod Identity associations provide the ability to manage credentials 15 | /// for your applications, similar to the way that Amazon EC2 instance profiles 16 | /// provide credentials to Amazon EC2 instances. 17 | #[derive(CustomResource, Serialize, Deserialize, Clone, Debug, JsonSchema)] 18 | #[kube( 19 | group = "eks.services.k8s.aws", 20 | version = "v1alpha1", 21 | kind = "PodIdentityAssociation", 22 | plural = "podidentityassociations" 23 | )] 24 | #[kube(namespaced)] 25 | #[kube(status = "PodIdentityAssociationStatus")] 26 | pub struct PodIdentityAssociationSpec { 27 | /// A unique, case-sensitive identifier that you provide to ensure the idempotency 28 | /// of the request. 29 | #[serde( 30 | default, 31 | skip_serializing_if = "Option::is_none", 32 | rename = "clientRequestToken" 33 | )] 34 | pub client_request_token: Option, 35 | /// The name of the cluster to create the association in. 36 | #[serde( 37 | default, 38 | skip_serializing_if = "Option::is_none", 39 | rename = "clusterName" 40 | )] 41 | pub cluster_name: Option, 42 | /// AWSResourceReferenceWrapper provides a wrapper around *AWSResourceReference 43 | /// type to provide more user friendly syntax for references using 'from' field 44 | /// Ex: 45 | /// APIIDRef: 46 | /// 47 | /// 48 | /// from: 49 | /// name: my-api 50 | #[serde( 51 | default, 52 | skip_serializing_if = "Option::is_none", 53 | rename = "clusterRef" 54 | )] 55 | pub cluster_ref: Option, 56 | /// The name of the Kubernetes namespace inside the cluster to create the association 57 | /// in. The service account and the pods that use the service account must be 58 | /// in this namespace. 59 | pub namespace: String, 60 | /// The Amazon Resource Name (ARN) of the IAM role to associate with the service 61 | /// account. The EKS Pod Identity agent manages credentials to assume this role 62 | /// for applications in the containers in the pods that use this service account. 63 | #[serde(default, skip_serializing_if = "Option::is_none", rename = "roleARN")] 64 | pub role_arn: Option, 65 | /// AWSResourceReferenceWrapper provides a wrapper around *AWSResourceReference 66 | /// type to provide more user friendly syntax for references using 'from' field 67 | /// Ex: 68 | /// APIIDRef: 69 | /// 70 | /// 71 | /// from: 72 | /// name: my-api 73 | #[serde(default, skip_serializing_if = "Option::is_none", rename = "roleRef")] 74 | pub role_ref: Option, 75 | /// The name of the Kubernetes service account inside the cluster to associate 76 | /// the IAM credentials with. 77 | #[serde(rename = "serviceAccount")] 78 | pub service_account: String, 79 | /// Metadata that assists with categorization and organization. Each tag consists 80 | /// of a key and an optional value. You define both. Tags don't propagate to 81 | /// any other cluster or Amazon Web Services resources. 82 | /// 83 | /// 84 | /// The following basic restrictions apply to tags: 85 | /// 86 | /// 87 | /// - Maximum number of tags per resource – 50 88 | /// 89 | /// 90 | /// - For each resource, each tag key must be unique, and each tag key can 91 | /// have only one value. 92 | /// 93 | /// 94 | /// - Maximum key length – 128 Unicode characters in UTF-8 95 | /// 96 | /// 97 | /// - Maximum value length – 256 Unicode characters in UTF-8 98 | /// 99 | /// 100 | /// - If your tagging schema is used across multiple services and resources, 101 | /// remember that other services may have restrictions on allowed characters. 102 | /// Generally allowed characters are: letters, numbers, and spaces representable 103 | /// in UTF-8, and the following characters: + - = . _ : / @. 104 | /// 105 | /// 106 | /// - Tag keys and values are case-sensitive. 107 | /// 108 | /// 109 | /// - Do not use aws:, AWS:, or any upper or lowercase combination of such 110 | /// as a prefix for either keys or values as it is reserved for Amazon Web 111 | /// Services use. You cannot edit or delete tag keys or values with this prefix. 112 | /// Tags with this prefix do not count against your tags per resource limit. 113 | #[serde(default, skip_serializing_if = "Option::is_none")] 114 | pub tags: Option>, 115 | } 116 | 117 | /// AWSResourceReferenceWrapper provides a wrapper around *AWSResourceReference 118 | /// type to provide more user friendly syntax for references using 'from' field 119 | /// Ex: 120 | /// APIIDRef: 121 | /// 122 | /// 123 | /// from: 124 | /// name: my-api 125 | #[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] 126 | pub struct PodIdentityAssociationClusterRef { 127 | /// AWSResourceReference provides all the values necessary to reference another 128 | /// k8s resource for finding the identifier(Id/ARN/Name) 129 | #[serde(default, skip_serializing_if = "Option::is_none")] 130 | pub from: Option, 131 | } 132 | 133 | /// AWSResourceReference provides all the values necessary to reference another 134 | /// k8s resource for finding the identifier(Id/ARN/Name) 135 | #[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] 136 | pub struct PodIdentityAssociationClusterRefFrom { 137 | #[serde(default, skip_serializing_if = "Option::is_none")] 138 | pub name: Option, 139 | } 140 | 141 | /// AWSResourceReferenceWrapper provides a wrapper around *AWSResourceReference 142 | /// type to provide more user friendly syntax for references using 'from' field 143 | /// Ex: 144 | /// APIIDRef: 145 | /// 146 | /// 147 | /// from: 148 | /// name: my-api 149 | #[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] 150 | pub struct PodIdentityAssociationRoleRef { 151 | /// AWSResourceReference provides all the values necessary to reference another 152 | /// k8s resource for finding the identifier(Id/ARN/Name) 153 | #[serde(default, skip_serializing_if = "Option::is_none")] 154 | pub from: Option, 155 | } 156 | 157 | /// AWSResourceReference provides all the values necessary to reference another 158 | /// k8s resource for finding the identifier(Id/ARN/Name) 159 | #[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] 160 | pub struct PodIdentityAssociationRoleRefFrom { 161 | #[serde(default, skip_serializing_if = "Option::is_none")] 162 | pub name: Option, 163 | } 164 | 165 | /// PodIdentityAssociationStatus defines the observed state of PodIdentityAssociation 166 | #[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Hash)] 167 | pub struct PodIdentityAssociationStatus { 168 | /// All CRs managed by ACK have a common `Status.ACKResourceMetadata` member 169 | /// that is used to contain resource sync state, account ownership, 170 | /// constructed ARN for the resource 171 | #[serde( 172 | default, 173 | skip_serializing_if = "Option::is_none", 174 | rename = "ackResourceMetadata" 175 | )] 176 | pub ack_resource_metadata: Option, 177 | /// The Amazon Resource Name (ARN) of the association. 178 | #[serde( 179 | default, 180 | skip_serializing_if = "Option::is_none", 181 | rename = "associationARN" 182 | )] 183 | pub association_arn: Option, 184 | /// The ID of the association. 185 | #[serde( 186 | default, 187 | skip_serializing_if = "Option::is_none", 188 | rename = "associationID" 189 | )] 190 | pub association_id: Option, 191 | /// All CRS managed by ACK have a common `Status.Conditions` member that 192 | /// contains a collection of `ackv1alpha1.Condition` objects that describe 193 | /// the various terminal states of the CR and its backend AWS service API 194 | /// resource 195 | #[serde(default, skip_serializing_if = "Option::is_none")] 196 | pub conditions: Option>, 197 | /// The timestamp that the association was created at. 198 | #[serde(default, skip_serializing_if = "Option::is_none", rename = "createdAt")] 199 | pub created_at: Option, 200 | /// The most recent timestamp that the association was modified at 201 | #[serde( 202 | default, 203 | skip_serializing_if = "Option::is_none", 204 | rename = "modifiedAt" 205 | )] 206 | pub modified_at: Option, 207 | } 208 | 209 | /// All CRs managed by ACK have a common `Status.ACKResourceMetadata` member 210 | /// that is used to contain resource sync state, account ownership, 211 | /// constructed ARN for the resource 212 | #[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Hash)] 213 | pub struct PodIdentityAssociationStatusAckResourceMetadata { 214 | /// ARN is the Amazon Resource Name for the resource. This is a 215 | /// globally-unique identifier and is set only by the ACK service controller 216 | /// once the controller has orchestrated the creation of the resource OR 217 | /// when it has verified that an "adopted" resource (a resource where the 218 | /// ARN annotation was set by the Kubernetes user on the CR) exists and 219 | /// matches the supplied CR's Spec field values. 220 | /// TODO(vijat@): Find a better strategy for resources that do not have ARN in CreateOutputResponse 221 | /// https://github.com/aws/aws-controllers-k8s/issues/270 222 | #[serde(default, skip_serializing_if = "Option::is_none")] 223 | pub arn: Option, 224 | /// OwnerAccountID is the AWS Account ID of the account that owns the 225 | /// backend AWS service API resource. 226 | #[serde(rename = "ownerAccountID")] 227 | pub owner_account_id: String, 228 | /// Region is the AWS region in which the resource exists or will exist. 229 | pub region: String, 230 | } 231 | 232 | /// Condition is the common struct used by all CRDs managed by ACK service 233 | /// controllers to indicate terminal states of the CR and its backend AWS 234 | /// service API resource 235 | #[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Hash)] 236 | pub struct PodIdentityAssociationStatusConditions { 237 | /// Last time the condition transitioned from one status to another. 238 | #[serde( 239 | default, 240 | skip_serializing_if = "Option::is_none", 241 | rename = "lastTransitionTime" 242 | )] 243 | pub last_transition_time: Option, 244 | /// A human readable message indicating details about the transition. 245 | #[serde(default, skip_serializing_if = "Option::is_none")] 246 | pub message: Option, 247 | /// The reason for the condition's last transition. 248 | #[serde(default, skip_serializing_if = "Option::is_none")] 249 | pub reason: Option, 250 | /// Status of the condition, one of True, False, Unknown. 251 | pub status: String, 252 | /// Type is the type of the Condition 253 | #[serde(rename = "type")] 254 | pub r#type: String, 255 | } 256 | -------------------------------------------------------------------------------- /src/reconcilers/compute.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{BTreeMap, HashSet}; 2 | use std::convert::Into; 3 | use std::path::PathBuf; 4 | 5 | use k8s_openapi::api::apps::v1::{StatefulSet, StatefulSetSpec, StatefulSetStatus}; 6 | use k8s_openapi::api::batch::v1::{Job, JobSpec}; 7 | use k8s_openapi::api::core::v1::{ 8 | ConfigMap, ConfigMapVolumeSource, Container, ContainerPort, EnvVar, EnvVarSource, 9 | HTTPGetAction, ObjectFieldSelector, PersistentVolumeClaim, PersistentVolumeClaimSpec, Pod, 10 | PodSecurityContext, PodSpec, PodTemplateSpec, Probe, SeccompProfile, SecurityContext, Service, 11 | ServiceAccount, ServicePort, ServiceSpec, Toleration, Volume, VolumeMount, 12 | VolumeResourceRequirements, 13 | }; 14 | use k8s_openapi::apimachinery::pkg::api::resource::Quantity; 15 | use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta; 16 | use k8s_openapi::apimachinery::pkg::util::intstr::IntOrString; 17 | use kube::api::{DeleteParams, ListParams, Preconditions, PropagationPolicy}; 18 | use kube::core::PartialObjectMeta; 19 | use kube::runtime::reflector::{ObjectRef, Store}; 20 | use kube::{ 21 | api::{Patch, PatchParams}, 22 | Api, ResourceExt, 23 | }; 24 | use sha2::Digest; 25 | use tracing::{debug, error, warn}; 26 | 27 | use crate::podidentityassociations::{PodIdentityAssociation, PodIdentityAssociationSpec}; 28 | use crate::reconcilers::{label_selector, mandatory_labels, object_meta}; 29 | use crate::securitygrouppolicies::{ 30 | SecurityGroupPolicy, SecurityGroupPolicySecurityGroups, SecurityGroupPolicySpec, 31 | }; 32 | use crate::{Context, Error, RestateClusterSpec, RestateClusterStorage}; 33 | 34 | use super::quantity_parser::QuantityParser; 35 | 36 | fn restate_service_account( 37 | base_metadata: &ObjectMeta, 38 | annotations: Option<&BTreeMap>, 39 | ) -> ServiceAccount { 40 | let mut metadata = object_meta(base_metadata, "restate"); 41 | if let Some(annotations) = annotations { 42 | match &mut metadata.annotations { 43 | Some(existing_annotations) => { 44 | existing_annotations.extend(annotations.iter().map(|(k, v)| (k.clone(), v.clone()))) 45 | } 46 | None => metadata.annotations = Some(annotations.clone()), 47 | } 48 | } 49 | 50 | ServiceAccount { 51 | metadata, 52 | ..Default::default() 53 | } 54 | } 55 | 56 | fn restate_configmap(base_metadata: &ObjectMeta, config: Option<&str>) -> ConfigMap { 57 | let config: String = config.unwrap_or_default().into(); 58 | 59 | let mut hasher = sha2::Sha256::new(); 60 | hasher.update(config.as_bytes()); 61 | let result = u32::from_le_bytes(hasher.finalize()[..4].try_into().unwrap()); 62 | 63 | let metadata = object_meta(base_metadata, format!("restate-config-{result:x}")); 64 | 65 | ConfigMap { 66 | metadata, 67 | data: Some(BTreeMap::from([("config.toml".into(), config)])), 68 | ..Default::default() 69 | } 70 | } 71 | 72 | fn restate_pod_identity_association( 73 | ns: &str, 74 | base_metadata: &ObjectMeta, 75 | pod_identity_association_cluster: &str, 76 | pod_identity_association_role_arn: &str, 77 | ) -> PodIdentityAssociation { 78 | PodIdentityAssociation { 79 | metadata: object_meta(base_metadata, "restate"), 80 | spec: PodIdentityAssociationSpec { 81 | cluster_name: Some(pod_identity_association_cluster.into()), 82 | namespace: ns.into(), 83 | service_account: "restate".into(), 84 | role_arn: Some(pod_identity_association_role_arn.into()), 85 | client_request_token: None, 86 | cluster_ref: None, 87 | role_ref: None, 88 | tags: None, 89 | }, 90 | status: None, 91 | } 92 | } 93 | 94 | fn restate_security_group_policy( 95 | base_metadata: &ObjectMeta, 96 | aws_security_groups: &[String], 97 | ) -> SecurityGroupPolicy { 98 | SecurityGroupPolicy { 99 | metadata: object_meta(base_metadata, "restate"), 100 | spec: SecurityGroupPolicySpec { 101 | security_groups: Some(SecurityGroupPolicySecurityGroups { 102 | group_ids: Some(aws_security_groups.into()), 103 | }), 104 | pod_selector: Some(label_selector(base_metadata)), 105 | service_account_selector: None, 106 | }, 107 | } 108 | } 109 | 110 | fn restate_service( 111 | base_metadata: &ObjectMeta, 112 | annotations: Option<&BTreeMap>, 113 | ) -> Service { 114 | let mut metadata = object_meta(base_metadata, "restate"); 115 | if let Some(annotations) = annotations { 116 | match &mut metadata.annotations { 117 | Some(existing_annotations) => { 118 | existing_annotations.extend(annotations.iter().map(|(k, v)| (k.clone(), v.clone()))) 119 | } 120 | None => metadata.annotations = Some(annotations.clone()), 121 | } 122 | } 123 | 124 | Service { 125 | metadata, 126 | spec: Some(ServiceSpec { 127 | selector: label_selector(base_metadata).match_labels, 128 | ports: Some(vec![ 129 | ServicePort { 130 | app_protocol: Some("kubernetes.io/h2c".into()), 131 | port: 8080, 132 | name: Some("ingress".into()), 133 | ..Default::default() 134 | }, 135 | ServicePort { 136 | app_protocol: Some("kubernetes.io/h2c".into()), 137 | port: 9070, 138 | name: Some("admin".into()), 139 | ..Default::default() 140 | }, 141 | ServicePort { 142 | app_protocol: Some("kubernetes.io/h2c".into()), 143 | port: 5122, 144 | name: Some("metrics".into()), 145 | ..Default::default() 146 | }, 147 | ]), 148 | ..Default::default() 149 | }), 150 | status: None, 151 | } 152 | } 153 | 154 | fn restate_cluster_service(base_metadata: &ObjectMeta) -> Service { 155 | Service { 156 | metadata: object_meta(base_metadata, "restate-cluster"), 157 | spec: Some(ServiceSpec { 158 | selector: label_selector(base_metadata).match_labels, 159 | ports: Some(vec![ServicePort { 160 | app_protocol: Some("kubernetes.io/h2c".into()), 161 | port: 5122, 162 | name: Some("node".into()), 163 | ..Default::default() 164 | }]), 165 | // We want all pods in the StatefulSet to have their addresses published for 166 | // the sake of the other Restate pods even before they're ready, since they 167 | // have to be able to talk to each other in order to become ready. 168 | publish_not_ready_addresses: Some(true), 169 | cluster_ip: Some("None".into()), // headless service 170 | ..Default::default() 171 | }), 172 | status: None, 173 | } 174 | } 175 | 176 | fn env(cluster_name: &str, custom: Option<&[EnvVar]>) -> Vec { 177 | let defaults = [ 178 | ("RESTATE_LOG_FORMAT", "json"), 179 | ("RESTATE_CLUSTER_NAME", cluster_name), 180 | ("RESTATE_BASE_DIR", "/restate-data"), 181 | ("RUST_BACKTRACE", "1"), 182 | ("RUST_LIB_BACKTRACE", "0"), 183 | ("RESTATE_CONFIG", "/config/config.toml"), 184 | ( 185 | "RESTATE_ADVERTISED_ADDRESS", 186 | // POD_NAME comes from the downward api, below 187 | "http://$(POD_NAME).restate-cluster:5122", 188 | ), 189 | ( 190 | "RESTATE_NODE_NAME", 191 | // POD_NAME comes from the downward api, below 192 | "$(POD_NAME)", 193 | ), 194 | ]; 195 | 196 | // allow crd to override our defaults 197 | let custom_names: HashSet<&str> = custom 198 | .map(|custom| custom.iter().map(|e| e.name.as_ref()).collect()) 199 | .unwrap_or_default(); 200 | 201 | let defaults = defaults 202 | .into_iter() 203 | .filter(|(k, _)| !custom_names.contains(k)) 204 | .map(|(k, v)| EnvVar { 205 | name: k.into(), 206 | value: Some(v.into()), 207 | value_from: None, 208 | }); 209 | 210 | let defaults = Some(EnvVar { 211 | name: "POD_NAME".into(), 212 | value: None, 213 | value_from: Some(EnvVarSource { 214 | config_map_key_ref: None, 215 | field_ref: Some(ObjectFieldSelector { 216 | api_version: None, 217 | field_path: "metadata.name".into(), 218 | }), 219 | resource_field_ref: None, 220 | secret_key_ref: None, 221 | }), 222 | }) 223 | .into_iter() 224 | .chain(defaults); 225 | 226 | if let Some(custom) = custom { 227 | defaults.chain(custom.iter().cloned()).collect() 228 | } else { 229 | defaults.collect() 230 | } 231 | } 232 | 233 | const RESTATE_STATEFULSET_NAME: &str = "restate"; 234 | 235 | fn restate_statefulset( 236 | base_metadata: &ObjectMeta, 237 | spec: &RestateClusterSpec, 238 | pod_annotations: Option>, 239 | signing_key: Option<(Volume, PathBuf)>, 240 | cm_name: String, 241 | ) -> StatefulSet { 242 | let metadata = object_meta(base_metadata, RESTATE_STATEFULSET_NAME); 243 | let labels = metadata.labels.clone(); 244 | let pod_annotations = match (pod_annotations, metadata.annotations.clone()) { 245 | (Some(pod_annotations), Some(mut base_annotations)) => { 246 | base_annotations.extend(pod_annotations); 247 | Some(base_annotations) 248 | } 249 | (Some(annotations), None) | (None, Some(annotations)) => Some(annotations), 250 | (None, None) => None, 251 | }; 252 | 253 | let mut volume_mounts = vec![ 254 | VolumeMount { 255 | name: "storage".into(), 256 | mount_path: "/restate-data".into(), 257 | ..Default::default() 258 | }, 259 | VolumeMount { 260 | name: "tmp".into(), 261 | mount_path: "/tmp".into(), 262 | ..Default::default() 263 | }, 264 | VolumeMount { 265 | name: "config".into(), 266 | mount_path: "/config".into(), 267 | read_only: Some(true), 268 | ..Default::default() 269 | }, 270 | ]; 271 | 272 | let mut volumes = vec![ 273 | Volume { 274 | name: "tmp".into(), 275 | empty_dir: Some(Default::default()), 276 | ..Default::default() 277 | }, 278 | Volume { 279 | name: "config".into(), 280 | config_map: Some(ConfigMapVolumeSource { 281 | name: Some(cm_name), 282 | ..Default::default() 283 | }), 284 | ..Default::default() 285 | }, 286 | ]; 287 | 288 | let mut env = env( 289 | spec.cluster_name 290 | .as_ref() 291 | .or(base_metadata.name.as_ref()) 292 | .unwrap(), 293 | spec.compute.env.as_deref(), 294 | ); 295 | 296 | if let Some((volume, relative_path)) = signing_key { 297 | let mut absolute_path = PathBuf::from("/signing-key"); 298 | 299 | volume_mounts.push(VolumeMount { 300 | mount_path: absolute_path.to_str().unwrap().into(), 301 | name: volume.name.clone(), 302 | read_only: Some(true), 303 | ..Default::default() 304 | }); 305 | volumes.push(volume); 306 | absolute_path.push(relative_path); 307 | env.push(EnvVar { 308 | name: "RESTATE_REQUEST_IDENTITY_PRIVATE_KEY_PEM_FILE".into(), 309 | value: Some(absolute_path.to_str().unwrap().into()), 310 | value_from: None, 311 | }) 312 | } 313 | 314 | StatefulSet { 315 | metadata, 316 | spec: Some(StatefulSetSpec { 317 | replicas: spec.compute.replicas, 318 | selector: label_selector(base_metadata), 319 | service_name: "restate-cluster".into(), 320 | template: PodTemplateSpec { 321 | metadata: Some(ObjectMeta { 322 | labels, 323 | annotations: pod_annotations, 324 | ..Default::default() 325 | }), 326 | spec: Some(PodSpec { 327 | affinity: spec.compute.affinity.clone(), 328 | automount_service_account_token: Some(false), 329 | dns_policy: spec.compute.dns_policy.clone(), 330 | dns_config: spec.compute.dns_config.clone(), 331 | containers: vec![Container { 332 | name: "restate".into(), 333 | image: Some(spec.compute.image.clone()), 334 | image_pull_policy: spec.compute.image_pull_policy.clone(), 335 | env: Some(env), 336 | ports: Some(vec![ 337 | ContainerPort { 338 | name: Some("ingress".into()), 339 | container_port: 8080, 340 | ..Default::default() 341 | }, 342 | ContainerPort { 343 | name: Some("admin".into()), 344 | container_port: 9070, 345 | ..Default::default() 346 | }, 347 | ContainerPort { 348 | name: Some("metrics".into()), 349 | container_port: 5122, 350 | ..Default::default() 351 | }, 352 | ]), 353 | readiness_probe: Some(Probe { 354 | http_get: Some(HTTPGetAction { 355 | port: IntOrString::Int(9070), 356 | path: Some("/health".into()), 357 | ..Default::default() 358 | }), 359 | ..Default::default() 360 | }), 361 | resources: spec.compute.resources.clone(), 362 | security_context: Some(SecurityContext { 363 | read_only_root_filesystem: Some(true), 364 | allow_privilege_escalation: Some(false), 365 | ..Default::default() 366 | }), 367 | volume_mounts: Some(volume_mounts), 368 | ..Default::default() 369 | }], 370 | security_context: Some(PodSecurityContext { 371 | run_as_user: Some(1000), 372 | run_as_group: Some(3000), 373 | fs_group: Some(2000), 374 | fs_group_change_policy: Some("OnRootMismatch".into()), 375 | seccomp_profile: Some(SeccompProfile { 376 | type_: "RuntimeDefault".into(), 377 | localhost_profile: None, 378 | }), 379 | ..Default::default() 380 | }), 381 | service_account_name: Some("restate".into()), 382 | termination_grace_period_seconds: Some(60), 383 | volumes: Some(volumes), 384 | tolerations: spec.compute.tolerations.clone(), 385 | node_selector: spec.compute.node_selector.clone(), 386 | ..Default::default() 387 | }), 388 | }, 389 | // It's important to start multiple pods at the same time in case multiple pods died. 390 | // Otherwise, we risk unavailability of an already configured metadata cluster 391 | pod_management_policy: Some("Parallel".to_owned()), 392 | volume_claim_templates: Some(vec![PersistentVolumeClaim { 393 | metadata: ObjectMeta { 394 | name: Some("storage".into()), 395 | labels: Some(mandatory_labels(base_metadata)), // caution needed; these cannot be changed 396 | ..Default::default() 397 | }, 398 | spec: Some(PersistentVolumeClaimSpec { 399 | storage_class_name: spec.storage.storage_class_name.clone(), 400 | access_modes: Some(vec!["ReadWriteOnce".into()]), 401 | resources: Some(restate_pvc_resources(&spec.storage)), 402 | ..Default::default() 403 | }), 404 | status: None, 405 | }]), 406 | ..Default::default() 407 | }), 408 | status: None, 409 | } 410 | } 411 | 412 | fn restate_pvc_resources(storage: &RestateClusterStorage) -> VolumeResourceRequirements { 413 | VolumeResourceRequirements { 414 | requests: Some(BTreeMap::from([( 415 | "storage".to_string(), 416 | Quantity(format!("{}", storage.storage_request_bytes)), 417 | )])), 418 | limits: None, 419 | } 420 | } 421 | 422 | pub async fn reconcile_compute( 423 | ctx: &Context, 424 | namespace: &str, 425 | base_metadata: &ObjectMeta, 426 | spec: &RestateClusterSpec, 427 | signing_key: Option<(Volume, PathBuf)>, 428 | ) -> Result<(), Error> { 429 | let ss_api: Api = Api::namespaced(ctx.client.clone(), namespace); 430 | let cm_api: Api = Api::namespaced(ctx.client.clone(), namespace); 431 | let pvc_api: Api = Api::namespaced(ctx.client.clone(), namespace); 432 | let svc_api: Api = Api::namespaced(ctx.client.clone(), namespace); 433 | let svcacc_api: Api = Api::namespaced(ctx.client.clone(), namespace); 434 | let pia_api: Api = Api::namespaced(ctx.client.clone(), namespace); 435 | let job_api: Api = Api::namespaced(ctx.client.clone(), namespace); 436 | let pod_api: Api = Api::namespaced(ctx.client.clone(), namespace); 437 | let sgp_api: Api = Api::namespaced(ctx.client.clone(), namespace); 438 | 439 | apply_service_account( 440 | namespace, 441 | &svcacc_api, 442 | restate_service_account( 443 | base_metadata, 444 | spec.security 445 | .as_ref() 446 | .and_then(|s| s.service_account_annotations.as_ref()), 447 | ), 448 | ) 449 | .await?; 450 | 451 | let cm = restate_configmap(base_metadata, spec.config.as_deref()); 452 | let cm_name: String = cm.metadata.name.as_ref().unwrap().into(); 453 | apply_configmap(namespace, &cm_api, cm).await?; 454 | 455 | let mut pod_annotations: Option> = None; 456 | 457 | match ( 458 | ctx.aws_pod_identity_association_cluster.as_ref(), 459 | spec.security 460 | .as_ref() 461 | .and_then(|s| s.aws_pod_identity_association_role_arn.as_ref()), 462 | ) { 463 | ( 464 | Some(aws_pod_identity_association_cluster), 465 | Some(aws_pod_identity_association_role_arn), 466 | ) => { 467 | let pia = apply_pod_identity_association( 468 | namespace, 469 | &pia_api, 470 | restate_pod_identity_association( 471 | namespace, 472 | base_metadata, 473 | aws_pod_identity_association_cluster, 474 | aws_pod_identity_association_role_arn, 475 | ), 476 | ) 477 | .await?; 478 | 479 | if !is_pod_identity_association_synced(pia) { 480 | return Err(Error::NotReady { reason: "PodIdentityAssociationNotSynced".into(), message: "Waiting for the AWS ACK controller to provision the Pod Identity Association with IAM".into(), requeue_after: None }); 481 | } 482 | 483 | check_pia( 484 | namespace, 485 | base_metadata, 486 | spec.compute.tolerations.as_ref(), 487 | &job_api, 488 | &pod_api, 489 | ) 490 | .await?; 491 | 492 | // Pods MUST roll when these change, so we will apply these parameters as annotations to the pod meta 493 | let pod_annotations = pod_annotations.get_or_insert_with(Default::default); 494 | pod_annotations.insert( 495 | "restate.dev/aws-pod-identity-association-cluster".into(), 496 | aws_pod_identity_association_cluster.clone(), 497 | ); 498 | pod_annotations.insert( 499 | "restate.dev/aws-pod-identity-association-role-arn".into(), 500 | aws_pod_identity_association_role_arn.clone(), 501 | ); 502 | } 503 | (Some(_), None) => { 504 | delete_pod_identity_association(namespace, &pia_api, "restate").await?; 505 | delete_job(namespace, &job_api, "restate-pia-canary").await?; 506 | } 507 | (None, Some(aws_pod_identity_association_role_arn)) => { 508 | warn!("Ignoring AWS pod identity association role ARN {aws_pod_identity_association_role_arn} as the operator is not configured with --aws-pod-identity-association-cluster"); 509 | } 510 | (None, None) => {} 511 | }; 512 | 513 | match spec 514 | .security 515 | .as_ref() 516 | .and_then(|s| s.aws_pod_security_groups.as_deref()) 517 | { 518 | Some(aws_pod_security_groups) 519 | if ctx.security_group_policy_installed && !aws_pod_security_groups.is_empty() => 520 | { 521 | apply_security_group_policy( 522 | namespace, 523 | &sgp_api, 524 | restate_security_group_policy(base_metadata, aws_pod_security_groups), 525 | ) 526 | .await?; 527 | 528 | let pod_annotations = pod_annotations.get_or_insert_with(Default::default); 529 | // Pods MUST roll when these change, so we will apply the groups as annotations to the pod meta 530 | pod_annotations.insert( 531 | "restate.dev/aws-security-groups".into(), 532 | aws_pod_security_groups.join(","), 533 | ); 534 | } 535 | None | Some(_) if ctx.security_group_policy_installed => { 536 | delete_security_group_policy(namespace, &sgp_api, "restate").await?; 537 | } 538 | Some(aws_pod_security_groups) if !aws_pod_security_groups.is_empty() => { 539 | warn!("Ignoring AWS pod security groups {} as the SecurityGroupPolicy CRD is not installed", aws_pod_security_groups.join(",")); 540 | } 541 | None | Some(_) => {} 542 | } 543 | 544 | let restate_service = restate_service( 545 | base_metadata, 546 | spec.security 547 | .as_ref() 548 | .and_then(|s| s.service_annotations.as_ref()), 549 | ); 550 | apply_service(namespace, &svc_api, restate_service).await?; 551 | 552 | let restate_cluster_service = restate_cluster_service(base_metadata); 553 | apply_service(namespace, &svc_api, restate_cluster_service).await?; 554 | 555 | resize_statefulset_storage( 556 | namespace, 557 | base_metadata, 558 | &ss_api, 559 | &ctx.ss_store, 560 | &pvc_api, 561 | &ctx.pvc_meta_store, 562 | &spec.storage, 563 | ) 564 | .await?; 565 | 566 | let ss = apply_stateful_set( 567 | namespace, 568 | &ss_api, 569 | restate_statefulset(base_metadata, spec, pod_annotations, signing_key, cm_name), 570 | ) 571 | .await?; 572 | 573 | validate_stateful_set_status(ss.status, spec.compute.replicas.unwrap_or(1))?; 574 | 575 | Ok(()) 576 | } 577 | 578 | async fn apply_service(namespace: &str, ss_api: &Api, ss: Service) -> Result<(), Error> { 579 | let name = ss.metadata.name.as_ref().unwrap(); 580 | let params: PatchParams = PatchParams::apply("restate-operator").force(); 581 | debug!("Applying Service {} in namespace {}", name, namespace); 582 | ss_api.patch(name, ¶ms, &Patch::Apply(&ss)).await?; 583 | Ok(()) 584 | } 585 | 586 | async fn apply_service_account( 587 | namespace: &str, 588 | svcacc_api: &Api, 589 | svcacc: ServiceAccount, 590 | ) -> Result<(), Error> { 591 | let name = svcacc.metadata.name.as_ref().unwrap(); 592 | let params: PatchParams = PatchParams::apply("restate-operator").force(); 593 | debug!( 594 | "Applying ServiceAccount {} in namespace {}", 595 | name, namespace 596 | ); 597 | svcacc_api 598 | .patch(name, ¶ms, &Patch::Apply(&svcacc)) 599 | .await?; 600 | Ok(()) 601 | } 602 | 603 | async fn apply_configmap( 604 | namespace: &str, 605 | cm_api: &Api, 606 | cm: ConfigMap, 607 | ) -> Result<(), Error> { 608 | let name = cm.metadata.name.as_ref().unwrap(); 609 | let params: PatchParams = PatchParams::apply("restate-operator").force(); 610 | debug!("Applying ConfigMap {} in namespace {}", name, namespace); 611 | cm_api.patch(name, ¶ms, &Patch::Apply(&cm)).await?; 612 | Ok(()) 613 | } 614 | 615 | async fn apply_pod_identity_association( 616 | namespace: &str, 617 | pia_api: &Api, 618 | pia: PodIdentityAssociation, 619 | ) -> Result { 620 | let name = pia.metadata.name.as_ref().unwrap(); 621 | let params: PatchParams = PatchParams::apply("restate-operator").force(); 622 | debug!( 623 | "Applying PodIdentityAssociation {} in namespace {}", 624 | name, namespace 625 | ); 626 | Ok(pia_api.patch(name, ¶ms, &Patch::Apply(&pia)).await?) 627 | } 628 | 629 | async fn check_pia( 630 | namespace: &str, 631 | base_metadata: &ObjectMeta, 632 | tolerations: Option<&Vec>, 633 | job_api: &Api, 634 | pod_api: &Api, 635 | ) -> Result<(), Error> { 636 | let name = "restate-pia-canary"; 637 | let params: PatchParams = PatchParams::apply("restate-operator").force(); 638 | 639 | let mut metadata = object_meta(base_metadata, name); 640 | let labels = metadata.labels.get_or_insert(Default::default()); 641 | if let Some(existing) = labels.get_mut("app.kubernetes.io/name") { 642 | *existing = name.into() 643 | } else { 644 | labels.insert("app.kubernetes.io/name".into(), name.into()); 645 | } 646 | 647 | debug!( 648 | "Applying PodIdentityAssociation canary Job in namespace {}", 649 | namespace 650 | ); 651 | 652 | let created = job_api 653 | .patch( 654 | name, 655 | ¶ms, 656 | &Patch::Apply(&Job { 657 | metadata, 658 | spec: Some(JobSpec { 659 | // single-use job that we delete on failuire; don't want to wait 10 seconds for retries 660 | backoff_limit: Some(1), 661 | template: PodTemplateSpec { 662 | metadata: None, 663 | spec: Some(PodSpec { 664 | service_account_name: Some("restate".into()), 665 | containers: vec![Container { 666 | name: "canary".into(), 667 | image: Some("busybox:uclibc".into()), 668 | command: Some(vec![ 669 | "grep".into(), 670 | "-q".into(), 671 | "AWS_CONTAINER_AUTHORIZATION_TOKEN_FILE".into(), 672 | "/proc/self/environ".into(), 673 | ]), 674 | ..Default::default() 675 | }], 676 | tolerations: tolerations.cloned(), 677 | restart_policy: Some("Never".into()), 678 | ..Default::default() 679 | }), 680 | }, 681 | ..Default::default() 682 | }), 683 | status: None, 684 | }), 685 | ) 686 | .await?; 687 | 688 | if let Some(conditions) = created.status.and_then(|s| s.conditions) { 689 | for condition in conditions { 690 | if condition.status != "True" { 691 | continue; 692 | } 693 | match condition.type_.as_str() { 694 | "Complete" => { 695 | debug!( 696 | "PodIdentityAssociation canary check succeeded in namespace {}", 697 | namespace 698 | ); 699 | return Ok(()); 700 | } 701 | "Failed" => { 702 | error!( 703 | "PodIdentityAssociation canary check failed in namespace {}, deleting Job", 704 | namespace 705 | ); 706 | 707 | delete_job(namespace, job_api, name).await?; 708 | 709 | return Err(Error::NotReady { 710 | reason: "PodIdentityAssociationCanaryFailed".into(), 711 | message: "Canary pod did not receive Pod Identity credentials; PIA webhook may need to catch up".into(), 712 | // job watch will cover this 713 | requeue_after: None, 714 | }); 715 | } 716 | _ => {} 717 | } 718 | } 719 | } 720 | 721 | // if we are here then the job hasn't succeeded or failed yet; lets try and figure things out a bit quicker 722 | // because it takes times for pods to schedule etc 723 | 724 | let pods = pod_api 725 | .list(&ListParams::default().labels(&format!( 726 | "batch.kubernetes.io/job-name={name},batch.kubernetes.io/controller-uid={}", 727 | created.metadata.uid.unwrap() 728 | ))) 729 | .await?; 730 | 731 | if let Some(pod) = pods.items.first() { 732 | if pod 733 | .spec 734 | .as_ref() 735 | .and_then(|s| s.volumes.as_ref()) 736 | .map(|vs| vs.iter().any(|v| v.name == "eks-pod-identity-token")) 737 | .unwrap_or(false) 738 | { 739 | debug!( 740 | "PodIdentityAssociation canary check succeeded via pod lookup in namespace {}", 741 | namespace 742 | ); 743 | return Ok(()); 744 | } 745 | 746 | debug!( 747 | "PodIdentityAssociation canary check failed via pod lookup in namespace {}, deleting Job", 748 | namespace 749 | ); 750 | delete_job(namespace, job_api, name).await?; 751 | 752 | return Err(Error::NotReady { 753 | reason: "PodIdentityAssociationCanaryFailed".into(), 754 | message: "Canary pod did not receive Pod Identity credentials; PIA webhook may need to catch up".into(), 755 | // job watch will cover this 756 | requeue_after: None, 757 | }); 758 | } 759 | 760 | // no pods; we generally expect this immediately after creating the job 761 | debug!( 762 | "PodIdentityAssociation canary Job not yet succeeded in namespace {}", 763 | namespace 764 | ); 765 | 766 | Err(Error::NotReady { 767 | reason: "PodIdentityAssociationCanaryPending".into(), 768 | message: "Canary Job has not yet succeeded; PIA webhook may need to catch up".into(), 769 | // job watch will cover this 770 | requeue_after: None, 771 | }) 772 | } 773 | 774 | fn is_pod_identity_association_synced(pia: PodIdentityAssociation) -> bool { 775 | if let Some(status) = pia.status { 776 | if let Some(conditions) = status.conditions { 777 | if let Some(synced) = conditions 778 | .iter() 779 | .find(|cond| cond.r#type == "ACK.ResourceSynced") 780 | { 781 | if synced.status == "True" { 782 | return true; 783 | } 784 | } 785 | } 786 | } 787 | false 788 | } 789 | 790 | async fn delete_pod_identity_association( 791 | namespace: &str, 792 | pia_api: &Api, 793 | name: &str, 794 | ) -> Result<(), Error> { 795 | debug!( 796 | "Ensuring PodIdentityAssociation {} in namespace {} does not exist", 797 | name, namespace 798 | ); 799 | match pia_api.delete(name, &DeleteParams::default()).await { 800 | Err(kube::Error::Api(kube::error::ErrorResponse { code: 404, .. })) => Ok(()), 801 | Err(err) => Err(err.into()), 802 | Ok(_) => Ok(()), 803 | } 804 | } 805 | 806 | async fn delete_job(namespace: &str, job_api: &Api, name: &str) -> Result<(), Error> { 807 | debug!( 808 | "Ensuring Job {} in namespace {} does not exist", 809 | name, namespace 810 | ); 811 | match job_api.delete(name, &DeleteParams::default()).await { 812 | Err(kube::Error::Api(kube::error::ErrorResponse { code: 404, .. })) => Ok(()), 813 | Err(err) => Err(err.into()), 814 | Ok(_) => Ok(()), 815 | } 816 | } 817 | 818 | async fn apply_security_group_policy( 819 | namespace: &str, 820 | pia_api: &Api, 821 | pia: SecurityGroupPolicy, 822 | ) -> Result<(), Error> { 823 | let name = pia.metadata.name.as_ref().unwrap(); 824 | let params: PatchParams = PatchParams::apply("restate-operator").force(); 825 | debug!( 826 | "Applying SecurityGroupPolicy {} in namespace {}", 827 | name, namespace 828 | ); 829 | pia_api.patch(name, ¶ms, &Patch::Apply(&pia)).await?; 830 | Ok(()) 831 | } 832 | 833 | async fn delete_security_group_policy( 834 | namespace: &str, 835 | sgp_api: &Api, 836 | name: &str, 837 | ) -> Result<(), Error> { 838 | debug!( 839 | "Ensuring SecurityGroupPolicy {} in namespace {} does not exist", 840 | name, namespace 841 | ); 842 | match sgp_api.delete(name, &DeleteParams::default()).await { 843 | Err(kube::Error::Api(kube::error::ErrorResponse { code: 404, .. })) => Ok(()), 844 | Err(err) => Err(err.into()), 845 | Ok(_) => Ok(()), 846 | } 847 | } 848 | 849 | async fn resize_statefulset_storage( 850 | namespace: &str, 851 | base_metadata: &ObjectMeta, 852 | ss_api: &Api, 853 | ss_store: &Store, 854 | pvc_api: &Api, 855 | pvc_meta_store: &Store>, 856 | storage: &RestateClusterStorage, 857 | ) -> Result<(), Error> { 858 | let params: PatchParams = PatchParams::apply("restate-operator").force(); 859 | let resources = Some(restate_pvc_resources(storage)); 860 | 861 | // ensure all existing pvcs have the right size set 862 | // first, filter the pvc meta store for our label selector 863 | let labels = mandatory_labels(base_metadata); 864 | let pvcs = pvc_meta_store.state().into_iter().filter(|pvc_meta| { 865 | for (k, v) in &labels { 866 | if pvc_meta.labels().get(k) != Some(v) { 867 | return false; 868 | } 869 | } 870 | true 871 | }); 872 | 873 | for pvc in pvcs { 874 | let name = pvc.name_any(); 875 | debug!( 876 | "Applying PersistentVolumeClaim {} in namespace {}", 877 | name, namespace 878 | ); 879 | 880 | let pvc = pvc_api 881 | .patch( 882 | &name, 883 | ¶ms, 884 | &Patch::Apply(PersistentVolumeClaim { 885 | metadata: ObjectMeta { 886 | name: Some(name.clone()), 887 | ..Default::default() 888 | }, 889 | spec: Some(PersistentVolumeClaimSpec { 890 | resources: resources.clone(), 891 | ..Default::default() 892 | }), 893 | status: None, 894 | }), 895 | ) 896 | .await?; 897 | 898 | if pvc.status.and_then(|s| s.phase).as_deref() != Some("Bound") { 899 | return Err(Error::NotReady { 900 | reason: "PersistentVolumeClaimNotBound".into(), 901 | message: format!( 902 | "PersistentVolumeClaim {} is not yet bound to a volume", 903 | name 904 | ), 905 | requeue_after: None, 906 | }); 907 | } 908 | } 909 | 910 | let existing = match ss_store.get(&ObjectRef::new(RESTATE_STATEFULSET_NAME).within(namespace)) { 911 | Some(existing) => existing, 912 | // no statefulset in cache; possibilities: 913 | // 1. first run and it hasn't ever been created => do nothing 914 | // 3. we deleted it in a previous reconciliation, and the cache reflects this => do nothing 915 | // 2. it has just been created, but cache doesn't have it yet. we'll reconcile again when it enters cache => do nothing 916 | None => return Ok(()), 917 | }; 918 | 919 | let existing_storage_request = existing 920 | .spec 921 | .as_ref() 922 | .and_then(|spec| spec.volume_claim_templates.as_ref()) 923 | .and_then(|templates| templates.first()) 924 | .and_then(|storage| storage.spec.as_ref()) 925 | .and_then(|spec| spec.resources.as_ref()) 926 | .and_then(|resources| resources.requests.as_ref()) 927 | .and_then(|requests| requests.get("storage").map(|storage| storage.to_bytes())); 928 | 929 | match existing_storage_request { 930 | // check if we can interpret the statefulset as having the same storage request 931 | Some(Ok(Some(bytes))) if bytes == storage.storage_request_bytes => return Ok(()), 932 | _ => {} 933 | } 934 | 935 | // expansion case - we would have failed when updating the pvcs if this was a contraction 936 | // we have already updated the pvcs, we just need to delete and recreate the statefulset 937 | // we *must* delete with an orphan propagation policy; this means the deletion will *not* cascade down 938 | // to the pods that this statefulset owns. 939 | // recreation will happen later in the reconcile loop 940 | ss_api 941 | .delete( 942 | RESTATE_STATEFULSET_NAME, 943 | &DeleteParams { 944 | propagation_policy: Some(PropagationPolicy::Orphan), 945 | preconditions: Some(Preconditions { 946 | // resources are immutable; but if someone deleted and recreated it with different resources, 947 | // we don't want to delete it, hence the uid precondition 948 | uid: existing.uid(), 949 | resource_version: None, 950 | }), 951 | ..Default::default() 952 | }, 953 | ) 954 | .await?; 955 | 956 | Ok(()) 957 | } 958 | 959 | async fn apply_stateful_set( 960 | namespace: &str, 961 | ss_api: &Api, 962 | ss: StatefulSet, 963 | ) -> Result { 964 | let name = ss.metadata.name.as_ref().unwrap(); 965 | let params: PatchParams = PatchParams::apply("restate-operator").force(); 966 | debug!("Applying Stateful Set {} in namespace {}", name, namespace); 967 | Ok(ss_api.patch(name, ¶ms, &Patch::Apply(&ss)).await?) 968 | } 969 | 970 | fn validate_stateful_set_status( 971 | status: Option, 972 | expected_replicas: i32, 973 | ) -> Result<(), Error> { 974 | let status = if let Some(status) = status { 975 | status 976 | } else { 977 | return Err(Error::NotReady { 978 | message: "StatefulSetNoStatus".into(), 979 | reason: "StatefulSet has no status set; it may have just been created".into(), 980 | requeue_after: None, 981 | }); 982 | }; 983 | 984 | let StatefulSetStatus { 985 | replicas, 986 | ready_replicas, 987 | .. 988 | } = status; 989 | if replicas != expected_replicas { 990 | return Err(Error::NotReady { reason: "StatefulSetScaling".into(), message: format!("StatefulSet has {replicas} replicas instead of the expected {expected_replicas}; it may be scaling up or down"), requeue_after: None }); 991 | }; 992 | 993 | let ready_replicas = ready_replicas.unwrap_or(0); 994 | 995 | if ready_replicas != expected_replicas { 996 | return Err(Error::NotReady { reason: "StatefulSetPodNotReady".into(), message: format!("StatefulSet has {ready_replicas} ready replicas instead of the expected {expected_replicas}; a pod may not be ready"), requeue_after: None }); 997 | } 998 | 999 | Ok(()) 1000 | } 1001 | -------------------------------------------------------------------------------- /src/reconcilers/mod.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | 3 | use k8s_openapi::apimachinery::pkg::apis::meta::v1::{LabelSelector, ObjectMeta}; 4 | 5 | pub mod compute; 6 | pub mod network_policies; 7 | mod quantity_parser; 8 | pub mod signing_key; 9 | 10 | // resource_labels returns labels to apply to all created resources on top of the RestateCluster labels 11 | // it is not safe to change these; statefulset volume template labels are immutable 12 | pub fn mandatory_labels(base_metadata: &ObjectMeta) -> BTreeMap { 13 | BTreeMap::from_iter([ 14 | ("app.kubernetes.io/name".into(), "restate".into()), 15 | ( 16 | "app.kubernetes.io/instance".into(), 17 | base_metadata.name.clone().unwrap(), 18 | ), 19 | ]) 20 | } 21 | 22 | pub fn label_selector(base_metadata: &ObjectMeta) -> LabelSelector { 23 | LabelSelector { 24 | match_labels: Some(mandatory_labels(base_metadata)), 25 | match_expressions: None, 26 | } 27 | } 28 | 29 | pub fn object_meta(base_metadata: &ObjectMeta, name: impl Into) -> ObjectMeta { 30 | let mut meta = base_metadata.clone(); 31 | meta.name = Some(name.into()); 32 | meta.labels 33 | .get_or_insert_with(Default::default) 34 | .extend(mandatory_labels(base_metadata)); 35 | meta 36 | } 37 | -------------------------------------------------------------------------------- /src/reconcilers/network_policies.rs: -------------------------------------------------------------------------------- 1 | use std::convert::Into; 2 | use std::string::ToString; 3 | 4 | use k8s_openapi::api::networking::v1::NetworkPolicySpec; 5 | use k8s_openapi::api::networking::v1::{ 6 | IPBlock, NetworkPolicy, NetworkPolicyEgressRule, NetworkPolicyIngressRule, NetworkPolicyPeer, 7 | NetworkPolicyPort, 8 | }; 9 | use k8s_openapi::apimachinery::pkg::apis::meta::v1::{LabelSelector, ObjectMeta}; 10 | use k8s_openapi::apimachinery::pkg::util::intstr::IntOrString; 11 | use kube::api::DeleteParams; 12 | use kube::{ 13 | api::{Patch, PatchParams}, 14 | Api, Client, 15 | }; 16 | use tracing::debug; 17 | 18 | use crate::reconcilers::{label_selector, object_meta}; 19 | use crate::{Error, RestateClusterNetworkPeers}; 20 | 21 | fn deny_all(base_metadata: &ObjectMeta) -> NetworkPolicy { 22 | NetworkPolicy { 23 | metadata: object_meta(base_metadata, "deny-all"), 24 | spec: Some(NetworkPolicySpec { 25 | policy_types: Some(vec!["Egress".into(), "Ingress".into()]), 26 | ..Default::default() 27 | }), 28 | } 29 | } 30 | 31 | fn allow_dns(base_metadata: &ObjectMeta) -> NetworkPolicy { 32 | NetworkPolicy { 33 | metadata: object_meta(base_metadata, "allow-egress-to-kube-dns"), 34 | spec: Some(NetworkPolicySpec { 35 | policy_types: Some(vec!["Egress".into()]), 36 | egress: Some(vec![NetworkPolicyEgressRule { 37 | to: Some(vec![NetworkPolicyPeer { 38 | pod_selector: Some(LabelSelector { 39 | match_labels: Some( 40 | [("k8s-app".to_string(), "kube-dns".to_string())].into(), 41 | ), 42 | ..Default::default() 43 | }), 44 | namespace_selector: Some(LabelSelector { 45 | match_labels: Some( 46 | [( 47 | "kubernetes.io/metadata.name".to_string(), 48 | "kube-system".to_string(), 49 | )] 50 | .into(), 51 | ), 52 | ..Default::default() 53 | }), 54 | ..Default::default() 55 | }]), 56 | ports: Some(vec![ 57 | NetworkPolicyPort { 58 | protocol: Some("UDP".into()), 59 | port: Some(IntOrString::Int(53)), 60 | end_port: None, 61 | }, 62 | NetworkPolicyPort { 63 | protocol: Some("TCP".into()), 64 | port: Some(IntOrString::Int(53)), 65 | end_port: None, 66 | }, 67 | ]), 68 | }]), 69 | ..Default::default() 70 | }), 71 | } 72 | } 73 | 74 | fn allow_public(base_metadata: &ObjectMeta) -> NetworkPolicy { 75 | NetworkPolicy { 76 | metadata: object_meta(base_metadata, "allow-restate-egress-to-public-internet"), 77 | spec: Some(NetworkPolicySpec { 78 | pod_selector: label_selector(base_metadata), 79 | policy_types: Some(vec!["Egress".into()]), 80 | egress: Some(vec![NetworkPolicyEgressRule { 81 | to: Some(vec![ 82 | // we split the ipv4 space into two because there is a known issue with AWS VPC CNI 83 | // that makes using 0.0.0.0/0 as `cidr` very dangerous. 84 | // https://github.com/aws/aws-network-policy-agent/pull/58 85 | NetworkPolicyPeer { 86 | ip_block: Some(IPBlock { 87 | // 0.0.0.0 to 127.255.255.255 88 | cidr: "0.0.0.0/1".into(), 89 | except: Some(vec![ 90 | // private IP ranges: https://en.wikipedia.org/wiki/Private_network 91 | "10.0.0.0/8".into(), 92 | ]), 93 | }), 94 | ..Default::default() 95 | }, 96 | NetworkPolicyPeer { 97 | ip_block: Some(IPBlock { 98 | // 128.0.0.0 to 255.255.255.255 99 | cidr: "128.0.0.0/1".into(), 100 | except: Some(vec![ 101 | // private IP ranges: https://en.wikipedia.org/wiki/Private_network 102 | "192.168.0.0/16".into(), 103 | "172.16.0.0/12".into(), 104 | // and the link-local IP ranges, as this is used by AWS instance metadata 105 | "169.254.0.0/16".into(), 106 | ]), 107 | }), 108 | ..Default::default() 109 | }, 110 | ]), 111 | ports: None, // all ports 112 | }]), 113 | ..Default::default() 114 | }), 115 | } 116 | } 117 | 118 | const AWS_POD_IDENTITY_POLICY_NAME: &str = "allow-restate-egress-to-aws-pod-identity"; 119 | 120 | fn allow_aws_pod_identity(base_metadata: &ObjectMeta) -> NetworkPolicy { 121 | // https://docs.aws.amazon.com/eks/latest/userguide/pod-id-how-it-works.html 122 | NetworkPolicy { 123 | metadata: object_meta(base_metadata, AWS_POD_IDENTITY_POLICY_NAME), 124 | spec: Some(NetworkPolicySpec { 125 | pod_selector: label_selector(base_metadata), 126 | policy_types: Some(vec!["Egress".into()]), 127 | egress: Some(vec![NetworkPolicyEgressRule { 128 | to: Some(vec![NetworkPolicyPeer { 129 | ip_block: Some(IPBlock { 130 | cidr: "169.254.170.23/32".into(), 131 | except: None, 132 | }), 133 | ..Default::default() 134 | }]), 135 | ports: Some(vec![NetworkPolicyPort { 136 | port: Some(IntOrString::Int(80)), 137 | protocol: Some("TCP".into()), 138 | end_port: None, 139 | }]), // all ports 140 | }]), 141 | ..Default::default() 142 | }), 143 | } 144 | } 145 | 146 | fn allow_access( 147 | port_name: &str, 148 | port: i32, 149 | base_metadata: &ObjectMeta, 150 | peers: Option<&[NetworkPolicyPeer]>, 151 | ) -> NetworkPolicy { 152 | NetworkPolicy { 153 | metadata: object_meta(base_metadata, format!("allow-{port_name}-access")), 154 | spec: Some(NetworkPolicySpec { 155 | pod_selector: label_selector(base_metadata), 156 | policy_types: Some(vec!["Ingress".into()]), 157 | ingress: peers.map(|peers| { 158 | vec![NetworkPolicyIngressRule { 159 | from: Some(peers.into()), 160 | ports: Some(vec![NetworkPolicyPort { 161 | protocol: Some("TCP".into()), 162 | port: Some(IntOrString::Int(port)), 163 | end_port: None, 164 | }]), 165 | }] 166 | }), 167 | ..Default::default() 168 | }), 169 | } 170 | } 171 | 172 | fn allow_egress( 173 | base_metadata: &ObjectMeta, 174 | egress: Option<&[crate::NetworkPolicyEgressRule]>, 175 | ) -> NetworkPolicy { 176 | NetworkPolicy { 177 | metadata: object_meta(base_metadata, "allow-restate-egress"), 178 | spec: Some(NetworkPolicySpec { 179 | pod_selector: label_selector(base_metadata), 180 | policy_types: Some(vec!["Egress".into()]), 181 | egress: egress.map(|e| e.iter().map(|r| r.clone().into()).collect()), // if none, this policy will do nothing 182 | ..Default::default() 183 | }), 184 | } 185 | } 186 | 187 | pub async fn reconcile_network_policies( 188 | client: Client, 189 | namespace: &str, 190 | base_metadata: &ObjectMeta, 191 | network_peers: Option<&RestateClusterNetworkPeers>, 192 | network_egress_rules: Option<&[crate::NetworkPolicyEgressRule]>, 193 | aws_pod_identity_enabled: bool, 194 | ) -> Result<(), Error> { 195 | let np_api: Api = Api::namespaced(client, namespace); 196 | 197 | apply_network_policy(namespace, &np_api, deny_all(base_metadata)).await?; 198 | apply_network_policy(namespace, &np_api, allow_dns(base_metadata)).await?; 199 | apply_network_policy(namespace, &np_api, allow_public(base_metadata)).await?; 200 | 201 | if aws_pod_identity_enabled { 202 | apply_network_policy(namespace, &np_api, allow_aws_pod_identity(base_metadata)).await? 203 | } else { 204 | delete_network_policy(namespace, &np_api, AWS_POD_IDENTITY_POLICY_NAME).await? 205 | } 206 | 207 | apply_network_policy( 208 | namespace, 209 | &np_api, 210 | allow_egress(base_metadata, network_egress_rules), 211 | ) 212 | .await?; 213 | 214 | apply_network_policy( 215 | namespace, 216 | &np_api, 217 | allow_access( 218 | "ingress", 219 | 8080, 220 | base_metadata, 221 | network_peers.and_then(|peers| peers.ingress.as_deref()), 222 | ), 223 | ) 224 | .await?; 225 | 226 | apply_network_policy( 227 | namespace, 228 | &np_api, 229 | allow_access( 230 | "admin", 231 | 9070, 232 | base_metadata, 233 | network_peers.and_then(|peers| peers.admin.as_deref()), 234 | ), 235 | ) 236 | .await?; 237 | 238 | apply_network_policy( 239 | namespace, 240 | &np_api, 241 | allow_access( 242 | "metrics", 243 | 5122, 244 | base_metadata, 245 | network_peers.and_then(|peers| peers.metrics.as_deref()), 246 | ), 247 | ) 248 | .await?; 249 | 250 | Ok(()) 251 | } 252 | 253 | async fn apply_network_policy( 254 | namespace: &str, 255 | np_api: &Api, 256 | np: NetworkPolicy, 257 | ) -> Result<(), Error> { 258 | let name = np.metadata.name.as_ref().unwrap(); 259 | let params: PatchParams = PatchParams::apply("restate-operator").force(); 260 | debug!( 261 | "Applying Network Policy {} in namespace {}", 262 | name, namespace 263 | ); 264 | np_api.patch(name, ¶ms, &Patch::Apply(&np)).await?; 265 | Ok(()) 266 | } 267 | 268 | async fn delete_network_policy( 269 | namespace: &str, 270 | np_api: &Api, 271 | name: &str, 272 | ) -> Result<(), Error> { 273 | debug!( 274 | "Ensuring NetworkPolicy {} in namespace {} does not exist", 275 | name, namespace 276 | ); 277 | match np_api.delete(name, &DeleteParams::default()).await { 278 | Err(kube::Error::Api(kube::error::ErrorResponse { code: 404, .. })) => Ok(()), 279 | Err(err) => Err(err.into()), 280 | Ok(_) => Ok(()), 281 | } 282 | } 283 | -------------------------------------------------------------------------------- /src/reconcilers/quantity_parser.rs: -------------------------------------------------------------------------------- 1 | /// Adapted from https://github.com/sombralibre/k8s-quantity-parser to resolve dependency conflict 2 | /// MIT licensed, Copyright (c) 2022 Alejandro Llanes 3 | use k8s_openapi::apimachinery::pkg::api::resource::Quantity; 4 | use regex::Regex; 5 | use std::{num::ParseIntError, sync::OnceLock}; 6 | 7 | #[allow(non_camel_case_types)] 8 | enum QuantityMemoryUnits { 9 | Ki, 10 | Mi, 11 | Gi, 12 | Ti, 13 | Pi, 14 | Ei, 15 | k, 16 | M, 17 | G, 18 | T, 19 | P, 20 | E, 21 | m, 22 | Invalid, 23 | } 24 | 25 | impl QuantityMemoryUnits { 26 | fn new(unit: &str) -> Self { 27 | match unit { 28 | "Ki" => Self::Ki, 29 | "Mi" => Self::Mi, 30 | "Gi" => Self::Gi, 31 | "Ti" => Self::Ti, 32 | "Pi" => Self::Pi, 33 | "Ei" => Self::Ei, 34 | "k" => Self::k, 35 | "M" => Self::M, 36 | "G" => Self::G, 37 | "T" => Self::T, 38 | "P" => Self::P, 39 | "E" => Self::E, 40 | "m" => Self::m, 41 | _ => Self::Invalid, 42 | } 43 | } 44 | } 45 | 46 | /// This trait works as a parser for the values retrieved from BTreeMap collections 47 | /// in `k8s_openapi::api::core::v1::Pod` and `k8s_openapi::api::core::v1::Node` 48 | /// 49 | /// # Errors 50 | /// The parser will fails if encounters an invalid unit letters or failed to parse String to i64 51 | pub trait QuantityParser { 52 | /// This method will parse the memory resource values returned by Kubernetes Api 53 | /// 54 | /// ```rust 55 | /// # use k8s_openapi::apimachinery::pkg::api::resource::Quantity; 56 | /// # use k8s_quantity_parser::QuantityParser; 57 | /// # 58 | /// let mib = Quantity("1Mi".into()); 59 | /// let ret: i64 = 1048576; 60 | /// assert_eq!(mib.to_bytes().ok().flatten().unwrap(), ret); 61 | /// ``` 62 | /// 63 | /// # Errors 64 | /// 65 | /// The parser will fails if encounters an invalid unit letters or failed to parse String to i64 66 | /// 67 | fn to_bytes(&self) -> Result, ParseError>; 68 | } 69 | 70 | #[derive(Debug, thiserror::Error)] 71 | pub enum ParseError { 72 | #[error(transparent)] 73 | ParseIntError(#[from] ParseIntError), 74 | #[error("Invalid memory unit")] 75 | InvalidMemoryUnit, 76 | } 77 | 78 | impl QuantityParser for Quantity { 79 | fn to_bytes(&self) -> Result, ParseError> { 80 | let unit_str = &self.0; 81 | static REGEX: OnceLock = OnceLock::new(); 82 | let cap = REGEX 83 | .get_or_init(|| Regex::new(r"([[:alpha:]]{1,2}$)").unwrap()) 84 | .captures(unit_str); 85 | 86 | if cap.is_none() { 87 | return Ok(Some(unit_str.parse::()?)); 88 | }; 89 | 90 | // Is safe to use unwrap here, as the value is already checked. 91 | match cap.unwrap().get(0) { 92 | Some(m) => match QuantityMemoryUnits::new(m.as_str()) { 93 | QuantityMemoryUnits::Ki => { 94 | let unit_str = unit_str.replace(m.as_str(), ""); 95 | let amount = unit_str.parse::()?; 96 | Ok(Some(amount * 1024)) 97 | } 98 | QuantityMemoryUnits::Mi => { 99 | let unit_str = unit_str.replace(m.as_str(), ""); 100 | let amount = unit_str.parse::()?; 101 | Ok(Some((amount * 1024) * 1024)) 102 | } 103 | QuantityMemoryUnits::Gi => { 104 | let unit_str = unit_str.replace(m.as_str(), ""); 105 | let amount = unit_str.parse::()?; 106 | Ok(Some(((amount * 1024) * 1024) * 1024)) 107 | } 108 | QuantityMemoryUnits::Ti => { 109 | let unit_str = unit_str.replace(m.as_str(), ""); 110 | let amount = unit_str.parse::()?; 111 | Ok(Some((((amount * 1024) * 1024) * 1024) * 1024)) 112 | } 113 | QuantityMemoryUnits::Pi => { 114 | let unit_str = unit_str.replace(m.as_str(), ""); 115 | let amount = unit_str.parse::()?; 116 | Ok(Some(((((amount * 1024) * 1024) * 1024) * 1024) * 1024)) 117 | } 118 | QuantityMemoryUnits::Ei => { 119 | let unit_str = unit_str.replace(m.as_str(), ""); 120 | let amount = unit_str.parse::()?; 121 | Ok(Some( 122 | (((((amount * 1024) * 1024) * 1024) * 1024) * 1024) * 1024, 123 | )) 124 | } 125 | QuantityMemoryUnits::k => { 126 | let unit_str = unit_str.replace(m.as_str(), ""); 127 | let amount = unit_str.parse::()?; 128 | Ok(Some(amount * 1000)) 129 | } 130 | QuantityMemoryUnits::M => { 131 | let unit_str = unit_str.replace(m.as_str(), ""); 132 | let amount = unit_str.parse::()?; 133 | Ok(Some((amount * 1000) * 1000)) 134 | } 135 | QuantityMemoryUnits::G => { 136 | let unit_str = unit_str.replace(m.as_str(), ""); 137 | let amount = unit_str.parse::()?; 138 | Ok(Some(((amount * 1000) * 1000) * 1000)) 139 | } 140 | QuantityMemoryUnits::T => { 141 | let unit_str = unit_str.replace(m.as_str(), ""); 142 | let amount = unit_str.parse::()?; 143 | Ok(Some((((amount * 1000) * 1000) * 1000) * 1000)) 144 | } 145 | QuantityMemoryUnits::P => { 146 | let unit_str = unit_str.replace(m.as_str(), ""); 147 | let amount = unit_str.parse::()?; 148 | Ok(Some(((((amount * 1000) * 1000) * 1000) * 1000) * 1000)) 149 | } 150 | QuantityMemoryUnits::E => { 151 | let unit_str = unit_str.replace(m.as_str(), ""); 152 | let amount = unit_str.parse::()?; 153 | Ok(Some( 154 | (((((amount * 1000) * 1000) * 1000) * 1000) * 1000) * 1000, 155 | )) 156 | } 157 | QuantityMemoryUnits::m => { 158 | let unit_str = unit_str.replace(m.as_str(), ""); 159 | let amount = unit_str.parse::()?; 160 | Ok(Some(amount / 1000)) 161 | } 162 | QuantityMemoryUnits::Invalid => Err(ParseError::InvalidMemoryUnit), 163 | }, 164 | None => Ok(None), 165 | } 166 | } 167 | } 168 | 169 | #[cfg(test)] 170 | mod tests { 171 | use super::*; 172 | 173 | #[test] 174 | fn to_bytes_works() { 175 | assert!(Quantity("12345".into()).to_bytes().is_ok()) 176 | } 177 | 178 | #[test] 179 | fn to_bytes_is_some() { 180 | assert!(Quantity("12345".into()).to_bytes().unwrap().is_some()) 181 | } 182 | 183 | #[test] 184 | fn invalid_unit_fails() { 185 | assert!(Quantity("12345r".into()).to_bytes().is_err()) 186 | } 187 | 188 | #[test] 189 | fn parse_i64_fails() { 190 | assert!(Quantity("123.123".into()).to_bytes().is_err()) 191 | } 192 | 193 | #[test] 194 | fn is_none_value() { 195 | assert!(Quantity("0Mi".into()).to_bytes().unwrap().is_some()) 196 | } 197 | 198 | #[test] 199 | fn pow2_mb_to_bytes() { 200 | let mib = Quantity("1Mi".into()); 201 | let ret: i64 = 1048576; 202 | assert_eq!(mib.to_bytes().ok().flatten().unwrap(), ret); 203 | } 204 | 205 | #[test] 206 | fn pow10_gb_to_bytes() { 207 | let mib = Quantity("1G".into()); 208 | let ret: i64 = 1000000000; 209 | assert_eq!(mib.to_bytes().ok().flatten().unwrap(), ret); 210 | } 211 | } 212 | -------------------------------------------------------------------------------- /src/reconcilers/signing_key.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::BTreeMap, path::PathBuf}; 2 | 3 | use k8s_openapi::api::core::v1::{CSIVolumeSource, KeyToPath, SecretVolumeSource, Volume}; 4 | use kube::{ 5 | api::{DeleteParams, ObjectMeta, Patch, PatchParams}, 6 | Api, 7 | }; 8 | use tracing::{debug, warn}; 9 | 10 | use crate::{ 11 | secretproviderclasses::{SecretProviderClass, SecretProviderClassSpec}, 12 | Context, Error, RequestSigningPrivateKey, SecretProviderSigningKeySource, 13 | SecretSigningKeySource, 14 | }; 15 | 16 | use super::object_meta; 17 | 18 | const SECRET_PROVIDER_CLASS_NAME: &str = "request-signing-key-v1"; 19 | 20 | #[derive(thiserror::Error, Debug)] 21 | pub enum InvalidSigningKeyError { 22 | #[error("Invalid signing protocol version; only 'v1' is supported")] 23 | InvalidVersion, 24 | #[error("Multiple sources provided for signing private key; only one of 'secret', 'secretProvider' can be provided")] 25 | MultipleSourcesProvided, 26 | } 27 | 28 | pub async fn reconcile_signing_key( 29 | ctx: &Context, 30 | namespace: &str, 31 | base_metadata: &ObjectMeta, 32 | private_key: Option<&RequestSigningPrivateKey>, 33 | ) -> Result, Error> { 34 | let spc_api: Api = Api::namespaced(ctx.client.clone(), namespace); 35 | 36 | let private_key = if let Some(private_key) = private_key { 37 | private_key 38 | } else { 39 | // No private key configuration, clean up 40 | remove_secret_provider_class(ctx, namespace, &spc_api).await?; 41 | return Ok(None); 42 | }; 43 | 44 | match private_key.version.as_str() { 45 | "v1" => {} 46 | _ => return Err(InvalidSigningKeyError::InvalidVersion.into()), 47 | } 48 | 49 | match ( 50 | private_key.secret.as_ref(), 51 | private_key.secret_provider.as_ref(), 52 | ) { 53 | (Some(secret), None) => { 54 | remove_secret_provider_class(ctx, namespace, &spc_api).await?; 55 | 56 | Ok(Some(reconcile_signing_key_secret(secret))) 57 | } 58 | (None, Some(secret_provider)) => { 59 | if ctx.secret_provider_class_installed { 60 | Ok(Some( 61 | reconcile_signing_key_secret_provider( 62 | namespace, 63 | base_metadata, 64 | secret_provider, 65 | &spc_api, 66 | ) 67 | .await?, 68 | )) 69 | } else { 70 | warn!("Ignoring secret provider signing key source as the SecretProviderClass CRD is not installed"); 71 | Ok(None) 72 | } 73 | } 74 | (Some(_), Some(_)) => Err(InvalidSigningKeyError::MultipleSourcesProvided.into()), 75 | (None, None) => { 76 | // No private key configuration, clean up 77 | remove_secret_provider_class(ctx, namespace, &spc_api).await?; 78 | Ok(None) 79 | } 80 | } 81 | } 82 | 83 | pub fn reconcile_signing_key_secret(secret: &SecretSigningKeySource) -> (Volume, PathBuf) { 84 | let path = "private.pem"; 85 | ( 86 | Volume { 87 | name: "request-signing-private-key-secret".into(), 88 | secret: Some(SecretVolumeSource { 89 | secret_name: Some(secret.secret_name.clone()), 90 | items: Some(vec![KeyToPath { 91 | key: secret.key.clone(), 92 | path: path.into(), 93 | mode: Some(0o400), 94 | }]), 95 | ..Default::default() 96 | }), 97 | ..Default::default() 98 | }, 99 | path.into(), 100 | ) 101 | } 102 | 103 | pub async fn reconcile_signing_key_secret_provider( 104 | namespace: &str, 105 | base_metadata: &ObjectMeta, 106 | secret_provider: &SecretProviderSigningKeySource, 107 | spc_api: &Api, 108 | ) -> Result<(Volume, PathBuf), Error> { 109 | let spc = SecretProviderClass { 110 | metadata: object_meta(base_metadata, SECRET_PROVIDER_CLASS_NAME), 111 | spec: SecretProviderClassSpec { 112 | parameters: secret_provider.parameters.clone(), 113 | provider: secret_provider.provider.clone(), 114 | secret_objects: None, 115 | }, 116 | }; 117 | 118 | let params: PatchParams = PatchParams::apply("restate-operator").force(); 119 | debug!( 120 | "Applying SecretProviderClass {} in namespace {}", 121 | SECRET_PROVIDER_CLASS_NAME, namespace 122 | ); 123 | spc_api 124 | .patch(SECRET_PROVIDER_CLASS_NAME, ¶ms, &Patch::Apply(&spc)) 125 | .await?; 126 | 127 | Ok(( 128 | Volume { 129 | name: "request-signing-private-key-secret-provider".into(), 130 | csi: Some(CSIVolumeSource { 131 | driver: "secrets-store.csi.k8s.io".into(), 132 | read_only: Some(true), 133 | volume_attributes: Some(BTreeMap::from([( 134 | "secretProviderClass".into(), 135 | SECRET_PROVIDER_CLASS_NAME.into(), 136 | )])), 137 | ..Default::default() 138 | }), 139 | ..Default::default() 140 | }, 141 | secret_provider.path.clone(), 142 | )) 143 | } 144 | 145 | pub async fn remove_secret_provider_class( 146 | ctx: &Context, 147 | namespace: &str, 148 | spc_api: &Api, 149 | ) -> Result<(), Error> { 150 | if !ctx.secret_provider_class_installed { 151 | return Ok(()); 152 | } 153 | debug!( 154 | "Ensuring SecretProviderClass {} in namespace {} does not exist", 155 | SECRET_PROVIDER_CLASS_NAME, namespace 156 | ); 157 | match spc_api 158 | .delete(SECRET_PROVIDER_CLASS_NAME, &DeleteParams::default()) 159 | .await 160 | { 161 | Err(kube::Error::Api(kube::error::ErrorResponse { code: 404, .. })) => Ok(()), 162 | Err(err) => Err(err.into()), 163 | Ok(_) => Ok(()), 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /src/schemagen.rs: -------------------------------------------------------------------------------- 1 | use schemars::JsonSchema; 2 | fn main() { 3 | let mut gen = schemars::gen::SchemaSettings::openapi3() 4 | .with(|s| { 5 | s.inline_subschemas = true; 6 | s.meta_schema = None; 7 | }) 8 | .with_visitor(kube::core::schema::StructuralSchemaRewriter) 9 | .into_generator(); 10 | print!( 11 | "{}", 12 | serde_json::to_string_pretty(&restate_operator::RestateCluster::json_schema(&mut gen)) 13 | .unwrap() 14 | ) 15 | } 16 | -------------------------------------------------------------------------------- /src/secretproviderclasses.rs: -------------------------------------------------------------------------------- 1 | // WARNING: generated by kopium - manual changes will be overwritten 2 | // kopium command: kopium secretproviderclasses.secrets-store.csi.x-k8s.io -A 3 | // kopium version: 0.16.5 4 | 5 | use kube::CustomResource; 6 | use schemars::JsonSchema; 7 | use serde::{Deserialize, Serialize}; 8 | use std::collections::BTreeMap; 9 | 10 | /// SecretProviderClassSpec defines the desired state of SecretProviderClass 11 | #[derive(CustomResource, Serialize, Deserialize, Clone, Debug, JsonSchema)] 12 | #[kube( 13 | group = "secrets-store.csi.x-k8s.io", 14 | version = "v1", 15 | kind = "SecretProviderClass", 16 | plural = "secretproviderclasses" 17 | )] 18 | #[kube(namespaced)] 19 | pub struct SecretProviderClassSpec { 20 | /// Configuration for specific provider 21 | #[serde(default, skip_serializing_if = "Option::is_none")] 22 | pub parameters: Option>, 23 | /// Configuration for provider name 24 | #[serde(default, skip_serializing_if = "Option::is_none")] 25 | pub provider: Option, 26 | #[serde( 27 | default, 28 | skip_serializing_if = "Option::is_none", 29 | rename = "secretObjects" 30 | )] 31 | pub secret_objects: Option>, 32 | } 33 | 34 | /// SecretObject defines the desired state of synced K8s secret objects 35 | #[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] 36 | pub struct SecretProviderClassSecretObjects { 37 | /// annotations of k8s secret object 38 | #[serde(default, skip_serializing_if = "Option::is_none")] 39 | pub annotations: Option>, 40 | #[serde(default, skip_serializing_if = "Option::is_none")] 41 | pub data: Option>, 42 | /// labels of K8s secret object 43 | #[serde(default, skip_serializing_if = "Option::is_none")] 44 | pub labels: Option>, 45 | /// name of the K8s secret object 46 | #[serde( 47 | default, 48 | skip_serializing_if = "Option::is_none", 49 | rename = "secretName" 50 | )] 51 | pub secret_name: Option, 52 | /// type of K8s secret object 53 | #[serde(default, skip_serializing_if = "Option::is_none", rename = "type")] 54 | pub r#type: Option, 55 | } 56 | 57 | /// SecretObjectData defines the desired state of synced K8s secret object data 58 | #[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] 59 | pub struct SecretProviderClassSecretObjectsData { 60 | /// data field to populate 61 | #[serde(default, skip_serializing_if = "Option::is_none")] 62 | pub key: Option, 63 | /// name of the object to sync 64 | #[serde( 65 | default, 66 | skip_serializing_if = "Option::is_none", 67 | rename = "objectName" 68 | )] 69 | pub object_name: Option, 70 | } 71 | 72 | /// SecretProviderClassStatus defines the observed state of SecretProviderClass 73 | #[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] 74 | pub struct SecretProviderClassStatus {} 75 | -------------------------------------------------------------------------------- /src/securitygrouppolicies.rs: -------------------------------------------------------------------------------- 1 | // WARNING: generated by kopium - manual changes will be overwritten 2 | // kopium command: kopium securitygrouppolicies.vpcresources.k8s.aws -A 3 | // kopium version: 0.16.5 4 | 5 | use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector; 6 | use kube::CustomResource; 7 | use schemars::JsonSchema; 8 | use serde::{Deserialize, Serialize}; 9 | 10 | /// SecurityGroupPolicySpec defines the desired state of SecurityGroupPolicy 11 | #[derive(CustomResource, Serialize, Deserialize, Clone, Debug, JsonSchema)] 12 | #[kube( 13 | group = "vpcresources.k8s.aws", 14 | version = "v1beta1", 15 | kind = "SecurityGroupPolicy", 16 | plural = "securitygrouppolicies" 17 | )] 18 | #[kube(namespaced)] 19 | pub struct SecurityGroupPolicySpec { 20 | /// A label selector is a label query over a set of resources. The result of matchLabels and matchExpressions are ANDed. An empty label selector matches all objects. A null label selector matches no objects. 21 | #[serde( 22 | default, 23 | skip_serializing_if = "Option::is_none", 24 | rename = "podSelector" 25 | )] 26 | pub pod_selector: Option, 27 | /// GroupIds contains the list of security groups that will be applied to the network interface of the pod matching the criteria. 28 | #[serde( 29 | default, 30 | skip_serializing_if = "Option::is_none", 31 | rename = "securityGroups" 32 | )] 33 | pub security_groups: Option, 34 | /// A label selector is a label query over a set of resources. The result of matchLabels and matchExpressions are ANDed. An empty label selector matches all objects. A null label selector matches no objects. 35 | #[serde( 36 | default, 37 | skip_serializing_if = "Option::is_none", 38 | rename = "serviceAccountSelector" 39 | )] 40 | pub service_account_selector: Option, 41 | } 42 | 43 | /// GroupIds contains the list of security groups that will be applied to the network interface of the pod matching the criteria. 44 | #[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] 45 | pub struct SecurityGroupPolicySecurityGroups { 46 | /// Groups is the list of EC2 Security Groups Ids that need to be applied to the ENI of a Pod. 47 | #[serde(default, skip_serializing_if = "Option::is_none", rename = "groupIds")] 48 | pub group_ids: Option>, 49 | } 50 | -------------------------------------------------------------------------------- /src/telemetry.rs: -------------------------------------------------------------------------------- 1 | use opentelemetry::trace::TraceId; 2 | use tracing_subscriber::{prelude::*, EnvFilter, Registry}; 3 | 4 | /// Fetch an opentelemetry::trace::TraceId as hex through the full tracing stack 5 | pub fn get_trace_id() -> Option { 6 | use opentelemetry::trace::TraceContextExt as _; // opentelemetry::Context -> opentelemetry::trace::Span 7 | use tracing_opentelemetry::OpenTelemetrySpanExt as _; // tracing::Span to opentelemetry::Context 8 | 9 | match tracing::Span::current() 10 | .context() 11 | .span() 12 | .span_context() 13 | .trace_id() 14 | { 15 | TraceId::INVALID => None, 16 | valid => Some(valid), 17 | } 18 | } 19 | 20 | #[cfg(feature = "telemetry")] 21 | async fn init_tracer() -> opentelemetry::sdk::trace::Tracer { 22 | let otlp_endpoint = std::env::var("OPENTELEMETRY_ENDPOINT_URL") 23 | .expect("Need a otel tracing collector configured"); 24 | 25 | let channel = tonic::transport::Channel::from_shared(otlp_endpoint) 26 | .unwrap() 27 | .connect() 28 | .await 29 | .unwrap(); 30 | 31 | opentelemetry_otlp::new_pipeline() 32 | .tracing() 33 | .with_exporter( 34 | opentelemetry_otlp::new_exporter() 35 | .tonic() 36 | .with_channel(channel), 37 | ) 38 | .with_trace_config(opentelemetry::sdk::trace::config().with_resource( 39 | opentelemetry::sdk::Resource::new(vec![opentelemetry::KeyValue::new( 40 | "service.name", 41 | "restate-operator", 42 | )]), 43 | )) 44 | .install_batch(opentelemetry::runtime::Tokio) 45 | .unwrap() 46 | } 47 | 48 | /// Initialize tracing 49 | pub async fn init() { 50 | // Setup tracing layers 51 | #[cfg(feature = "telemetry")] 52 | let telemetry = tracing_opentelemetry::layer().with_tracer(init_tracer().await); 53 | let logger = tracing_subscriber::fmt::layer().compact(); 54 | let env_filter = EnvFilter::try_from_default_env() 55 | .or(EnvFilter::try_new("info")) 56 | .unwrap(); 57 | 58 | // Decide on layers 59 | #[cfg(feature = "telemetry")] 60 | let collector = Registry::default() 61 | .with(telemetry) 62 | .with(logger) 63 | .with(env_filter); 64 | #[cfg(not(feature = "telemetry"))] 65 | let collector = Registry::default().with(logger).with(env_filter); 66 | 67 | // Initialize tracing 68 | tracing::subscriber::set_global_default(collector).unwrap(); 69 | } 70 | 71 | #[cfg(test)] 72 | mod test { 73 | // This test only works when telemetry is initialized fully 74 | // and requires OPENTELEMETRY_ENDPOINT_URL pointing to a valid server 75 | #[cfg(feature = "telemetry")] 76 | #[tokio::test] 77 | #[ignore = "requires a trace exporter"] 78 | async fn get_trace_id_returns_valid_traces() { 79 | use super::*; 80 | super::init().await; 81 | #[tracing::instrument(name = "test_span")] // need to be in an instrumented fn 82 | fn test_trace_id() -> Option { 83 | get_trace_id() 84 | } 85 | assert_ne!(test_trace_id(), None, "valid trace"); 86 | assert_ne!(test_trace_id(), Some(TraceId::INVALID), "valid trace"); 87 | } 88 | } 89 | --------------------------------------------------------------------------------