├── .gitignore ├── CODEOWNERS ├── pkg ├── api │ └── v1alpha1 │ │ ├── doc.go │ │ ├── register.go │ │ ├── types.go │ │ └── zz_generated.deepcopy.go ├── kubernetes │ ├── scheme_test.go │ ├── node.go │ ├── drain_test.go │ ├── client.go │ ├── labels.go │ ├── scheme.go │ ├── pod.go │ ├── watch_test.go │ ├── deployment.go │ ├── watch.go │ ├── drain.go │ ├── port_forward.go │ └── labels_test.go ├── logging │ └── logger.go ├── cloudprovider │ ├── cloud_provider_test.go │ ├── gcp │ │ ├── provider_test.go │ │ ├── compute_test.go │ │ ├── provider.go │ │ ├── compute.go │ │ ├── cloud_provider_test.go │ │ └── cloud_provider.go │ ├── fake │ │ └── cloud_provider.go │ └── cloud_provider.go ├── test │ ├── generate_test.go │ └── generate.go ├── controller │ ├── controller_test.go │ ├── controller.go │ ├── pod_safe_to_evict_annotator.go │ ├── pod_safe_to_evict_annotator_test.go │ ├── spot_migrator.go │ └── spot_migrator_test.go └── config │ ├── config.go │ └── config_test.go ├── charts └── cost-manager │ ├── Chart.yaml │ ├── templates │ ├── configmap.yaml │ ├── service-account.yaml │ ├── cluster-role-binding.yaml │ ├── vpa.yaml │ ├── pod-monitor.yaml │ ├── cluster-role.yaml │ ├── prometheus-rule.yaml │ └── deployment.yaml │ └── values.yaml ├── .dockerignore ├── e2e ├── errors.go ├── errors_test.go ├── config │ ├── kube-prometheus-stack-values.yaml │ ├── kind.yaml │ └── prometheus.yaml ├── pod_safe_to_evict_annotator_test.go ├── prometheus_alerts_test.go ├── main_test.go └── spot_migrator_test.go ├── hack ├── tools │ └── tools.go └── notes │ └── notes.go ├── .golangci.yml ├── CONTRIBUTING.md ├── Dockerfile ├── RELEASE.md ├── Makefile ├── .github └── workflows │ ├── verify.yaml │ └── ci.yaml ├── ROADMAP.md ├── main.go ├── README.md ├── go.mod ├── LICENSE └── go.sum /.gitignore: -------------------------------------------------------------------------------- 1 | /bin/ 2 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @dippynark 2 | -------------------------------------------------------------------------------- /pkg/api/v1alpha1/doc.go: -------------------------------------------------------------------------------- 1 | // +kubebuilder:object:generate=true 2 | package v1alpha1 3 | -------------------------------------------------------------------------------- /charts/cost-manager/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | type: application 3 | name: cost-manager 4 | version: 0.1.0 5 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .dockerignore 2 | Dockerfile 3 | 4 | Makefile 5 | 6 | .git/ 7 | .gitignore 8 | .github/ 9 | 10 | bin/ 11 | 12 | LICENSE 13 | README.md 14 | 15 | charts/ 16 | 17 | e2e/ 18 | -------------------------------------------------------------------------------- /charts/cost-manager/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: cost-manager 5 | namespace: {{ .Release.Namespace }} 6 | data: 7 | config.yaml: | 8 | {{ toYaml .Values.config | trim | indent 4 }} 9 | -------------------------------------------------------------------------------- /e2e/errors.go: -------------------------------------------------------------------------------- 1 | package e2e 2 | 3 | // extractErrorMessage returns the error message or the empty string if nil 4 | func extractErrorMessage(err error) string { 5 | if err == nil { 6 | return "" 7 | } 8 | return err.Error() 9 | } 10 | -------------------------------------------------------------------------------- /e2e/errors_test.go: -------------------------------------------------------------------------------- 1 | package e2e 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestExtractNilErrorMessage(t *testing.T) { 10 | require.Equal(t, "", extractErrorMessage(nil)) 11 | } 12 | -------------------------------------------------------------------------------- /pkg/kubernetes/scheme_test.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestNewScheme(t *testing.T) { 10 | _, err := NewScheme() 11 | require.NoError(t, err) 12 | } 13 | -------------------------------------------------------------------------------- /pkg/api/v1alpha1/register.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "k8s.io/apimachinery/pkg/runtime/schema" 5 | ) 6 | 7 | const GroupName = "cost-manager.io" 8 | 9 | var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: "v1alpha1"} 10 | -------------------------------------------------------------------------------- /hack/tools/tools.go: -------------------------------------------------------------------------------- 1 | //go:build tools 2 | 3 | package tools 4 | 5 | // Import tools required by build scripts to force `go mod` to see them as dependencies 6 | import ( 7 | _ "sigs.k8s.io/controller-tools/cmd/controller-gen" 8 | _ "sigs.k8s.io/kubebuilder-release-tools/notes" 9 | ) 10 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | linters: 2 | disable-all: true 3 | enable: 4 | - errcheck 5 | - gofmt 6 | - goimports 7 | - gosec 8 | - gosimple 9 | - govet 10 | - ineffassign 11 | - misspell 12 | - staticcheck 13 | - stylecheck 14 | - testifylint 15 | - unused 16 | - whitespace 17 | -------------------------------------------------------------------------------- /pkg/logging/logger.go: -------------------------------------------------------------------------------- 1 | package logging 2 | 3 | import ( 4 | "github.com/go-logr/logr" 5 | "sigs.k8s.io/controller-runtime/pkg/log" 6 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 7 | ) 8 | 9 | var Logger logr.Logger 10 | 11 | func init() { 12 | log.SetLogger(zap.New()) 13 | Logger = log.Log 14 | } 15 | -------------------------------------------------------------------------------- /pkg/cloudprovider/cloud_provider_test.go: -------------------------------------------------------------------------------- 1 | package cloudprovider 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestNewCloudProvider(t *testing.T) { 11 | _, err := NewCloudProvider(context.Background(), "") 12 | require.Error(t, err) 13 | } 14 | -------------------------------------------------------------------------------- /charts/cost-manager/templates/service-account.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: cost-manager 5 | namespace: {{ .Release.Namespace }} 6 | {{- if .Values.serviceAccount.annotations }} 7 | annotations: 8 | {{ .Values.serviceAccount.annotations | toYaml | indent 4 }} 9 | {{- end }} 10 | -------------------------------------------------------------------------------- /pkg/kubernetes/node.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | const ( 4 | // https://github.com/kubernetes/autoscaler/blob/5bf33b23f2bcf5f9c8ccaf99d445e25366ee7f40/cluster-autoscaler/utils/taints/taints.go#L39-L42 5 | ToBeDeletedTaint = "ToBeDeletedByClusterAutoscaler" 6 | DeletionCandidateTaint = "DeletionCandidateOfClusterAutoscaler" 7 | ) 8 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | ## Logging 4 | 5 | All contributions should use contextual logging as described in 6 | [KEP-3077](https://github.com/kubernetes/enhancements/tree/56b0982f6e0b71ec901779d58b041c6126066437/keps/sig-instrumentation/3077-contextual-logging). 7 | 8 | ## Testing Locally 9 | 10 | Run the command `make test` to test changes locally. 11 | -------------------------------------------------------------------------------- /charts/cost-manager/templates/cluster-role-binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: cost-manager 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: cost-manager 9 | subjects: 10 | - kind: ServiceAccount 11 | name: cost-manager 12 | namespace: {{ .Release.Namespace }} 13 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.23.4 as build 2 | 3 | WORKDIR /go/src/cost-manager 4 | 5 | COPY go.mod go.sum ./ 6 | RUN go mod download 7 | 8 | COPY . . 9 | 10 | # Build static cost-manager binary 11 | RUN CGO_ENABLED=0 go build -ldflags="-s -w" -o /go/bin/cost-manager 12 | 13 | FROM gcr.io/distroless/static-debian12:nonroot 14 | 15 | COPY --from=build /go/bin/cost-manager / 16 | ENTRYPOINT ["/cost-manager"] 17 | -------------------------------------------------------------------------------- /charts/cost-manager/templates/vpa.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.vpa.enabled }} 2 | apiVersion: autoscaling.k8s.io/v1 3 | kind: VerticalPodAutoscaler 4 | metadata: 5 | name: cost-manager 6 | namespace: {{ .Release.Namespace }} 7 | spec: 8 | targetRef: 9 | apiVersion: apps/v1 10 | kind: Deployment 11 | name: cost-manager 12 | updatePolicy: 13 | updateMode: Auto 14 | minReplicas: 1 15 | {{- end }} 16 | -------------------------------------------------------------------------------- /charts/cost-manager/templates/pod-monitor.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.podMonitor.enabled }} 2 | apiVersion: monitoring.coreos.com/v1 3 | kind: PodMonitor 4 | metadata: 5 | name: cost-manager 6 | namespace: {{ .Release.Namespace }} 7 | spec: 8 | jobLabel: app.kubernetes.io/name 9 | selector: 10 | matchLabels: 11 | app.kubernetes.io/name: cost-manager 12 | podMetricsEndpoints: 13 | - port: metrics 14 | {{- end }} 15 | -------------------------------------------------------------------------------- /pkg/kubernetes/drain_test.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/require" 8 | "k8s.io/client-go/kubernetes/fake" 9 | ) 10 | 11 | func TestWaitForNodeToBeDeletedWithMissingNode(t *testing.T) { 12 | ctx := context.Background() 13 | clientset := fake.NewSimpleClientset() 14 | err := WaitForNodeToBeDeleted(ctx, clientset, "test") 15 | require.NoError(t, err) 16 | } 17 | -------------------------------------------------------------------------------- /pkg/cloudprovider/gcp/provider_test.go: -------------------------------------------------------------------------------- 1 | package gcp 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestParseProviderID(t *testing.T) { 10 | providerID := "gce://my-project/my-zone/my-instance" 11 | project, zone, instanceName, err := parseProviderID(providerID) 12 | require.NoError(t, err) 13 | require.Equal(t, "my-project", project) 14 | require.Equal(t, "my-zone", zone) 15 | require.Equal(t, "my-instance", instanceName) 16 | } 17 | -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- 1 | # Release Process 2 | 3 | We take inspiration from the controller-runtime release process: 4 | https://github.com/kubernetes-sigs/controller-runtime/blob/main/RELEASE.md 5 | 6 | 1. Create a new release branch from main: `git checkout -b release-` 7 | 2. Push the new branch to the remote repository: ` git push --set-upstream origin release-` 8 | 3. Fetch all tags from the remote: `git fetch --all --tags` 9 | 4. Generate the release notes: `go run ./hack/notes` 10 | 5. Create a new release in GitHub from the release branch, pasting the generated release notes 11 | -------------------------------------------------------------------------------- /pkg/kubernetes/client.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | import ( 4 | "k8s.io/client-go/rest" 5 | "sigs.k8s.io/controller-runtime/pkg/client" 6 | "sigs.k8s.io/controller-runtime/pkg/client/config" 7 | ) 8 | 9 | func NewClient() (client.WithWatch, *rest.Config, error) { 10 | scheme, err := NewScheme() 11 | if err != nil { 12 | return nil, nil, err 13 | } 14 | 15 | restConfig := config.GetConfigOrDie() 16 | kubeClient, err := client.NewWithWatch(restConfig, client.Options{Scheme: scheme}) 17 | if err != nil { 18 | return nil, nil, err 19 | } 20 | 21 | return kubeClient, restConfig, nil 22 | } 23 | -------------------------------------------------------------------------------- /pkg/kubernetes/labels.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | import ( 4 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 5 | "k8s.io/apimachinery/pkg/labels" 6 | ) 7 | 8 | func SelectorMatchesLabels(labelSelector *metav1.LabelSelector, resourceLabels map[string]string) (bool, error) { 9 | // A nil selector matches everything (the same as an empty selector) to avoid surprises 10 | if labelSelector == nil { 11 | return true, nil 12 | } 13 | selector, err := metav1.LabelSelectorAsSelector(labelSelector) 14 | if err != nil { 15 | return false, err 16 | } 17 | return selector.Matches(labels.Set(resourceLabels)), nil 18 | } 19 | -------------------------------------------------------------------------------- /charts/cost-manager/values.yaml: -------------------------------------------------------------------------------- 1 | image: 2 | # TODO(dippynark): Use HSBC Docker Hub repository 3 | repository: docker.io/dippynark/cost-manager 4 | tag: latest 5 | pullPolicy: IfNotPresent 6 | 7 | config: 8 | apiVersion: cost-manager.io/v1alpha1 9 | kind: CostManagerConfiguration 10 | 11 | serviceAccount: 12 | annotations: {} 13 | 14 | # Create VPA to vertically autoscale cost-manager: 15 | # https://cloud.google.com/kubernetes-engine/docs/concepts/verticalpodautoscaler 16 | vpa: 17 | enabled: false 18 | 19 | prometheusRule: 20 | enabled: false 21 | groups: 22 | rules: 23 | labels: {} 24 | annotations: {} 25 | 26 | podMonitor: 27 | enabled: false 28 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | IMAGE = cost-manager 2 | BIN_DIR = ./bin 3 | 4 | test: 5 | go test $(shell go list ./... | grep -v github.com/hsbc/cost-manager/e2e) -race 6 | 7 | build: 8 | go build -o $(BIN_DIR)/cost-manager 9 | 10 | .PHONY: e2e 11 | e2e: 12 | go test ./e2e --test.image=$(IMAGE) --parallel=4 --shuffle=on -race -v 13 | 14 | image: 15 | docker build -t $(IMAGE) . 16 | 17 | generate: controller-gen 18 | $(BIN_DIR)/controller-gen object \ 19 | paths=./pkg/api/... 20 | 21 | verify: generate 22 | git diff --exit-code 23 | 24 | # https://book.kubebuilder.io/reference/controller-gen 25 | controller-gen: 26 | ls $(BIN_DIR)/controller-gen || go build -o $(BIN_DIR)/controller-gen sigs.k8s.io/controller-tools/cmd/controller-gen 27 | -------------------------------------------------------------------------------- /.github/workflows/verify.yaml: -------------------------------------------------------------------------------- 1 | # https://github.com/kubernetes-sigs/kubebuilder-release-tools#pr-verification-github-action 2 | name: pr 3 | on: 4 | # pull_request_target runs in the context of the base of the pull request so changes won't take 5 | # effect until this file is merged into the main branch: 6 | # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target 7 | pull_request_target: 8 | types: [opened, edited, reopened, synchronize] 9 | jobs: 10 | verify: 11 | runs-on: ubuntu-latest 12 | name: verify 13 | steps: 14 | - name: Verifier action 15 | uses: kubernetes-sigs/kubebuilder-release-tools@v0.4.3 16 | with: 17 | github_token: ${{ secrets.GITHUB_TOKEN }} 18 | -------------------------------------------------------------------------------- /e2e/config/kube-prometheus-stack-values.yaml: -------------------------------------------------------------------------------- 1 | global: 2 | rbac: 3 | create: true 4 | pspEnabled: false 5 | fullnameOverride: prometheus 6 | defaultRules: 7 | create: false 8 | alertmanager: 9 | enabled: false 10 | grafana: 11 | enabled: false 12 | kubeApiServer: 13 | enabled: false 14 | kubelet: 15 | enabled: false 16 | kubeControllerManager: 17 | enabled: false 18 | coreDns: 19 | enabled: false 20 | kubeEtcd: 21 | enabled: false 22 | kubeScheduler: 23 | enabled: false 24 | kubeProxy: 25 | enabled: false 26 | kubeStateMetrics: 27 | enabled: false 28 | nodeExporter: 29 | enabled: false 30 | prometheus: 31 | enabled: false 32 | prometheusOperator: 33 | prometheusInstanceNamespaces: 34 | - monitoring 35 | serviceMonitor: 36 | selfMonitor: false 37 | -------------------------------------------------------------------------------- /e2e/config/kind.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kind.x-k8s.io/v1alpha4 2 | kind: Cluster 3 | nodes: 4 | - role: control-plane 5 | # Create worker so that there is always somewhere to schedule to 6 | - role: worker 7 | # Create worker to be drained by spot-migrator 8 | - role: worker 9 | labels: 10 | spot-migrator: "true" 11 | # Add taint to prevent other workloads from being scheduled to this Node unless we want them to: 12 | # https://kind.sigs.k8s.io/docs/user/configuration/#kubeadm-config-patches 13 | # https://kubernetes.io/docs/reference/config-api/kubeadm-config.v1beta3/#kubeadm-k8s-io-v1beta3-JoinConfiguration 14 | kubeadmConfigPatches: 15 | - | 16 | kind: JoinConfiguration 17 | nodeRegistration: 18 | taints: 19 | - key: spot-migrator 20 | value: "true" 21 | effect: NoSchedule 22 | -------------------------------------------------------------------------------- /pkg/cloudprovider/fake/cloud_provider.go: -------------------------------------------------------------------------------- 1 | package fake 2 | 3 | import ( 4 | "context" 5 | 6 | corev1 "k8s.io/api/core/v1" 7 | ) 8 | 9 | const ( 10 | SpotInstanceLabelKey = "is-spot-instance" 11 | SpotInstanceLabelValue = "true" 12 | ) 13 | 14 | // CloudProvider is a fake implementation of the cloudprovider.CloudProvider interface for testing 15 | type CloudProvider struct{} 16 | 17 | func (fake *CloudProvider) DeleteInstance(ctx context.Context, node *corev1.Node) error { 18 | return nil 19 | } 20 | 21 | func (fake *CloudProvider) IsSpotInstance(ctx context.Context, node *corev1.Node) (bool, error) { 22 | if node.Labels == nil { 23 | return false, nil 24 | } 25 | value, ok := node.Labels[SpotInstanceLabelKey] 26 | if !ok { 27 | return false, nil 28 | } 29 | return value == SpotInstanceLabelValue, nil 30 | } 31 | -------------------------------------------------------------------------------- /pkg/kubernetes/scheme.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | import ( 4 | "github.com/pkg/errors" 5 | monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" 6 | "k8s.io/apimachinery/pkg/runtime" 7 | "k8s.io/client-go/kubernetes/scheme" 8 | ) 9 | 10 | // NewScheme creates a new scheme: 11 | // https://book.kubebuilder.io/cronjob-tutorial/gvks.html#err-but-whats-that-scheme-thing 12 | func NewScheme() (*runtime.Scheme, error) { 13 | newScheme := runtime.NewScheme() 14 | 15 | err := scheme.AddToScheme(newScheme) 16 | if err != nil { 17 | return newScheme, errors.Wrap(err, "failed to add core kinds to scheme") 18 | } 19 | 20 | err = monitoringv1.AddToScheme(newScheme) 21 | if err != nil { 22 | return newScheme, errors.Wrap(err, "failed to add monitoring kinds to scheme") 23 | } 24 | 25 | return newScheme, nil 26 | } 27 | -------------------------------------------------------------------------------- /charts/cost-manager/templates/cluster-role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: cost-manager 5 | rules: 6 | # spot-migrator 7 | - apiGroups: 8 | - "" 9 | resources: 10 | - nodes 11 | verbs: 12 | - get 13 | - list 14 | - watch 15 | - patch 16 | - update 17 | - apiGroups: 18 | - "" 19 | resources: 20 | - pods 21 | verbs: 22 | - get 23 | - list 24 | - apiGroups: 25 | - "" 26 | resources: 27 | - pods/eviction 28 | verbs: 29 | - create 30 | - apiGroups: 31 | - apps 32 | resources: 33 | - daemonsets 34 | verbs: 35 | - get 36 | - list 37 | # pod-safe-to-evict-annotator 38 | - apiGroups: 39 | - "" 40 | resources: 41 | - pods 42 | verbs: 43 | - get 44 | - list 45 | - watch 46 | - update 47 | - apiGroups: 48 | - "" 49 | resources: 50 | - namespaces 51 | verbs: 52 | - get 53 | - list 54 | - watch 55 | -------------------------------------------------------------------------------- /pkg/kubernetes/pod.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | import ( 4 | "context" 5 | 6 | corev1 "k8s.io/api/core/v1" 7 | apiwatch "k8s.io/apimachinery/pkg/watch" 8 | "k8s.io/client-go/tools/watch" 9 | "k8s.io/kubectl/pkg/util/podutils" 10 | "sigs.k8s.io/controller-runtime/pkg/client" 11 | ) 12 | 13 | func WaitForAnyReadyPod(ctx context.Context, kubeClient client.WithWatch, opts ...client.ListOption) (*corev1.Pod, error) { 14 | listerWatcher := NewListerWatcher(ctx, kubeClient, &corev1.PodList{}, opts...) 15 | condition := func(event apiwatch.Event) (bool, error) { 16 | pod, err := ParseWatchEventObject[*corev1.Pod](event) 17 | if err != nil { 18 | return false, err 19 | } 20 | return podutils.IsPodReady(pod), nil 21 | } 22 | event, err := watch.UntilWithSync(ctx, listerWatcher, &corev1.Pod{}, nil, condition) 23 | if err != nil { 24 | return nil, err 25 | } 26 | return event.Object.(*corev1.Pod), nil 27 | } 28 | -------------------------------------------------------------------------------- /pkg/cloudprovider/gcp/compute_test.go: -------------------------------------------------------------------------------- 1 | package gcp 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | "google.golang.org/api/compute/v1" 8 | "knative.dev/pkg/ptr" 9 | ) 10 | 11 | func TestGetManagedInstanceGroupFromInstance(t *testing.T) { 12 | instance := &compute.Instance{ 13 | Metadata: &compute.Metadata{ 14 | Items: []*compute.MetadataItems{ 15 | { 16 | Key: "instance-template", 17 | Value: ptr.String("projects/my-project-number/global/instanceTemplates/my-instance-template"), 18 | }, 19 | { 20 | Key: "created-by", 21 | Value: ptr.String("projects/my-project-number/zones/my-zone/instanceGroupManagers/my-managed-instance-group"), 22 | }, 23 | }, 24 | }, 25 | } 26 | managedInstanceGroupName, err := getManagedInstanceGroupFromInstance(instance) 27 | require.NoError(t, err) 28 | require.Equal(t, "my-managed-instance-group", managedInstanceGroupName) 29 | } 30 | -------------------------------------------------------------------------------- /pkg/cloudprovider/gcp/provider.go: -------------------------------------------------------------------------------- 1 | package gcp 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "sigs.k8s.io/cluster-api-provider-gcp/cloud/providerid" 8 | ) 9 | 10 | // parseProviderID parses the node.Spec.ProviderID of a GKE Node. We assume the following format: 11 | // https://github.com/kubernetes-sigs/cluster-api-provider-gcp/blob/173d8a201d251cb78a76bf47ec613d0d10b3f2f7/cloud/providerid/providerid.go#L88 12 | func parseProviderID(providerID string) (string, string, string, error) { 13 | var project, zone, instanceName string 14 | if !strings.HasPrefix(providerID, providerid.Prefix) { 15 | return project, zone, instanceName, fmt.Errorf("provider ID does not have the expected prefix: %s", providerID) 16 | } 17 | tokens := strings.Split(strings.TrimPrefix(providerID, providerid.Prefix), "/") 18 | if len(tokens) != 3 { 19 | return project, zone, instanceName, fmt.Errorf("provider ID is not in the expected format: %s", providerID) 20 | } 21 | return tokens[0], tokens[1], tokens[2], nil 22 | } 23 | -------------------------------------------------------------------------------- /pkg/kubernetes/watch_test.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | corev1 "k8s.io/api/core/v1" 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | "k8s.io/apimachinery/pkg/watch" 10 | ) 11 | 12 | func TestParseWatchEventPodObject(t *testing.T) { 13 | event := watch.Event{ 14 | Object: &corev1.Pod{ 15 | ObjectMeta: metav1.ObjectMeta{ 16 | Name: "foo", 17 | Namespace: "bar", 18 | }, 19 | }, 20 | } 21 | pod, err := ParseWatchEventObject[*corev1.Pod](event) 22 | require.NoError(t, err) 23 | require.Equal(t, "foo", pod.ObjectMeta.Name) 24 | require.Equal(t, "bar", pod.ObjectMeta.Namespace) 25 | } 26 | 27 | func TestParseWatchEventErrorObject(t *testing.T) { 28 | event := watch.Event{ 29 | Type: watch.Error, 30 | Object: &metav1.Status{ 31 | Message: "message", 32 | }, 33 | } 34 | _, err := ParseWatchEventObject[*corev1.Pod](event) 35 | require.Error(t, err) 36 | require.Equal(t, "watch failed with error: message", err.Error()) 37 | } 38 | -------------------------------------------------------------------------------- /pkg/api/v1alpha1/types.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 5 | ) 6 | 7 | // +kubebuilder:object:root=true 8 | type CostManagerConfiguration struct { 9 | metav1.TypeMeta `json:",inline"` 10 | // TODO(dippynark): Support all generic controller fields: 11 | // https://github.com/kubernetes/controller-manager/blob/2a157ca0075be690e609881e5fdd3362cc62ecdc/config/v1alpha1/types.go#L24-L52 12 | Controllers []string `json:"controllers,omitempty"` 13 | CloudProvider CloudProvider `json:"cloudProvider"` 14 | SpotMigrator *SpotMigrator `json:"spotMigrator,omitempty"` 15 | PodSafeToEvictAnnotator *PodSafeToEvictAnnotator `json:"podSafeToEvictAnnotator,omitempty"` 16 | } 17 | 18 | type CloudProvider struct { 19 | Name string `json:"name"` 20 | } 21 | 22 | type SpotMigrator struct { 23 | MigrationSchedule *string `json:"migrationSchedule,omitempty"` 24 | } 25 | 26 | type PodSafeToEvictAnnotator struct { 27 | NamespaceSelector *metav1.LabelSelector `json:"namespaceSelector,omitempty"` 28 | } 29 | -------------------------------------------------------------------------------- /pkg/test/generate_test.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestGenerateResourceName(t *testing.T) { 10 | require.Equal(t, "test-generate-resource-name", GenerateResourceName(t)) 11 | } 12 | 13 | func TestGenerateLongResourceNameTruncatedTo63CharactersXxxxx(t *testing.T) { 14 | actual := GenerateResourceName(t) 15 | require.Len(t, actual, 63) 16 | 17 | expected := "test-generate-long-resource-name-truncated-to63-characters-xxxx" 18 | require.Equal(t, expected, actual) 19 | } 20 | 21 | // The capital X at the end of the test name will cause the 63rd character of the resource name to 22 | // be a hyphen which gets truncated 23 | func TestGenerateLongResourceNameTruncatedTo62CharactersXxxX(t *testing.T) { 24 | actual := GenerateResourceName(t) 25 | require.Len(t, actual, 62) 26 | 27 | expected := "test-generate-long-resource-name-truncated-to62-characters-xxx" 28 | require.Equal(t, expected, actual) 29 | } 30 | 31 | func TestGenerateDeployment(t *testing.T) { 32 | _, err := GenerateDeployment("test", "test") 33 | require.NoError(t, err) 34 | } 35 | -------------------------------------------------------------------------------- /e2e/config/prometheus.yaml: -------------------------------------------------------------------------------- 1 | # https://github.com/prometheus-operator/prometheus-operator/tree/48d3604507e082f4187f39edda9bc22935881a14/example/rbac/prometheus 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: prometheus 6 | namespace: monitoring 7 | --- 8 | apiVersion: rbac.authorization.k8s.io/v1 9 | kind: ClusterRole 10 | metadata: 11 | name: prometheus 12 | rules: 13 | - apiGroups: 14 | - "" 15 | resources: 16 | - nodes 17 | - nodes/metrics 18 | - services 19 | - endpoints 20 | - pods 21 | verbs: 22 | - get 23 | - list 24 | - watch 25 | - apiGroups: 26 | - "" 27 | resources: 28 | - configmaps 29 | verbs: 30 | - get 31 | - apiGroups: 32 | - networking.k8s.io 33 | resources: 34 | - ingresses 35 | verbs: 36 | - get 37 | - list 38 | - watch 39 | - nonResourceURLs: 40 | - /metrics 41 | verbs: 42 | - get 43 | --- 44 | apiVersion: rbac.authorization.k8s.io/v1 45 | kind: ClusterRoleBinding 46 | metadata: 47 | name: prometheus 48 | roleRef: 49 | apiGroup: rbac.authorization.k8s.io 50 | kind: ClusterRole 51 | name: prometheus 52 | subjects: 53 | - kind: ServiceAccount 54 | name: prometheus 55 | namespace: monitoring 56 | --- 57 | apiVersion: monitoring.coreos.com/v1 58 | kind: Prometheus 59 | metadata: 60 | name: prometheus 61 | namespace: monitoring 62 | spec: 63 | serviceAccountName: prometheus 64 | scrapeInterval: 2s 65 | # Watch for all PrometheusRules and PodMonitors 66 | ruleSelector: {} 67 | ruleNamespaceSelector: {} 68 | podMonitorSelector: {} 69 | podMonitorNamespaceSelector: {} 70 | -------------------------------------------------------------------------------- /pkg/cloudprovider/cloud_provider.go: -------------------------------------------------------------------------------- 1 | package cloudprovider 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/hsbc/cost-manager/pkg/cloudprovider/fake" 8 | "github.com/hsbc/cost-manager/pkg/cloudprovider/gcp" 9 | corev1 "k8s.io/api/core/v1" 10 | ) 11 | 12 | const ( 13 | FakeCloudProviderName = "fake" 14 | GCPCloudProviderName = "gcp" 15 | ) 16 | 17 | // CloudProvider contains the functions for interacting with a cloud provider 18 | type CloudProvider interface { 19 | // IsSpotInstance determines whether the underlying instance of the Node is a spot instance 20 | IsSpotInstance(ctx context.Context, node *corev1.Node) (bool, error) 21 | // DeleteInstance should drain connections from external load balancers to the Node and then 22 | // delete the underlying instance. Implementations can assume that before this function is 23 | // called Pods have already been drained from the Node and it has been tainted with 24 | // ToBeDeletedByClusterAutoscaler to fail kube-proxy health checks as described in KEP-3836: 25 | // https://github.com/kubernetes/enhancements/tree/27ef0d9a740ae5058472aac4763483f0e7218c0e/keps/sig-network/3836-kube-proxy-improved-ingress-connectivity-reliability 26 | DeleteInstance(ctx context.Context, node *corev1.Node) error 27 | } 28 | 29 | // NewCloudProvider returns a new CloudProvider instance 30 | func NewCloudProvider(ctx context.Context, cloudProviderName string) (CloudProvider, error) { 31 | switch cloudProviderName { 32 | case FakeCloudProviderName: 33 | return &fake.CloudProvider{}, nil 34 | case GCPCloudProviderName: 35 | return gcp.NewCloudProvider(ctx) 36 | default: 37 | return nil, fmt.Errorf("unknown cloud provider: %s", cloudProviderName) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /ROADMAP.md: -------------------------------------------------------------------------------- 1 | # Roadmap 2 | 3 | cost-manager does not currently have a well-defined roadmap, however here we describe some 4 | Kubernetes cost optimisations that could be automated as cost-manager controllers in the future: 5 | 6 | - Generating VerticalPodAutoscalers: The [Vertical Pod Autoscaler 7 | (VPA)](https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler) automatically 8 | adjusts Kubernetes Pod resource requests based on actual usage and can help reduce 9 | over-provisioning. By automatically generating VPA resources for all cluster workloads (being 10 | careful to consider the [limitations](https://github.com/kubernetes/autoscaler/issues/6247) when 11 | using VPA together with HPA) operators can make sure that workloads are only requesting the 12 | resources that they need 13 | - Garbage collecting disks and load balancing infrastructure: PersistentVolumeClaims and Services 14 | can be used to automatically provision cloud resources, however if the cluster is deleted without 15 | first deleting these resources then the cloud resources can become orphaned. By using metadata on 16 | these resources, a controller can be used to automatically detect orphaned resources provisioned 17 | by Kubernetes and clean them up to save costs 18 | - Scheduled cluster scale down: There are many cases where cluster workloads do not need to be 19 | running all the time (e.g. CI infrastructure or development clusters). On a schedule, 20 | [ResourceQuotas](https://kubernetes.io/docs/concepts/policy/resource-quotas/) can be used to limit 21 | Pods in all Namespaces (except for the cost-manager Namespace) and then all Pods deleted to allow 22 | the cluster to scale down. To scale back up, the ResourceQuotas can simply be deleted 23 | -------------------------------------------------------------------------------- /pkg/kubernetes/deployment.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | import ( 4 | "context" 5 | 6 | appsv1 "k8s.io/api/apps/v1" 7 | apiwatch "k8s.io/apimachinery/pkg/watch" 8 | "k8s.io/client-go/tools/watch" 9 | "sigs.k8s.io/controller-runtime/pkg/client" 10 | ) 11 | 12 | func WaitUntilDeploymentAvailable(ctx context.Context, kubeClient client.WithWatch, deploymentNamespace, deploymentName string) error { 13 | listerWatcher := NewListerWatcher(ctx, kubeClient, &appsv1.DeploymentList{}, client.InNamespace(deploymentNamespace)) 14 | condition := func(event apiwatch.Event) (bool, error) { 15 | deployment, err := ParseWatchEventObject[*appsv1.Deployment](event) 16 | if err != nil { 17 | return false, err 18 | } 19 | return deployment.Name == deploymentName && 20 | deployment.Status.AvailableReplicas > 0 && 21 | deployment.Generation == deployment.Status.ObservedGeneration, nil 22 | } 23 | _, err := watch.UntilWithSync(ctx, listerWatcher, &appsv1.Deployment{}, nil, condition) 24 | return err 25 | } 26 | 27 | func WaitUntilDeploymentUnavailable(ctx context.Context, kubeClient client.WithWatch, deploymentNamespace, deploymentName string) error { 28 | listerWatcher := NewListerWatcher(ctx, kubeClient, &appsv1.DeploymentList{}, client.InNamespace(deploymentNamespace)) 29 | condition := func(event apiwatch.Event) (bool, error) { 30 | deployment, err := ParseWatchEventObject[*appsv1.Deployment](event) 31 | if err != nil { 32 | return false, err 33 | } 34 | return deployment.Name == deploymentName && 35 | deployment.Status.AvailableReplicas == 0 && 36 | deployment.Generation == deployment.Status.ObservedGeneration, nil 37 | } 38 | _, err := watch.UntilWithSync(ctx, listerWatcher, &appsv1.Deployment{}, nil, condition) 39 | return err 40 | } 41 | -------------------------------------------------------------------------------- /e2e/pod_safe_to_evict_annotator_test.go: -------------------------------------------------------------------------------- 1 | package e2e 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | "time" 7 | 8 | "github.com/hsbc/cost-manager/pkg/kubernetes" 9 | "github.com/stretchr/testify/require" 10 | corev1 "k8s.io/api/core/v1" 11 | "sigs.k8s.io/controller-runtime/pkg/client" 12 | ) 13 | 14 | // TestPodSafeToEvictAnnotator tests that pod-safe-to-evict-annotator correctly annotates all Pods 15 | func TestPodSafeToEvictAnnotator(t *testing.T) { 16 | t.Parallel() 17 | 18 | ctx := context.Background() 19 | 20 | kubeClient, _, err := kubernetes.NewClient() 21 | require.NoError(t, err) 22 | 23 | // Wait until all Pods have expected safe-to-evict annotation 24 | for { 25 | success, err := allPodsHaveExpectedSafeToEvictAnnotation(ctx, kubeClient) 26 | require.NoError(t, err) 27 | if success { 28 | // Make sure condition still holds after 2 seconds 29 | time.Sleep(2 * time.Second) 30 | stillSuccess, err := allPodsHaveExpectedSafeToEvictAnnotation(ctx, kubeClient) 31 | require.NoError(t, err) 32 | require.True(t, stillSuccess) 33 | break 34 | } 35 | time.Sleep(time.Second) 36 | } 37 | } 38 | 39 | func allPodsHaveExpectedSafeToEvictAnnotation(ctx context.Context, kubeClient client.WithWatch) (bool, error) { 40 | podList := &corev1.PodList{} 41 | err := kubeClient.List(ctx, podList) 42 | if err != nil { 43 | return false, err 44 | } 45 | for _, pod := range podList.Items { 46 | // kube-system Pods should have the annotation... 47 | if pod.Namespace == "kube-system" && !hasSafeToEvictAnnotation(&pod) { 48 | return false, nil 49 | } 50 | // ...all other Pods should not have the annotation 51 | if pod.Namespace != "kube-system" && hasSafeToEvictAnnotation(&pod) { 52 | return false, nil 53 | } 54 | } 55 | return true, nil 56 | } 57 | 58 | func hasSafeToEvictAnnotation(pod *corev1.Pod) bool { 59 | if pod.Annotations == nil { 60 | return false 61 | } 62 | value, ok := pod.Annotations["cluster-autoscaler.kubernetes.io/safe-to-evict"] 63 | if ok && value == "true" { 64 | return true 65 | } 66 | return false 67 | } 68 | -------------------------------------------------------------------------------- /pkg/test/generate.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import ( 4 | "fmt" 5 | "regexp" 6 | "strings" 7 | "testing" 8 | 9 | appsv1 "k8s.io/api/apps/v1" 10 | "k8s.io/kubectl/pkg/scheme" 11 | ) 12 | 13 | const ( 14 | maxResourceNameLength = 63 15 | ) 16 | 17 | // GenerateResourceName generates a Kubernetes resource name from the name of the test; in 18 | // particular this can be used to generate a unique Namespace name for each test that requires one 19 | func GenerateResourceName(t *testing.T) string { 20 | r := regexp.MustCompile("[A-Z]+") 21 | resourceName := r.ReplaceAllStringFunc(t.Name(), func(s string) string { 22 | if len(s) > 1 { 23 | s = s[:len(s)-1] + "-" + s[len(s)-1:] 24 | } 25 | return "-" + strings.ToLower(s) 26 | }) 27 | resourceName = strings.TrimPrefix(resourceName, "-") 28 | // Truncate string to avoid: metadata.name: Invalid value: must be no more than 63 characters 29 | if len(resourceName) > maxResourceNameLength { 30 | resourceName = resourceName[:maxResourceNameLength] 31 | resourceName = strings.TrimSuffix(resourceName, "-") 32 | } 33 | return resourceName 34 | } 35 | 36 | // GenerateDeployment generates a Deployment used for testing 37 | func GenerateDeployment(deploymentNamespaceName, deploymentName string) (*appsv1.Deployment, error) { 38 | deployment := &appsv1.Deployment{} 39 | deploymentManifest := fmt.Sprintf(` 40 | apiVersion: apps/v1 41 | kind: Deployment 42 | metadata: 43 | name: %s 44 | namespace: %s 45 | spec: 46 | replicas: 1 47 | selector: 48 | matchLabels: 49 | app.kubernetes.io/name: %s 50 | template: 51 | metadata: 52 | labels: 53 | app.kubernetes.io/name: %s 54 | spec: 55 | terminationGracePeriodSeconds: 1 56 | containers: 57 | - name: %s 58 | image: nginx 59 | command: 60 | - /usr/bin/tail 61 | - -f`, deploymentName, deploymentNamespaceName, deploymentName, deploymentName, deploymentName) 62 | _, _, err := scheme.Codecs.UniversalDeserializer().Decode([]byte(deploymentManifest), nil, deployment) 63 | if err != nil { 64 | return nil, err 65 | } 66 | return deployment, nil 67 | } 68 | -------------------------------------------------------------------------------- /charts/cost-manager/templates/prometheus-rule.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.prometheusRule.enabled }} 2 | apiVersion: monitoring.coreos.com/v1 3 | kind: PrometheusRule 4 | metadata: 5 | name: cost-manager 6 | namespace: {{ .Release.Namespace }} 7 | spec: 8 | groups: 9 | - name: CostManager 10 | rules: 11 | - alert: CostManagerDown 12 | expr: absent(up{job="cost-manager", namespace="{{ .Release.Namespace }}"} == 1) 13 | for: 10m 14 | {{- if .Values.prometheusRule.groups.rules.labels }} 15 | labels: 16 | {{ .Values.prometheusRule.groups.rules.labels | toYaml | indent 8 }} 17 | {{- end }} 18 | annotations: 19 | message: cost-manager is not running or has been missing metrics for more than 10 minutes 20 | {{- if .Values.prometheusRule.groups.rules.annotations }} 21 | {{ .Values.prometheusRule.groups.rules.annotations | toYaml | indent 8 }} 22 | {{- end }} 23 | - alert: CostManagerReconciliationErrors 24 | expr: sum(rate(controller_runtime_reconcile_errors_total{job="cost-manager", namespace="{{ .Release.Namespace }}"}[5m])) > 0 25 | for: 10m 26 | {{- if .Values.prometheusRule.groups.rules.labels }} 27 | labels: 28 | {{ .Values.prometheusRule.groups.rules.labels | toYaml | indent 8 }} 29 | {{- end }} 30 | annotations: 31 | message: cost-manager has had reconciliation errors for more than 10 minutes 32 | {{- if .Values.prometheusRule.groups.rules.annotations }} 33 | {{ .Values.prometheusRule.groups.rules.annotations | toYaml | indent 8 }} 34 | {{- end }} 35 | - alert: CostManagerSpotMigratorFailing 36 | expr: sum(rate(cost_manager_spot_migrator_operation_success_total{job="cost-manager", namespace="{{ .Release.Namespace }}"}[2h])) == 0 37 | for: 2h 38 | {{- if .Values.prometheusRule.groups.rules.labels }} 39 | labels: 40 | {{ .Values.prometheusRule.groups.rules.labels | toYaml | indent 8 }} 41 | {{- end }} 42 | annotations: 43 | message: cost-manager spot-migrator has been failing to run for more than 2 hours 44 | {{- if .Values.prometheusRule.groups.rules.annotations }} 45 | {{ .Values.prometheusRule.groups.rules.annotations | toYaml | indent 8 }} 46 | {{- end }} 47 | {{- end }} 48 | -------------------------------------------------------------------------------- /pkg/controller/controller_test.go: -------------------------------------------------------------------------------- 1 | package controller 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/hsbc/cost-manager/pkg/api/v1alpha1" 8 | "github.com/hsbc/cost-manager/pkg/cloudprovider" 9 | "github.com/hsbc/cost-manager/pkg/kubernetes" 10 | "github.com/stretchr/testify/require" 11 | "k8s.io/client-go/rest" 12 | "knative.dev/pkg/ptr" 13 | ctrl "sigs.k8s.io/controller-runtime" 14 | "sigs.k8s.io/controller-runtime/pkg/config" 15 | "sigs.k8s.io/controller-runtime/pkg/manager" 16 | ) 17 | 18 | func TestSetupWithManager(t *testing.T) { 19 | tests := map[string]struct { 20 | config *v1alpha1.CostManagerConfiguration 21 | shouldSucceed bool 22 | }{ 23 | "allControllers": { 24 | config: &v1alpha1.CostManagerConfiguration{ 25 | CloudProvider: v1alpha1.CloudProvider{ 26 | Name: cloudprovider.FakeCloudProviderName, 27 | }, 28 | Controllers: AllControllerNames, 29 | }, 30 | shouldSucceed: true, 31 | }, 32 | "allControllersWithoutCloudProvider": { 33 | config: &v1alpha1.CostManagerConfiguration{ 34 | Controllers: AllControllerNames, 35 | }, 36 | shouldSucceed: false, 37 | }, 38 | "withoutCloudProvider": { 39 | // Configure a controller that does not interact with the cloud provider 40 | config: &v1alpha1.CostManagerConfiguration{ 41 | Controllers: []string{podSafeToEvictAnnotatorControllerName}, 42 | }, 43 | shouldSucceed: true, 44 | }, 45 | } 46 | for name, test := range tests { 47 | t.Run(name, func(t *testing.T) { 48 | // Create manager 49 | scheme, err := kubernetes.NewScheme() 50 | require.NoError(t, err) 51 | mgr, err := ctrl.NewManager(&rest.Config{}, manager.Options{ 52 | Scheme: scheme, 53 | Controller: config.Controller{ 54 | // Required during testing to avoid validation errors: 55 | // https://github.com/kubernetes-sigs/controller-runtime/issues/2937#issuecomment-2325280073 56 | SkipNameValidation: ptr.Bool(true), 57 | }, 58 | }) 59 | require.NoError(t, err) 60 | 61 | // Setup manager... 62 | err = SetupWithManager(context.Background(), mgr, test.config) 63 | // ...and verify success 64 | if test.shouldSucceed { 65 | require.NoError(t, err) 66 | } else { 67 | require.Error(t, err) 68 | } 69 | }) 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /pkg/config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | "slices" 8 | 9 | "github.com/hsbc/cost-manager/pkg/api/v1alpha1" 10 | "github.com/hsbc/cost-manager/pkg/controller" 11 | "k8s.io/apimachinery/pkg/runtime" 12 | "k8s.io/apimachinery/pkg/runtime/serializer" 13 | ) 14 | 15 | func Load(configFilePath string) (*v1alpha1.CostManagerConfiguration, error) { 16 | if configFilePath == "" { 17 | return nil, errors.New("configuration file not specified") 18 | } 19 | 20 | configData, err := os.ReadFile(configFilePath) 21 | if err != nil { 22 | return nil, fmt.Errorf("failed to read configuration file: %s", err) 23 | } 24 | 25 | config, err := decode(configData) 26 | if err != nil { 27 | return config, fmt.Errorf("failed to decode configuration: %s", err) 28 | } 29 | 30 | err = validate(config) 31 | if err != nil { 32 | return config, fmt.Errorf("failed to validate configuration: %s", err) 33 | } 34 | 35 | return config, nil 36 | } 37 | 38 | func decode(configData []byte) (*v1alpha1.CostManagerConfiguration, error) { 39 | config := &v1alpha1.CostManagerConfiguration{} 40 | 41 | // We enable strict decoding to ensure that we do not accept unknown fields 42 | codecFactory := serializer.NewCodecFactory(runtime.NewScheme(), serializer.EnableStrict) 43 | 44 | decoder := codecFactory.UniversalDecoder(v1alpha1.SchemeGroupVersion) 45 | err := runtime.DecodeInto(decoder, configData, config) 46 | if err != nil { 47 | return nil, fmt.Errorf("failed to decode configuration: %s", err) 48 | } 49 | 50 | // Verify that the API version and kind are what we expect 51 | if config.APIVersion != v1alpha1.SchemeGroupVersion.String() { 52 | return nil, fmt.Errorf("invalid API version: %s", config.APIVersion) 53 | } 54 | if config.Kind != "CostManagerConfiguration" { 55 | return nil, fmt.Errorf("invalid kind: %s", config.Kind) 56 | } 57 | 58 | return config, nil 59 | } 60 | 61 | func validate(config *v1alpha1.CostManagerConfiguration) error { 62 | // Ensure that all enabled controllers are known 63 | for _, controllerName := range config.Controllers { 64 | if !slices.Contains(controller.AllControllerNames, controllerName) { 65 | return fmt.Errorf("unknown controller: %s", controllerName) 66 | } 67 | } 68 | 69 | return nil 70 | } 71 | -------------------------------------------------------------------------------- /charts/cost-manager/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: cost-manager 5 | namespace: {{ .Release.Namespace }} 6 | spec: 7 | replicas: 1 8 | strategy: 9 | type: Recreate 10 | selector: 11 | matchLabels: 12 | app.kubernetes.io/name: cost-manager 13 | template: 14 | metadata: 15 | labels: 16 | app.kubernetes.io/name: cost-manager 17 | annotations: 18 | prometheus.io/scrape: "true" 19 | prometheus.io/port: "8080" 20 | prometheus.io/path: "/metrics" 21 | checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} 22 | spec: 23 | serviceAccount: cost-manager 24 | securityContext: 25 | fsGroup: 1000 26 | containers: 27 | - name: cost-manager 28 | {{- if .Values.image.tag }} 29 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" 30 | {{- else }} 31 | image: "{{ .Values.image.repository }}" 32 | {{- end }} 33 | imagePullPolicy: {{ .Values.image.pullPolicy }} 34 | args: 35 | - --config=/config/config.yaml 36 | env: 37 | # Add Node name environment variable to allow spot-migrator to identify the Node it is 38 | # running on and reduce the chance of draining itself: 39 | # https://kubernetes.io/docs/tasks/inject-data-application/environment-variable-expose-pod-information/#use-pod-fields-as-values-for-environment-variables 40 | - name: NODE_NAME 41 | valueFrom: 42 | fieldRef: 43 | fieldPath: spec.nodeName 44 | resources: 45 | requests: 46 | cpu: 10m 47 | memory: 50Mi 48 | ports: 49 | - name: metrics 50 | containerPort: 8080 51 | securityContext: 52 | seccompProfile: 53 | type: RuntimeDefault 54 | readOnlyRootFilesystem: true 55 | runAsUser: 1000 56 | allowPrivilegeEscalation: false 57 | capabilities: 58 | drop: 59 | - all 60 | volumeMounts: 61 | - name: config 62 | mountPath: /config 63 | volumes: 64 | - name: config 65 | configMap: 66 | name: cost-manager 67 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "os" 6 | 7 | costmanagerconfig "github.com/hsbc/cost-manager/pkg/config" 8 | "github.com/hsbc/cost-manager/pkg/controller" 9 | "github.com/hsbc/cost-manager/pkg/kubernetes" 10 | _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" 11 | ctrl "sigs.k8s.io/controller-runtime" 12 | "sigs.k8s.io/controller-runtime/pkg/client/config" 13 | "sigs.k8s.io/controller-runtime/pkg/log" 14 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 15 | "sigs.k8s.io/controller-runtime/pkg/manager" 16 | "sigs.k8s.io/controller-runtime/pkg/manager/signals" 17 | ) 18 | 19 | func init() { 20 | log.SetLogger(zap.New()) 21 | } 22 | 23 | const ( 24 | costManagerName = "cost-manager" 25 | ) 26 | 27 | func main() { 28 | // Parse flags 29 | costManagerConfigFilePath := flag.String("config", "", "Configuration file path") 30 | flag.Parse() 31 | 32 | // Create signal handling context and logger 33 | ctx := signals.SetupSignalHandler() 34 | logger := log.FromContext(ctx).WithName(costManagerName) 35 | ctx = log.IntoContext(ctx, logger) 36 | 37 | // Load configuration 38 | logger.Info("Loading configuration") 39 | costManagerConfig, err := costmanagerconfig.Load(*costManagerConfigFilePath) 40 | if err != nil { 41 | logger.Error(err, "failed to load configuration") 42 | os.Exit(1) 43 | } 44 | 45 | // Create new scheme 46 | scheme, err := kubernetes.NewScheme() 47 | if err != nil { 48 | logger.Error(err, "failed to create new scheme") 49 | os.Exit(1) 50 | } 51 | 52 | // Setup controller manager 53 | logger.Info("Setting up controller manager") 54 | restConfig := config.GetConfigOrDie() 55 | // Disable client-side rate-limiting: https://github.com/kubernetes/kubernetes/issues/111880 56 | restConfig.QPS = -1 57 | mgr, err := ctrl.NewManager(restConfig, manager.Options{Scheme: scheme}) 58 | if err != nil { 59 | logger.Error(err, "failed to setup controller manager") 60 | os.Exit(1) 61 | } 62 | 63 | // Setup controllers 64 | err = controller.SetupWithManager(ctx, mgr, costManagerConfig) 65 | if err != nil { 66 | logger.Error(err, "failed to setup controllers with manager") 67 | os.Exit(1) 68 | } 69 | 70 | // Start controller manager 71 | logger.Info("Starting controller manager") 72 | err = mgr.Start(ctx) 73 | if err != nil { 74 | logger.Error(err, "failed to start controller manager") 75 | os.Exit(1) 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /pkg/controller/controller.go: -------------------------------------------------------------------------------- 1 | package controller 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/hsbc/cost-manager/pkg/api/v1alpha1" 7 | "github.com/hsbc/cost-manager/pkg/cloudprovider" 8 | "github.com/pkg/errors" 9 | "k8s.io/apimachinery/pkg/util/sets" 10 | "k8s.io/client-go/kubernetes" 11 | "k8s.io/controller-manager/app" 12 | ctrl "sigs.k8s.io/controller-runtime" 13 | ) 14 | 15 | var ( 16 | // The following link describes how controller names should be treated: 17 | // https://github.com/kubernetes/cloud-provider/blob/30270693811ff7d3c4646509eed7efd659332e72/names/controller_names.go 18 | AllControllerNames = []string{ 19 | spotMigratorControllerName, 20 | podSafeToEvictAnnotatorControllerName, 21 | } 22 | // All controllers are disabled by default 23 | disabledByDefaultControllerNames = sets.NewString(AllControllerNames...) 24 | ) 25 | 26 | // SetupWithManager sets up the controllers with the manager 27 | func SetupWithManager(ctx context.Context, mgr ctrl.Manager, config *v1alpha1.CostManagerConfiguration) error { 28 | // Create clientset 29 | clientset, err := kubernetes.NewForConfig(mgr.GetConfig()) 30 | if err != nil { 31 | return errors.Wrapf(err, "failed to create clientset") 32 | } 33 | 34 | // Setup controllers 35 | for _, controllerName := range AllControllerNames { 36 | if app.IsControllerEnabled(controllerName, disabledByDefaultControllerNames, config.Controllers) { 37 | switch controllerName { 38 | case spotMigratorControllerName: 39 | // Instantiate cloud provider 40 | cloudProvider, err := cloudprovider.NewCloudProvider(ctx, config.CloudProvider.Name) 41 | if err != nil { 42 | return errors.Wrapf(err, "failed to instantiate cloud provider") 43 | } 44 | err = mgr.Add(&spotMigrator{ 45 | Config: config.SpotMigrator, 46 | Clientset: clientset, 47 | CloudProvider: cloudProvider, 48 | }) 49 | if err != nil { 50 | return errors.Wrapf(err, "failed to setup %s", spotMigratorControllerName) 51 | } 52 | case podSafeToEvictAnnotatorControllerName: 53 | err := (&podSafeToEvictAnnotator{ 54 | Config: config.PodSafeToEvictAnnotator, 55 | Client: mgr.GetClient(), 56 | }).SetupWithManager(mgr) 57 | if err != nil { 58 | return errors.Wrapf(err, "failed to setup %s", podSafeToEvictAnnotatorControllerName) 59 | } 60 | default: 61 | return errors.Errorf("unknown controller: %s", controllerName) 62 | } 63 | } 64 | } 65 | 66 | return nil 67 | } 68 | -------------------------------------------------------------------------------- /pkg/kubernetes/watch.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | "k8s.io/apimachinery/pkg/runtime" 10 | "k8s.io/apimachinery/pkg/watch" 11 | "k8s.io/client-go/tools/cache" 12 | "sigs.k8s.io/controller-runtime/pkg/client" 13 | ) 14 | 15 | func NewWatcher(ctx context.Context, kubeClient client.WithWatch, objectList client.ObjectList, opts ...client.ListOption) cache.Watcher { 16 | watchFunc := func(options metav1.ListOptions) (watch.Interface, error) { 17 | listOptions := &client.ListOptions{Raw: &options} 18 | // Apply default options to caller options 19 | listOptions.ApplyOptions(opts) 20 | return kubeClient.Watch(ctx, objectList, listOptions) 21 | } 22 | return &cache.ListWatch{ 23 | WatchFunc: watchFunc, 24 | } 25 | } 26 | 27 | func NewListerWatcher(ctx context.Context, kubeClient client.WithWatch, objectList client.ObjectList, opts ...client.ListOption) cache.ListerWatcher { 28 | listFunc := func(options metav1.ListOptions) (runtime.Object, error) { 29 | listOptions := &client.ListOptions{Raw: &options} 30 | // Apply default options to caller options 31 | listOptions.ApplyOptions(opts) 32 | err := kubeClient.List(ctx, objectList, listOptions) 33 | if err != nil { 34 | return nil, err 35 | } 36 | return objectList, nil 37 | } 38 | watchFunc := func(options metav1.ListOptions) (watch.Interface, error) { 39 | listOptions := &client.ListOptions{Raw: &options} 40 | // Apply default options to caller options 41 | listOptions.ApplyOptions(opts) 42 | return kubeClient.Watch(ctx, objectList, listOptions) 43 | } 44 | return &cache.ListWatch{ 45 | ListFunc: listFunc, 46 | WatchFunc: watchFunc, 47 | } 48 | } 49 | 50 | // ParseWatchEventObject determines if the specified watch event is an error and if so returns an 51 | // error and otherwise asserts and returns an object of the expected type 52 | func ParseWatchEventObject[T runtime.Object](event watch.Event) (T, error) { 53 | var runtimeObject T 54 | if event.Type == watch.Error { 55 | if status, ok := event.Object.(*metav1.Status); ok { 56 | return runtimeObject, fmt.Errorf("watch failed with error: %s", status.Message) 57 | } 58 | return runtimeObject, fmt.Errorf("watch failed with error: %+v", event.Object) 59 | } 60 | var ok bool 61 | runtimeObject, ok = event.Object.(T) 62 | if !ok { 63 | return runtimeObject, errors.New("failed to type assert runtime object") 64 | } 65 | return runtimeObject, nil 66 | } 67 | -------------------------------------------------------------------------------- /pkg/cloudprovider/gcp/compute.go: -------------------------------------------------------------------------------- 1 | package gcp 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "time" 7 | 8 | "google.golang.org/api/compute/v1" 9 | ) 10 | 11 | const ( 12 | operationPollInterval = 10 * time.Second 13 | ) 14 | 15 | // getManagedInstanceGroupFromInstance determines the managed instance group that created the 16 | // instance; instances created by managed instance groups should have a metadata label with key 17 | // `created-by` and a value of the form: 18 | // projects/[PROJECT_ID]/zones/[ZONE]/instanceGroupManagers/[INSTANCE_GROUP_MANAGER_NAME]: 19 | // https://cloud.google.com/compute/docs/instance-groups/getting-info-about-migs#checking_if_a_vm_instance_is_part_of_a_mig 20 | func getManagedInstanceGroupFromInstance(instance *compute.Instance) (string, error) { 21 | if instance.Metadata != nil { 22 | for _, item := range instance.Metadata.Items { 23 | if item != nil && item.Key == "created-by" && item.Value != nil { 24 | createdBy := *item.Value 25 | tokens := strings.Split(createdBy, "/") 26 | if len(tokens) > 2 && tokens[len(tokens)-2] == "instanceGroupManagers" { 27 | return tokens[len(tokens)-1], nil 28 | } 29 | } 30 | } 31 | } 32 | return "", fmt.Errorf("failed to determine managed instance group for instance %s", instance.Name) 33 | } 34 | 35 | func (gcp *CloudProvider) waitForManagedInstanceGroupStability(project, zone, managedInstanceGroupName string) error { 36 | for { 37 | r, err := gcp.computeService.InstanceGroupManagers.Get(project, zone, managedInstanceGroupName).Do() 38 | if err != nil { 39 | return err 40 | } 41 | if r.Status != nil && r.Status.IsStable { 42 | return nil 43 | } 44 | time.Sleep(operationPollInterval) 45 | } 46 | } 47 | 48 | func (gcp *CloudProvider) waitForZonalComputeOperation(project, zone, operationName string) error { 49 | return waitForComputeOperation(func() (*compute.Operation, error) { 50 | return gcp.computeService.ZoneOperations.Get(project, zone, operationName).Do() 51 | }) 52 | } 53 | 54 | func waitForComputeOperation(getOperation func() (*compute.Operation, error)) error { 55 | for { 56 | operation, err := getOperation() 57 | if err != nil { 58 | return err 59 | } 60 | if operation.Status == "DONE" { 61 | if operation.Error != nil { 62 | var operationErrorErrors []string 63 | for _, operationErrorError := range operation.Error.Errors { 64 | operationErrorErrors = append(operationErrorErrors, operationErrorError.Message) 65 | } 66 | return fmt.Errorf("compute operation failed with errors: %s", strings.Join(operationErrorErrors, ", ")) 67 | } 68 | return nil 69 | } 70 | time.Sleep(operationPollInterval) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /pkg/kubernetes/drain.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | import ( 4 | "context" 5 | "io" 6 | "time" 7 | 8 | "github.com/pkg/errors" 9 | corev1 "k8s.io/api/core/v1" 10 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 11 | apiwatch "k8s.io/apimachinery/pkg/watch" 12 | kubernetes "k8s.io/client-go/kubernetes" 13 | "k8s.io/client-go/tools/cache" 14 | "k8s.io/client-go/tools/watch" 15 | "k8s.io/kubectl/pkg/drain" 16 | ) 17 | 18 | const ( 19 | // We match our Node drain timeout with GKE: 20 | // https://cloud.google.com/kubernetes-engine/docs/concepts/node-pools#drain 21 | nodeDrainTimeout = time.Hour 22 | ) 23 | 24 | // DrainNode uses the default drain implementation to drain the Node: 25 | // https://github.com/kubernetes/kubectl/blob/3ec401449e5821ad954942c7ecec9d2c90ecaaa1/pkg/drain/default.go 26 | func DrainNode(ctx context.Context, clientset kubernetes.Interface, node *corev1.Node) error { 27 | // https://github.com/kubernetes/kubectl/blob/3ec401449e5821ad954942c7ecec9d2c90ecaaa1/pkg/cmd/drain/drain.go#L147-L160 28 | drainer := &drain.Helper{ 29 | Ctx: ctx, 30 | Client: clientset, 31 | Force: true, 32 | GracePeriodSeconds: -1, 33 | IgnoreAllDaemonSets: true, 34 | Timeout: nodeDrainTimeout, 35 | DeleteEmptyDirData: true, 36 | Out: io.Discard, 37 | ErrOut: io.Discard, 38 | } 39 | 40 | err := drain.RunCordonOrUncordon(drainer, node, true) 41 | if err != nil { 42 | return errors.Wrapf(err, "failed to cordon Node %s", node.Name) 43 | } 44 | 45 | err = drain.RunNodeDrain(drainer, node.Name) 46 | if err != nil { 47 | return errors.Wrapf(err, "failed to drain Node %s", node.Name) 48 | } 49 | 50 | return nil 51 | } 52 | 53 | func WaitForNodeToBeDeleted(ctx context.Context, clientset kubernetes.Interface, nodeName string) error { 54 | nodeList, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) 55 | if err != nil { 56 | return err 57 | } 58 | 59 | // Determine whether the Node has already been deleted 60 | nodeIsNotFound := true 61 | for _, node := range nodeList.Items { 62 | if node.Name == nodeName { 63 | nodeIsNotFound = false 64 | break 65 | } 66 | } 67 | if nodeIsNotFound { 68 | return nil 69 | } 70 | 71 | // Wait for the Node to be deleted 72 | watcher := &cache.ListWatch{ 73 | WatchFunc: func(options metav1.ListOptions) (apiwatch.Interface, error) { 74 | return clientset.CoreV1().Nodes().Watch(ctx, options) 75 | }, 76 | } 77 | condition := func(event apiwatch.Event) (bool, error) { 78 | node, err := ParseWatchEventObject[*corev1.Node](event) 79 | if err != nil { 80 | return false, err 81 | } 82 | if event.Type == apiwatch.Deleted { 83 | if node.Name == nodeName { 84 | return true, nil 85 | } 86 | } 87 | return false, nil 88 | } 89 | 90 | _, err = watch.Until(ctx, nodeList.ResourceVersion, watcher, condition) 91 | return err 92 | } 93 | -------------------------------------------------------------------------------- /pkg/kubernetes/port_forward.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "net/http" 9 | "net/url" 10 | "strings" 11 | 12 | "github.com/hashicorp/go-multierror" 13 | "k8s.io/client-go/rest" 14 | "k8s.io/client-go/tools/portforward" 15 | "k8s.io/client-go/transport/spdy" 16 | ) 17 | 18 | // PortForward port forwards to the specified Pod. The forwarded port is a random available local 19 | // port which is returned as well as a function to stop the port forward when finished 20 | func PortForward(ctx context.Context, restConfig *rest.Config, podNamespace, podName string, port int) (uint16, func() error, error) { 21 | stopChan, readyChan, errChan := make(chan struct{}, 1), make(chan struct{}, 1), make(chan error, 1) 22 | forwarder, err := createForwarder(restConfig, stopChan, readyChan, podNamespace, podName, port) 23 | if err != nil { 24 | return 0, nil, err 25 | } 26 | go func() { 27 | errChan <- forwarder.ForwardPorts() 28 | }() 29 | // Wait for port forward to be ready or fail 30 | select { 31 | case <-readyChan: 32 | case err := <-errChan: 33 | if err != nil { 34 | return 0, nil, err 35 | } 36 | return 0, nil, errors.New("port forward finished") 37 | } 38 | // Create function for the caller to finish port forwarding 39 | stop := func() error { 40 | // Make sure any started listeners are stopped... 41 | close(stopChan) 42 | // ...and wait for the port forward to finish 43 | return <-errChan 44 | } 45 | forwardedPorts, err := forwarder.GetPorts() 46 | if err != nil { 47 | return 0, nil, multierror.Append(err, stop()) 48 | } 49 | if len(forwardedPorts) != 1 { 50 | err := fmt.Errorf("unexpected number of forwarded ports: %d", len(forwardedPorts)) 51 | return 0, nil, multierror.Append(err, stop()) 52 | } 53 | return forwardedPorts[0].Local, stop, nil 54 | } 55 | 56 | func createForwarder(restConfig *rest.Config, stopChan, readyChan chan struct{}, podNamespace, podName string, port int) (*portforward.PortForwarder, error) { 57 | // Discard output to avoid race conditions 58 | out, errOut := io.Discard, io.Discard 59 | 60 | roundTripper, upgrader, err := spdy.RoundTripperFor(restConfig) 61 | if err != nil { 62 | return nil, err 63 | } 64 | 65 | path := fmt.Sprintf("/api/v1/namespaces/%s/pods/%s/portforward", podNamespace, podName) 66 | hostIP := strings.TrimLeft(restConfig.Host, "htps:/") 67 | serverURL := url.URL{Scheme: "https", Path: path, Host: hostIP} 68 | 69 | dialer := spdy.NewDialer(upgrader, &http.Client{Transport: roundTripper}, http.MethodPost, &serverURL) 70 | // Listen on a random available local port to avoid collisions: 71 | // https://github.com/kubernetes/client-go/blob/86d49e7265f07676cb39f342595a858b032112de/tools/portforward/portforward.go#L75 72 | forwarderPort := fmt.Sprintf(":%d", port) 73 | forwarder, err := portforward.New(dialer, []string{forwarderPort}, stopChan, readyChan, out, errOut) 74 | if err != nil { 75 | return nil, err 76 | } 77 | 78 | return forwarder, nil 79 | } 80 | -------------------------------------------------------------------------------- /hack/notes/notes.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | "os/exec" 8 | "strings" 9 | ) 10 | 11 | /* 12 | This tool generates release notes from the commits between two Git references 13 | */ 14 | 15 | var ( 16 | warnings = ":warning: Breaking Changes" 17 | features = ":sparkles: New Features" 18 | bugs = ":bug: Bug Fixes" 19 | documentation = ":book: Documentation" 20 | others = ":seedling: Others" 21 | unknown = ":question: Sort these by hand" 22 | 23 | outputOrder = []string{ 24 | warnings, 25 | features, 26 | bugs, 27 | documentation, 28 | others, 29 | unknown, 30 | } 31 | ) 32 | 33 | func main() { 34 | from := flag.String("from", "", "Include commits starting from this Git reference") 35 | to := flag.String("to", "", "Include commits up to and including this Git reference. Defaults to HEAD") 36 | flag.Parse() 37 | 38 | err := run(*from, *to) 39 | if err != nil { 40 | os.Exit(1) 41 | } 42 | } 43 | 44 | func run(from, to string) error { 45 | if to == "" { 46 | to = "HEAD" 47 | } 48 | if from == "" { 49 | var err error 50 | from, err = previousTag(to) 51 | if err != nil { 52 | return err 53 | } 54 | } 55 | 56 | //#nosec G204 57 | cmd := exec.Command("git", "rev-list", fmt.Sprintf("%s..%s", from, to), "--pretty=format:%B") 58 | out, err := cmd.CombinedOutput() 59 | if err != nil { 60 | return err 61 | } 62 | 63 | commits := map[string][]string{} 64 | for _, output := range outputOrder { 65 | commits[output] = []string{} 66 | } 67 | outLines := strings.Split(string(out), "\n") 68 | for i, line := range outLines { 69 | // If we have found a commit then we pick the next line 70 | if !strings.HasPrefix(line, "commit ") { 71 | continue 72 | } 73 | title := outLines[i+1] 74 | var key string 75 | switch { 76 | case strings.HasPrefix(title, "⚠️"): 77 | key = warnings 78 | title = strings.TrimPrefix(title, "⚠️") 79 | case strings.HasPrefix(title, "✨"): 80 | key = features 81 | title = strings.TrimPrefix(title, "✨") 82 | case strings.HasPrefix(title, "🐛"): 83 | key = bugs 84 | title = strings.TrimPrefix(title, "🐛") 85 | case strings.HasPrefix(title, "📖"): 86 | key = documentation 87 | title = strings.TrimPrefix(title, "📖") 88 | case strings.HasPrefix(title, "🌱"): 89 | key = others 90 | title = strings.TrimPrefix(title, "🌱") 91 | default: 92 | key = unknown 93 | } 94 | title = strings.TrimSpace(title) 95 | commits[key] = append(commits[key], title) 96 | } 97 | 98 | fmt.Printf("## Changes since [%s](https://github.com/hsbc/cost-manager/releases/%s)\n\n", from, from) 99 | for _, key := range outputOrder { 100 | commits := commits[key] 101 | if len(commits) == 0 { 102 | continue 103 | } 104 | 105 | fmt.Printf("### %s\n\n", key) 106 | for _, commit := range commits { 107 | fmt.Printf("- %s\n", commit) 108 | } 109 | fmt.Println() 110 | } 111 | 112 | return nil 113 | } 114 | 115 | func previousTag(to string) (string, error) { 116 | //#nosec G204 117 | cmd := exec.Command("git", "describe", "--abbrev=0", "--tags", fmt.Sprintf("%s^", to)) 118 | out, err := cmd.CombinedOutput() 119 | if err != nil { 120 | return "", err 121 | } 122 | return strings.TrimSpace(string(out)), nil 123 | } 124 | -------------------------------------------------------------------------------- /pkg/kubernetes/labels_test.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 8 | ) 9 | 10 | func TestMatchesLabels(t *testing.T) { 11 | tests := map[string]struct { 12 | selector *metav1.LabelSelector 13 | labels map[string]string 14 | shouldMatch bool 15 | }{ 16 | "nilSelectorNilLabels": { 17 | selector: nil, 18 | labels: nil, 19 | shouldMatch: true, 20 | }, 21 | "emptySelectorNilLabels": { 22 | selector: &metav1.LabelSelector{}, 23 | labels: nil, 24 | shouldMatch: true, 25 | }, 26 | "nilSelectorEmptyLabels": { 27 | selector: nil, 28 | labels: map[string]string{}, 29 | shouldMatch: true, 30 | }, 31 | "emptySelectorEmptyLabels": { 32 | selector: &metav1.LabelSelector{}, 33 | labels: map[string]string{}, 34 | shouldMatch: true, 35 | }, 36 | "nilSelectorNonEmptyLabels": { 37 | selector: nil, 38 | labels: map[string]string{ 39 | "kubernetes.io/metadata.name": "kube-system", 40 | }, 41 | shouldMatch: true, 42 | }, 43 | "emptySelectorNonEmptyLabels": { 44 | selector: &metav1.LabelSelector{}, 45 | labels: map[string]string{ 46 | "kubernetes.io/metadata.name": "kube-system", 47 | }, 48 | shouldMatch: true, 49 | }, 50 | "nameSelectorDoesMatchNameLabel": { 51 | selector: &metav1.LabelSelector{ 52 | MatchExpressions: []metav1.LabelSelectorRequirement{ 53 | { 54 | Key: "kubernetes.io/metadata.name", 55 | Operator: "In", 56 | Values: []string{ 57 | "kube-system", 58 | }, 59 | }, 60 | }, 61 | }, 62 | labels: map[string]string{ 63 | "kubernetes.io/metadata.name": "kube-system", 64 | }, 65 | shouldMatch: true, 66 | }, 67 | "nameSelectorDoesNotMatchNameLabel": { 68 | selector: &metav1.LabelSelector{ 69 | MatchExpressions: []metav1.LabelSelectorRequirement{ 70 | { 71 | Key: "kubernetes.io/metadata.name", 72 | Operator: "In", 73 | Values: []string{ 74 | "kube-system", 75 | }, 76 | }, 77 | }, 78 | }, 79 | labels: map[string]string{ 80 | "kubernetes.io/metadata.name": "kube-public", 81 | }, 82 | shouldMatch: false, 83 | }, 84 | "nameSelectorDoesNotMatchNilLabels": { 85 | selector: &metav1.LabelSelector{ 86 | MatchExpressions: []metav1.LabelSelectorRequirement{ 87 | { 88 | Key: "kubernetes.io/metadata.name", 89 | Operator: "In", 90 | Values: []string{ 91 | "kube-system", 92 | }, 93 | }, 94 | }, 95 | }, 96 | labels: nil, 97 | shouldMatch: false, 98 | }, 99 | "reverseNameSelectorDoesMatchNilLabels": { 100 | selector: &metav1.LabelSelector{ 101 | MatchExpressions: []metav1.LabelSelectorRequirement{ 102 | { 103 | Key: "kubernetes.io/metadata.name", 104 | Operator: "NotIn", 105 | Values: []string{ 106 | "kube-system", 107 | }, 108 | }, 109 | }, 110 | }, 111 | labels: nil, 112 | shouldMatch: true, 113 | }, 114 | } 115 | for name, test := range tests { 116 | t.Run(name, func(t *testing.T) { 117 | matches, err := SelectorMatchesLabels(test.selector, test.labels) 118 | require.NoError(t, err) 119 | require.Equal(t, test.shouldMatch, matches) 120 | }) 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /pkg/api/v1alpha1/zz_generated.deepcopy.go: -------------------------------------------------------------------------------- 1 | //go:build !ignore_autogenerated 2 | 3 | // Code generated by controller-gen. DO NOT EDIT. 4 | 5 | package v1alpha1 6 | 7 | import ( 8 | "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | runtime "k8s.io/apimachinery/pkg/runtime" 10 | ) 11 | 12 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 13 | func (in *CloudProvider) DeepCopyInto(out *CloudProvider) { 14 | *out = *in 15 | } 16 | 17 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CloudProvider. 18 | func (in *CloudProvider) DeepCopy() *CloudProvider { 19 | if in == nil { 20 | return nil 21 | } 22 | out := new(CloudProvider) 23 | in.DeepCopyInto(out) 24 | return out 25 | } 26 | 27 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 28 | func (in *CostManagerConfiguration) DeepCopyInto(out *CostManagerConfiguration) { 29 | *out = *in 30 | out.TypeMeta = in.TypeMeta 31 | if in.Controllers != nil { 32 | in, out := &in.Controllers, &out.Controllers 33 | *out = make([]string, len(*in)) 34 | copy(*out, *in) 35 | } 36 | out.CloudProvider = in.CloudProvider 37 | if in.SpotMigrator != nil { 38 | in, out := &in.SpotMigrator, &out.SpotMigrator 39 | *out = new(SpotMigrator) 40 | (*in).DeepCopyInto(*out) 41 | } 42 | if in.PodSafeToEvictAnnotator != nil { 43 | in, out := &in.PodSafeToEvictAnnotator, &out.PodSafeToEvictAnnotator 44 | *out = new(PodSafeToEvictAnnotator) 45 | (*in).DeepCopyInto(*out) 46 | } 47 | } 48 | 49 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CostManagerConfiguration. 50 | func (in *CostManagerConfiguration) DeepCopy() *CostManagerConfiguration { 51 | if in == nil { 52 | return nil 53 | } 54 | out := new(CostManagerConfiguration) 55 | in.DeepCopyInto(out) 56 | return out 57 | } 58 | 59 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 60 | func (in *CostManagerConfiguration) DeepCopyObject() runtime.Object { 61 | if c := in.DeepCopy(); c != nil { 62 | return c 63 | } 64 | return nil 65 | } 66 | 67 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 68 | func (in *PodSafeToEvictAnnotator) DeepCopyInto(out *PodSafeToEvictAnnotator) { 69 | *out = *in 70 | if in.NamespaceSelector != nil { 71 | in, out := &in.NamespaceSelector, &out.NamespaceSelector 72 | *out = new(v1.LabelSelector) 73 | (*in).DeepCopyInto(*out) 74 | } 75 | } 76 | 77 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodSafeToEvictAnnotator. 78 | func (in *PodSafeToEvictAnnotator) DeepCopy() *PodSafeToEvictAnnotator { 79 | if in == nil { 80 | return nil 81 | } 82 | out := new(PodSafeToEvictAnnotator) 83 | in.DeepCopyInto(out) 84 | return out 85 | } 86 | 87 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 88 | func (in *SpotMigrator) DeepCopyInto(out *SpotMigrator) { 89 | *out = *in 90 | if in.MigrationSchedule != nil { 91 | in, out := &in.MigrationSchedule, &out.MigrationSchedule 92 | *out = new(string) 93 | **out = **in 94 | } 95 | } 96 | 97 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SpotMigrator. 98 | func (in *SpotMigrator) DeepCopy() *SpotMigrator { 99 | if in == nil { 100 | return nil 101 | } 102 | out := new(SpotMigrator) 103 | in.DeepCopyInto(out) 104 | return out 105 | } 106 | -------------------------------------------------------------------------------- /pkg/cloudprovider/gcp/cloud_provider_test.go: -------------------------------------------------------------------------------- 1 | package gcp 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "testing" 7 | "time" 8 | 9 | "github.com/stretchr/testify/require" 10 | corev1 "k8s.io/api/core/v1" 11 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 12 | ) 13 | 14 | func TestIsSpotInstance(t *testing.T) { 15 | tests := map[string]struct { 16 | node *corev1.Node 17 | isSpotInstance bool 18 | }{ 19 | "hasSpotLabelSetToTrue": { 20 | node: &corev1.Node{ 21 | ObjectMeta: metav1.ObjectMeta{ 22 | Labels: map[string]string{ 23 | "cloud.google.com/gke-spot": "true", 24 | }, 25 | }, 26 | }, 27 | isSpotInstance: true, 28 | }, 29 | "hasPreemptibleLabelSetToTrue": { 30 | node: &corev1.Node{ 31 | ObjectMeta: metav1.ObjectMeta{ 32 | Labels: map[string]string{ 33 | "cloud.google.com/gke-preemptible": "true", 34 | }, 35 | }, 36 | }, 37 | isSpotInstance: true, 38 | }, 39 | "hasSpotLabelSetToFalse": { 40 | node: &corev1.Node{ 41 | ObjectMeta: metav1.ObjectMeta{ 42 | Labels: map[string]string{ 43 | "cloud.google.com/gke-spot": "false", 44 | }, 45 | }, 46 | }, 47 | isSpotInstance: false, 48 | }, 49 | "hasPreemptibleLabelSetToFalse": { 50 | node: &corev1.Node{ 51 | ObjectMeta: metav1.ObjectMeta{ 52 | Labels: map[string]string{ 53 | "cloud.google.com/gke-preemptible": "false", 54 | }, 55 | }, 56 | }, 57 | isSpotInstance: false, 58 | }, 59 | "hasOtherLabel": { 60 | node: &corev1.Node{ 61 | ObjectMeta: metav1.ObjectMeta{ 62 | Labels: map[string]string{ 63 | "foo": "bar", 64 | }, 65 | }, 66 | }, 67 | isSpotInstance: false, 68 | }, 69 | "hasNoLabels": { 70 | node: &corev1.Node{ 71 | ObjectMeta: metav1.ObjectMeta{}, 72 | }, 73 | isSpotInstance: false, 74 | }, 75 | } 76 | for name, test := range tests { 77 | t.Run(name, func(t *testing.T) { 78 | cloudProvider := &CloudProvider{} 79 | isSpotInstance, err := cloudProvider.IsSpotInstance(context.Background(), test.node) 80 | require.NoError(t, err) 81 | require.Equal(t, test.isSpotInstance, isSpotInstance) 82 | }) 83 | } 84 | } 85 | 86 | func TestTimeSinceToBeDeletedTaintAdded(t *testing.T) { 87 | tests := map[string]struct { 88 | node *corev1.Node 89 | now time.Time 90 | timeSinceToBeDeletedTaintAdded time.Duration 91 | }{ 92 | "missingTaint": { 93 | node: &corev1.Node{}, 94 | now: time.Now(), 95 | timeSinceToBeDeletedTaintAdded: 0, 96 | }, 97 | "recentTaint": { 98 | node: &corev1.Node{ 99 | Spec: corev1.NodeSpec{ 100 | Taints: []corev1.Taint{ 101 | { 102 | Key: "ToBeDeletedByClusterAutoscaler", 103 | Value: fmt.Sprint(time.Date(0, 0, 0, 0, 0, 0, 0, time.UTC).Unix()), 104 | Effect: corev1.TaintEffectNoSchedule, 105 | }, 106 | }, 107 | }, 108 | }, 109 | now: time.Date(0, 0, 0, 0, 1, 0, 0, time.UTC), 110 | timeSinceToBeDeletedTaintAdded: time.Minute, 111 | }, 112 | "futureTaint": { 113 | node: &corev1.Node{ 114 | Spec: corev1.NodeSpec{ 115 | Taints: []corev1.Taint{ 116 | { 117 | Key: "ToBeDeletedByClusterAutoscaler", 118 | Value: fmt.Sprint(time.Date(0, 0, 0, 0, 1, 0, 0, time.UTC).Unix()), 119 | Effect: corev1.TaintEffectNoSchedule, 120 | }, 121 | }, 122 | }, 123 | }, 124 | now: time.Date(0, 0, 0, 0, 0, 0, 0, time.UTC), 125 | timeSinceToBeDeletedTaintAdded: 0, 126 | }, 127 | } 128 | for name, test := range tests { 129 | t.Run(name, func(t *testing.T) { 130 | timeSinceToBeDeletedTaintAdded := timeSinceToBeDeletedTaintAdded(test.node, test.now) 131 | require.Equal(t, test.timeSinceToBeDeletedTaintAdded, timeSinceToBeDeletedTaintAdded) 132 | }) 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /pkg/config/config_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/hsbc/cost-manager/pkg/api/v1alpha1" 7 | "github.com/stretchr/testify/require" 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | "knative.dev/pkg/ptr" 10 | ) 11 | 12 | func TestDecode(t *testing.T) { 13 | tests := map[string]struct { 14 | configData []byte 15 | valid bool 16 | config *v1alpha1.CostManagerConfiguration 17 | }{ 18 | "default": { 19 | configData: []byte(` 20 | apiVersion: cost-manager.io/v1alpha1 21 | kind: CostManagerConfiguration 22 | controllers: 23 | - spot-migrator 24 | - pod-safe-to-evict-annotator 25 | cloudProvider: 26 | name: gcp 27 | spotMigrator: 28 | migrationSchedule: "* * * * *" 29 | podSafeToEvictAnnotator: 30 | namespaceSelector: 31 | matchExpressions: 32 | - key: kubernetes.io/metadata.name 33 | operator: In 34 | values: 35 | - kube-system 36 | `), 37 | valid: true, 38 | config: &v1alpha1.CostManagerConfiguration{ 39 | TypeMeta: metav1.TypeMeta{ 40 | APIVersion: "cost-manager.io/v1alpha1", 41 | Kind: "CostManagerConfiguration", 42 | }, 43 | Controllers: []string{ 44 | "spot-migrator", 45 | "pod-safe-to-evict-annotator", 46 | }, 47 | CloudProvider: v1alpha1.CloudProvider{ 48 | Name: "gcp", 49 | }, 50 | SpotMigrator: &v1alpha1.SpotMigrator{ 51 | MigrationSchedule: ptr.String("* * * * *"), 52 | }, 53 | PodSafeToEvictAnnotator: &v1alpha1.PodSafeToEvictAnnotator{ 54 | NamespaceSelector: &metav1.LabelSelector{ 55 | MatchExpressions: []metav1.LabelSelectorRequirement{ 56 | { 57 | Key: "kubernetes.io/metadata.name", 58 | Operator: "In", 59 | Values: []string{ 60 | "kube-system", 61 | }, 62 | }, 63 | }, 64 | }, 65 | }, 66 | }, 67 | }, 68 | "noFields": { 69 | configData: []byte(` 70 | apiVersion: cost-manager.io/v1alpha1 71 | kind: CostManagerConfiguration 72 | `), 73 | valid: true, 74 | config: &v1alpha1.CostManagerConfiguration{ 75 | TypeMeta: metav1.TypeMeta{ 76 | APIVersion: "cost-manager.io/v1alpha1", 77 | Kind: "CostManagerConfiguration", 78 | }, 79 | }, 80 | }, 81 | "unknownAPIVersion": { 82 | configData: []byte(` 83 | apiVersion: foo.io/v1alpha1 84 | kind: CostManagerConfiguration 85 | `), 86 | valid: false, 87 | }, 88 | "unknownKind": { 89 | configData: []byte(` 90 | apiVersion: cost-manager.io/v1alpha1 91 | kind: FooConfiguration 92 | `), 93 | valid: false, 94 | }, 95 | "unknownField": { 96 | configData: []byte(` 97 | apiVersion: cost-manager.io/v1alpha1 98 | kind: CostManagerConfiguration 99 | foo: bar 100 | `), 101 | valid: false, 102 | }, 103 | } 104 | for name, test := range tests { 105 | t.Run(name, func(t *testing.T) { 106 | config, err := decode(test.configData) 107 | if test.valid { 108 | require.NoError(t, err) 109 | require.Equal(t, test.config, config) 110 | } else { 111 | require.Error(t, err) 112 | } 113 | }) 114 | } 115 | } 116 | 117 | func TestValidate(t *testing.T) { 118 | tests := map[string]struct { 119 | config *v1alpha1.CostManagerConfiguration 120 | valid bool 121 | }{ 122 | "valid": { 123 | config: &v1alpha1.CostManagerConfiguration{}, 124 | valid: true, 125 | }, 126 | "knownController": { 127 | config: &v1alpha1.CostManagerConfiguration{ 128 | Controllers: []string{"spot-migrator"}, 129 | }, 130 | valid: true, 131 | }, 132 | "unknownController": { 133 | config: &v1alpha1.CostManagerConfiguration{ 134 | Controllers: []string{"unknown-controller"}, 135 | }, 136 | valid: false, 137 | }, 138 | } 139 | for name, test := range tests { 140 | t.Run(name, func(t *testing.T) { 141 | err := validate(test.config) 142 | if test.valid { 143 | require.NoError(t, err) 144 | } else { 145 | require.Error(t, err) 146 | } 147 | }) 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /pkg/controller/pod_safe_to_evict_annotator.go: -------------------------------------------------------------------------------- 1 | package controller 2 | 3 | import ( 4 | "context" 5 | "strings" 6 | 7 | "github.com/hsbc/cost-manager/pkg/api/v1alpha1" 8 | "github.com/hsbc/cost-manager/pkg/kubernetes" 9 | corev1 "k8s.io/api/core/v1" 10 | "k8s.io/apimachinery/pkg/api/errors" 11 | "k8s.io/apimachinery/pkg/types" 12 | ctrl "sigs.k8s.io/controller-runtime" 13 | "sigs.k8s.io/controller-runtime/pkg/client" 14 | "sigs.k8s.io/controller-runtime/pkg/reconcile" 15 | ) 16 | 17 | const ( 18 | podSafeToEvictAnnotatorControllerName = "pod-safe-to-evict-annotator" 19 | 20 | // We copy the annotation key to avoid depending on the autoscaler repository: 21 | // https://github.com/kubernetes/autoscaler/blob/389914758265a33e36683d6df7dbecf91de81802/cluster-autoscaler/utils/drain/drain.go#L33-L35 22 | podSafeToEvictAnnotationKey = "cluster-autoscaler.kubernetes.io/safe-to-evict" 23 | ) 24 | 25 | // podSafeToEvictAnnotator adds the `cluster-autoscaler.kubernetes.io/safe-to-evict: "true"` 26 | // annotation to Pods to ensure that they do not prevent cluster scale down: 27 | // https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#what-types-of-pods-can-prevent-ca-from-removing-a-node 28 | type podSafeToEvictAnnotator struct { 29 | Config *v1alpha1.PodSafeToEvictAnnotator 30 | Client client.Client 31 | } 32 | 33 | var _ reconcile.Reconciler = &podSafeToEvictAnnotator{} 34 | 35 | func (r *podSafeToEvictAnnotator) SetupWithManager(mgr ctrl.Manager) error { 36 | return ctrl.NewControllerManagedBy(mgr). 37 | For(&corev1.Pod{}). 38 | Named(strings.ReplaceAll(podSafeToEvictAnnotatorControllerName, "-", "_")). 39 | Complete(r) 40 | } 41 | 42 | func (r *podSafeToEvictAnnotator) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { 43 | // We do nothing if the Namespace does not match the Namespace selector... 44 | namespace := &corev1.Namespace{} 45 | err := r.Client.Get(ctx, types.NamespacedName{Name: request.Namespace}, namespace) 46 | if errors.IsNotFound(err) { 47 | return reconcile.Result{}, nil 48 | } 49 | if err != nil { 50 | return reconcile.Result{}, err 51 | } 52 | namespaceSelectorMatchesLabels, err := r.namespaceSelectorMatchesLabels(namespace) 53 | if err != nil { 54 | return reconcile.Result{}, err 55 | } 56 | if !namespaceSelectorMatchesLabels { 57 | return reconcile.Result{}, nil 58 | } 59 | 60 | // ...otherwise we continue to process the Pod 61 | pod := &corev1.Pod{} 62 | err = r.Client.Get(ctx, request.NamespacedName, pod) 63 | if errors.IsNotFound(err) { 64 | return reconcile.Result{}, nil 65 | } 66 | if err != nil { 67 | return reconcile.Result{}, err 68 | } 69 | 70 | // If the annotation is not already set then we set it to true 71 | if pod.Annotations == nil { 72 | pod.Annotations = map[string]string{} 73 | } 74 | _, ok := pod.Annotations[podSafeToEvictAnnotationKey] 75 | if ok { 76 | return reconcile.Result{}, nil 77 | } 78 | // https://github.com/kubernetes/autoscaler/blob/389914758265a33e36683d6df7dbecf91de81802/cluster-autoscaler/utils/drain/drain.go#L118-L121 79 | pod.Annotations[podSafeToEvictAnnotationKey] = "true" 80 | 81 | err = r.Client.Update(ctx, pod) 82 | // If the Pod has been deleted or there was a conflict then we ignore the error since there must 83 | // be another event queued for reconciliation 84 | if errors.IsNotFound(err) || errors.IsConflict(err) { 85 | return reconcile.Result{}, nil 86 | } 87 | if err != nil { 88 | return reconcile.Result{}, err 89 | } 90 | 91 | return reconcile.Result{}, nil 92 | } 93 | 94 | func (r *podSafeToEvictAnnotator) namespaceSelectorMatchesLabels(namespace *corev1.Namespace) (bool, error) { 95 | // If the Namespace selector is nil then we match all Namespaces... 96 | if r.Config == nil || r.Config.NamespaceSelector == nil { 97 | return true, nil 98 | } 99 | // ...otherwise we match the Namespace against the selector 100 | return kubernetes.SelectorMatchesLabels(r.Config.NamespaceSelector, namespace.Labels) 101 | } 102 | -------------------------------------------------------------------------------- /e2e/prometheus_alerts_test.go: -------------------------------------------------------------------------------- 1 | package e2e 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "strings" 7 | "testing" 8 | "time" 9 | 10 | "github.com/hsbc/cost-manager/pkg/kubernetes" 11 | monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" 12 | "github.com/prometheus/client_golang/api" 13 | prometheusv1 "github.com/prometheus/client_golang/api/prometheus/v1" 14 | "github.com/prometheus/common/model" 15 | "github.com/stretchr/testify/require" 16 | "sigs.k8s.io/controller-runtime/pkg/client" 17 | ) 18 | 19 | const ( 20 | prometheusAlertsInactiveDuration = 30 * time.Second 21 | ) 22 | 23 | // TestPrometheusAlerts tests that all cost-manager PrometheusRule alerts are registered with 24 | // Prometheus and remain inactive for a period of time 25 | func TestPrometheusAlerts(t *testing.T) { 26 | t.Parallel() 27 | 28 | ctx := context.Background() 29 | 30 | kubeClient, restConfig, err := kubernetes.NewClient() 31 | require.NoError(t, err) 32 | 33 | // Port forward to Prometheus and create client using local forwarded port 34 | pod, err := kubernetes.WaitForAnyReadyPod(ctx, kubeClient, client.InNamespace("monitoring"), client.MatchingLabels{"app.kubernetes.io/name": "prometheus"}) 35 | require.NoError(t, err) 36 | forwardedPort, stop, err := kubernetes.PortForward(ctx, restConfig, pod.Namespace, pod.Name, 9090) 37 | require.NoError(t, err) 38 | defer func() { 39 | err := stop() 40 | require.NoError(t, err) 41 | }() 42 | prometheusAddress := fmt.Sprintf("http://127.0.0.1:%d", forwardedPort) 43 | prometheusClient, err := api.NewClient(api.Config{ 44 | Address: prometheusAddress, 45 | }) 46 | require.NoError(t, err) 47 | prometheusAPI := prometheusv1.NewAPI(prometheusClient) 48 | 49 | t.Log("Waiting for cost-manager alerts to be registered with Prometheus...") 50 | costManagerPrometheusRule := &monitoringv1.PrometheusRule{} 51 | err = kubeClient.Get(ctx, client.ObjectKey{Name: "cost-manager", Namespace: "cost-manager"}, costManagerPrometheusRule) 52 | require.NoError(t, err) 53 | for { 54 | prometheusRules, err := prometheusAPI.Rules(ctx) 55 | require.NoError(t, err) 56 | if prometheusHasAllCostManagerAlerts(costManagerPrometheusRule, prometheusRules) { 57 | break 58 | } 59 | time.Sleep(time.Second) 60 | } 61 | t.Log("All cost-manager alerts registered with Prometheus!") 62 | 63 | // Wait for Prometheus to scrape cost-manager 64 | for { 65 | results, _, err := prometheusAPI.Query(ctx, `up{job="cost-manager",namespace="cost-manager"}`, time.Now()) 66 | require.NoError(t, err) 67 | if len(results.(model.Vector)) > 0 { 68 | break 69 | } 70 | time.Sleep(time.Second) 71 | } 72 | 73 | t.Logf("Ensuring all Prometheus alerts remain inactive for %s...", prometheusAlertsInactiveDuration) 74 | err = waitForAllPrometheusAlertsToRemainInactive(ctx, prometheusAPI) 75 | require.NoError(t, err) 76 | t.Logf("All Prometheus alerts remained inactive for %s!", prometheusAlertsInactiveDuration) 77 | } 78 | 79 | func prometheusHasAllCostManagerAlerts(costManagerPrometheusRule *monitoringv1.PrometheusRule, prometheusRules prometheusv1.RulesResult) bool { 80 | for _, group := range costManagerPrometheusRule.Spec.Groups { 81 | for _, rule := range group.Rules { 82 | if len(rule.Alert) > 0 { 83 | if !prometheusHasGroupAlert(group.Name, rule.Alert, prometheusRules) { 84 | return false 85 | } 86 | } 87 | } 88 | } 89 | return true 90 | } 91 | 92 | func prometheusHasGroupAlert(groupName string, alertName string, prometheusRules prometheusv1.RulesResult) bool { 93 | for _, group := range prometheusRules.Groups { 94 | if group.Name == groupName { 95 | for _, rule := range group.Rules { 96 | switch alert := rule.(type) { 97 | case prometheusv1.AlertingRule: 98 | if alertName == alert.Name { 99 | return true 100 | } 101 | default: 102 | continue 103 | } 104 | } 105 | } 106 | } 107 | return false 108 | } 109 | 110 | func waitForAllPrometheusAlertsToRemainInactive(ctx context.Context, prometheusAPI prometheusv1.API) error { 111 | ticker := time.NewTicker(time.Second) 112 | for { 113 | done := time.After(prometheusAlertsInactiveDuration) 114 | for { 115 | resetTimer := false 116 | select { 117 | case <-done: 118 | return nil 119 | case <-ticker.C: 120 | result, err := prometheusAPI.Alerts(ctx) 121 | if err != nil { 122 | return err 123 | } 124 | 125 | // If any alerts are firing then we return an error 126 | var firingAlertNames []string 127 | for _, alert := range result.Alerts { 128 | if alert.State == prometheusv1.AlertStateFiring { 129 | firingAlertNames = append(firingAlertNames, string(alert.Labels[model.AlertNameLabel])) 130 | } 131 | } 132 | if len(firingAlertNames) > 0 { 133 | return fmt.Errorf("Prometheus alerts firing: %s", strings.Join(firingAlertNames, ", ")) 134 | } 135 | 136 | // If any alerts are not inactive then we reset our timer 137 | resetTimer = !allPrometheusAlertsInactive(result.Alerts) 138 | } 139 | if resetTimer { 140 | break 141 | } 142 | } 143 | } 144 | } 145 | 146 | func allPrometheusAlertsInactive(alerts []prometheusv1.Alert) bool { 147 | for _, alert := range alerts { 148 | if alert.State != prometheusv1.AlertStateInactive { 149 | return false 150 | } 151 | } 152 | return true 153 | } 154 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: ci 2 | # Trigger on push to main branch and any pull requests to main branch 3 | on: 4 | push: 5 | branches: 6 | - main 7 | tags: 8 | # Match semantic version tags: 9 | # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#patterns-to-match-branches-and-tags 10 | - v[0-9]+.[0-9]+.[0-9]+ 11 | pull_request: 12 | branches: 13 | - main 14 | # https://github.com/golangci/golangci-lint-action?tab=readme-ov-file#comments-and-annotations 15 | permissions: 16 | contents: read 17 | pull-requests: read 18 | checks: write 19 | jobs: 20 | # https://github.com/golangci/golangci-lint-action?tab=readme-ov-file#how-to-use 21 | lint: 22 | runs-on: ubuntu-latest 23 | steps: 24 | - uses: actions/checkout@v4 25 | - uses: actions/setup-go@v4 26 | with: 27 | go-version: '1.23.4' 28 | cache: false 29 | - name: golangci-lint 30 | uses: golangci/golangci-lint-action@v6 31 | with: 32 | version: v1.60 33 | args: --timeout=10m 34 | # https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go 35 | test: 36 | runs-on: ubuntu-latest 37 | steps: 38 | - uses: actions/checkout@v4 39 | - uses: actions/setup-go@v4 40 | with: 41 | go-version: '1.23.4' 42 | - run: go mod download 43 | - name: Verify generated code 44 | run: make verify 45 | - name: Run unit tests 46 | run: make test 47 | - name: Set up QEMU 48 | uses: docker/setup-qemu-action@v3 49 | - name: Set up Docker Buildx 50 | uses: docker/setup-buildx-action@v3 51 | # Build without pushing to first validate that the image works as expected: 52 | # https://docs.docker.com/build/ci/github-actions/test-before-push/ 53 | - name: Build Docker image 54 | uses: docker/build-push-action@v5 55 | id: docker-build 56 | with: 57 | context: . 58 | platforms: linux/amd64 59 | tags: docker.io/dippynark/cost-manager:test 60 | cache-from: type=gha 61 | cache-to: type=gha,mode=max 62 | # Export to Docker so we can load into kind cluster: 63 | # https://docs.docker.com/build/ci/github-actions/export-docker/ 64 | load: true 65 | - name: Setup Helm 66 | uses: azure/setup-helm@v4 67 | with: 68 | version: v3.16.4 69 | - name: Helm lint 70 | run: helm lint --strict ./charts/cost-manager 71 | - name: Install kind 72 | uses: helm/kind-action@v1 73 | with: 74 | install_only: true 75 | - name: Run E2E tests 76 | run: make e2e IMAGE=${{ fromJSON(steps.docker-build.outputs.metadata)['image.name'] }} 77 | release: 78 | # Make sure the tests have passed before releasing 79 | needs: 80 | - lint 81 | - test 82 | runs-on: ubuntu-latest 83 | # Do not release for forked repositories since secrets are not available: 84 | # https://docs.github.com/en/actions/security-guides/using-secrets-in-github-actions#using-secrets-in-a-workflow 85 | if: ${{ ! github.event.pull_request.head.repo.fork }} 86 | steps: 87 | - uses: actions/checkout@v4 88 | - name: Set up QEMU 89 | uses: docker/setup-qemu-action@v3 90 | - name: Set up Docker Buildx 91 | uses: docker/setup-buildx-action@v3 92 | - name: Login to Docker Hub 93 | uses: docker/login-action@v3 94 | id: docker-login 95 | with: 96 | username: ${{ secrets.DOCKERHUB_USERNAME }} 97 | password: ${{ secrets.DOCKERHUB_TOKEN }} 98 | # https://github.com/docker/metadata-action#basic 99 | - name: Docker meta 100 | uses: docker/metadata-action@v5 101 | id: meta 102 | with: 103 | images: docker.io/dippynark/cost-manager 104 | # Do not handle the latest tag by default. In particular this prevents the latest tag being 105 | # pushed when the repository is tagged with a semantic version: 106 | # https://github.com/docker/metadata-action#flavor-input 107 | flavor: latest=false 108 | tags: | 109 | # Push semantic version tags: https://github.com/docker/metadata-action#typesemver 110 | type=semver,pattern={{raw}} 111 | # To make sure that we cannot accidentally push a semantic version tag from a pull request 112 | # we only generate tags from the pull request number: 113 | # https://github.com/docker/metadata-action#typeref 114 | type=ref,event=pr 115 | # Set the latest tag for the main branch: 116 | # https://github.com/docker/metadata-action#latest-tag 117 | type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }} 118 | # Buildx does not currently support pushing a previously built image so we rebuild from cache: 119 | # https://github.com/docker/buildx/issues/1915 120 | - name: Push Docker image 121 | uses: docker/build-push-action@v5 122 | with: 123 | context: . 124 | # We do not build the ARM64 image for pull requests since it takes a very long time: 125 | # https://github.com/docker/setup-qemu-action/issues/22 126 | # https://github.com/DSpace/DSpace/pull/8315 127 | platforms: linux/amd64${{ github.event_name != 'pull_request' && ',linux/arm64' || '' }} 128 | tags: ${{ steps.meta.outputs.tags }} 129 | cache-from: type=gha 130 | cache-to: type=gha,mode=max 131 | push: true 132 | -------------------------------------------------------------------------------- /pkg/controller/pod_safe_to_evict_annotator_test.go: -------------------------------------------------------------------------------- 1 | package controller 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/hsbc/cost-manager/pkg/api/v1alpha1" 8 | "github.com/hsbc/cost-manager/pkg/kubernetes" 9 | "github.com/stretchr/testify/require" 10 | corev1 "k8s.io/api/core/v1" 11 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 12 | "k8s.io/apimachinery/pkg/types" 13 | "sigs.k8s.io/controller-runtime/pkg/client" 14 | "sigs.k8s.io/controller-runtime/pkg/client/fake" 15 | "sigs.k8s.io/controller-runtime/pkg/reconcile" 16 | ) 17 | 18 | func TestPodSafeToEvictAnnotatorReconcile(t *testing.T) { 19 | name := "foo" 20 | namespace := "bar" 21 | tests := map[string]struct { 22 | pod *corev1.Pod 23 | namespace *corev1.Namespace 24 | config *v1alpha1.PodSafeToEvictAnnotator 25 | shouldAnnotate bool 26 | }{ 27 | "annotationMissing": { 28 | pod: &corev1.Pod{ 29 | ObjectMeta: metav1.ObjectMeta{ 30 | Name: name, 31 | Namespace: namespace, 32 | }, 33 | }, 34 | namespace: &corev1.Namespace{ 35 | ObjectMeta: metav1.ObjectMeta{ 36 | Name: namespace, 37 | }, 38 | }, 39 | shouldAnnotate: true, 40 | }, 41 | "annotationFalse": { 42 | pod: &corev1.Pod{ 43 | ObjectMeta: metav1.ObjectMeta{ 44 | Name: name, 45 | Namespace: namespace, 46 | Annotations: map[string]string{ 47 | "cluster-autoscaler.kubernetes.io/safe-to-evict": "false", 48 | }, 49 | }, 50 | }, 51 | namespace: &corev1.Namespace{ 52 | ObjectMeta: metav1.ObjectMeta{ 53 | Name: namespace, 54 | }, 55 | }, 56 | shouldAnnotate: false, 57 | }, 58 | "annotationMissingWithNilNamespaceSelector": { 59 | pod: &corev1.Pod{ 60 | ObjectMeta: metav1.ObjectMeta{ 61 | Name: name, 62 | Namespace: namespace, 63 | }, 64 | }, 65 | namespace: &corev1.Namespace{ 66 | ObjectMeta: metav1.ObjectMeta{ 67 | Name: namespace, 68 | }, 69 | }, 70 | config: &v1alpha1.PodSafeToEvictAnnotator{}, 71 | shouldAnnotate: true, 72 | }, 73 | "annotationMissingWithMatchingNamespaceSelector": { 74 | pod: &corev1.Pod{ 75 | ObjectMeta: metav1.ObjectMeta{ 76 | Name: name, 77 | Namespace: namespace, 78 | }, 79 | }, 80 | namespace: &corev1.Namespace{ 81 | ObjectMeta: metav1.ObjectMeta{ 82 | Name: namespace, 83 | Labels: map[string]string{ 84 | "kubernetes.io/metadata.name": "kube-system", 85 | }, 86 | }, 87 | }, 88 | config: &v1alpha1.PodSafeToEvictAnnotator{ 89 | NamespaceSelector: &metav1.LabelSelector{ 90 | MatchExpressions: []metav1.LabelSelectorRequirement{ 91 | { 92 | Key: "kubernetes.io/metadata.name", 93 | Operator: "In", 94 | Values: []string{ 95 | "kube-system", 96 | }, 97 | }, 98 | }, 99 | }, 100 | }, 101 | shouldAnnotate: true, 102 | }, 103 | "annotationMissingWithNonMatchingNamespaceSelector": { 104 | pod: &corev1.Pod{ 105 | ObjectMeta: metav1.ObjectMeta{ 106 | Name: name, 107 | Namespace: namespace, 108 | }, 109 | }, 110 | namespace: &corev1.Namespace{ 111 | ObjectMeta: metav1.ObjectMeta{ 112 | Name: namespace, 113 | Labels: map[string]string{ 114 | "kubernetes.io/metadata.name": "kube-system", 115 | }, 116 | }, 117 | }, 118 | config: &v1alpha1.PodSafeToEvictAnnotator{ 119 | NamespaceSelector: &metav1.LabelSelector{ 120 | MatchExpressions: []metav1.LabelSelectorRequirement{ 121 | { 122 | Key: "kubernetes.io/metadata.name", 123 | Operator: "In", 124 | Values: []string{ 125 | "kube-public", 126 | }, 127 | }, 128 | }, 129 | }, 130 | }, 131 | shouldAnnotate: false, 132 | }, 133 | "annotationMissingWithMissingNamespace": { 134 | pod: &corev1.Pod{ 135 | ObjectMeta: metav1.ObjectMeta{ 136 | Name: name, 137 | Namespace: namespace, 138 | }, 139 | }, 140 | namespace: nil, 141 | shouldAnnotate: false, 142 | }, 143 | } 144 | for name, test := range tests { 145 | t.Run(name, func(t *testing.T) { 146 | // Create fake client 147 | scheme, err := kubernetes.NewScheme() 148 | require.NoError(t, err) 149 | objects := []client.Object{test.pod} 150 | if test.namespace != nil { 151 | objects = append(objects, test.namespace) 152 | } 153 | client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(objects...).Build() 154 | 155 | // Setup controller 156 | podSafeToEvictAnnotator := &podSafeToEvictAnnotator{ 157 | Client: client, 158 | Config: test.config, 159 | } 160 | 161 | // Run reconciliation 162 | ctx := context.Background() 163 | _, err = podSafeToEvictAnnotator.Reconcile(ctx, reconcile.Request{NamespacedName: types.NamespacedName{Name: test.pod.Name, Namespace: test.pod.Namespace}}) 164 | require.NoError(t, err) 165 | 166 | // Determine whether the Pod has been annotated 167 | pod := &corev1.Pod{} 168 | err = client.Get(ctx, types.NamespacedName{Name: test.pod.Name, Namespace: test.pod.Namespace}, pod) 169 | require.NoError(t, err) 170 | annotated := true 171 | if pod.Annotations == nil { 172 | annotated = false 173 | } else { 174 | value, ok := pod.Annotations["cluster-autoscaler.kubernetes.io/safe-to-evict"] 175 | if !ok || value != "true" { 176 | annotated = false 177 | } 178 | } 179 | require.Equal(t, test.shouldAnnotate, annotated) 180 | }) 181 | } 182 | } 183 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cost-manager 2 | 3 | cost-manager is a Kubernetes controller manager that manages 4 | [controllers](https://kubernetes.io/docs/concepts/architecture/controller/) to automate cost 5 | optimisations. 6 | 7 | ## Controllers 8 | 9 | Here we provide details of the various controllers supported by cost-manager. 10 | 11 | ### spot-migrator 12 | 13 | Spot VMs are unused compute capacity that many cloud providers support access to at significantly 14 | reduced costs (e.g. on GCP spot VMs provide a [60-91% 15 | discount](https://cloud.google.com/compute/docs/instances/spot#pricing)). Since spot VM availability 16 | can fluctuate it is common to configure workloads to be able to run on spot VMs but to allow 17 | fallback to on-demand VMs if spot VMs are unavailable. However, even if spot VMs are available, if 18 | workloads are already running on on-demand VMs there is no reason for them to migrate. 19 | 20 | To improve spot VM utilisation, [spot-migrator](./pkg/controller/spot_migrator.go) periodically 21 | attempts to migrate workloads from on-demand VMs to spot VMs by draining on-demand Nodes to force 22 | cluster scale up, relying on the fact that the cluster autoscaler [attempts to expand the least 23 | expensive possible node 24 | group](https://github.com/kubernetes/autoscaler/blob/600cda52cf764a1f08b06fc8cc29b1ef95f13c76/cluster-autoscaler/proposals/pricing.md), 25 | taking into account the reduced cost of spot VMs. If an on-demand VM is added to the cluster then 26 | spot-migrator assumes that there are currently no more spot VMs available and waits for the next 27 | migration attempt (currently every hour) however if no on-demand VMs were added then spot-migrator 28 | continues to drain on-demand VMs until there are no more left in the cluster (and all workloads are 29 | running on spot VMs). Node draining respects 30 | [PodDisruptionBudgets](https://kubernetes.io/docs/concepts/workloads/pods/disruptions/) to ensure 31 | that workloads are migrated whilst maintaining desired levels of availability. 32 | 33 | Currently only [GKE 34 | Standard](https://cloud.google.com/kubernetes-engine/docs/concepts/types-of-clusters) clusters are 35 | supported. To allow spot-migrator to migrate workloads to spot VMs with fallback to on-demand VMs 36 | your cluster must be running at least one on-demand node pool and at least one spot node pool. 37 | 38 | ```yaml 39 | apiVersion: cost-manager.io/v1alpha1 40 | kind: CostManagerConfiguration 41 | controllers: 42 | - spot-migrator 43 | cloudProvider: 44 | name: gcp 45 | ``` 46 | 47 | ### pod-safe-to-evict-annotator 48 | 49 | Certain [types of 50 | Pods](https://github.com/kubernetes/autoscaler/blob/bb72e46cb0697090683969c932a38afec9089978/cluster-autoscaler/FAQ.md#what-types-of-pods-can-prevent-ca-from-removing-a-node) 51 | can prevent the cluster autoscaler from removing a Node (e.g. Pods in the kube-system Namespace that 52 | do not have a PodDisruptionBudget) leading to more Nodes in the cluster than necessary. This can be 53 | particularly problematic for workloads that cluster operators are not in control of and can have a 54 | high number of replicas, such as kube-dns or the [Konnectivity 55 | agent](https://kubernetes.io/docs/tasks/extend-kubernetes/setup-konnectivity/), which are typically 56 | installed by cloud providers. 57 | 58 | To allow the cluster autoscaler to evict all Pods that have not been explicitly marked as unsafe for 59 | eviction, [pod-safe-to-evict-annotator](./pkg/controller/pod_safe_to_evict_annotator.go) adds the 60 | `cluster-autoscaler.kubernetes.io/safe-to-evict: "true"` annotation to all Pods that have not 61 | already been annotated; note that PodDisruptionBudgets can still be used to maintain desired levels 62 | of availability. 63 | 64 | ```yaml 65 | apiVersion: cost-manager.io/v1alpha1 66 | kind: CostManagerConfiguration 67 | controllers: 68 | - pod-safe-to-evict-annotator 69 | podSafeToEvictAnnotator: 70 | namespaceSelector: 71 | matchExpressions: 72 | - key: kubernetes.io/metadata.name 73 | operator: In 74 | values: 75 | - kube-system 76 | ``` 77 | 78 | ## Installation 79 | 80 | You can install cost-manager into a GKE cluster with [Workload 81 | Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity) enabled as 82 | follows: 83 | 84 | ```sh 85 | NAMESPACE="cost-manager" 86 | kubectl get namespace "$NAMESPACE" || kubectl create namespace "$NAMESPACE" 87 | LATEST_RELEASE_TAG="$(curl -s https://api.github.com/repos/hsbc/cost-manager/releases/latest | jq -r .tag_name)" 88 | # GCP service account bound to the roles/compute.instanceAdmin role 89 | GCP_SERVICE_ACCOUNT_EMAIL_ADDRESS="cost-manager@example.iam.gserviceaccount.com" 90 | cat < values.yaml 91 | image: 92 | tag: $LATEST_RELEASE_TAG 93 | config: 94 | apiVersion: cost-manager.io/v1alpha1 95 | kind: CostManagerConfiguration 96 | controllers: 97 | - spot-migrator 98 | - pod-safe-to-evict-annotator 99 | cloudProvider: 100 | name: gcp 101 | podSafeToEvictAnnotator: 102 | namespaceSelector: 103 | matchExpressions: 104 | - key: kubernetes.io/metadata.name 105 | operator: In 106 | values: 107 | - kube-system 108 | serviceAccount: 109 | annotations: 110 | iam.gke.io/gcp-service-account: $GCP_SERVICE_ACCOUNT_EMAIL_ADDRESS 111 | EOF 112 | helm template ./charts/cost-manager -n "$NAMESPACE" -f values.yaml | kubectl apply -f - 113 | ``` 114 | 115 | ## Testing 116 | 117 | Build Docker image and run E2E tests using [kind](https://github.com/kubernetes-sigs/kind): 118 | 119 | ```sh 120 | make image e2e 121 | ``` 122 | 123 | ## Roadmap 124 | 125 | See [ROADMAP.md](ROADMAP.md) for details. 126 | 127 | ## Contributing 128 | 129 | Contributions are greatly appreciated. The project follows the typical GitHub pull request model. 130 | See [CONTRIBUTING.md](CONTRIBUTING.md) for more details. 131 | -------------------------------------------------------------------------------- /pkg/cloudprovider/gcp/cloud_provider.go: -------------------------------------------------------------------------------- 1 | package gcp 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "strconv" 7 | "time" 8 | 9 | "github.com/hsbc/cost-manager/pkg/kubernetes" 10 | "github.com/pkg/errors" 11 | "google.golang.org/api/compute/v1" 12 | corev1 "k8s.io/api/core/v1" 13 | ) 14 | 15 | const ( 16 | // https://cloud.google.com/kubernetes-engine/docs/concepts/spot-vms#scheduling-workloads 17 | spotNodeLabelKey = "cloud.google.com/gke-spot" 18 | // https://cloud.google.com/kubernetes-engine/docs/how-to/preemptible-vms#use_nodeselector_to_schedule_pods_on_preemptible_vms 19 | preemptibleNodeLabelKey = "cloud.google.com/gke-preemptible" 20 | ) 21 | 22 | type CloudProvider struct { 23 | computeService *compute.Service 24 | } 25 | 26 | // NewCloudProvider creates a new GCP cloud provider 27 | func NewCloudProvider(ctx context.Context) (*CloudProvider, error) { 28 | computeService, err := compute.NewService(ctx) 29 | if err != nil { 30 | return nil, err 31 | } 32 | return &CloudProvider{ 33 | computeService, 34 | }, nil 35 | } 36 | 37 | // DeleteInstance drains any connections from GCP load balancers, retrieves the underlying compute 38 | // instance of the Kubernetes Node and then deletes it from its managed instance group 39 | func (gcp *CloudProvider) DeleteInstance(ctx context.Context, node *corev1.Node) error { 40 | // GCP Network Load Balancer health checks have an interval of 8 seconds with a timeout of 1 41 | // second and an unhealthy threshold of 3 so we wait for 3 * 8 + 1 = 25 seconds for instances to 42 | // be marked as unhealthy which triggers connection draining. We add an additional 30 seconds 43 | // since this is the connection draining timeout used when GKE subsetting is enabled. We then 44 | // add an additional 5 seconds to allow processing time for the various components involved 45 | // (e.g. GCP probes and kube-proxy): 46 | // https://github.com/kubernetes/ingress-gce/blob/2a08b1e4111a21c71455bbb2bcca13349bb6f4c0/pkg/healthchecksl4/healthchecksl4.go#L42 47 | time.Sleep(time.Minute - timeSinceToBeDeletedTaintAdded(node, time.Now())) 48 | 49 | // Retrieve instance details from the provider ID 50 | project, zone, instanceName, err := parseProviderID(node.Spec.ProviderID) 51 | if err != nil { 52 | return err 53 | } 54 | 55 | // Validate that the provider ID details match with the Node. This should not be necessary but 56 | // it provides an extra level of validation that we are retrieving the expected instance 57 | if instanceName != node.Name { 58 | return fmt.Errorf("provider ID instance name \"%s\" does not match with Node name \"%s\"", instanceName, node.Name) 59 | } 60 | if node.Labels == nil { 61 | return fmt.Errorf("failed to determine zone for Node %s", node.Name) 62 | } 63 | nodeZone, ok := node.Labels[corev1.LabelTopologyZone] 64 | if !ok { 65 | return fmt.Errorf("failed to determine zone for Node %s", node.Name) 66 | } 67 | if zone != nodeZone { 68 | return fmt.Errorf("provider ID zone \"%s\" does not match with Node zone \"%s\"", zone, nodeZone) 69 | } 70 | 71 | // Retrieve the compute instance corresponding to the Node 72 | instance, err := gcp.computeService.Instances.Get(project, zone, instanceName).Do() 73 | if err != nil { 74 | return errors.Wrapf(err, "failed to get compute instance: %s/%s/%s", project, zone, instanceName) 75 | } 76 | 77 | // Determine the managed instance group that created the instance 78 | managedInstanceGroupName, err := getManagedInstanceGroupFromInstance(instance) 79 | if err != nil { 80 | return err 81 | } 82 | 83 | // Delete the instance from the managed instance group 84 | instanceGroupManagedsDeleteInstancesRequest := &compute.InstanceGroupManagersDeleteInstancesRequest{ 85 | Instances: []string{instance.SelfLink}, 86 | // Do not error if the instance has already been deleted or is being deleted 87 | SkipInstancesOnValidationError: true, 88 | } 89 | r, err := gcp.computeService.InstanceGroupManagers.DeleteInstances(project, zone, managedInstanceGroupName, instanceGroupManagedsDeleteInstancesRequest).Do() 90 | if err != nil { 91 | return errors.Wrap(err, "failed to delete managed instance") 92 | } 93 | err = gcp.waitForZonalComputeOperation(project, zone, r.Name) 94 | if err != nil { 95 | return errors.Wrap(err, "failed to wait for compute operation to complete successfully") 96 | } 97 | err = gcp.waitForManagedInstanceGroupStability(project, zone, managedInstanceGroupName) 98 | if err != nil { 99 | return errors.Wrap(err, "failed to wait for managed instance group stability") 100 | } 101 | 102 | return nil 103 | } 104 | 105 | // IsSpotInstance determines whether the underlying compute instance is a spot VM. We consider 106 | // preemptible VMs to be spot VMs to align with the cluster autoscaler: 107 | // https://github.com/kubernetes/autoscaler/blob/10fafe758c118adeb55b28718dc826511cc5ba40/cluster-autoscaler/cloudprovider/gce/gce_price_model.go#L220-L230 108 | func (gcp *CloudProvider) IsSpotInstance(ctx context.Context, node *corev1.Node) (bool, error) { 109 | if node.Labels == nil { 110 | return false, nil 111 | } 112 | return node.Labels[spotNodeLabelKey] == "true" || node.Labels[preemptibleNodeLabelKey] == "true", nil 113 | } 114 | 115 | func timeSinceToBeDeletedTaintAdded(node *corev1.Node, now time.Time) time.Duration { 116 | // Retrieve taint value 117 | toBeDeletedTaintAddedValue := "" 118 | for _, taint := range node.Spec.Taints { 119 | if taint.Key == kubernetes.ToBeDeletedTaint && taint.Effect == corev1.TaintEffectNoSchedule { 120 | toBeDeletedTaintAddedValue = taint.Value 121 | break 122 | } 123 | } 124 | 125 | // Attempt to parse taint value as Unix timestamp 126 | unixTimeSeconds, err := strconv.ParseInt(toBeDeletedTaintAddedValue, 10, 64) 127 | if err != nil { 128 | return 0 129 | } 130 | 131 | timeSinceToBeDeletedTaintAdded := now.Sub(time.Unix(unixTimeSeconds, 0)) 132 | // Ignore negative durations to avoid waiting for an unbounded amount of time 133 | if timeSinceToBeDeletedTaintAdded < 0 { 134 | return 0 135 | } 136 | return timeSinceToBeDeletedTaintAdded 137 | } 138 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/hsbc/cost-manager 2 | 3 | go 1.23.4 4 | 5 | require ( 6 | github.com/go-logr/logr v1.4.2 7 | github.com/hashicorp/go-multierror v1.1.1 8 | github.com/pkg/errors v0.9.1 9 | github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.2 10 | github.com/prometheus/client_golang v1.20.5 11 | github.com/prometheus/common v0.61.0 12 | github.com/robfig/cron/v3 v3.0.1 13 | github.com/stretchr/testify v1.10.0 14 | google.golang.org/api v0.214.0 15 | k8s.io/api v0.32.0 16 | k8s.io/apimachinery v0.32.0 17 | k8s.io/client-go v0.32.0 18 | k8s.io/controller-manager v0.32.0 19 | k8s.io/kubectl v0.32.0 20 | knative.dev/pkg v0.0.0-20231102200604-fac3a4ffbc74 21 | sigs.k8s.io/cluster-api-provider-gcp v1.8.0 22 | sigs.k8s.io/controller-runtime v0.19.3 23 | sigs.k8s.io/controller-tools v0.16.5 24 | sigs.k8s.io/kubebuilder-release-tools/notes v0.3.0 25 | ) 26 | 27 | require ( 28 | cel.dev/expr v0.19.1 // indirect 29 | cloud.google.com/go/auth v0.13.0 // indirect 30 | cloud.google.com/go/auth/oauth2adapt v0.2.6 // indirect 31 | cloud.google.com/go/compute/metadata v0.6.0 // indirect 32 | github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect 33 | github.com/MakeNowJust/heredoc v1.0.0 // indirect 34 | github.com/NYTimes/gziphandler v1.1.1 // indirect 35 | github.com/antlr4-go/antlr/v4 v4.13.1 // indirect 36 | github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect 37 | github.com/beorn7/perks v1.0.1 // indirect 38 | github.com/blang/semver/v4 v4.0.0 // indirect 39 | github.com/cenkalti/backoff/v4 v4.3.0 // indirect 40 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 41 | github.com/chai2010/gettext-go v1.0.3 // indirect 42 | github.com/coreos/go-semver v0.3.1 // indirect 43 | github.com/coreos/go-systemd/v22 v22.5.0 // indirect 44 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect 45 | github.com/emicklei/go-restful/v3 v3.12.1 // indirect 46 | github.com/evanphx/json-patch v5.9.0+incompatible // indirect 47 | github.com/evanphx/json-patch/v5 v5.9.0 // indirect 48 | github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f // indirect 49 | github.com/fatih/color v1.18.0 // indirect 50 | github.com/felixge/httpsnoop v1.0.4 // indirect 51 | github.com/fsnotify/fsnotify v1.8.0 // indirect 52 | github.com/fxamacker/cbor/v2 v2.7.0 // indirect 53 | github.com/go-errors/errors v1.5.1 // indirect 54 | github.com/go-logr/stdr v1.2.2 // indirect 55 | github.com/go-logr/zapr v1.3.0 // indirect 56 | github.com/go-openapi/jsonpointer v0.21.0 // indirect 57 | github.com/go-openapi/jsonreference v0.21.0 // indirect 58 | github.com/go-openapi/swag v0.23.0 // indirect 59 | github.com/gobuffalo/flect v1.0.3 // indirect 60 | github.com/gogo/protobuf v1.3.2 // indirect 61 | github.com/golang/protobuf v1.5.4 // indirect 62 | github.com/google/btree v1.1.3 // indirect 63 | github.com/google/cel-go v0.22.1 // indirect 64 | github.com/google/gnostic-models v0.6.9 // indirect 65 | github.com/google/go-cmp v0.6.0 // indirect 66 | github.com/google/gofuzz v1.2.0 // indirect 67 | github.com/google/s2a-go v0.1.8 // indirect 68 | github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect 69 | github.com/google/uuid v1.6.0 // indirect 70 | github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect 71 | github.com/googleapis/gax-go/v2 v2.14.1 // indirect 72 | github.com/gorilla/websocket v1.5.3 // indirect 73 | github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect 74 | github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect 75 | github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1 // indirect 76 | github.com/hashicorp/errwrap v1.1.0 // indirect 77 | github.com/inconshreveable/mousetrap v1.1.0 // indirect 78 | github.com/josharian/intern v1.0.0 // indirect 79 | github.com/json-iterator/go v1.1.12 // indirect 80 | github.com/klauspost/compress v1.17.11 // indirect 81 | github.com/kylelemons/godebug v1.1.0 // indirect 82 | github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect 83 | github.com/mailru/easyjson v0.9.0 // indirect 84 | github.com/mattn/go-colorable v0.1.13 // indirect 85 | github.com/mattn/go-isatty v0.0.20 // indirect 86 | github.com/mitchellh/go-wordwrap v1.0.1 // indirect 87 | github.com/moby/spdystream v0.5.0 // indirect 88 | github.com/moby/term v0.5.0 // indirect 89 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 90 | github.com/modern-go/reflect2 v1.0.2 // indirect 91 | github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect 92 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 93 | github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect 94 | github.com/peterbourgon/diskv v2.0.1+incompatible // indirect 95 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect 96 | github.com/prometheus/client_model v0.6.1 // indirect 97 | github.com/prometheus/procfs v0.15.1 // indirect 98 | github.com/russross/blackfriday/v2 v2.1.0 // indirect 99 | github.com/spf13/cobra v1.8.1 // indirect 100 | github.com/spf13/pflag v1.0.5 // indirect 101 | github.com/stoewer/go-strcase v1.3.0 // indirect 102 | github.com/x448/float16 v0.8.4 // indirect 103 | github.com/xlab/treeprint v1.2.0 // indirect 104 | go.etcd.io/etcd/api/v3 v3.5.17 // indirect 105 | go.etcd.io/etcd/client/pkg/v3 v3.5.17 // indirect 106 | go.etcd.io/etcd/client/v3 v3.5.17 // indirect 107 | go.opentelemetry.io/auto/sdk v1.1.0 // indirect 108 | go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0 // indirect 109 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 // indirect 110 | go.opentelemetry.io/otel v1.33.0 // indirect 111 | go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 // indirect 112 | go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 // indirect 113 | go.opentelemetry.io/otel/metric v1.33.0 // indirect 114 | go.opentelemetry.io/otel/sdk v1.33.0 // indirect 115 | go.opentelemetry.io/otel/trace v1.33.0 // indirect 116 | go.opentelemetry.io/proto/otlp v1.4.0 // indirect 117 | go.uber.org/multierr v1.11.0 // indirect 118 | go.uber.org/zap v1.27.0 // indirect 119 | golang.org/x/crypto v0.31.0 // indirect 120 | golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67 // indirect 121 | golang.org/x/mod v0.22.0 // indirect 122 | golang.org/x/net v0.33.0 // indirect 123 | golang.org/x/oauth2 v0.24.0 // indirect 124 | golang.org/x/sync v0.10.0 // indirect 125 | golang.org/x/sys v0.28.0 // indirect 126 | golang.org/x/term v0.27.0 // indirect 127 | golang.org/x/text v0.21.0 // indirect 128 | golang.org/x/time v0.8.0 // indirect 129 | golang.org/x/tools v0.28.0 // indirect 130 | gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect 131 | google.golang.org/genproto/googleapis/api v0.0.0-20241219192143-6b3ec007d9bb // indirect 132 | google.golang.org/genproto/googleapis/rpc v0.0.0-20241219192143-6b3ec007d9bb // indirect 133 | google.golang.org/grpc v1.69.2 // indirect 134 | google.golang.org/protobuf v1.36.0 // indirect 135 | gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect 136 | gopkg.in/inf.v0 v0.9.1 // indirect 137 | gopkg.in/yaml.v2 v2.4.0 // indirect 138 | gopkg.in/yaml.v3 v3.0.1 // indirect 139 | k8s.io/apiextensions-apiserver v0.32.0 // indirect 140 | k8s.io/apiserver v0.32.0 // indirect 141 | k8s.io/cli-runtime v0.32.0 // indirect 142 | k8s.io/component-base v0.32.0 // indirect 143 | k8s.io/klog/v2 v2.130.1 // indirect 144 | k8s.io/kube-openapi v0.0.0-20241212222426-2c72e554b1e7 // indirect 145 | k8s.io/utils v0.0.0-20241210054802-24370beab758 // indirect 146 | sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.1 // indirect 147 | sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect 148 | sigs.k8s.io/kustomize/api v0.18.0 // indirect 149 | sigs.k8s.io/kustomize/kyaml v0.18.1 // indirect 150 | sigs.k8s.io/structured-merge-diff/v4 v4.5.0 // indirect 151 | sigs.k8s.io/yaml v1.4.0 // indirect 152 | ) 153 | -------------------------------------------------------------------------------- /e2e/main_test.go: -------------------------------------------------------------------------------- 1 | package e2e 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "fmt" 7 | "os" 8 | "os/exec" 9 | "testing" 10 | "time" 11 | 12 | "github.com/hsbc/cost-manager/pkg/cloudprovider" 13 | cloudproviderfake "github.com/hsbc/cost-manager/pkg/cloudprovider/fake" 14 | "github.com/hsbc/cost-manager/pkg/kubernetes" 15 | corev1 "k8s.io/api/core/v1" 16 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 17 | "k8s.io/apimachinery/pkg/types" 18 | "sigs.k8s.io/controller-runtime/pkg/client" 19 | "sigs.k8s.io/controller-runtime/pkg/log" 20 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 21 | ) 22 | 23 | const ( 24 | kindClusterName = "cost-manager" 25 | ) 26 | 27 | func init() { 28 | log.SetLogger(zap.New()) 29 | } 30 | 31 | func TestMain(m *testing.M) { 32 | ctx := context.Background() 33 | logger := log.FromContext(ctx).WithName("e2e") 34 | 35 | // Parse flags 36 | image := flag.String("test.image", "cost-manager", "Local Docker image to test") 37 | flag.Parse() 38 | 39 | // Setup test suite 40 | err := setup(ctx, *image) 41 | if err != nil { 42 | logger.Error(err, "failed to setup E2E test suite") 43 | os.Exit(1) 44 | } 45 | 46 | code := m.Run() 47 | 48 | // If the E2E tests failed then we print some debug information 49 | if code > 0 { 50 | err := printDebugInformation() 51 | if err != nil { 52 | logger.Error(err, "failed to print debug information") 53 | os.Exit(1) 54 | } 55 | } 56 | 57 | // Teardown test suite 58 | err = teardown() 59 | if err != nil { 60 | logger.Error(err, "failed to teardown E2E test suite") 61 | os.Exit(1) 62 | } 63 | 64 | os.Exit(code) 65 | } 66 | 67 | func setup(ctx context.Context, image string) error { 68 | // Cleanup from any previous failed runs 69 | err := runCommand("kind", "delete", "cluster", "--name", kindClusterName) 70 | if err != nil { 71 | return err 72 | } 73 | 74 | // Create kind cluster 75 | err = createKindCluster(ctx) 76 | if err != nil { 77 | return err 78 | } 79 | 80 | // Load image into kind cluster 81 | err = runCommand("kind", "load", "docker-image", image, "--name", kindClusterName) 82 | if err != nil { 83 | return err 84 | } 85 | 86 | // Install CRDs 87 | err = runCommand("kubectl", "apply", "-f", "https://raw.githubusercontent.com/kubernetes/autoscaler/5469d7912072c1070eedc680c89e27d46b8f4f82/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml") 88 | if err != nil { 89 | return err 90 | } 91 | 92 | // Install Prometheus Operator and Prometheus 93 | err = installPrometheus() 94 | if err != nil { 95 | return err 96 | } 97 | 98 | // Install cost-manager 99 | err = installCostManager(ctx, image) 100 | if err != nil { 101 | return err 102 | } 103 | 104 | return nil 105 | } 106 | 107 | func createKindCluster(ctx context.Context) (rerr error) { 108 | err := runCommand("kind", "create", "cluster", "--name", kindClusterName, "--config", "./config/kind.yaml") 109 | if err != nil { 110 | return err 111 | } 112 | 113 | // Wait for all Nodes to be created 114 | kubeClient, _, err := kubernetes.NewClient() 115 | if err != nil { 116 | return err 117 | } 118 | for { 119 | nodeList := &corev1.NodeList{} 120 | err = kubeClient.List(ctx, nodeList) 121 | if err != nil { 122 | return err 123 | } 124 | if len(nodeList.Items) == 3 { 125 | break 126 | } 127 | time.Sleep(time.Second) 128 | } 129 | 130 | // Label all worker Nodes as spot Nodes until we are ready to test spot-migrator 131 | selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ 132 | MatchExpressions: []metav1.LabelSelectorRequirement{ 133 | { 134 | Key: "node-role.kubernetes.io/control-plane", 135 | Operator: "DoesNotExist", 136 | }, 137 | }, 138 | }) 139 | if err != nil { 140 | return err 141 | } 142 | nodeList := &corev1.NodeList{} 143 | err = kubeClient.List(ctx, nodeList, client.MatchingLabelsSelector{Selector: selector}) 144 | if err != nil { 145 | return err 146 | } 147 | for _, node := range nodeList.Items { 148 | patch := []byte(fmt.Sprintf(`{"metadata":{"labels":{"%s":"%s"}}}`, cloudproviderfake.SpotInstanceLabelKey, cloudproviderfake.SpotInstanceLabelValue)) 149 | err = kubeClient.Patch(ctx, &node, client.RawPatch(types.StrategicMergePatchType, patch)) 150 | if err != nil { 151 | return err 152 | } 153 | } 154 | 155 | return nil 156 | } 157 | 158 | func installCostManager(ctx context.Context, image string) (rerr error) { 159 | // Create temporary file to store Helm values 160 | valuesFile, err := os.CreateTemp("", "cost-manager-values-*.yaml") 161 | if err != nil { 162 | return err 163 | } 164 | defer func() { 165 | err := os.Remove(valuesFile.Name()) 166 | if rerr == nil { 167 | rerr = err 168 | } 169 | }() 170 | 171 | // Write Helm values 172 | _, err = valuesFile.WriteString(fmt.Sprintf(` 173 | image: 174 | repository: %s 175 | tag: "" 176 | pullPolicy: Never 177 | 178 | config: 179 | apiVersion: cost-manager.io/v1alpha1 180 | kind: CostManagerConfiguration 181 | controllers: 182 | - spot-migrator 183 | - pod-safe-to-evict-annotator 184 | cloudProvider: 185 | name: %s 186 | spotMigrator: 187 | migrationSchedule: "* * * * *" 188 | podSafeToEvictAnnotator: 189 | namespaceSelector: 190 | matchExpressions: 191 | - key: kubernetes.io/metadata.name 192 | operator: In 193 | values: 194 | - kube-system 195 | 196 | vpa: 197 | enabled: true 198 | 199 | prometheusRule: 200 | enabled: true 201 | 202 | podMonitor: 203 | enabled: true 204 | `, image, cloudprovider.FakeCloudProviderName)) 205 | if err != nil { 206 | return err 207 | } 208 | 209 | // Install cost-manager 210 | err = runCommand("helm", "upgrade", "--install", 211 | "cost-manager", "../charts/cost-manager", 212 | "--namespace", "cost-manager", "--create-namespace", 213 | "--values", valuesFile.Name(), 214 | "--wait", "--timeout", "2m") 215 | if err != nil { 216 | return err 217 | } 218 | 219 | // Wait for the cost-manager Deployment to become available 220 | kubeClient, _, err := kubernetes.NewClient() 221 | if err != nil { 222 | return err 223 | } 224 | return kubernetes.WaitUntilDeploymentAvailable(ctx, kubeClient, "cost-manager", "cost-manager") 225 | } 226 | 227 | func installPrometheus() (rerr error) { 228 | // Add prometheus-community Helm repository: 229 | // https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack#get-helm-repository-info 230 | err := runCommand("helm", "repo", "add", "prometheus-community", "https://prometheus-community.github.io/helm-charts") 231 | if err != nil { 232 | return err 233 | } 234 | err = runCommand("helm", "repo", "update", "prometheus-community") 235 | if err != nil { 236 | return err 237 | } 238 | 239 | // Install Prometheus Operator 240 | err = runCommand("helm", "upgrade", "--install", 241 | "kube-prometheus-stack", "prometheus-community/kube-prometheus-stack", 242 | "--namespace", "monitoring", "--create-namespace", 243 | "--values", "./config/kube-prometheus-stack-values.yaml", 244 | "--wait", "--timeout", "2m") 245 | if err != nil { 246 | return err 247 | } 248 | 249 | // Install Prometheus 250 | err = runCommand("kubectl", "apply", "-f", "./config/prometheus.yaml") 251 | if err != nil { 252 | return err 253 | } 254 | 255 | return nil 256 | } 257 | 258 | func teardown() error { 259 | err := runCommand("kind", "delete", "cluster", "--name", kindClusterName) 260 | if err != nil { 261 | return err 262 | } 263 | 264 | return nil 265 | } 266 | 267 | func printDebugInformation() error { 268 | err := runCommand("kubectl", "get", "nodes") 269 | if err != nil { 270 | return err 271 | } 272 | err = runCommand("kubectl", "describe", "deployment/cost-manager", "-n", "cost-manager") 273 | if err != nil { 274 | return err 275 | } 276 | err = runCommand("kubectl", "describe", "pod", "-n", "cost-manager", "-l", "app.kubernetes.io/name=cost-manager") 277 | if err != nil { 278 | return err 279 | } 280 | err = runCommand("kubectl", "logs", "-n", "cost-manager", "-l", "app.kubernetes.io/name=cost-manager") 281 | if err != nil { 282 | return err 283 | } 284 | return nil 285 | } 286 | 287 | func runCommand(name string, args ...string) error { 288 | cmd := exec.Command(name, args...) 289 | cmd.Stdout = os.Stdout 290 | cmd.Stderr = os.Stderr 291 | return cmd.Run() 292 | } 293 | -------------------------------------------------------------------------------- /e2e/spot_migrator_test.go: -------------------------------------------------------------------------------- 1 | package e2e 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "testing" 7 | "time" 8 | 9 | cloudproviderfake "github.com/hsbc/cost-manager/pkg/cloudprovider/fake" 10 | "github.com/hsbc/cost-manager/pkg/kubernetes" 11 | "github.com/hsbc/cost-manager/pkg/test" 12 | "github.com/prometheus/client_golang/api" 13 | prometheusv1 "github.com/prometheus/client_golang/api/prometheus/v1" 14 | "github.com/prometheus/common/model" 15 | "github.com/stretchr/testify/require" 16 | corev1 "k8s.io/api/core/v1" 17 | policyv1 "k8s.io/api/policy/v1" 18 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 19 | "k8s.io/apimachinery/pkg/types" 20 | "k8s.io/apimachinery/pkg/util/intstr" 21 | "k8s.io/apimachinery/pkg/util/wait" 22 | apiwatch "k8s.io/apimachinery/pkg/watch" 23 | "k8s.io/client-go/tools/watch" 24 | "sigs.k8s.io/controller-runtime/pkg/client" 25 | ) 26 | 27 | // TestSpotMigrator tests that spot-migrator successfully drains a worker Node while respecting 28 | // PodDisruptionBudgets and excludes control plane Nodes 29 | func TestSpotMigrator(t *testing.T) { 30 | t.Parallel() 31 | 32 | ctx := context.Background() 33 | 34 | kubeClient, restConfig, err := kubernetes.NewClient() 35 | require.NoError(t, err) 36 | 37 | // Port forward to Prometheus and create client using local forwarded port 38 | pod, err := kubernetes.WaitForAnyReadyPod(ctx, kubeClient, client.InNamespace("monitoring"), client.MatchingLabels{"app.kubernetes.io/name": "prometheus"}) 39 | require.NoError(t, err) 40 | forwardedPort, stop, err := kubernetes.PortForward(ctx, restConfig, pod.Namespace, pod.Name, 9090) 41 | require.NoError(t, err) 42 | defer func() { 43 | err := stop() 44 | require.NoError(t, err) 45 | }() 46 | prometheusAddress := fmt.Sprintf("http://127.0.0.1:%d", forwardedPort) 47 | prometheusClient, err := api.NewClient(api.Config{ 48 | Address: prometheusAddress, 49 | }) 50 | require.NoError(t, err) 51 | prometheusAPI := prometheusv1.NewAPI(prometheusClient) 52 | 53 | t.Log("Waiting for the failure metric to be scraped by Prometheus...") 54 | for { 55 | results, _, err := prometheusAPI.Query(ctx, `sum(cost_manager_spot_migrator_operation_failure_total{job="cost-manager",namespace="cost-manager"})`, time.Now()) 56 | require.NoError(t, err) 57 | if len(results.(model.Vector)) == 1 { 58 | break 59 | } 60 | time.Sleep(time.Second) 61 | } 62 | t.Log("Failure metric has been scraped by Prometheus!") 63 | 64 | // Find the worker Node to be drained by spot-migrator 65 | workerNodeSelector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ 66 | MatchExpressions: []metav1.LabelSelectorRequirement{ 67 | { 68 | Key: "node-role.kubernetes.io/control-plane", 69 | Operator: "DoesNotExist", 70 | }, 71 | { 72 | Key: "spot-migrator", 73 | Operator: "In", 74 | Values: []string{"true"}, 75 | }, 76 | }, 77 | }) 78 | require.NoError(t, err) 79 | nodeList := &corev1.NodeList{} 80 | err = kubeClient.List(ctx, nodeList, client.MatchingLabelsSelector{Selector: workerNodeSelector}) 81 | require.NoError(t, err) 82 | require.NotEmpty(t, nodeList.Items) 83 | nodeName := nodeList.Items[0].Name 84 | 85 | // Deploy a workload to the worker Node 86 | namespaceName := test.GenerateResourceName(t) 87 | namespace := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespaceName}} 88 | err = kubeClient.Create(ctx, namespace) 89 | require.NoError(t, err) 90 | deploymentName := namespaceName 91 | deployment, err := test.GenerateDeployment(namespaceName, deploymentName) 92 | require.NoError(t, err) 93 | deployment.Spec.Template.Spec.NodeSelector = map[string]string{"kubernetes.io/hostname": nodeName} 94 | deployment.Spec.Template.Spec.Tolerations = []corev1.Toleration{ 95 | { 96 | Key: "spot-migrator", 97 | Operator: corev1.TolerationOpEqual, 98 | Value: "true", 99 | Effect: corev1.TaintEffectNoSchedule, 100 | }, 101 | } 102 | err = kubeClient.Create(ctx, deployment) 103 | require.NoError(t, err) 104 | t.Logf("Waiting for Deployment %s/%s to become available...", deployment.Namespace, deployment.Name) 105 | err = kubernetes.WaitUntilDeploymentAvailable(ctx, kubeClient, namespaceName, deploymentName) 106 | require.NoError(t, err) 107 | t.Logf("Deployment %s/%s is available!", deployment.Namespace, deployment.Name) 108 | 109 | // Create PodDisruptionBudget... 110 | zero := intstr.FromInt(0) 111 | pdb := &policyv1.PodDisruptionBudget{ 112 | ObjectMeta: metav1.ObjectMeta{ 113 | Name: deploymentName, 114 | Namespace: namespaceName, 115 | }, 116 | Spec: policyv1.PodDisruptionBudgetSpec{ 117 | MaxUnavailable: &zero, 118 | Selector: &metav1.LabelSelector{ 119 | MatchLabels: map[string]string{"app.kubernetes.io/name": deploymentName}, 120 | }, 121 | }, 122 | } 123 | err = kubeClient.Create(ctx, pdb) 124 | require.NoError(t, err) 125 | // ...and wait until it's blocking eviction 126 | pdbName := pdb.Name 127 | listerWatcher := kubernetes.NewListerWatcher(ctx, kubeClient, &policyv1.PodDisruptionBudgetList{}, &client.ListOptions{Namespace: pdb.Namespace}) 128 | condition := func(event apiwatch.Event) (bool, error) { 129 | pdb, err := kubernetes.ParseWatchEventObject[*policyv1.PodDisruptionBudget](event) 130 | if err != nil { 131 | return false, err 132 | } 133 | return pdb.Name == pdbName && pdb.Status.DisruptionsAllowed == 0 && pdb.Generation == pdb.Status.ObservedGeneration, nil 134 | } 135 | _, err = watch.UntilWithSync(ctx, listerWatcher, &policyv1.PodDisruptionBudget{}, nil, condition) 136 | require.NoError(t, err) 137 | 138 | // Label worker Node as an on-demand Node to give spot-migrator something to drain 139 | node := &corev1.Node{} 140 | err = kubeClient.Get(ctx, types.NamespacedName{Name: nodeName}, node) 141 | require.NoError(t, err) 142 | patch := []byte(fmt.Sprintf(`{"metadata":{"labels":{"%s":"false"}}}`, cloudproviderfake.SpotInstanceLabelKey)) 143 | err = kubeClient.Patch(ctx, node, client.RawPatch(types.StrategicMergePatchType, patch)) 144 | require.NoError(t, err) 145 | 146 | // Wait for the Node to be marked as unschedulable. This should not take any longer than 2 147 | // minutes since spot-migrator is configured with a 1 minute migration interval 148 | t.Logf("Waiting for Node %s to be marked as unschedulable...", nodeName) 149 | ctxWithTimeout, cancel := context.WithTimeout(ctx, 2*time.Minute) 150 | defer cancel() 151 | listerWatcher = kubernetes.NewListerWatcher(ctx, kubeClient, &corev1.NodeList{}) 152 | condition = func(event apiwatch.Event) (bool, error) { 153 | node, err := kubernetes.ParseWatchEventObject[*corev1.Node](event) 154 | if err != nil { 155 | return false, err 156 | } 157 | return node.Name == nodeName && node.Spec.Unschedulable, nil 158 | } 159 | _, err = watch.UntilWithSync(ctxWithTimeout, listerWatcher, &corev1.Node{}, nil, condition) 160 | require.NoError(t, err) 161 | t.Logf("Node %s marked as unschedulable!", nodeName) 162 | 163 | // Make sure that the PodDisruptionBudget blocks eviction 164 | t.Logf("Ensuring that Deployment %s/%s is not evicted...", deployment.Namespace, deployment.Name) 165 | ctxWithTimeout, cancel = context.WithTimeout(ctx, 10*time.Second) 166 | defer cancel() 167 | err = kubernetes.WaitUntilDeploymentUnavailable(ctxWithTimeout, kubeClient, namespaceName, deploymentName) 168 | require.True(t, wait.Interrupted(err)) 169 | t.Logf("Deployment %s/%s has not been evicted!", deployment.Namespace, deployment.Name) 170 | 171 | // Delete PodDisruptionBudget... 172 | err = kubeClient.Delete(ctx, pdb) 173 | require.NoError(t, err) 174 | // ...and wait for the Deployment to become unavailable 175 | t.Logf("Waiting for Deployment %s/%s to become unavailable...", deployment.Namespace, deployment.Name) 176 | err = kubernetes.WaitUntilDeploymentUnavailable(ctx, kubeClient, namespaceName, deploymentName) 177 | require.NoError(t, err) 178 | t.Logf("Deployment %s/%s is unavailable!", deployment.Namespace, deployment.Name) 179 | 180 | // Delete Node; typically this would be done by the node controller but we simulate it here: 181 | // https://github.com/hsbc/cost-manager/blob/bf176ada100e19a765d276aee1a0a2d6038275e0/pkg/controller/spot_migrator.go#L242-L250 182 | // We delay deletion for a short period of time to ensure that any issues with draining the Node 183 | // or waiting for Node deletion are discovered due to the failure metric being incremented 184 | time.Sleep(10 * time.Second) 185 | err = kubeClient.Delete(ctx, node) 186 | require.NoError(t, err) 187 | 188 | // Delete Namespace 189 | err = kubeClient.Delete(ctx, namespace) 190 | require.NoError(t, err) 191 | 192 | // Make sure that the failure metric was never incremented 193 | results, _, err := prometheusAPI.Query(ctx, `sum(sum_over_time(cost_manager_spot_migrator_operation_failure_total{job="cost-manager",namespace="cost-manager"}[1h]))`, time.Now()) 194 | require.NoError(t, err) 195 | require.Len(t, results.(model.Vector), 1) 196 | require.InDelta(t, 0, float64(results.(model.Vector)[0].Value), 0.5, "spot-migrator failure metric was incremented!") 197 | 198 | // Finally, we verify that all control plane Nodes are still schedulable 199 | controlPlaneNodeSelector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ 200 | MatchExpressions: []metav1.LabelSelectorRequirement{ 201 | { 202 | Key: "node-role.kubernetes.io/control-plane", 203 | Operator: "Exists", 204 | }, 205 | }, 206 | }) 207 | require.NoError(t, err) 208 | nodeList = &corev1.NodeList{} 209 | err = kubeClient.List(ctx, nodeList, client.MatchingLabelsSelector{Selector: controlPlaneNodeSelector}) 210 | require.NoError(t, err) 211 | require.NotEmpty(t, nodeList.Items) 212 | for _, node := range nodeList.Items { 213 | require.False(t, node.Spec.Unschedulable) 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /pkg/controller/spot_migrator.go: -------------------------------------------------------------------------------- 1 | package controller 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "sort" 8 | "time" 9 | 10 | "github.com/hsbc/cost-manager/pkg/api/v1alpha1" 11 | "github.com/hsbc/cost-manager/pkg/cloudprovider" 12 | "github.com/hsbc/cost-manager/pkg/kubernetes" 13 | "github.com/pkg/errors" 14 | "github.com/prometheus/client_golang/prometheus" 15 | "github.com/prometheus/client_golang/prometheus/promauto" 16 | "github.com/robfig/cron/v3" 17 | corev1 "k8s.io/api/core/v1" 18 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 19 | "k8s.io/apimachinery/pkg/types" 20 | clientgo "k8s.io/client-go/kubernetes" 21 | "k8s.io/client-go/util/retry" 22 | "sigs.k8s.io/controller-runtime/pkg/log" 23 | "sigs.k8s.io/controller-runtime/pkg/manager" 24 | "sigs.k8s.io/controller-runtime/pkg/metrics" 25 | ) 26 | 27 | const ( 28 | spotMigratorControllerName = "spot-migrator" 29 | 30 | // Running spot migration hourly seems like a good tradeoff between cluster stability and 31 | // reactivity to spot availability. Note that this will schedule on the hour rather than 32 | // relative to the current time; this ensures that spot migration still has a good chance of 33 | // running even if cost-manager is being restarted regularly: 34 | // https://pkg.go.dev/github.com/robfig/cron#hdr-Predefined_schedules 35 | defaultMigrationSchedule = "@hourly" 36 | 37 | // https://kubernetes.io/docs/reference/labels-annotations-taints/#node-role-kubernetes-io-control-plane 38 | controlPlaneNodeRoleLabelKey = "node-role.kubernetes.io/control-plane" 39 | ) 40 | 41 | var ( 42 | spotMigratorOperationSuccessTotal = promauto.NewCounter(prometheus.CounterOpts{ 43 | Name: "cost_manager_spot_migrator_operation_success_total", 44 | Help: "The total number of successful spot-migrator operations", 45 | }) 46 | spotMigratorOperationFailureTotal = promauto.NewCounter(prometheus.CounterOpts{ 47 | Name: "cost_manager_spot_migrator_operation_failure_total", 48 | Help: "The total number of failed spot-migrator operations", 49 | }) 50 | 51 | // Label to add to Nodes before draining to allow them to be identified if we are restarted 52 | nodeSelectedForDeletionLabelKey = fmt.Sprintf("%s/%s", v1alpha1.GroupName, "selected-for-deletion") 53 | ) 54 | 55 | // spotMigrator periodically drains on-demand Nodes in an attempt to migrate workloads to spot 56 | // Nodes; this works because draining Nodes will eventually trigger cluster scale up and the cluster 57 | // autoscaler attempts to scale up the least expensive node pool, taking into account the reduced 58 | // cost of spot Nodes: 59 | // https://github.com/kubernetes/autoscaler/blob/600cda52cf764a1f08b06fc8cc29b1ef95f13c76/cluster-autoscaler/proposals/pricing.md 60 | type spotMigrator struct { 61 | Config *v1alpha1.SpotMigrator 62 | Clientset clientgo.Interface 63 | CloudProvider cloudprovider.CloudProvider 64 | } 65 | 66 | var _ manager.Runnable = &spotMigrator{} 67 | 68 | // Start starts spot-migrator and blocks until the context is cancelled 69 | func (sm *spotMigrator) Start(ctx context.Context) error { 70 | logger := log.FromContext(ctx).WithName(spotMigratorControllerName) 71 | ctx = log.IntoContext(ctx, logger) 72 | 73 | // Register Prometheus metrics 74 | metrics.Registry.MustRegister(spotMigratorOperationSuccessTotal) 75 | metrics.Registry.MustRegister(spotMigratorOperationFailureTotal) 76 | 77 | // Parse migration schedule 78 | migrationSchedule := defaultMigrationSchedule 79 | if sm.Config != nil && sm.Config.MigrationSchedule != nil { 80 | migrationSchedule = *sm.Config.MigrationSchedule 81 | } 82 | parsedMigrationSchedule, err := parseMigrationSchedule(migrationSchedule) 83 | if err != nil { 84 | return fmt.Errorf("failed to parse migration schedule: %s", err) 85 | } 86 | 87 | // If spot-migrator drains itself then any ongoing migration operations will be cancelled. To 88 | // mitigate this we first drain and delete any Nodes that have previously been selected for 89 | // deletion. Note that we do not run a full migration in this case because otherwise we could 90 | // get stuck in a continuous loop of draining and deleting the Node that spot-migrator is 91 | // running on; we will need to wait for the next schedule time for the migration to continue 92 | onDemandNodes, err := sm.listOnDemandNodes(ctx) 93 | if err != nil { 94 | return err 95 | } 96 | for _, onDemandNode := range onDemandNodes { 97 | if isSelectedForDeletion(onDemandNode) { 98 | err = sm.drainAndDeleteNode(ctx, onDemandNode) 99 | if err != nil { 100 | return err 101 | } 102 | } 103 | } 104 | 105 | for { 106 | // Wait until the next schedule time or the context is cancelled 107 | now := time.Now() 108 | nextScheduleTime := parsedMigrationSchedule.Next(now) 109 | sleepDuration := nextScheduleTime.Sub(now) 110 | logger.WithValues("sleepDuration", sleepDuration.String()).Info("Waiting before next spot migration") 111 | select { 112 | case <-time.After(sleepDuration): 113 | case <-ctx.Done(): 114 | return nil 115 | } 116 | 117 | err := sm.run(ctx) 118 | if err != nil { 119 | // We do not return the error to make sure other cost-manager processes/controllers 120 | // continue to run; we rely on Prometheus metrics to alert us to failures 121 | logger.Error(err, "Failed to run spot migration") 122 | spotMigratorOperationFailureTotal.Inc() 123 | } 124 | } 125 | } 126 | 127 | func parseMigrationSchedule(migrationSchedule string) (cron.Schedule, error) { 128 | return cron.ParseStandard(migrationSchedule) 129 | } 130 | 131 | // run runs spot migration 132 | func (sm *spotMigrator) run(ctx context.Context) error { 133 | logger := log.FromContext(ctx) 134 | for { 135 | // If the context has been cancelled then return instead of continuing with the migration 136 | select { 137 | case <-ctx.Done(): 138 | return nil 139 | default: 140 | } 141 | 142 | // List on-demand Nodes before draining 143 | beforeDrainOnDemandNodes, err := sm.listOnDemandNodes(ctx) 144 | if err != nil { 145 | return err 146 | } 147 | 148 | // If there are no on-demand Nodes then we are done 149 | if len(beforeDrainOnDemandNodes) == 0 { 150 | // Increment success metric since all workloads are already running on spot Nodes 151 | spotMigratorOperationSuccessTotal.Inc() 152 | return nil 153 | } 154 | 155 | // Select one of the on-demand Nodes to delete 156 | onDemandNode, err := selectNodeForDeletion(beforeDrainOnDemandNodes) 157 | if err != nil { 158 | return err 159 | } 160 | 161 | // Just before we drain and delete the Node we label it. If we happen to drain ourself this 162 | // will allow us to identify the Node again and continue after rescheduling 163 | err = sm.addSelectedForDeletionLabel(ctx, onDemandNode.Name) 164 | if err != nil { 165 | return err 166 | } 167 | 168 | // Drain and delete Node 169 | err = sm.drainAndDeleteNode(ctx, onDemandNode) 170 | if err != nil { 171 | return err 172 | } 173 | 174 | // List on-demand Nodes after draining 175 | afterDrainOnDemandNodes, err := sm.listOnDemandNodes(ctx) 176 | if err != nil { 177 | return err 178 | } 179 | 180 | // If any on-demand Nodes were created while draining then we assume that there are no more 181 | // spot VMs available and that spot migration is complete 182 | if nodeCreated(beforeDrainOnDemandNodes, afterDrainOnDemandNodes) { 183 | logger.Info("Spot migration complete") 184 | return nil 185 | } 186 | } 187 | } 188 | 189 | // listOnDemandNodes lists all Nodes that are not backed by a spot instance 190 | func (sm *spotMigrator) listOnDemandNodes(ctx context.Context) ([]*corev1.Node, error) { 191 | nodeList, err := sm.Clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) 192 | if err != nil { 193 | return nil, err 194 | } 195 | onDemandNodes := []*corev1.Node{} 196 | for _, node := range nodeList.Items { 197 | // We always ignore control plane Nodes to make sure that we do not drain them 198 | if isControlPlaneNode(&node) { 199 | continue 200 | } 201 | isSpotInstance, err := sm.CloudProvider.IsSpotInstance(ctx, &node) 202 | if err != nil { 203 | return onDemandNodes, err 204 | } 205 | if !isSpotInstance { 206 | onDemandNodes = append(onDemandNodes, node.DeepCopy()) 207 | } 208 | } 209 | return onDemandNodes, nil 210 | } 211 | 212 | // isControlPlaneNode returns true if the Node is part of the Kubernetes control plane 213 | func isControlPlaneNode(node *corev1.Node) bool { 214 | _, ok := node.Labels[controlPlaneNodeRoleLabelKey] 215 | return ok 216 | } 217 | 218 | // drainAndDeleteNode drains the specified Node and deletes the underlying instance 219 | func (sm *spotMigrator) drainAndDeleteNode(ctx context.Context, node *corev1.Node) error { 220 | logger := log.FromContext(ctx, "node", node.Name) 221 | 222 | logger.Info("Draining Node") 223 | err := kubernetes.DrainNode(ctx, sm.Clientset, node) 224 | if err != nil { 225 | return err 226 | } 227 | logger.Info("Drained Node successfully") 228 | 229 | logger.Info("Adding taint ToBeDeletedByClusterAutoscaler") 230 | err = sm.addToBeDeletedTaint(ctx, node) 231 | if err != nil { 232 | return err 233 | } 234 | logger.Info("Taint ToBeDeletedByClusterAutoscaler added successfully") 235 | 236 | logger.Info("Deleting instance") 237 | err = sm.CloudProvider.DeleteInstance(ctx, node) 238 | if err != nil { 239 | return err 240 | } 241 | logger.Info("Instance deleted successfully") 242 | 243 | // Since the underlying instance has been deleted we expect the Node object to be deleted from 244 | // the Kubernetes API server by the node controller: 245 | // https://kubernetes.io/docs/concepts/architecture/cloud-controller/#node-controller 246 | logger.Info("Waiting for Node object to be deleted") 247 | err = kubernetes.WaitForNodeToBeDeleted(ctx, sm.Clientset, node.Name) 248 | if err != nil { 249 | return err 250 | } 251 | logger.Info("Node deleted") 252 | 253 | // Increment success metric 254 | spotMigratorOperationSuccessTotal.Inc() 255 | 256 | return nil 257 | } 258 | 259 | func (sm *spotMigrator) addSelectedForDeletionLabel(ctx context.Context, nodeName string) error { 260 | patch := []byte(fmt.Sprintf(`{"metadata":{"labels":{"%s":"true"}}}`, nodeSelectedForDeletionLabelKey)) 261 | _, err := sm.Clientset.CoreV1().Nodes().Patch(ctx, nodeName, types.StrategicMergePatchType, patch, metav1.PatchOptions{}) 262 | if err != nil { 263 | return err 264 | } 265 | return nil 266 | } 267 | 268 | func isSelectedForDeletion(node *corev1.Node) bool { 269 | if node.Labels == nil { 270 | return false 271 | } 272 | value, ok := node.Labels[nodeSelectedForDeletionLabelKey] 273 | return ok && value == "true" 274 | } 275 | 276 | // addToBeDeletedTaint adds the ToBeDeletedByClusterAutoscaler taint to the Node to tell kube-proxy 277 | // to start failing its healthz and subsequently load balancer health checks depending on provider: 278 | // https://github.com/kubernetes/enhancements/tree/27ef0d9a740ae5058472aac4763483f0e7218c0e/keps/sig-network/3836-kube-proxy-improved-ingress-connectivity-reliability 279 | func (sm *spotMigrator) addToBeDeletedTaint(ctx context.Context, node *corev1.Node) error { 280 | return retry.RetryOnConflict(retry.DefaultRetry, func() error { 281 | node, err := sm.Clientset.CoreV1().Nodes().Get(ctx, node.Name, metav1.GetOptions{}) 282 | if err != nil { 283 | return err 284 | } 285 | 286 | hasToBeDeletedTaint := false 287 | for _, taint := range node.Spec.Taints { 288 | if taint.Key == kubernetes.ToBeDeletedTaint { 289 | hasToBeDeletedTaint = true 290 | break 291 | } 292 | } 293 | if !hasToBeDeletedTaint { 294 | // https://github.com/kubernetes/autoscaler/blob/5bf33b23f2bcf5f9c8ccaf99d445e25366ee7f40/cluster-autoscaler/utils/taints/taints.go#L166-L174 295 | node.Spec.Taints = append(node.Spec.Taints, corev1.Taint{ 296 | Key: kubernetes.ToBeDeletedTaint, 297 | Value: fmt.Sprint(time.Now().Unix()), 298 | Effect: corev1.TaintEffectNoSchedule, 299 | }) 300 | _, err := sm.Clientset.CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{}) 301 | if err != nil { 302 | return err 303 | } 304 | } 305 | 306 | return nil 307 | }) 308 | } 309 | 310 | // selectNodeForDeletion attempts the find the best Node to delete using the following algorithm: 311 | // 1. If there are any Nodes that have previously been selected for deletion then return the oldest 312 | // 2. Otherwise if there are any unschedulable Nodes then return the oldest 313 | // 3. Otherwise if there are any Nodes marked for deletion by the cluster-autoscaler then return the oldest 314 | // 4. Otherwise if there are any Nodes that are not running spot-migrator then return the oldest 315 | // 5. Otherwise return the oldest Node 316 | func selectNodeForDeletion(nodes []*corev1.Node) (*corev1.Node, error) { 317 | // There should always be at least 1 Node to select from 318 | if len(nodes) == 0 { 319 | return nil, errors.New("failed to select Node from empty list") 320 | } 321 | 322 | // Sort the Nodes in the order in which they were created 323 | sort.Slice(nodes, func(i, j int) bool { 324 | iTime := nodes[i].CreationTimestamp.Time 325 | jTime := nodes[j].CreationTimestamp.Time 326 | return iTime.Before(jTime) 327 | }) 328 | 329 | // If any Nodes have previously been selected for deletion then return the first one. Note that 330 | // all such Nodes should have already been drained and deleted when spot-migrator started up 331 | for _, node := range nodes { 332 | if isSelectedForDeletion(node) { 333 | return node, nil 334 | } 335 | } 336 | 337 | // If any Nodes are unschedulable then return the first one; this reduces the chance of having 338 | // more than one unschedulable Node at any one time 339 | for _, node := range nodes { 340 | if node.Spec.Unschedulable { 341 | return node, nil 342 | } 343 | } 344 | 345 | // If any Nodes are about to be deleted by the cluster autoscaler then return the first one; 346 | // this reduces the chance of having more than one Node being drained at the same time 347 | for _, node := range nodes { 348 | for _, taint := range node.Spec.Taints { 349 | // https://github.com/kubernetes/autoscaler/blob/299c9637229fb2bf849c1d86243fe2948d14101e/cluster-autoscaler/utils/taints/taints.go#L119 350 | if taint.Key == kubernetes.ToBeDeletedTaint && taint.Effect == corev1.TaintEffectNoSchedule { 351 | return node, nil 352 | } 353 | } 354 | } 355 | 356 | // If any Nodes are candidates for deletion by the cluster autoscaler then return the first one; 357 | // this reduces the chance of having more than one Node being drained at the same time 358 | for _, node := range nodes { 359 | for _, taint := range node.Spec.Taints { 360 | // https://github.com/kubernetes/autoscaler/blob/299c9637229fb2bf849c1d86243fe2948d14101e/cluster-autoscaler/utils/taints/taints.go#L124 361 | if taint.Key == kubernetes.DeletionCandidateTaint && taint.Effect == corev1.TaintEffectPreferNoSchedule { 362 | return node, nil 363 | } 364 | } 365 | } 366 | 367 | // If any Nodes are not running spot-migrator then we return the first one; this reduces the 368 | // chance of spot-migrator draining itself and cancelling an ongoing migration operation. Note 369 | // that there is very small possibility that the Node that spot-migrator is running on is the 370 | // only on-demand Node remaining in the cluster that could be drained to trigger the addition of 371 | // a new spot Node (e.g. if all other on-demand Nodes are running Pods that cannot be scheduled 372 | // to spot Nodes) however this seems like the lesser evil compared to potentially repeatedly 373 | // cancelling migration operations 374 | for _, node := range nodes { 375 | if node.Name != os.Getenv("NODE_NAME") { 376 | return node, nil 377 | } 378 | } 379 | 380 | return nodes[0], nil 381 | } 382 | 383 | // nodeCreated compares the list of Nodes before and after to determine if any Nodes were created 384 | func nodeCreated(beforeNodes, afterNodes []*corev1.Node) bool { 385 | for _, afterNode := range afterNodes { 386 | nodeCreated := true 387 | for _, beforeNode := range beforeNodes { 388 | // We compare the UID to detect if a Node object was recreated with the same name 389 | if beforeNode.UID == afterNode.UID { 390 | nodeCreated = false 391 | break 392 | } 393 | } 394 | if nodeCreated { 395 | return true 396 | } 397 | } 398 | return false 399 | } 400 | -------------------------------------------------------------------------------- /pkg/controller/spot_migrator_test.go: -------------------------------------------------------------------------------- 1 | package controller 2 | 3 | import ( 4 | "context" 5 | "os" 6 | "testing" 7 | "time" 8 | 9 | cloudproviderfake "github.com/hsbc/cost-manager/pkg/cloudprovider/fake" 10 | "github.com/stretchr/testify/require" 11 | corev1 "k8s.io/api/core/v1" 12 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 13 | "k8s.io/apimachinery/pkg/runtime" 14 | "k8s.io/apimachinery/pkg/types" 15 | "k8s.io/client-go/kubernetes/fake" 16 | "sigs.k8s.io/controller-runtime/pkg/metrics" 17 | ) 18 | 19 | func TestSpotMigratorNodeCreatedFalseOnNoChange(t *testing.T) { 20 | nodes := []*corev1.Node{ 21 | { 22 | ObjectMeta: metav1.ObjectMeta{ 23 | UID: types.UID("1"), 24 | }, 25 | }, 26 | } 27 | require.False(t, nodeCreated(nodes, nodes)) 28 | } 29 | 30 | func TestSpotMigratorNodeCreatedFalseOnNodeRemoved(t *testing.T) { 31 | beforeNodes := []*corev1.Node{ 32 | { 33 | ObjectMeta: metav1.ObjectMeta{ 34 | UID: types.UID("1"), 35 | }, 36 | }, 37 | { 38 | ObjectMeta: metav1.ObjectMeta{ 39 | UID: types.UID("2"), 40 | }, 41 | }, 42 | } 43 | afterNodes := []*corev1.Node{ 44 | { 45 | ObjectMeta: metav1.ObjectMeta{ 46 | UID: types.UID("1"), 47 | }, 48 | }, 49 | } 50 | require.False(t, nodeCreated(beforeNodes, afterNodes)) 51 | } 52 | 53 | func TestSpotMigratorNodeCreatedTrueOnNodeCreate(t *testing.T) { 54 | beforeNodes := []*corev1.Node{ 55 | { 56 | ObjectMeta: metav1.ObjectMeta{ 57 | UID: types.UID("1"), 58 | }, 59 | }, 60 | } 61 | afterNodes := []*corev1.Node{ 62 | { 63 | ObjectMeta: metav1.ObjectMeta{ 64 | UID: types.UID("1"), 65 | }, 66 | }, 67 | { 68 | ObjectMeta: metav1.ObjectMeta{ 69 | UID: types.UID("2"), 70 | }, 71 | }, 72 | } 73 | require.True(t, nodeCreated(beforeNodes, afterNodes)) 74 | } 75 | 76 | func TestSpotMigratorSelectNodeForDeletionErrorOnEmptyList(t *testing.T) { 77 | nodes := []*corev1.Node{} 78 | _, err := selectNodeForDeletion(nodes) 79 | require.Error(t, err) 80 | } 81 | 82 | func TestSpotMigratorSelectNodeForDeletionPreferOldest(t *testing.T) { 83 | nodes := []*corev1.Node{ 84 | { 85 | ObjectMeta: metav1.ObjectMeta{ 86 | Name: "secondoldest", 87 | CreationTimestamp: metav1.Time{ 88 | Time: time.Now().Add(2 * time.Hour), 89 | }, 90 | }, 91 | Spec: corev1.NodeSpec{ 92 | ProviderID: "gce://secondoldest", 93 | }, 94 | }, 95 | { 96 | ObjectMeta: metav1.ObjectMeta{ 97 | Name: "oldest", 98 | CreationTimestamp: metav1.Time{ 99 | Time: time.Now().Add(1 * time.Hour), 100 | }, 101 | }, 102 | Spec: corev1.NodeSpec{ 103 | ProviderID: "gce://oldest", 104 | }, 105 | }, 106 | { 107 | ObjectMeta: metav1.ObjectMeta{ 108 | Name: "thirdoldest", 109 | CreationTimestamp: metav1.Time{ 110 | Time: time.Now().Add(3 * time.Hour), 111 | }, 112 | }, 113 | Spec: corev1.NodeSpec{ 114 | ProviderID: "gce://thirdoldest", 115 | }, 116 | }, 117 | } 118 | node, err := selectNodeForDeletion(nodes) 119 | require.NoError(t, err) 120 | require.Equal(t, "oldest", node.Name) 121 | } 122 | 123 | func TestSpotMigratorSelectNodeForDeletionDoNotPreferLocalNode(t *testing.T) { 124 | err := os.Setenv("NODE_NAME", "oldest") 125 | require.NoError(t, err) 126 | nodes := []*corev1.Node{ 127 | { 128 | ObjectMeta: metav1.ObjectMeta{ 129 | Name: "secondoldest", 130 | CreationTimestamp: metav1.Time{ 131 | Time: time.Now().Add(2 * time.Hour), 132 | }, 133 | }, 134 | Spec: corev1.NodeSpec{ 135 | ProviderID: "gce://secondoldest", 136 | }, 137 | }, 138 | { 139 | ObjectMeta: metav1.ObjectMeta{ 140 | Name: "oldest", 141 | CreationTimestamp: metav1.Time{ 142 | Time: time.Now().Add(1 * time.Hour), 143 | }, 144 | }, 145 | Spec: corev1.NodeSpec{ 146 | ProviderID: "gce://oldest", 147 | }, 148 | }, 149 | { 150 | ObjectMeta: metav1.ObjectMeta{ 151 | Name: "thirdoldest", 152 | CreationTimestamp: metav1.Time{ 153 | Time: time.Now().Add(3 * time.Hour), 154 | }, 155 | }, 156 | Spec: corev1.NodeSpec{ 157 | ProviderID: "gce://thirdoldest", 158 | }, 159 | }, 160 | } 161 | node, err := selectNodeForDeletion(nodes) 162 | require.NoError(t, err) 163 | require.Equal(t, "secondoldest", node.Name) 164 | } 165 | 166 | func TestSpotMigratorSelectNodeForDeletionPreferNodesMarkedPreferNoScheduleByClusterAutoscaler(t *testing.T) { 167 | nodes := []*corev1.Node{ 168 | { 169 | ObjectMeta: metav1.ObjectMeta{ 170 | Name: "secondoldest", 171 | CreationTimestamp: metav1.Time{ 172 | Time: time.Now().Add(2 * time.Hour), 173 | }, 174 | }, 175 | Spec: corev1.NodeSpec{ 176 | ProviderID: "gce://secondoldest", 177 | Taints: []corev1.Taint{ 178 | { 179 | Key: "DeletionCandidateOfClusterAutoscaler", 180 | Effect: corev1.TaintEffectPreferNoSchedule, 181 | }, 182 | }, 183 | }, 184 | }, 185 | { 186 | ObjectMeta: metav1.ObjectMeta{ 187 | Name: "oldest", 188 | CreationTimestamp: metav1.Time{ 189 | Time: time.Now().Add(1 * time.Hour), 190 | }, 191 | }, 192 | Spec: corev1.NodeSpec{ 193 | ProviderID: "gce://oldest", 194 | }, 195 | }, 196 | { 197 | ObjectMeta: metav1.ObjectMeta{ 198 | Name: "thirdoldest", 199 | CreationTimestamp: metav1.Time{ 200 | Time: time.Now().Add(3 * time.Hour), 201 | }, 202 | }, 203 | Spec: corev1.NodeSpec{ 204 | ProviderID: "gce://thirdoldest", 205 | }, 206 | }, 207 | } 208 | node, err := selectNodeForDeletion(nodes) 209 | require.NoError(t, err) 210 | require.Equal(t, "secondoldest", node.Name) 211 | } 212 | 213 | func TestSpotMigratorSelectNodeForDeletionPreferNodesMarkedNoScheduleByClusterAutoscaler(t *testing.T) { 214 | nodes := []*corev1.Node{ 215 | { 216 | ObjectMeta: metav1.ObjectMeta{ 217 | Name: "secondoldest", 218 | CreationTimestamp: metav1.Time{ 219 | Time: time.Now().Add(2 * time.Hour), 220 | }, 221 | }, 222 | Spec: corev1.NodeSpec{ 223 | ProviderID: "gce://secondoldest", 224 | Taints: []corev1.Taint{ 225 | { 226 | Key: "DeletionCandidateOfClusterAutoscaler", 227 | Effect: corev1.TaintEffectPreferNoSchedule, 228 | }, 229 | }, 230 | }, 231 | }, 232 | { 233 | ObjectMeta: metav1.ObjectMeta{ 234 | Name: "oldest", 235 | CreationTimestamp: metav1.Time{ 236 | Time: time.Now().Add(1 * time.Hour), 237 | }, 238 | }, 239 | Spec: corev1.NodeSpec{ 240 | ProviderID: "gce://oldest", 241 | }, 242 | }, 243 | { 244 | ObjectMeta: metav1.ObjectMeta{ 245 | Name: "thirdoldest", 246 | CreationTimestamp: metav1.Time{ 247 | Time: time.Now().Add(2 * time.Hour), 248 | }, 249 | }, 250 | Spec: corev1.NodeSpec{ 251 | ProviderID: "gce://thirdoldest", 252 | Taints: []corev1.Taint{ 253 | { 254 | Key: "ToBeDeletedByClusterAutoscaler", 255 | Effect: corev1.TaintEffectNoSchedule, 256 | }, 257 | }, 258 | }, 259 | }, 260 | } 261 | node, err := selectNodeForDeletion(nodes) 262 | require.NoError(t, err) 263 | require.Equal(t, "thirdoldest", node.Name) 264 | } 265 | 266 | func TestSpotMigratorSelectNodeForDeletionPreferUnschedulable(t *testing.T) { 267 | nodes := []*corev1.Node{ 268 | { 269 | ObjectMeta: metav1.ObjectMeta{ 270 | CreationTimestamp: metav1.Time{ 271 | Time: time.Now().Add(2 * time.Hour), 272 | }, 273 | }, 274 | Spec: corev1.NodeSpec{ 275 | Unschedulable: false, 276 | }, 277 | }, 278 | { 279 | ObjectMeta: metav1.ObjectMeta{ 280 | CreationTimestamp: metav1.Time{ 281 | Time: time.Now().Add(3 * time.Hour), 282 | }, 283 | }, 284 | Spec: corev1.NodeSpec{ 285 | Unschedulable: true, 286 | }, 287 | }, 288 | { 289 | ObjectMeta: metav1.ObjectMeta{ 290 | Name: "oldest", 291 | CreationTimestamp: metav1.Time{ 292 | Time: time.Now().Add(1 * time.Hour), 293 | }, 294 | }, 295 | Spec: corev1.NodeSpec{ 296 | Unschedulable: false, 297 | }, 298 | }, 299 | } 300 | node, err := selectNodeForDeletion(nodes) 301 | require.NoError(t, err) 302 | require.True(t, node.Spec.Unschedulable) 303 | } 304 | 305 | func TestSpotMigratorSelectNodeForDeletionPreferSelectedForDeletion(t *testing.T) { 306 | nodes := []*corev1.Node{ 307 | { 308 | ObjectMeta: metav1.ObjectMeta{ 309 | CreationTimestamp: metav1.Time{ 310 | Time: time.Now().Add(3 * time.Hour), 311 | }, 312 | }, 313 | Spec: corev1.NodeSpec{ 314 | Unschedulable: true, 315 | }, 316 | }, 317 | { 318 | ObjectMeta: metav1.ObjectMeta{ 319 | CreationTimestamp: metav1.Time{ 320 | Time: time.Now().Add(2 * time.Hour), 321 | }, 322 | Labels: map[string]string{ 323 | "cost-manager.io/selected-for-deletion": "true", 324 | }, 325 | }, 326 | Spec: corev1.NodeSpec{ 327 | Unschedulable: false, 328 | }, 329 | }, 330 | { 331 | ObjectMeta: metav1.ObjectMeta{ 332 | Name: "oldest", 333 | CreationTimestamp: metav1.Time{ 334 | Time: time.Now().Add(1 * time.Hour), 335 | }, 336 | }, 337 | Spec: corev1.NodeSpec{ 338 | Unschedulable: false, 339 | }, 340 | }, 341 | } 342 | node, err := selectNodeForDeletion(nodes) 343 | require.NoError(t, err) 344 | require.True(t, isSelectedForDeletion(node)) 345 | } 346 | 347 | // TestSpotMigratorDefaultMigrationScheduleHasFixedActivationTimes ensures that the default 348 | // migration schedule does not return activation times that are a fixed amount of time ahead of the 349 | // given time; otherwise, spot migration will never run if cost-manager is restarting more regularly 350 | // than the activation interval. For example, `@every 1h` would fail this test 351 | func TestSpotMigratorDefaultMigrationScheduleHasFixedActivationTimes(t *testing.T) { 352 | parsedMigrationSchedule, err := parseMigrationSchedule(defaultMigrationSchedule) 353 | require.NoError(t, err) 354 | 355 | testTime := time.Date(00, 00, 00, 00, 00, 00, 00, time.UTC) 356 | require.Equal(t, parsedMigrationSchedule.Next(testTime), parsedMigrationSchedule.Next(testTime.Add(time.Second))) 357 | } 358 | 359 | func TestSpotMigratorPrometheusMetricRegistration(t *testing.T) { 360 | // Create cancelled context so that spot-migrator returns after starting 361 | ctx, cancel := context.WithCancel(context.Background()) 362 | cancel() 363 | 364 | // Start spot-migrator 365 | err := (&spotMigrator{ 366 | Clientset: fake.NewSimpleClientset(), 367 | }).Start(ctx) 368 | require.NoError(t, err) 369 | 370 | // Make sure Prometheus metric has been registered 371 | metricFamilies, err := metrics.Registry.Gather() 372 | require.NoError(t, err) 373 | spotMigratorDrainSuccessMetricFound := false 374 | spotMigratorDrainFailureMetricFound := false 375 | for _, metricFamily := range metricFamilies { 376 | // This metric name should match with the corresponding PrometheusRule alert 377 | if metricFamily.Name != nil && *metricFamily.Name == "cost_manager_spot_migrator_operation_success_total" { 378 | spotMigratorDrainSuccessMetricFound = true 379 | } 380 | if metricFamily.Name != nil && *metricFamily.Name == "cost_manager_spot_migrator_operation_failure_total" { 381 | spotMigratorDrainFailureMetricFound = true 382 | } 383 | } 384 | require.True(t, spotMigratorDrainSuccessMetricFound) 385 | require.True(t, spotMigratorDrainFailureMetricFound) 386 | } 387 | 388 | func TestAnnotateNode(t *testing.T) { 389 | ctx := context.Background() 390 | clientset := fake.NewSimpleClientset() 391 | sm := &spotMigrator{ 392 | Clientset: clientset, 393 | } 394 | node, err := clientset.CoreV1().Nodes().Create(ctx, &corev1.Node{ 395 | ObjectMeta: metav1.ObjectMeta{ 396 | Name: "test", 397 | }, 398 | }, metav1.CreateOptions{}) 399 | require.NoError(t, err) 400 | err = sm.addSelectedForDeletionLabel(ctx, node.Name) 401 | require.NoError(t, err) 402 | node, err = clientset.CoreV1().Nodes().Get(ctx, node.Name, metav1.GetOptions{}) 403 | require.NoError(t, err) 404 | require.True(t, isSelectedForDeletion(node)) 405 | } 406 | 407 | func TestIsSelectedForDeletion(t *testing.T) { 408 | tests := map[string]struct { 409 | node *corev1.Node 410 | isSelectedForDeletion bool 411 | }{ 412 | "hasSelectedForDeletionLabelTrue": { 413 | node: &corev1.Node{ 414 | ObjectMeta: metav1.ObjectMeta{ 415 | Labels: map[string]string{ 416 | "cost-manager.io/selected-for-deletion": "true", 417 | }, 418 | }, 419 | }, 420 | isSelectedForDeletion: true, 421 | }, 422 | "hasSelectedForDeletionLabelFalse": { 423 | node: &corev1.Node{ 424 | ObjectMeta: metav1.ObjectMeta{ 425 | Labels: map[string]string{ 426 | "cost-manager.io/selected-for-deletion": "false", 427 | }, 428 | }, 429 | }, 430 | isSelectedForDeletion: false, 431 | }, 432 | "hasAnotherLabel": { 433 | node: &corev1.Node{ 434 | ObjectMeta: metav1.ObjectMeta{ 435 | Labels: map[string]string{ 436 | "foo": "bar", 437 | }, 438 | }, 439 | }, 440 | isSelectedForDeletion: false, 441 | }, 442 | "hasNoLabels": { 443 | node: &corev1.Node{ 444 | ObjectMeta: metav1.ObjectMeta{ 445 | Labels: map[string]string{}, 446 | }}, 447 | isSelectedForDeletion: false, 448 | }, 449 | "missingLabels": { 450 | node: &corev1.Node{}, 451 | isSelectedForDeletion: false, 452 | }, 453 | } 454 | for name, test := range tests { 455 | t.Run(name, func(t *testing.T) { 456 | isSelectedForDeletion := isSelectedForDeletion(test.node) 457 | require.Equal(t, test.isSelectedForDeletion, isSelectedForDeletion) 458 | }) 459 | } 460 | } 461 | 462 | func TestAddToBeDeletedTaint(t *testing.T) { 463 | ctx := context.Background() 464 | node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "test"}} 465 | sm := &spotMigrator{ 466 | Clientset: fake.NewSimpleClientset(node), 467 | } 468 | 469 | err := sm.addToBeDeletedTaint(ctx, node) 470 | require.NoError(t, err) 471 | 472 | node, err = sm.Clientset.CoreV1().Nodes().Get(ctx, node.Name, metav1.GetOptions{}) 473 | require.NoError(t, err) 474 | 475 | hasToBeDeletedTaint := false 476 | for _, taint := range node.Spec.Taints { 477 | if taint.Key == "ToBeDeletedByClusterAutoscaler" && taint.Effect == "NoSchedule" { 478 | hasToBeDeletedTaint = true 479 | break 480 | } 481 | } 482 | require.True(t, hasToBeDeletedTaint) 483 | } 484 | 485 | func TestListOnDemandNodes(t *testing.T) { 486 | tests := map[string]struct { 487 | nodes []*corev1.Node 488 | onDemandNodes []*corev1.Node 489 | }{ 490 | "noNodes": { 491 | nodes: []*corev1.Node{}, 492 | onDemandNodes: []*corev1.Node{}, 493 | }, 494 | "oneSpotNode": { 495 | nodes: []*corev1.Node{ 496 | { 497 | ObjectMeta: metav1.ObjectMeta{ 498 | Name: "test", 499 | Labels: map[string]string{ 500 | cloudproviderfake.SpotInstanceLabelKey: cloudproviderfake.SpotInstanceLabelValue, 501 | }, 502 | }, 503 | }, 504 | }, 505 | onDemandNodes: []*corev1.Node{}, 506 | }, 507 | "oneOnDemandNode": { 508 | nodes: []*corev1.Node{ 509 | { 510 | ObjectMeta: metav1.ObjectMeta{ 511 | Name: "test", 512 | }, 513 | }, 514 | }, 515 | onDemandNodes: []*corev1.Node{ 516 | { 517 | ObjectMeta: metav1.ObjectMeta{ 518 | Name: "test", 519 | }, 520 | }, 521 | }, 522 | }, 523 | "oneControlPlaneNode": { 524 | nodes: []*corev1.Node{ 525 | { 526 | ObjectMeta: metav1.ObjectMeta{ 527 | Name: "test", 528 | Labels: map[string]string{ 529 | "node-role.kubernetes.io/control-plane": "", 530 | }, 531 | }, 532 | }, 533 | }, 534 | onDemandNodes: []*corev1.Node{}, 535 | }, 536 | "multipleNodes": { 537 | nodes: []*corev1.Node{ 538 | { 539 | ObjectMeta: metav1.ObjectMeta{ 540 | Name: "foo", 541 | Labels: map[string]string{ 542 | cloudproviderfake.SpotInstanceLabelKey: cloudproviderfake.SpotInstanceLabelValue, 543 | }, 544 | }, 545 | }, 546 | { 547 | ObjectMeta: metav1.ObjectMeta{ 548 | Name: "bar", 549 | }, 550 | }, 551 | }, 552 | onDemandNodes: []*corev1.Node{ 553 | { 554 | ObjectMeta: metav1.ObjectMeta{ 555 | Name: "bar", 556 | }, 557 | }, 558 | }, 559 | }, 560 | } 561 | for name, test := range tests { 562 | t.Run(name, func(t *testing.T) { 563 | ctx := context.Background() 564 | 565 | var objects []runtime.Object 566 | for _, node := range test.nodes { 567 | objects = append(objects, node) 568 | } 569 | sm := &spotMigrator{ 570 | Clientset: fake.NewSimpleClientset(objects...), 571 | CloudProvider: &cloudproviderfake.CloudProvider{}, 572 | } 573 | 574 | onDemandNodes, err := sm.listOnDemandNodes(ctx) 575 | require.NoError(t, err) 576 | require.Equal(t, test.onDemandNodes, onDemandNodes) 577 | }) 578 | } 579 | } 580 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | cel.dev/expr v0.19.1 h1:NciYrtDRIR0lNCnH1LFJegdjspNx9fI59O7TWcua/W4= 2 | cel.dev/expr v0.19.1/go.mod h1:MrpN08Q+lEBs+bGYdLxxHkZoUSsCp0nSKTs0nTymJgw= 3 | cloud.google.com/go/auth v0.13.0 h1:8Fu8TZy167JkW8Tj3q7dIkr2v4cndv41ouecJx0PAHs= 4 | cloud.google.com/go/auth v0.13.0/go.mod h1:COOjD9gwfKNKz+IIduatIhYJQIc0mG3H102r/EMxX6Q= 5 | cloud.google.com/go/auth/oauth2adapt v0.2.6 h1:V6a6XDu2lTwPZWOawrAa9HUK+DB2zfJyTuciBG5hFkU= 6 | cloud.google.com/go/auth/oauth2adapt v0.2.6/go.mod h1:AlmsELtlEBnaNTL7jCj8VQFLy6mbZv0s4Q7NGBeQ5E8= 7 | cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4j01OwKxG9I= 8 | cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg= 9 | github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= 10 | github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= 11 | github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ= 12 | github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= 13 | github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I= 14 | github.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c= 15 | github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ= 16 | github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw= 17 | github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= 18 | github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= 19 | github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= 20 | github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= 21 | github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= 22 | github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= 23 | github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= 24 | github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= 25 | github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= 26 | github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= 27 | github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= 28 | github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 29 | github.com/chai2010/gettext-go v1.0.3 h1:9liNh8t+u26xl5ddmWLmsOsdNLwkdRTg5AG+JnTiM80= 30 | github.com/chai2010/gettext-go v1.0.3/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHeQQ+5AjwawxA= 31 | github.com/coreos/go-semver v0.3.1 h1:yi21YpKnrx1gt5R+la8n5WgS0kCrsPp33dmEyHReZr4= 32 | github.com/coreos/go-semver v0.3.1/go.mod h1:irMmmIw/7yzSRPWryHsK7EYSg09caPQL03VsM8rvUec= 33 | github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= 34 | github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= 35 | github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= 36 | github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= 37 | github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= 38 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 39 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 40 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= 41 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 42 | github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= 43 | github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= 44 | github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU= 45 | github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= 46 | github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lShz4oaXpDTX2bLe7ls= 47 | github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= 48 | github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg= 49 | github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= 50 | github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f h1:Wl78ApPPB2Wvf/TIe2xdyJxTlb6obmF18d8QdkxNDu4= 51 | github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f/go.mod h1:OSYXu++VVOHnXeitef/D8n/6y4QV8uLHSFXX4NeXMGc= 52 | github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= 53 | github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= 54 | github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= 55 | github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= 56 | github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M= 57 | github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= 58 | github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= 59 | github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= 60 | github.com/go-errors/errors v1.5.1 h1:ZwEMSLRCapFLflTpT7NKaAc7ukJ8ZPEjzlxt8rPN8bk= 61 | github.com/go-errors/errors v1.5.1/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= 62 | github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= 63 | github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= 64 | github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= 65 | github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= 66 | github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= 67 | github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= 68 | github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= 69 | github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= 70 | github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= 71 | github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= 72 | github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= 73 | github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= 74 | github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= 75 | github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= 76 | github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= 77 | github.com/gobuffalo/flect v1.0.3 h1:xeWBM2nui+qnVvNM4S3foBhCAL2XgPU+a7FdpelbTq4= 78 | github.com/gobuffalo/flect v1.0.3/go.mod h1:A5msMlrHtLqh9umBSnvabjsMrCcCpAyzglnDvkbYKHs= 79 | github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= 80 | github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= 81 | github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= 82 | github.com/golang-jwt/jwt/v4 v4.5.0 h1:7cYmW1XlMY7h7ii7UhUyChSgS5wUJEnm9uZVTGqOWzg= 83 | github.com/golang-jwt/jwt/v4 v4.5.0/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= 84 | github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= 85 | github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= 86 | github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= 87 | github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= 88 | github.com/google/cel-go v0.22.1 h1:AfVXx3chM2qwoSbM7Da8g8hX8OVSkBFwX+rz2+PcK40= 89 | github.com/google/cel-go v0.22.1/go.mod h1:BuznPXXfQDpXKWQ9sPW3TzlAJN5zzFe+i9tIs0yC4s8= 90 | github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw= 91 | github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw= 92 | github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 93 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 94 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 95 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 96 | github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= 97 | github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 98 | github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= 99 | github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= 100 | github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM= 101 | github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA= 102 | github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= 103 | github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= 104 | github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= 105 | github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 106 | github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw= 107 | github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA= 108 | github.com/googleapis/gax-go/v2 v2.14.1 h1:hb0FFeiPaQskmvakKu5EbCbpntQn48jyHuvrkurSS/Q= 109 | github.com/googleapis/gax-go/v2 v2.14.1/go.mod h1:Hb/NubMaVM88SrNkvl8X/o8XWwDJEPqouaLeN2IUxoA= 110 | github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= 111 | github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= 112 | github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 h1:+ngKgrYPPJrOjhax5N+uePQ0Fh1Z7PheYoUI/0nzkPA= 113 | github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= 114 | github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= 115 | github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= 116 | github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= 117 | github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= 118 | github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= 119 | github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= 120 | github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1 h1:VNqngBF40hVlDloBruUehVYC3ArSgIyScOAyMRqBxRg= 121 | github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1/go.mod h1:RBRO7fro65R6tjKzYgLAFo0t1QEXY1Dp+i/bvpRiqiQ= 122 | github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= 123 | github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= 124 | github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= 125 | github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= 126 | github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= 127 | github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= 128 | github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= 129 | github.com/jonboulle/clockwork v0.4.0 h1:p4Cf1aMWXnXAUh8lVfewRBx1zaTSYKrKMF2g3ST4RZ4= 130 | github.com/jonboulle/clockwork v0.4.0/go.mod h1:xgRqUGwRcjKCO1vbZUEtSLrqKoPSsUpK7fnezOII0kc= 131 | github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= 132 | github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= 133 | github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= 134 | github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= 135 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 136 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 137 | github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= 138 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= 139 | github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= 140 | github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= 141 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= 142 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= 143 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 144 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 145 | github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= 146 | github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= 147 | github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhnIaL+V+BEER86oLrvS+kWobKpbJuye0= 148 | github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE= 149 | github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= 150 | github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= 151 | github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= 152 | github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= 153 | github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= 154 | github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= 155 | github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= 156 | github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0= 157 | github.com/mitchellh/go-wordwrap v1.0.1/go.mod h1:R62XHJLzvMFRBbcrT7m7WgmE1eOyTSsCt+hzestvNj0= 158 | github.com/moby/spdystream v0.5.0 h1:7r0J1Si3QO/kjRitvSLVVFUjxMEb/YLj6S9FF62JBCU= 159 | github.com/moby/spdystream v0.5.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI= 160 | github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0= 161 | github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= 162 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 163 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= 164 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 165 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= 166 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= 167 | github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 h1:n6/2gBQ3RWajuToeY6ZtZTIKv2v7ThUy5KKusIT0yc0= 168 | github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00/go.mod h1:Pm3mSP3c5uWn86xMLZ5Sa7JB9GsEZySvHYXCTK4E9q4= 169 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= 170 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= 171 | github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= 172 | github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= 173 | github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= 174 | github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= 175 | github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= 176 | github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= 177 | github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= 178 | github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= 179 | github.com/onsi/ginkgo/v2 v2.21.0 h1:7rg/4f3rB88pb5obDgNZrNHrQ4e6WpjonchcpuBRnZM= 180 | github.com/onsi/ginkgo/v2 v2.21.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= 181 | github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4= 182 | github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= 183 | github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI= 184 | github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= 185 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 186 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 187 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 188 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= 189 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 190 | github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.2 h1:DGv150w4UyxnjNHlkCw85R3+lspOxegtdnbpP2vKRrk= 191 | github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.2/go.mod h1:AVMP4QEW8xuGWnxaWSpI3kKjP9fDA31nO68zsyREJZA= 192 | github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= 193 | github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= 194 | github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= 195 | github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= 196 | github.com/prometheus/common v0.61.0 h1:3gv/GThfX0cV2lpO7gkTUwZru38mxevy90Bj8YFSRQQ= 197 | github.com/prometheus/common v0.61.0/go.mod h1:zr29OCN/2BsJRaFwG8QOBr41D6kkchKbpeNH7pAjb/s= 198 | github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= 199 | github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= 200 | github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= 201 | github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= 202 | github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= 203 | github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= 204 | github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= 205 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 206 | github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ= 207 | github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= 208 | github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= 209 | github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= 210 | github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js= 211 | github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0= 212 | github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= 213 | github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= 214 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= 215 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 216 | github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs= 217 | github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo= 218 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 219 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 220 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 221 | github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= 222 | github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= 223 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 224 | github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 225 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 226 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 227 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 228 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 229 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 230 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 231 | github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 h1:6fotK7otjonDflCTK0BCfls4SPy3NcCVb5dqqmbRknE= 232 | github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75/go.mod h1:KO6IkyS8Y3j8OdNO85qEYBsRPuteD+YciPomcXdrMnk= 233 | github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= 234 | github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= 235 | github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510 h1:S2dVYn90KE98chqDkyE9Z4N61UnQd+KOfgp5Iu53llk= 236 | github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= 237 | github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ= 238 | github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0= 239 | github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 240 | github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 241 | go.etcd.io/bbolt v1.3.11 h1:yGEzV1wPz2yVCLsD8ZAiGHhHVlczyC9d1rP43/VCRJ0= 242 | go.etcd.io/bbolt v1.3.11/go.mod h1:dksAq7YMXoljX0xu6VF5DMZGbhYYoLUalEiSySYAS4I= 243 | go.etcd.io/etcd/api/v3 v3.5.17 h1:cQB8eb8bxwuxOilBpMJAEo8fAONyrdXTHUNcMd8yT1w= 244 | go.etcd.io/etcd/api/v3 v3.5.17/go.mod h1:d1hvkRuXkts6PmaYk2Vrgqbv7H4ADfAKhyJqHNLJCB4= 245 | go.etcd.io/etcd/client/pkg/v3 v3.5.17 h1:XxnDXAWq2pnxqx76ljWwiQ9jylbpC4rvkAeRVOUKKVw= 246 | go.etcd.io/etcd/client/pkg/v3 v3.5.17/go.mod h1:4DqK1TKacp/86nJk4FLQqo6Mn2vvQFBmruW3pP14H/w= 247 | go.etcd.io/etcd/client/v2 v2.305.16 h1:kQrn9o5czVNaukf2A2At43cE9ZtWauOtf9vRZuiKXow= 248 | go.etcd.io/etcd/client/v2 v2.305.16/go.mod h1:h9YxWCzcdvZENbfzBTFCnoNumr2ax3F19sKMqHFmXHE= 249 | go.etcd.io/etcd/client/v3 v3.5.17 h1:o48sINNeWz5+pjy/Z0+HKpj/xSnBkuVhVvXkjEXbqZY= 250 | go.etcd.io/etcd/client/v3 v3.5.17/go.mod h1:j2d4eXTHWkT2ClBgnnEPm/Wuu7jsqku41v9DZ3OtjQo= 251 | go.etcd.io/etcd/pkg/v3 v3.5.16 h1:cnavs5WSPWeK4TYwPYfmcr3Joz9BH+TZ6qoUtz6/+mc= 252 | go.etcd.io/etcd/pkg/v3 v3.5.16/go.mod h1:+lutCZHG5MBBFI/U4eYT5yL7sJfnexsoM20Y0t2uNuY= 253 | go.etcd.io/etcd/raft/v3 v3.5.16 h1:zBXA3ZUpYs1AwiLGPafYAKKl/CORn/uaxYDwlNwndAk= 254 | go.etcd.io/etcd/raft/v3 v3.5.16/go.mod h1:P4UP14AxofMJ/54boWilabqqWoW9eLodl6I5GdGzazI= 255 | go.etcd.io/etcd/server/v3 v3.5.16 h1:d0/SAdJ3vVsZvF8IFVb1k8zqMZ+heGcNfft71ul9GWE= 256 | go.etcd.io/etcd/server/v3 v3.5.16/go.mod h1:ynhyZZpdDp1Gq49jkUg5mfkDWZwXnn3eIqCqtJnrD/s= 257 | go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= 258 | go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= 259 | go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0 h1:PS8wXpbyaDJQ2VDHHncMe9Vct0Zn1fEjpsjrLxGJoSc= 260 | go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0/go.mod h1:HDBUsEjOuRC0EzKZ1bSaRGZWUBAzo+MhAcUUORSr4D0= 261 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 h1:yd02MEjBdJkG3uabWP9apV+OuWRIXGDuJEUJbOHmCFU= 262 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0/go.mod h1:umTcuxiv1n/s/S6/c2AT/g2CQ7u5C59sHDNmfSwgz7Q= 263 | go.opentelemetry.io/otel v1.33.0 h1:/FerN9bax5LoK51X/sI0SVYrjSE0/yUL7DpxW4K3FWw= 264 | go.opentelemetry.io/otel v1.33.0/go.mod h1:SUUkR6csvUQl+yjReHu5uM3EtVV7MBm5FHKRlNx4I8I= 265 | go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 h1:Vh5HayB/0HHfOQA7Ctx69E/Y/DcQSMPpKANYVMQ7fBA= 266 | go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0/go.mod h1:cpgtDBaqD/6ok/UG0jT15/uKjAY8mRA53diogHBg3UI= 267 | go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 h1:5pojmb1U1AogINhN3SurB+zm/nIcusopeBNp42f45QM= 268 | go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0/go.mod h1:57gTHJSE5S1tqg+EKsLPlTWhpHMsWlVmer+LA926XiA= 269 | go.opentelemetry.io/otel/metric v1.33.0 h1:r+JOocAyeRVXD8lZpjdQjzMadVZp2M4WmQ+5WtEnklQ= 270 | go.opentelemetry.io/otel/metric v1.33.0/go.mod h1:L9+Fyctbp6HFTddIxClbQkjtubW6O9QS3Ann/M82u6M= 271 | go.opentelemetry.io/otel/sdk v1.33.0 h1:iax7M131HuAm9QkZotNHEfstof92xM+N8sr3uHXc2IM= 272 | go.opentelemetry.io/otel/sdk v1.33.0/go.mod h1:A1Q5oi7/9XaMlIWzPSxLRWOI8nG3FnzHJNbiENQuihM= 273 | go.opentelemetry.io/otel/sdk/metric v1.31.0 h1:i9hxxLJF/9kkvfHppyLL55aW7iIJz4JjxTeYusH7zMc= 274 | go.opentelemetry.io/otel/sdk/metric v1.31.0/go.mod h1:CRInTMVvNhUKgSAMbKyTMxqOBC0zgyxzW55lZzX43Y8= 275 | go.opentelemetry.io/otel/trace v1.33.0 h1:cCJuF7LRjUFso9LPnEAHJDB2pqzp+hbO8eu1qqW2d/s= 276 | go.opentelemetry.io/otel/trace v1.33.0/go.mod h1:uIcdVUZMpTAmz0tI1z04GoVSezK37CbGV4fr1f2nBck= 277 | go.opentelemetry.io/proto/otlp v1.4.0 h1:TA9WRvW6zMwP+Ssb6fLoUIuirti1gGbP28GcKG1jgeg= 278 | go.opentelemetry.io/proto/otlp v1.4.0/go.mod h1:PPBWZIP98o2ElSqI35IHfu7hIhSwvc5N38Jw8pXuGFY= 279 | go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= 280 | go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= 281 | go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= 282 | go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= 283 | go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= 284 | go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= 285 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 286 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 287 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 288 | golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= 289 | golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= 290 | golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67 h1:1UoZQm6f0P/ZO0w1Ri+f+ifG/gXhegadRdwBIXEFWDo= 291 | golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67/go.mod h1:qj5a5QZpwLU2NLQudwIN5koi3beDhSAlJwa67PuM98c= 292 | golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 293 | golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 294 | golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4= 295 | golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= 296 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 297 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 298 | golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 299 | golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= 300 | golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= 301 | golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= 302 | golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= 303 | golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= 304 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 305 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 306 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 307 | golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= 308 | golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 309 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 310 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 311 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 312 | golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 313 | golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 314 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 315 | golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= 316 | golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 317 | golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= 318 | golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= 319 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 320 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 321 | golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= 322 | golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= 323 | golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg= 324 | golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= 325 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 326 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 327 | golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= 328 | golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= 329 | golang.org/x/tools v0.28.0 h1:WuB6qZ4RPCQo5aP3WdKZS7i595EdWqWR8vqJTlwTVK8= 330 | golang.org/x/tools v0.28.0/go.mod h1:dcIOrVd3mfQKTgrDVQHqCPMWy6lnhfhtX3hLXYVLfRw= 331 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 332 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 333 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 334 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 335 | gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= 336 | gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= 337 | google.golang.org/api v0.214.0 h1:h2Gkq07OYi6kusGOaT/9rnNljuXmqPnaig7WGPmKbwA= 338 | google.golang.org/api v0.214.0/go.mod h1:bYPpLG8AyeMWwDU6NXoB00xC0DFkikVvd5MfwoxjLqE= 339 | google.golang.org/genproto v0.0.0-20241007155032-5fefd90f89a9 h1:nFS3IivktIU5Mk6KQa+v6RKkHUpdQpphqGNLxqNnbEk= 340 | google.golang.org/genproto v0.0.0-20241007155032-5fefd90f89a9/go.mod h1:tEzYTYZxbmVNOu0OAFH9HzdJtLn6h4Aj89zzlBCdHms= 341 | google.golang.org/genproto/googleapis/api v0.0.0-20241219192143-6b3ec007d9bb h1:B7GIB7sr443wZ/EAEl7VZjmh1V6qzkt5V+RYcUYtS1U= 342 | google.golang.org/genproto/googleapis/api v0.0.0-20241219192143-6b3ec007d9bb/go.mod h1:E5//3O5ZIG2l71Xnt+P/CYUY8Bxs8E7WMoZ9tlcMbAY= 343 | google.golang.org/genproto/googleapis/rpc v0.0.0-20241219192143-6b3ec007d9bb h1:3oy2tynMOP1QbTC0MsNNAV+Se8M2Bd0A5+x1QHyw+pI= 344 | google.golang.org/genproto/googleapis/rpc v0.0.0-20241219192143-6b3ec007d9bb/go.mod h1:lcTa1sDdWEIHMWlITnIczmw5w60CF9ffkb8Z+DVmmjA= 345 | google.golang.org/grpc v1.69.2 h1:U3S9QEtbXC0bYNvRtcoklF3xGtLViumSYxWykJS+7AU= 346 | google.golang.org/grpc v1.69.2/go.mod h1:vyjdE6jLBI76dgpDojsFGNaHlxdjXN9ghpnd2o7JGZ4= 347 | google.golang.org/protobuf v1.36.0 h1:mjIs9gYtt56AzC4ZaffQuh88TZurBGhIJMBZGSxNerQ= 348 | google.golang.org/protobuf v1.36.0/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= 349 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 350 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 351 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 352 | gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= 353 | gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= 354 | gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= 355 | gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= 356 | gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc= 357 | gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc= 358 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= 359 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= 360 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 361 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 362 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 363 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 364 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 365 | k8s.io/api v0.32.0 h1:OL9JpbvAU5ny9ga2fb24X8H6xQlVp+aJMFlgtQjR9CE= 366 | k8s.io/api v0.32.0/go.mod h1:4LEwHZEf6Q/cG96F3dqR965sYOfmPM7rq81BLgsE0p0= 367 | k8s.io/apiextensions-apiserver v0.32.0 h1:S0Xlqt51qzzqjKPxfgX1xh4HBZE+p8KKBq+k2SWNOE0= 368 | k8s.io/apiextensions-apiserver v0.32.0/go.mod h1:86hblMvN5yxMvZrZFX2OhIHAuFIMJIZ19bTvzkP+Fmw= 369 | k8s.io/apimachinery v0.32.0 h1:cFSE7N3rmEEtv4ei5X6DaJPHHX0C+upp+v5lVPiEwpg= 370 | k8s.io/apimachinery v0.32.0/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE= 371 | k8s.io/apiserver v0.32.0 h1:VJ89ZvQZ8p1sLeiWdRJpRD6oLozNZD2+qVSLi+ft5Qs= 372 | k8s.io/apiserver v0.32.0/go.mod h1:HFh+dM1/BE/Hm4bS4nTXHVfN6Z6tFIZPi649n83b4Ag= 373 | k8s.io/cli-runtime v0.32.0 h1:dP+OZqs7zHPpGQMCGAhectbHU2SNCuZtIimRKTv2T1c= 374 | k8s.io/cli-runtime v0.32.0/go.mod h1:Mai8ht2+esoDRK5hr861KRy6z0zHsSTYttNVJXgP3YQ= 375 | k8s.io/client-go v0.32.0 h1:DimtMcnN/JIKZcrSrstiwvvZvLjG0aSxy8PxN8IChp8= 376 | k8s.io/client-go v0.32.0/go.mod h1:boDWvdM1Drk4NJj/VddSLnx59X3OPgwrOo0vGbtq9+8= 377 | k8s.io/component-base v0.32.0 h1:d6cWHZkCiiep41ObYQS6IcgzOUQUNpywm39KVYaUqzU= 378 | k8s.io/component-base v0.32.0/go.mod h1:JLG2W5TUxUu5uDyKiH2R/7NnxJo1HlPoRIIbVLkK5eM= 379 | k8s.io/controller-manager v0.32.0 h1:tpQl1rvH4huFB6Avl1nhowZHtZoCNWqn6OYdZPl7Ybc= 380 | k8s.io/controller-manager v0.32.0/go.mod h1:JRuYnYCkKj3NgBTy+KNQKIUm/lJRoDAvGbfdEmk9LhY= 381 | k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= 382 | k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= 383 | k8s.io/kube-openapi v0.0.0-20241212222426-2c72e554b1e7 h1:hcha5B1kVACrLujCKLbr8XWMxCxzQx42DY8QKYJrDLg= 384 | k8s.io/kube-openapi v0.0.0-20241212222426-2c72e554b1e7/go.mod h1:GewRfANuJ70iYzvn+i4lezLDAFzvjxZYK1gn1lWcfas= 385 | k8s.io/kubectl v0.32.0 h1:rpxl+ng9qeG79YA4Em9tLSfX0G8W0vfaiPVrc/WR7Xw= 386 | k8s.io/kubectl v0.32.0/go.mod h1:qIjSX+QgPQUgdy8ps6eKsYNF+YmFOAO3WygfucIqFiE= 387 | k8s.io/utils v0.0.0-20241210054802-24370beab758 h1:sdbE21q2nlQtFh65saZY+rRM6x6aJJI8IUa1AmH/qa0= 388 | k8s.io/utils v0.0.0-20241210054802-24370beab758/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= 389 | knative.dev/pkg v0.0.0-20231102200604-fac3a4ffbc74 h1:phT8Z3zNyL0OPh9z0EpTJdDMT9sn2x9wtFB5IqI4vJA= 390 | knative.dev/pkg v0.0.0-20231102200604-fac3a4ffbc74/go.mod h1:i7ifWvAVgStXzSwZcfwA3T6D00IPJrqu3XRKKSiYZvw= 391 | sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.1 h1:uOuSLOMBWkJH0TWa9X6l+mj5nZdm6Ay6Bli8HL8rNfk= 392 | sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.1/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= 393 | sigs.k8s.io/cluster-api-provider-gcp v1.8.0 h1:K3/fa4VEPCIgtzGsKKPs3qwbJEkMxt+YjT+fkmE7CG8= 394 | sigs.k8s.io/cluster-api-provider-gcp v1.8.0/go.mod h1:dHC23Chv/PpH2M8pvkVpleW9auCsXuxmEQUz8UUwk7A= 395 | sigs.k8s.io/controller-runtime v0.19.3 h1:XO2GvC9OPftRst6xWCpTgBZO04S2cbp0Qqkj8bX1sPw= 396 | sigs.k8s.io/controller-runtime v0.19.3/go.mod h1:j4j87DqtsThvwTv5/Tc5NFRyyF/RF0ip4+62tbTSIUM= 397 | sigs.k8s.io/controller-tools v0.16.5 h1:5k9FNRqziBPwqr17AMEPPV/En39ZBplLAdOwwQHruP4= 398 | sigs.k8s.io/controller-tools v0.16.5/go.mod h1:8vztuRVzs8IuuJqKqbXCSlXcw+lkAv/M2sTpg55qjMY= 399 | sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= 400 | sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= 401 | sigs.k8s.io/kubebuilder-release-tools/notes v0.3.0 h1:EjwSxrFH6xcB1PKb1msI7ZjbAQPG6jq1HKhqliUHKzc= 402 | sigs.k8s.io/kubebuilder-release-tools/notes v0.3.0/go.mod h1:0MFmf9EVf9hm7ULQMpWPWHfh0w7Ut4Ze09onqy2zDSM= 403 | sigs.k8s.io/kustomize/api v0.18.0 h1:hTzp67k+3NEVInwz5BHyzc9rGxIauoXferXyjv5lWPo= 404 | sigs.k8s.io/kustomize/api v0.18.0/go.mod h1:f8isXnX+8b+SGLHQ6yO4JG1rdkZlvhaCf/uZbLVMb0U= 405 | sigs.k8s.io/kustomize/kyaml v0.18.1 h1:WvBo56Wzw3fjS+7vBjN6TeivvpbW9GmRaWZ9CIVmt4E= 406 | sigs.k8s.io/kustomize/kyaml v0.18.1/go.mod h1:C3L2BFVU1jgcddNBE1TxuVLgS46TjObMwW5FT9FcjYo= 407 | sigs.k8s.io/structured-merge-diff/v4 v4.5.0 h1:nbCitCK2hfnhyiKo6uf2HxUPTCodY6Qaf85SbDIaMBk= 408 | sigs.k8s.io/structured-merge-diff/v4 v4.5.0/go.mod h1:N8f93tFZh9U6vpxwRArLiikrE5/2tiu1w1AGfACIGE4= 409 | sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= 410 | sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= 411 | --------------------------------------------------------------------------------