├── .codecov.yml
├── .dockerignore
├── .github
├── CODEOWNERS
└── workflows
│ └── go.yml
├── .gitignore
├── .golangci.yml
├── .goreleaser.yml
├── Dockerfile
├── LICENSE
├── Makefile
├── PROJECT
├── README.md
├── api
└── v1alpha1
│ ├── common_types.go
│ ├── daskcluster_types.go
│ ├── daskcluster_webhook.go
│ ├── groupversion_info.go
│ ├── mpicluster_types.go
│ ├── mpicluster_webhook.go
│ ├── raycluster_types.go
│ ├── raycluster_webhook.go
│ ├── raycluster_webhook_integration_test.go
│ ├── sparkcluster_types.go
│ ├── sparkcluster_webhook.go
│ ├── sparkcluster_webhook_integration_test.go
│ ├── validations.go
│ ├── webhook_suite_test.go
│ └── zz_generated.deepcopy.go
├── cluster-testing
├── dask.yaml
└── ray.yaml
├── cmd
├── crdapply.go
├── crddelete.go
├── root.go
└── start.go
├── config
├── certmanager
│ ├── certificate.yaml
│ ├── kustomization.yaml
│ └── kustomizeconfig.yaml
├── crd
│ ├── bases
│ │ ├── distributed-compute.dominodatalab.com_daskclusters.yaml
│ │ ├── distributed-compute.dominodatalab.com_mpiclusters.yaml
│ │ ├── distributed-compute.dominodatalab.com_rayclusters.yaml
│ │ └── distributed-compute.dominodatalab.com_sparkclusters.yaml
│ ├── embed.go
│ ├── kustomization.yaml
│ ├── kustomizeconfig.yaml
│ └── patches
│ │ ├── cainjection_in_daskclusters.yaml
│ │ ├── cainjection_in_mpiclusters.yaml
│ │ ├── cainjection_in_rayclusters.yaml
│ │ ├── cainjection_in_sparkclusters.yaml
│ │ ├── webhook_in_daskclusters.yaml
│ │ ├── webhook_in_mpiclusters.yaml
│ │ ├── webhook_in_rayclusters.yaml
│ │ └── webhook_in_sparkclusters.yaml
├── default
│ ├── kustomization.yaml
│ ├── manager_auth_proxy_patch.yaml
│ ├── manager_config_patch.yaml
│ ├── manager_webhook_patch.yaml
│ └── webhookcainjection_patch.yaml
├── manager
│ ├── controller_manager_config.yaml
│ ├── kustomization.yaml
│ └── manager.yaml
├── prometheus
│ ├── kustomization.yaml
│ └── monitor.yaml
├── rbac
│ ├── auth_proxy_client_clusterrole.yaml
│ ├── auth_proxy_role.yaml
│ ├── auth_proxy_role_binding.yaml
│ ├── auth_proxy_service.yaml
│ ├── daskcluster_editor_role.yaml
│ ├── daskcluster_viewer_role.yaml
│ ├── kustomization.yaml
│ ├── leader_election_role.yaml
│ ├── leader_election_role_binding.yaml
│ ├── mpicluster_editor_role.yaml
│ ├── mpicluster_viewer_role.yaml
│ ├── raycluster_editor_role.yaml
│ ├── raycluster_viewer_role.yaml
│ ├── role.yaml
│ └── role_binding.yaml
├── samples
│ ├── distributed-compute_v1alpha1_daskcluster.yaml
│ ├── distributed-compute_v1alpha1_mpicluster.yaml
│ ├── distributed-compute_v1alpha1_raycluster.yaml
│ └── distributed-compute_v1alpha1_sparkcluster.yaml
└── webhook
│ ├── kustomization.yaml
│ ├── kustomizeconfig.yaml
│ ├── manifests.yaml
│ └── service.yaml
├── controllers
├── config.go
├── controllers.go
├── daskcluster_controller.go
├── mpicluster_controller.go
├── raycluster_controller.go
├── raycluster_controller_integration_test.go
├── sparkcluster_controller.go
├── sparkcluster_controller_integration_test.go
├── suite_test.go
└── variables.go
├── deploy
└── helm
│ └── distributed-compute-operator
│ ├── .helmignore
│ ├── Chart.lock
│ ├── Chart.yaml
│ ├── charts
│ └── common-1.4.1.tgz
│ ├── dco-values.yaml
│ ├── templates
│ ├── NOTES.txt
│ ├── _helpers.tpl
│ ├── clusterrole.yaml
│ ├── clusterrolebinding.yaml
│ ├── deployment.yaml
│ ├── hooks.yaml
│ ├── istio.yaml
│ ├── networkpolicy.yaml
│ ├── serviceaccount.yaml
│ ├── webhook-cert-manager.yaml
│ ├── webhook-configuration-mutating.yaml
│ ├── webhook-configuration-validating.yaml
│ └── webhook-service.yaml
│ └── values.yaml
├── dockerfiles
├── mpi-init.Dockerfile
├── mpi-sync.Dockerfile
├── mpi-worker-start.sh
├── openssh.gpgkey
├── rsync-start.sh
└── rsyncd.conf
├── docs
├── development.md
└── img
│ └── logo.png
├── go.mod
├── go.sum
├── hack
└── boilerplate.go.txt
├── istio
├── global-strict-mtls.yaml
└── operator-minimal.yaml
├── main.go
├── pkg
├── cluster
│ ├── dask
│ │ ├── clientports.go
│ │ ├── clusterstatusupdate.go
│ │ ├── configmap.go
│ │ ├── dask_test.go
│ │ ├── horizonalpodautoscaler.go
│ │ ├── horizontalpodautoscaler_test.go
│ │ ├── istiopeerauthentication.go
│ │ ├── metadata.go
│ │ ├── networkpolicy.go
│ │ ├── networkpolicy_test.go
│ │ ├── rbac.go
│ │ ├── rbac_test.go
│ │ ├── service.go
│ │ ├── service_test.go
│ │ ├── serviceaccount.go
│ │ ├── serviceaccount_test.go
│ │ └── statefulset.go
│ ├── metadata
│ │ └── metadata.go
│ └── mpi
│ │ ├── clientports.go
│ │ ├── configmap.go
│ │ ├── istiopeerauthentication.go
│ │ ├── metadata.go
│ │ ├── mpi.go
│ │ ├── networkpolicy.go
│ │ ├── podsecuritypolicy.go
│ │ ├── service.go
│ │ ├── serviceaccount.go
│ │ ├── statefulset.go
│ │ └── statusupdate.go
├── controller
│ ├── actions
│ │ └── actions.go
│ ├── components
│ │ ├── clientports.go
│ │ ├── clusterstatusupdate.go
│ │ ├── configmap.go
│ │ ├── horizontalpodautoscaler.go
│ │ ├── istiopeerauthentication.go
│ │ ├── networkpolicy.go
│ │ ├── rbac.go
│ │ ├── service.go
│ │ ├── serviceaccount.go
│ │ └── statefulset.go
│ └── core
│ │ ├── components.go
│ │ ├── context.go
│ │ ├── patch.go
│ │ └── reconciler.go
├── crd
│ ├── crd.go
│ ├── crd_test.go
│ └── istio.go
├── manager
│ └── manager.go
├── resources
│ ├── istio
│ │ ├── peerauthentication.go
│ │ └── peerauthentication_test.go
│ ├── metadata.go
│ ├── metadata_test.go
│ ├── ray
│ │ ├── helpers_test.go
│ │ ├── horizontalpodautoscaler.go
│ │ ├── horizontalpodautoscaler_test.go
│ │ ├── networkpolicy.go
│ │ ├── networkpolicy_test.go
│ │ ├── podsecuritypolicy.go
│ │ ├── podsecuritypolicy_test.go
│ │ ├── ray.go
│ │ ├── ray_test.go
│ │ ├── service.go
│ │ ├── service_test.go
│ │ ├── serviceaccount.go
│ │ ├── serviceaccount_test.go
│ │ ├── statefulset.go
│ │ └── statefulset_test.go
│ └── spark
│ │ ├── configmap.go
│ │ ├── configmap_test.go
│ │ ├── envoyfilter.go
│ │ ├── envoyfilter_test.go
│ │ ├── helpers_test.go
│ │ ├── horizontalpodautoscaler.go
│ │ ├── horizontalpodautoscaler_test.go
│ │ ├── networkpolicy.go
│ │ ├── networkpolicy_test.go
│ │ ├── podsecuritypolicy.go
│ │ ├── podsecuritypolicy_test.go
│ │ ├── service.go
│ │ ├── service_test.go
│ │ ├── serviceaccount.go
│ │ ├── serviceaccount_test.go
│ │ ├── spark.go
│ │ ├── spark_test.go
│ │ ├── statefulset.go
│ │ └── statefulset_test.go
└── util
│ ├── util.go
│ └── util_test.go
├── scripts
├── development.sh
├── hotpatch.sh
└── release
│ ├── before-hook.sh
│ └── helm.sh
└── test
└── test.go
/.codecov.yml:
--------------------------------------------------------------------------------
1 | coverage:
2 | status:
3 | project:
4 | default:
5 | target: 75%
6 | threshold: 10%
7 | patch: off
8 | ignore:
9 | - "api/**/zz_generated.deepcopy.go"
10 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | # More info: https://docs.docker.com/engine/reference/builder/#dockerignore-file
2 | # Ignore all files which are not go type
3 | !**/*.go
4 | !**/*.mod
5 | !**/*.sum
6 |
7 | # Ignore bin directories
8 | bin/*
9 | testbin/*
10 |
--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @dominodatalab/workbench-train
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | # Binaries for programs and plugins
3 | *.exe
4 | *.exe~
5 | *.dll
6 | *.so
7 | *.dylib
8 | bin
9 | testbin/*
10 |
11 | # Test binary, build with `go test -c`
12 | *.test
13 |
14 | # Output of the go coverage tool, specifically when used with LiteIDE
15 | *.out
16 |
17 | # Kubernetes Generated files - skip generated files, except for vendored files
18 |
19 | !vendor/**/zz_generated.*
20 |
21 | # editor and IDE paraphernalia
22 | .idea
23 | *.swp
24 | *.swo
25 | *~
26 |
27 | # goreleaser directories
28 | dist/
29 | custom-resource-definitions/
30 |
31 | # miscellaneous local files
32 | exclude/
33 |
--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
1 | run:
2 | timeout: 2m
3 |
4 | linters-settings:
5 | dupl:
6 | threshold: 100
7 | exhaustive:
8 | default-signifies-exhaustive: true
9 | funlen:
10 | lines: 150
11 | statements: 70
12 | gci:
13 | local-prefixes: github.com/dominodatalab/distributed-compute-operator
14 | goconst:
15 | min-len: 2
16 | min-occurrences: 2
17 | gocyclo:
18 | min-complexity: 25
19 | goimports:
20 | local-prefixes: github.com/dominodatalab/distributed-compute-operator
21 | gomnd:
22 | settings:
23 | mnd:
24 | checks: argument,case,condition,return
25 | ignored-functions: log.V
26 | govet:
27 | check-shadowing: true
28 | lll:
29 | line-length: 140
30 | maligned:
31 | suggest-new: true
32 | misspell:
33 | locale: US
34 | nestif:
35 | min-complexity: 6
36 |
37 | linters:
38 | disable-all: true
39 | enable:
40 | - bodyclose
41 | - depguard
42 | - dogsled
43 | - dupl
44 | - errcheck
45 | - errorlint
46 | - exhaustive
47 | - funlen
48 | - goconst
49 | - gocritic
50 | - gocyclo
51 | - gofmt
52 | - goimports
53 | - revive
54 | - gomnd
55 | - goprintffuncname
56 | - gosec
57 | - gosimple
58 | - govet
59 | - ineffassign
60 | - lll
61 | - misspell
62 | - nakedret
63 | - nestif
64 | - noctx
65 | - exportloopref
66 | - staticcheck
67 | - stylecheck
68 | - typecheck
69 | - unconvert
70 | - unparam
71 | - unused
72 | - whitespace
73 |
74 | issues:
75 | exclude-rules:
76 | - path: _test\.go
77 | linters:
78 | - dupl
79 | - exhaustive
80 | - gocyclo
81 | - gomnd
82 | - gosec
83 | - funlen
84 | - path: test/test.go
85 | linters:
86 | - dogsled
87 | - source: "^//\\s*\\+kubebuilder:.+"
88 | linters:
89 | - lll
90 | exclude:
91 | - Using the variable on range scope `tc` in function literal
92 |
93 |
--------------------------------------------------------------------------------
/.goreleaser.yml:
--------------------------------------------------------------------------------
1 | before:
2 | hooks:
3 | - scripts/release/before-hook.sh
4 | builds:
5 | - env:
6 | - CGO_ENABLED=0
7 | goos:
8 | - linux
9 | - darwin
10 | goarch:
11 | - amd64
12 | archives:
13 | - replacements:
14 | amd64: x86_64
15 | files:
16 | - LICENSE
17 | - README.md
18 | - custom-resource-definitions/*.yaml
19 | - deploy/*
20 | checksum:
21 | name_template: 'checksums.txt'
22 | snapshot:
23 | name_template: "{{ .Tag }}-next"
24 | changelog:
25 | sort: asc
26 | filters:
27 | exclude:
28 | - '^docs:'
29 | - '^test:'
30 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Build the manager binary
2 | FROM golang:1.21.3 as builder
3 |
4 | WORKDIR /workspace
5 | # Copy the Go Modules manifests
6 | COPY go.mod go.mod
7 | COPY go.sum go.sum
8 | # cache deps before building and copying source so that we don't need to re-download as much
9 | # and so that source changes don't invalidate our downloaded layer
10 | RUN go mod download
11 |
12 | # Copy the go source
13 | COPY main.go main.go
14 | COPY cmd/ cmd/
15 | COPY api/ api/
16 | COPY config/crd/ config/crd/
17 | COPY controllers/ controllers/
18 | COPY pkg/ pkg/
19 |
20 | # Build
21 | RUN CGO_ENABLED=0 GOOS=linux GO111MODULE=on go build -a -o manager main.go
22 |
23 | # Use distroless as minimal base image to package the manager binary
24 | # Refer to https://github.com/GoogleContainerTools/distroless for more details
25 | FROM gcr.io/distroless/static-debian11:nonroot
26 | WORKDIR /
27 | COPY --from=builder /workspace/manager .
28 | USER 65532:65532
29 |
30 | ENTRYPOINT ["/manager"]
31 |
--------------------------------------------------------------------------------
/PROJECT:
--------------------------------------------------------------------------------
1 | domain: dominodatalab.com
2 | layout:
3 | - go.kubebuilder.io/v3
4 | projectName: distributed-compute-operator
5 | repo: github.com/dominodatalab/distributed-compute-operator
6 | resources:
7 | - api:
8 | crdVersion: v1
9 | namespaced: true
10 | controller: true
11 | domain: dominodatalab.com
12 | group: distributed-compute
13 | kind: RayCluster
14 | path: github.com/dominodatalab/distributed-compute-operator/api/v1alpha1
15 | version: v1alpha1
16 | webhooks:
17 | defaulting: true
18 | validation: true
19 | webhookVersion: v1
20 | - api:
21 | crdVersion: v1
22 | namespaced: true
23 | controller: true
24 | domain: dominodatalab.com
25 | group: distributed-compute
26 | kind: DaskCluster
27 | path: github.com/dominodatalab/distributed-compute-operator/api/v1alpha1
28 | version: v1alpha1
29 | webhooks:
30 | defaulting: true
31 | validation: true
32 | webhookVersion: v1
33 | - api:
34 | crdVersion: v1
35 | namespaced: true
36 | controller: true
37 | domain: dominodatalab.com
38 | group: distributed-compute
39 | kind: SparkCluster
40 | path: github.com/dominodatalab/distributed-compute-operator/api/v1alpha1
41 | version: v1alpha1
42 | webhooks:
43 | defaulting: true
44 | validation: true
45 | webhookVersion: v1
46 | - api:
47 | crdVersion: v1
48 | namespaced: true
49 | controller: true
50 | domain: dominodatalab.com
51 | group: distributed-compute
52 | kind: MPICluster
53 | path: github.com/dominodatalab/distributed-compute-operator/api/v1alpha1
54 | version: v1alpha1
55 | webhooks:
56 | defaulting: true
57 | validation: true
58 | webhookVersion: v1
59 | version: "3"
60 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | # IMPORTANT
26 |
27 | This repository has now been archived. No further work will be merged here.
28 |
29 | # Distributed Compute Operator
30 |
31 | Kubernetes operator providing Ray|Spark|Dask clusters on-demand via [Custom Resource Definitions][custom resources].
32 |
33 | ## Overview
34 |
35 | TODO
36 |
37 | ## Installation
38 |
39 | The easiest way to install DCO is to use the provided Helm chart.
40 |
41 | ### Prerequisites
42 |
43 | Before you get started using the DCO, you need to have a running Kubernetes cluster.
44 |
45 | - Access to a Kubernetes cluster version **1.16** or above. This version of the
46 | CRD API is stable and supports our required features.
47 | - Install [helm] client version **3.0.0** or above.
48 | - Install the [cert-manager] operator. DCO makes extensive use of [webhooks]
49 | which require TLS.
50 |
51 | ### Install
52 |
53 | ```shell
54 | $ helm install distributed-compute-operator ./deploy/helm/distributed-compute-operator
55 | ```
56 |
57 | ## Development
58 |
59 | The following instructions will help you create a local Kubernetes environment
60 | that can be used to test every feature supported by this operator.
61 |
62 | 1. Install [minikube] and create a new cluster.
63 |
64 | ```shell
65 | # tested using minikube v1.17.1 and k8s v1.21.3
66 | $ minikube start \
67 | --cpus=6 --memory=16384 --driver=hyperkit \
68 | --extra-config=apiserver.enable-admission-plugins=PodSecurityPolicy \
69 | --addons=pod-security-policy
70 | ```
71 |
72 | 1. Install cert-manager
73 | 1. Install metrics-server
74 | 1. Launch operator
75 |
76 | [custom resources]: https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/
77 | [helm]: https://helm.sh/docs/intro/install/
78 | [cert-manager]: https://cert-manager.io/docs/
79 | [webhooks]: https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/
80 | [minikube]: https://minikube.sigs.k8s.io/docs/
81 |
--------------------------------------------------------------------------------
/api/v1alpha1/daskcluster_types.go:
--------------------------------------------------------------------------------
1 | package v1alpha1
2 |
3 | import (
4 | corev1 "k8s.io/api/core/v1"
5 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
6 | )
7 |
8 | // DaskClusterWorker defines worker-specific workload settings.
9 | type DaskClusterWorker struct {
10 | WorkloadConfig `json:",inline"`
11 | Replicas *int32 `json:"replicas,omitempty"`
12 | }
13 |
14 | // DaskClusterSpec defines the desired state of DaskCluster.
15 | type DaskClusterSpec struct {
16 | ScalableClusterConfig `json:",inline"`
17 |
18 | Scheduler WorkloadConfig `json:"scheduler,omitempty"`
19 | Worker DaskClusterWorker `json:"worker,omitempty"`
20 |
21 | SchedulerPort int32 `json:"schedulerPort,omitempty"`
22 | DashboardPort int32 `json:"dashboardPort,omitempty"`
23 | WorkerPort int32 `json:"workerPort,omitempty"`
24 | NannyPort int32 `json:"nannyPort,omitempty"`
25 |
26 | // AdditionalClientPorts are extra ports through which cluster nodes could connect to the client.
27 | AdditionalClientPorts []corev1.ServicePort `json:"additionalClientPorts,omitempty"`
28 | }
29 |
30 | // DaskClusterStatus defines the observed state of DaskCluster
31 | type DaskClusterStatus struct {
32 | ClusterStatusConfig `json:",inline"`
33 | }
34 |
35 | //+kubebuilder:object:root=true
36 | //+kubebuilder:resource:shortName=dask
37 | //+kubebuilder:subresource:status
38 | //+kubebuilder:subresource:scale:specpath=.spec.worker.replicas,statuspath=.status.workerReplicas,selectorpath=.status.workerSelector
39 | //+kubebuilder:printcolumn:name="Workers",type=integer,JSONPath=".spec.worker.replicas"
40 | //+kubebuilder:printcolumn:name="Status",type=string,JSONPath=".status.clusterStatus"
41 | //+kubebuilder:printcolumn:name="Age",type=date,JSONPath=".metadata.creationTimestamp"
42 | //+kubebuilder:printcolumn:name="Image",type=string,JSONPath=".spec.image"
43 | //+kubebuilder:printcolumn:name="Network Policy",type=boolean,JSONPath=".spec.networkPolicy.enabled",priority=10
44 | //+kubebuilder:printcolumn:name="Pods",type=string,JSONPath=".status.nodes",priority=10
45 |
46 | // DaskCluster is the Schema for the daskclusters API.
47 | type DaskCluster struct {
48 | metav1.TypeMeta `json:",inline"`
49 | metav1.ObjectMeta `json:"metadata,omitempty"`
50 |
51 | Spec DaskClusterSpec `json:"spec,omitempty"`
52 | Status DaskClusterStatus `json:"status,omitempty"`
53 | }
54 |
55 | //+kubebuilder:object:root=true
56 |
57 | // DaskClusterList contains a list of DaskCluster.
58 | type DaskClusterList struct {
59 | metav1.TypeMeta `json:",inline"`
60 | metav1.ListMeta `json:"metadata,omitempty"`
61 | Items []DaskCluster `json:"items"`
62 | }
63 |
64 | func init() {
65 | SchemeBuilder.Register(&DaskCluster{}, &DaskClusterList{})
66 | }
67 |
--------------------------------------------------------------------------------
/api/v1alpha1/groupversion_info.go:
--------------------------------------------------------------------------------
1 | // Package v1alpha1 contains API Schema definitions for the distributed-compute v1alpha1 API group
2 | // +kubebuilder:object:generate=true
3 | // +groupName=distributed-compute.dominodatalab.com
4 | package v1alpha1
5 |
6 | import (
7 | "k8s.io/apimachinery/pkg/runtime/schema"
8 | "sigs.k8s.io/controller-runtime/pkg/scheme"
9 | )
10 |
11 | var (
12 | // GroupVersion is group version used to register these objects
13 | GroupVersion = schema.GroupVersion{Group: "distributed-compute.dominodatalab.com", Version: "v1alpha1"}
14 |
15 | // SchemeBuilder is used to add go types to the GroupVersionKind scheme
16 | SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
17 |
18 | // AddToScheme adds the types in this group-version to the given scheme.
19 | AddToScheme = SchemeBuilder.AddToScheme
20 | )
21 |
--------------------------------------------------------------------------------
/api/v1alpha1/mpicluster_types.go:
--------------------------------------------------------------------------------
1 | package v1alpha1
2 |
3 | import (
4 | corev1 "k8s.io/api/core/v1"
5 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
6 | )
7 |
8 | // MPIClusterWorker defines worker-specific workload settings.
9 | type MPIClusterWorker struct {
10 | WorkloadConfig `json:",inline"`
11 | Replicas *int32 `json:"replicas,omitempty"`
12 | SharedSSHSecret string `json:"sharedSSHSecret"`
13 | UserName string `json:"userName,omitempty"`
14 | UserID *int64 `json:"userID,omitempty"`
15 | GroupName string `json:"groupName,omitempty"`
16 | GroupID *int64 `json:"groupID,omitempty"`
17 | HomeDir string `json:"homeDir,omitempty"`
18 | }
19 |
20 | // MPIClusterSpec defines the desired state of MPICluster.
21 | type MPIClusterSpec struct {
22 | ClusterConfig `json:",inline"`
23 | Worker MPIClusterWorker `json:"worker,omitempty"`
24 |
25 | // WorkerPorts specifies the range of ports used by worker processes for communication.
26 | WorkerPorts []int32 `json:"workerPorts,omitempty"`
27 | // AdditionalClientPorts are extra ports through which cluster nodes could connect to the client.
28 | AdditionalClientPorts []corev1.ServicePort `json:"additionalClientPorts,omitempty"`
29 | }
30 |
31 | //+kubebuilder:object:root=true
32 | //+kubebuilder:resource:shortName=mpi
33 | //+kubebuilder:subresource:status
34 | //+kubebuilder:printcolumn:name="Workers",type=integer,JSONPath=".spec.worker.replicas"
35 | //+kubebuilder:printcolumn:name="Status",type=string,JSONPath=".status.clusterStatus"
36 | //+kubebuilder:printcolumn:name="Age",type=date,JSONPath=".metadata.creationTimestamp"
37 | //+kubebuilder:printcolumn:name="Image",type=string,JSONPath=".status.image",priority=10
38 | //+kubebuilder:printcolumn:name="Bound PSP",type=string,JSONPath=".spec.podSecurityPolicy",priority=10
39 | //+kubebuilder:printcolumn:name="Network Policy",type=boolean,JSONPath=".spec.networkPolicy.enabled",priority=10
40 | //+kubebuilder:printcolumn:name="Pods",type=string,JSONPath=".status.nodes",priority=10
41 |
42 | // MPICluster is the Schema for the MPI Clusters API.
43 | type MPICluster struct {
44 | metav1.TypeMeta `json:",inline"`
45 | metav1.ObjectMeta `json:"metadata,omitempty"`
46 | Spec MPIClusterSpec `json:"spec,omitempty"`
47 | Status ClusterStatusConfig `json:"status,omitempty"`
48 | }
49 |
50 | //+kubebuilder:object:root=true
51 |
52 | // MPIClusterList contains a list of MPICluster.
53 | type MPIClusterList struct {
54 | metav1.TypeMeta `json:",inline"`
55 | metav1.ListMeta `json:"metadata,omitempty"`
56 | Items []MPICluster `json:"items"`
57 | }
58 |
59 | func init() {
60 | SchemeBuilder.Register(&MPICluster{}, &MPIClusterList{})
61 | }
62 |
--------------------------------------------------------------------------------
/cluster-testing/dask.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ServiceAccount
3 | metadata:
4 | name: dask-notebook
5 |
6 | ---
7 | apiVersion: rbac.authorization.k8s.io/v1
8 | kind: Role
9 | metadata:
10 | name: dask-notebook
11 | rules:
12 | - apiGroups: ['policy']
13 | resources: ['podsecuritypolicies']
14 | verbs: ['use']
15 | resourceNames:
16 | - privileged
17 |
18 | ---
19 | apiVersion: rbac.authorization.k8s.io/v1
20 | kind: RoleBinding
21 | metadata:
22 | name: dask-notebook
23 | roleRef:
24 | kind: Role
25 | name: dask-notebook
26 | apiGroup: rbac.authorization.k8s.io
27 | subjects:
28 | - kind: ServiceAccount
29 | name: dask-notebook
30 |
31 | ---
32 | apiVersion: apps/v1
33 | kind: Deployment
34 | metadata:
35 | name: dask-notebook
36 | labels:
37 | app: dask-notebook
38 | dask-client: "true"
39 | spec:
40 | replicas: 1
41 | selector:
42 | matchLabels:
43 | app: dask-notebook
44 | template:
45 | metadata:
46 | labels:
47 | app: dask-notebook
48 | dask-client: "true"
49 | spec:
50 | serviceAccountName: dask-notebook
51 | containers:
52 | - name: dask-notebook
53 | image: daskdev/dask-notebook:2021.7.2
54 | ports:
55 | - containerPort: 8888
56 |
57 | ---
58 | apiVersion: v1
59 | kind: Service
60 | metadata:
61 | name: dask-notebook
62 | spec:
63 | type: NodePort
64 | selector:
65 | app: dask-notebook
66 | ports:
67 | - protocol: TCP
68 | name: tcp-ui
69 | port: 8888
70 | targetPort: 8888
71 |
--------------------------------------------------------------------------------
/cluster-testing/ray.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ServiceAccount
3 | metadata:
4 | name: test-ray
5 |
6 | ---
7 | apiVersion: rbac.authorization.k8s.io/v1
8 | kind: Role
9 | metadata:
10 | name: test-ray
11 | rules:
12 | - apiGroups: ["policy"]
13 | resources: ["podsecuritypolicies"]
14 | verbs: ["use"]
15 | resourceNames:
16 | - privileged
17 |
18 | ---
19 | apiVersion: rbac.authorization.k8s.io/v1
20 | kind: RoleBinding
21 | metadata:
22 | name: test-ray
23 | roleRef:
24 | kind: Role
25 | name: test-ray
26 | apiGroup: rbac.authorization.k8s.io
27 | subjects:
28 | - kind: ServiceAccount
29 | name: test-ray
30 |
31 | ---
32 | apiVersion: apps/v1
33 | kind: Deployment
34 | metadata:
35 | name: test-ray
36 | labels:
37 | app: test-ray
38 | version: 1.6.0-cpu
39 | ray-client: "true"
40 | spec:
41 | replicas: 1
42 | selector:
43 | matchLabels:
44 | app: test-ray
45 | template:
46 | metadata:
47 | labels:
48 | app: test-ray
49 | version: 1.6.0-cpu
50 | ray-client: "true"
51 | spec:
52 | serviceAccountName: test-ray
53 | containers:
54 | - name: ray
55 | image: rayproject/ray:1.6.0-cpu
56 | command: ["sleep", "86400"]
57 |
--------------------------------------------------------------------------------
/cmd/crdapply.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "context"
5 |
6 | "github.com/spf13/cobra"
7 |
8 | "github.com/dominodatalab/distributed-compute-operator/pkg/crd"
9 | )
10 |
11 | var crdApplyCmd = &cobra.Command{
12 | Use: "crd-apply",
13 | Short: "Apply custom resource definitions to a cluster",
14 | Long: `Apply all "distributed-compute.dominodatalab.com" CRDs to a cluster.
15 |
16 | Apply Rules:
17 | - When a definition is is missing, it will be created
18 | - If a definition is already present, then it will be updated
19 | - Updating definitions that have not changed results in a no-op`,
20 | RunE: func(cmd *cobra.Command, args []string) error {
21 | return crd.Apply(context.Background(), istioEnabled)
22 | },
23 | }
24 |
25 | func init() {
26 | rootCmd.AddCommand(crdApplyCmd)
27 | }
28 |
--------------------------------------------------------------------------------
/cmd/crddelete.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "context"
5 |
6 | "github.com/spf13/cobra"
7 |
8 | "github.com/dominodatalab/distributed-compute-operator/pkg/crd"
9 | )
10 |
11 | var crdDeleteCmd = &cobra.Command{
12 | Use: "crd-delete",
13 | Short: "Delete custom resource definitions from a cluster",
14 | Long: `Delete all "distributed-compute.dominodatalab.com" CRDs from a cluster.
15 |
16 | Any running distributed compute resources will be decommissioned when this
17 | operation runs (i.e. your deployments will be deleted immediately). This will
18 | only attempt to remove definitions that are already present in Kubernetes.`,
19 | RunE: func(cmd *cobra.Command, args []string) error {
20 | return crd.Delete(context.Background(), istioEnabled)
21 | },
22 | }
23 |
24 | func init() {
25 | rootCmd.AddCommand(crdDeleteCmd)
26 | }
27 |
--------------------------------------------------------------------------------
/cmd/root.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "fmt"
5 | "os"
6 |
7 | "github.com/spf13/cobra"
8 | )
9 |
10 | var istioEnabled bool
11 |
12 | var rootCmd = &cobra.Command{
13 | Use: "distributed-compute-operator",
14 | Short: "Kubernetes operator that manages parallel computing clusters.",
15 | Long: `Kubernetes operator that manages parallel computing clusters.`,
16 | }
17 |
18 | // Execute launches the command line tool.
19 | func Execute() {
20 | if err := rootCmd.Execute(); err != nil {
21 | fmt.Println(err)
22 | os.Exit(1)
23 | }
24 | }
25 |
26 | func init() {
27 | // NOTE: required until https://github.com/spf13/cobra/issues/587
28 | rootCmd.SetHelpCommand(&cobra.Command{Hidden: true})
29 | rootCmd.PersistentFlags().BoolVar(&istioEnabled, "istio-enabled", false, "Enable support for Istio sidecar container")
30 | }
31 |
--------------------------------------------------------------------------------
/cmd/start.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "flag"
5 |
6 | "github.com/dominodatalab/distributed-compute-operator/controllers"
7 |
8 | "github.com/spf13/cobra"
9 | "sigs.k8s.io/controller-runtime/pkg/log/zap"
10 |
11 | "github.com/dominodatalab/distributed-compute-operator/pkg/manager"
12 | )
13 |
14 | const WebhookPort = 9443
15 |
16 | var (
17 | namespaces []string
18 | probeAddr string
19 | metricsAddr string
20 | webhookPort int
21 | enableLeaderElection bool
22 | zapOpts = zap.Options{}
23 | mpiInitImage string
24 | mpiSyncImage string
25 | )
26 |
27 | var startCmd = &cobra.Command{
28 | Use: "start",
29 | Short: "Start the controller manager",
30 | RunE: func(cmd *cobra.Command, args []string) error {
31 | cfg := &controllers.Config{
32 | Namespaces: namespaces,
33 | MetricsAddr: metricsAddr,
34 | HealthProbeAddr: probeAddr,
35 | WebhookServerPort: webhookPort,
36 | EnableLeaderElection: enableLeaderElection,
37 | IstioEnabled: istioEnabled,
38 | ZapOptions: zapOpts,
39 | MPIInitImage: mpiInitImage,
40 | MPISyncImage: mpiSyncImage,
41 | }
42 |
43 | return manager.Start(cfg)
44 | },
45 | }
46 |
47 | func init() {
48 | startCmd.Flags().SortFlags = false
49 |
50 | fs := new(flag.FlagSet)
51 | zapOpts.BindFlags(fs)
52 |
53 | startCmd.Flags().AddGoFlagSet(fs)
54 | startCmd.Flags().StringSliceVar(&namespaces, "namespaces", nil,
55 | "Only reconcile resources in these namespaces")
56 | startCmd.Flags().IntVar(&webhookPort, "webhook-server-port", WebhookPort,
57 | "Webhook server will bind to this port")
58 | startCmd.Flags().StringVar(&metricsAddr, "metrics-bind-address", ":8080",
59 | "Metrics endpoint will bind to this address")
60 | startCmd.Flags().StringVar(&probeAddr, "health-probe-bind-address", ":8081",
61 | "Health probe endpoint will bind to this address")
62 | startCmd.Flags().BoolVar(&enableLeaderElection, "leader-elect", false,
63 | "Enable leader election to ensure there is only one active controller manager")
64 | startCmd.Flags().StringVar(&mpiInitImage, "mpi-init-image", "",
65 | "Image for MPI worker init container")
66 | startCmd.Flags().StringVar(&mpiSyncImage, "mpi-sync-image", "",
67 | "Image for MPI worker sync container")
68 |
69 | rootCmd.AddCommand(startCmd)
70 | }
71 |
--------------------------------------------------------------------------------
/config/certmanager/certificate.yaml:
--------------------------------------------------------------------------------
1 | # The following manifests contain a self-signed issuer CR and a certificate CR.
2 | # More document can be found at https://docs.cert-manager.io
3 | # WARNING: Targets CertManager v1.0. Check https://cert-manager.io/docs/installation/upgrading/ for breaking changes.
4 | apiVersion: cert-manager.io/v1
5 | kind: Issuer
6 | metadata:
7 | name: selfsigned-issuer
8 | namespace: system
9 | spec:
10 | selfSigned: {}
11 | ---
12 | apiVersion: cert-manager.io/v1
13 | kind: Certificate
14 | metadata:
15 | name: serving-cert # this name should match the one appeared in kustomizeconfig.yaml
16 | namespace: system
17 | spec:
18 | # $(SERVICE_NAME) and $(SERVICE_NAMESPACE) will be substituted by kustomize
19 | dnsNames:
20 | - $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc
21 | - $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc.cluster.local
22 | issuerRef:
23 | kind: Issuer
24 | name: selfsigned-issuer
25 | secretName: webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize
26 |
--------------------------------------------------------------------------------
/config/certmanager/kustomization.yaml:
--------------------------------------------------------------------------------
1 | resources:
2 | - certificate.yaml
3 |
4 | configurations:
5 | - kustomizeconfig.yaml
6 |
--------------------------------------------------------------------------------
/config/certmanager/kustomizeconfig.yaml:
--------------------------------------------------------------------------------
1 | # This configuration is for teaching kustomize how to update name ref and var substitution
2 | nameReference:
3 | - kind: Issuer
4 | group: cert-manager.io
5 | fieldSpecs:
6 | - kind: Certificate
7 | group: cert-manager.io
8 | path: spec/issuerRef/name
9 |
10 | varReference:
11 | - kind: Certificate
12 | group: cert-manager.io
13 | path: spec/commonName
14 | - kind: Certificate
15 | group: cert-manager.io
16 | path: spec/dnsNames
17 |
--------------------------------------------------------------------------------
/config/crd/embed.go:
--------------------------------------------------------------------------------
1 | package crd
2 |
3 | import (
4 | "embed"
5 | "path/filepath"
6 | )
7 |
8 | // NOTE: If we start using conversion webhooks in the future and need to
9 | // "patch" our CRD bases with `kustomize', we can (1) pre-process CRDs during
10 | // build time and store them in "config/crd/processed", (2) git ignore that
11 | // directory, (3) and embed that directory instead of "bases".
12 |
13 | //go:embed bases/*.yaml
14 | var bases embed.FS
15 |
16 | const contentDir = "bases"
17 |
18 | // Definition represents the metadata and contents of a single custom resource definition.
19 | type Definition struct {
20 | Filename string
21 | Contents []byte
22 | }
23 |
24 | // ReadAll returns a slice of custom resource Definition objects.
25 | func ReadAll() (definitions []Definition, err error) {
26 | files, err := bases.ReadDir(contentDir)
27 | if err != nil {
28 | return
29 | }
30 |
31 | for _, f := range files {
32 | if f.IsDir() {
33 | continue
34 | }
35 |
36 | var contents []byte
37 | contents, err = bases.ReadFile(filepath.Join(contentDir, f.Name()))
38 | if err != nil {
39 | return
40 | }
41 |
42 | definitions = append(definitions, Definition{
43 | Filename: f.Name(),
44 | Contents: contents,
45 | })
46 | }
47 |
48 | return definitions, nil
49 | }
50 |
--------------------------------------------------------------------------------
/config/crd/kustomization.yaml:
--------------------------------------------------------------------------------
1 | # This kustomization.yaml is not intended to be run by itself,
2 | # since it depends on service name and namespace that are out of this kustomize package.
3 | # It should be run by config/default
4 | resources:
5 | - bases/distributed-compute.dominodatalab.com_rayclusters.yaml
6 | - bases/distributed-compute.dominodatalab.com_sparkclusters.yaml
7 | - bases/distributed-compute.dominodatalab.com_daskclusters.yaml
8 | - bases/distributed-compute.dominodatalab.com_mpiclusters.yaml
9 | #+kubebuilder:scaffold:crdkustomizeresource
10 |
11 | patchesStrategicMerge:
12 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix.
13 | # patches here are for enabling the conversion webhook for each CRD
14 | - patches/webhook_in_rayclusters.yaml
15 | - patches/webhook_in_sparkclusters.yaml
16 | - patches/webhook_in_daskclusters.yaml
17 | #- patches/webhook_in_mpiclusters.yaml
18 | #+kubebuilder:scaffold:crdkustomizewebhookpatch
19 |
20 | # [CERTMANAGER] To enable webhook, uncomment all the sections with [CERTMANAGER] prefix.
21 | # patches here are for enabling the CA injection for each CRD
22 | #- patches/cainjection_in_rayclusters.yaml
23 | #- patches/cainjection_in_sparkclusters.yaml
24 | #- patches/cainjection_in_daskclusters.yaml
25 | #- patches/cainjection_in_mpiclusters.yaml
26 | #+kubebuilder:scaffold:crdkustomizecainjectionpatch
27 |
28 | # the following config is for teaching kustomize how to do kustomization for CRDs.
29 | configurations:
30 | - kustomizeconfig.yaml
31 |
--------------------------------------------------------------------------------
/config/crd/kustomizeconfig.yaml:
--------------------------------------------------------------------------------
1 | # This file is for teaching kustomize how to substitute name and namespace reference in CRD
2 | nameReference:
3 | - kind: Service
4 | version: v1
5 | fieldSpecs:
6 | - kind: CustomResourceDefinition
7 | version: v1
8 | group: apiextensions.k8s.io
9 | path: spec/conversion/webhook/clientConfig/service/name
10 |
11 | namespace:
12 | - kind: CustomResourceDefinition
13 | version: v1
14 | group: apiextensions.k8s.io
15 | path: spec/conversion/webhook/clientConfig/service/namespace
16 | create: false
17 |
18 | varReference:
19 | - path: metadata/annotations
20 |
--------------------------------------------------------------------------------
/config/crd/patches/cainjection_in_daskclusters.yaml:
--------------------------------------------------------------------------------
1 | # The following patch adds a directive for certmanager to inject CA into the CRD
2 | apiVersion: apiextensions.k8s.io/v1
3 | kind: CustomResourceDefinition
4 | metadata:
5 | annotations:
6 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
7 | name: daskclusters.distributed-compute.dominodatalab.com
8 |
--------------------------------------------------------------------------------
/config/crd/patches/cainjection_in_mpiclusters.yaml:
--------------------------------------------------------------------------------
1 | # The following patch adds a directive for certmanager to inject CA into the CRD
2 | apiVersion: apiextensions.k8s.io/v1
3 | kind: CustomResourceDefinition
4 | metadata:
5 | annotations:
6 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
7 | name: mpiclusters.distributed-compute.dominodatalab.com
8 |
--------------------------------------------------------------------------------
/config/crd/patches/cainjection_in_rayclusters.yaml:
--------------------------------------------------------------------------------
1 | # The following patch adds a directive for certmanager to inject CA into the CRD
2 | apiVersion: apiextensions.k8s.io/v1
3 | kind: CustomResourceDefinition
4 | metadata:
5 | annotations:
6 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
7 | name: rayclusters.distributed-compute.dominodatalab.com
8 |
--------------------------------------------------------------------------------
/config/crd/patches/cainjection_in_sparkclusters.yaml:
--------------------------------------------------------------------------------
1 | # The following patch adds a directive for certmanager to inject CA into the CRD
2 | apiVersion: apiextensions.k8s.io/v1
3 | kind: CustomResourceDefinition
4 | metadata:
5 | annotations:
6 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
7 | name: sparkclusters.distributed-compute.dominodatalab.com
8 |
--------------------------------------------------------------------------------
/config/crd/patches/webhook_in_daskclusters.yaml:
--------------------------------------------------------------------------------
1 | # The following patch enables a conversion webhook for the CRD
2 | apiVersion: apiextensions.k8s.io/v1
3 | kind: CustomResourceDefinition
4 | metadata:
5 | name: daskclusters.distributed-compute.dominodatalab.com
6 | spec:
7 | conversion:
8 | strategy: Webhook
9 | webhook:
10 | conversionReviewVersions: ["v1","v1beta1"]
11 | clientConfig:
12 | service:
13 | namespace: system
14 | name: webhook-service
15 | path: /convert
16 |
--------------------------------------------------------------------------------
/config/crd/patches/webhook_in_mpiclusters.yaml:
--------------------------------------------------------------------------------
1 | # The following patch enables a conversion webhook for the CRD
2 | apiVersion: apiextensions.k8s.io/v1
3 | kind: CustomResourceDefinition
4 | metadata:
5 | name: mpiclusters.distributed-compute.dominodatalab.com
6 | spec:
7 | conversion:
8 | strategy: Webhook
9 | webhook:
10 | clientConfig:
11 | service:
12 | namespace: system
13 | name: webhook-service
14 | path: /convert
15 | conversionReviewVersions:
16 | - v1
17 |
--------------------------------------------------------------------------------
/config/crd/patches/webhook_in_rayclusters.yaml:
--------------------------------------------------------------------------------
1 | # The following patch enables a conversion webhook for the CRD
2 | apiVersion: apiextensions.k8s.io/v1
3 | kind: CustomResourceDefinition
4 | metadata:
5 | name: rayclusters.distributed-compute.dominodatalab.com
6 | spec:
7 | conversion:
8 | strategy: Webhook
9 | webhook:
10 | conversionReviewVersions: ["v1","v1beta1"]
11 | clientConfig:
12 | service:
13 | namespace: system
14 | name: webhook-service
15 | path: /convert
16 |
--------------------------------------------------------------------------------
/config/crd/patches/webhook_in_sparkclusters.yaml:
--------------------------------------------------------------------------------
1 | # The following patch enables a conversion webhook for the CRD
2 | apiVersion: apiextensions.k8s.io/v1
3 | kind: CustomResourceDefinition
4 | metadata:
5 | name: sparkclusters.distributed-compute.dominodatalab.com
6 | spec:
7 | conversion:
8 | strategy: Webhook
9 | webhook:
10 | conversionReviewVersions: ["v1","v1beta1"]
11 | clientConfig:
12 | service:
13 | namespace: system
14 | name: webhook-service
15 | path: /convert
16 |
--------------------------------------------------------------------------------
/config/default/kustomization.yaml:
--------------------------------------------------------------------------------
1 | # Adds namespace to all resources.
2 | namespace: distributed-compute-operator-system
3 |
4 | # Value of this field is prepended to the
5 | # names of all resources, e.g. a deployment named
6 | # "wordpress" becomes "alices-wordpress".
7 | # Note that it should also match with the prefix (text before '-') of the namespace
8 | # field above.
9 | namePrefix: distributed-compute-operator-
10 |
11 | # Labels to add to all resources and selectors.
12 | #commonLabels:
13 | # someName: someValue
14 |
15 | bases:
16 | - ../crd
17 | - ../rbac
18 | - ../manager
19 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
20 | # crd/kustomization.yaml
21 | - ../webhook
22 | # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required.
23 | - ../certmanager
24 | # [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
25 | #- ../prometheus
26 |
27 | patchesStrategicMerge:
28 | # Protect the /metrics endpoint by putting it behind auth.
29 | # If you want your controller-manager to expose the /metrics
30 | # endpoint w/o any authn/z, please comment the following line.
31 | #- manager_auth_proxy_patch.yaml
32 |
33 | # Mount the controller config file for loading manager configurations
34 | # through a ComponentConfig type
35 | #- manager_config_patch.yaml
36 |
37 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
38 | # crd/kustomization.yaml
39 | - manager_webhook_patch.yaml
40 |
41 | # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'.
42 | # Uncomment 'CERTMANAGER' sections in crd/kustomization.yaml to enable the CA injection in the admission webhooks.
43 | # 'CERTMANAGER' needs to be enabled to use ca injection
44 | - webhookcainjection_patch.yaml
45 |
46 | # the following config is for teaching kustomize how to do var substitution
47 | vars:
48 | # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix.
49 | - name: CERTIFICATE_NAMESPACE # namespace of the certificate CR
50 | objref:
51 | kind: Certificate
52 | group: cert-manager.io
53 | version: v1
54 | name: serving-cert # this name should match the one in certificate.yaml
55 | fieldref:
56 | fieldpath: metadata.namespace
57 | - name: CERTIFICATE_NAME
58 | objref:
59 | kind: Certificate
60 | group: cert-manager.io
61 | version: v1
62 | name: serving-cert # this name should match the one in certificate.yaml
63 | - name: SERVICE_NAMESPACE # namespace of the service
64 | objref:
65 | kind: Service
66 | version: v1
67 | name: webhook-service
68 | fieldref:
69 | fieldpath: metadata.namespace
70 | - name: SERVICE_NAME
71 | objref:
72 | kind: Service
73 | version: v1
74 | name: webhook-service
75 |
--------------------------------------------------------------------------------
/config/default/manager_auth_proxy_patch.yaml:
--------------------------------------------------------------------------------
1 | # This patch inject a sidecar container which is a HTTP proxy for the
2 | # controller manager, it performs RBAC authorization against the Kubernetes API using SubjectAccessReviews.
3 | apiVersion: apps/v1
4 | kind: Deployment
5 | metadata:
6 | name: controller-manager
7 | namespace: system
8 | spec:
9 | template:
10 | spec:
11 | containers:
12 | - name: kube-rbac-proxy
13 | image: gcr.io/kubebuilder/kube-rbac-proxy:v0.8.0
14 | args:
15 | - "--secure-listen-address=0.0.0.0:8443"
16 | - "--upstream=http://127.0.0.1:8080/"
17 | - "--logtostderr=true"
18 | - "--v=10"
19 | ports:
20 | - containerPort: 8443
21 | name: https
22 | - name: manager
23 | args:
24 | - "--health-probe-bind-address=:8081"
25 | - "--metrics-bind-address=127.0.0.1:8080"
26 | - "--leader-elect"
27 |
--------------------------------------------------------------------------------
/config/default/manager_config_patch.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: controller-manager
5 | namespace: system
6 | spec:
7 | template:
8 | spec:
9 | containers:
10 | - name: manager
11 | args:
12 | - "--config=controller_manager_config.yaml"
13 | volumeMounts:
14 | - name: manager-config
15 | mountPath: /controller_manager_config.yaml
16 | subPath: controller_manager_config.yaml
17 | volumes:
18 | - name: manager-config
19 | configMap:
20 | name: manager-config
21 |
--------------------------------------------------------------------------------
/config/default/manager_webhook_patch.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: controller-manager
5 | namespace: system
6 | spec:
7 | template:
8 | spec:
9 | containers:
10 | - name: manager
11 | ports:
12 | - containerPort: 9443
13 | name: webhook-server
14 | protocol: TCP
15 | volumeMounts:
16 | - mountPath: /tmp/k8s-webhook-server/serving-certs
17 | name: cert
18 | readOnly: true
19 | volumes:
20 | - name: cert
21 | secret:
22 | defaultMode: 420
23 | secretName: webhook-server-cert
24 |
--------------------------------------------------------------------------------
/config/default/webhookcainjection_patch.yaml:
--------------------------------------------------------------------------------
1 | # This patch add annotation to admission webhook config and
2 | # the variables $(CERTIFICATE_NAMESPACE) and $(CERTIFICATE_NAME) will be substituted by kustomize.
3 | apiVersion: admissionregistration.k8s.io/v1
4 | kind: MutatingWebhookConfiguration
5 | metadata:
6 | name: mutating-webhook-configuration
7 | annotations:
8 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
9 | ---
10 | apiVersion: admissionregistration.k8s.io/v1
11 | kind: ValidatingWebhookConfiguration
12 | metadata:
13 | name: validating-webhook-configuration
14 | annotations:
15 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
16 |
--------------------------------------------------------------------------------
/config/manager/controller_manager_config.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: controller-runtime.sigs.k8s.io/v1alpha1
2 | kind: ControllerManagerConfig
3 | health:
4 | healthProbeBindAddress: :8081
5 | metrics:
6 | bindAddress: 127.0.0.1:8080
7 | webhook:
8 | port: 9443
9 | leaderElection:
10 | leaderElect: true
11 | resourceName: a846cbf2.dominodatalab.com
12 |
--------------------------------------------------------------------------------
/config/manager/kustomization.yaml:
--------------------------------------------------------------------------------
1 | resources:
2 | - manager.yaml
3 |
4 | generatorOptions:
5 | disableNameSuffixHash: true
6 |
7 | configMapGenerator:
8 | - files:
9 | - controller_manager_config.yaml
10 | name: manager-config
11 | apiVersion: kustomize.config.k8s.io/v1beta1
12 | kind: Kustomization
13 | images:
14 | - name: controller
15 | newName: ghcr.io/dominodatalab/distributed-compute-operator
16 | newTag: latest
17 |
--------------------------------------------------------------------------------
/config/manager/manager.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 | labels:
5 | control-plane: controller-manager
6 | name: system
7 | ---
8 | apiVersion: apps/v1
9 | kind: Deployment
10 | metadata:
11 | name: controller-manager
12 | namespace: system
13 | labels:
14 | control-plane: controller-manager
15 | spec:
16 | selector:
17 | matchLabels:
18 | control-plane: controller-manager
19 | replicas: 1
20 | template:
21 | metadata:
22 | labels:
23 | control-plane: controller-manager
24 | spec:
25 | securityContext:
26 | runAsNonRoot: true
27 | containers:
28 | - command:
29 | - /manager
30 | - start
31 | args:
32 | - --leader-elect
33 | - --zap-log-level=5
34 | image: controller:latest
35 | imagePullPolicy: IfNotPresent # changed to aid development
36 | name: manager
37 | securityContext:
38 | allowPrivilegeEscalation: false
39 | livenessProbe:
40 | httpGet:
41 | path: /healthz
42 | port: 8081
43 | initialDelaySeconds: 15
44 | periodSeconds: 20
45 | readinessProbe:
46 | httpGet:
47 | path: /readyz
48 | port: 8081
49 | initialDelaySeconds: 5
50 | periodSeconds: 10
51 | resources:
52 | limits:
53 | cpu: 100m
54 | memory: 30Mi
55 | requests:
56 | cpu: 100m
57 | memory: 20Mi
58 | terminationGracePeriodSeconds: 10
59 |
--------------------------------------------------------------------------------
/config/prometheus/kustomization.yaml:
--------------------------------------------------------------------------------
1 | resources:
2 | - monitor.yaml
3 |
--------------------------------------------------------------------------------
/config/prometheus/monitor.yaml:
--------------------------------------------------------------------------------
1 |
2 | # Prometheus Monitor Service (Metrics)
3 | apiVersion: monitoring.coreos.com/v1
4 | kind: ServiceMonitor
5 | metadata:
6 | labels:
7 | control-plane: controller-manager
8 | name: controller-manager-metrics-monitor
9 | namespace: system
10 | spec:
11 | endpoints:
12 | - path: /metrics
13 | port: https
14 | selector:
15 | matchLabels:
16 | control-plane: controller-manager
17 |
--------------------------------------------------------------------------------
/config/rbac/auth_proxy_client_clusterrole.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: rbac.authorization.k8s.io/v1
2 | kind: ClusterRole
3 | metadata:
4 | name: metrics-reader
5 | rules:
6 | - nonResourceURLs: ["/metrics"]
7 | verbs: ["get"]
8 |
--------------------------------------------------------------------------------
/config/rbac/auth_proxy_role.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: rbac.authorization.k8s.io/v1
2 | kind: ClusterRole
3 | metadata:
4 | name: proxy-role
5 | rules:
6 | - apiGroups: ["authentication.k8s.io"]
7 | resources:
8 | - tokenreviews
9 | verbs: ["create"]
10 | - apiGroups: ["authorization.k8s.io"]
11 | resources:
12 | - subjectaccessreviews
13 | verbs: ["create"]
14 |
--------------------------------------------------------------------------------
/config/rbac/auth_proxy_role_binding.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: rbac.authorization.k8s.io/v1
2 | kind: ClusterRoleBinding
3 | metadata:
4 | name: proxy-rolebinding
5 | roleRef:
6 | apiGroup: rbac.authorization.k8s.io
7 | kind: ClusterRole
8 | name: proxy-role
9 | subjects:
10 | - kind: ServiceAccount
11 | name: default
12 | namespace: system
13 |
--------------------------------------------------------------------------------
/config/rbac/auth_proxy_service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | labels:
5 | control-plane: controller-manager
6 | name: controller-manager-metrics-service
7 | namespace: system
8 | spec:
9 | ports:
10 | - name: https
11 | port: 8443
12 | targetPort: https
13 | selector:
14 | control-plane: controller-manager
15 |
--------------------------------------------------------------------------------
/config/rbac/daskcluster_editor_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to edit daskclusters.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: daskcluster-editor-role
6 | rules:
7 | - apiGroups:
8 | - distributed-compute.dominodatalab.com
9 | resources:
10 | - daskclusters
11 | verbs:
12 | - create
13 | - delete
14 | - get
15 | - list
16 | - patch
17 | - update
18 | - watch
19 | - apiGroups:
20 | - distributed-compute.dominodatalab.com
21 | resources:
22 | - daskclusters/status
23 | verbs:
24 | - get
25 |
--------------------------------------------------------------------------------
/config/rbac/daskcluster_viewer_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to view daskclusters.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: daskcluster-viewer-role
6 | rules:
7 | - apiGroups:
8 | - distributed-compute.dominodatalab.com
9 | resources:
10 | - daskclusters
11 | verbs:
12 | - get
13 | - list
14 | - watch
15 | - apiGroups:
16 | - distributed-compute.dominodatalab.com
17 | resources:
18 | - daskclusters/status
19 | verbs:
20 | - get
21 |
--------------------------------------------------------------------------------
/config/rbac/kustomization.yaml:
--------------------------------------------------------------------------------
1 | resources:
2 | - role.yaml
3 | - role_binding.yaml
4 | - leader_election_role.yaml
5 | - leader_election_role_binding.yaml
6 | # Comment the following 4 lines if you want to disable
7 | # the auth proxy (https://github.com/brancz/kube-rbac-proxy)
8 | # which protects your /metrics endpoint.
9 | - auth_proxy_service.yaml
10 | - auth_proxy_role.yaml
11 | - auth_proxy_role_binding.yaml
12 | - auth_proxy_client_clusterrole.yaml
13 |
--------------------------------------------------------------------------------
/config/rbac/leader_election_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions to do leader election.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: Role
4 | metadata:
5 | name: leader-election-role
6 | rules:
7 | - apiGroups:
8 | - ""
9 | - coordination.k8s.io
10 | resources:
11 | - configmaps
12 | - leases
13 | verbs:
14 | - get
15 | - list
16 | - watch
17 | - create
18 | - update
19 | - patch
20 | - delete
21 | - apiGroups:
22 | - ""
23 | resources:
24 | - events
25 | verbs:
26 | - create
27 | - patch
28 |
--------------------------------------------------------------------------------
/config/rbac/leader_election_role_binding.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: rbac.authorization.k8s.io/v1
2 | kind: RoleBinding
3 | metadata:
4 | name: leader-election-rolebinding
5 | roleRef:
6 | apiGroup: rbac.authorization.k8s.io
7 | kind: Role
8 | name: leader-election-role
9 | subjects:
10 | - kind: ServiceAccount
11 | name: default
12 | namespace: system
13 |
--------------------------------------------------------------------------------
/config/rbac/mpicluster_editor_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to edit mpiclusters.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: mpicluster-editor-role
6 | rules:
7 | - apiGroups:
8 | - distributed-compute.dominodatalab.com
9 | resources:
10 | - mpiclusters
11 | verbs:
12 | - create
13 | - delete
14 | - get
15 | - list
16 | - patch
17 | - update
18 | - watch
19 | - apiGroups:
20 | - distributed-compute.dominodatalab.com
21 | resources:
22 | - mpiclusters/status
23 | verbs:
24 | - get
25 |
--------------------------------------------------------------------------------
/config/rbac/mpicluster_viewer_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to view mpiclusters.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: mpicluster-viewer-role
6 | rules:
7 | - apiGroups:
8 | - distributed-compute.dominodatalab.com
9 | resources:
10 | - mpiclusters
11 | verbs:
12 | - get
13 | - list
14 | - watch
15 | - apiGroups:
16 | - distributed-compute.dominodatalab.com
17 | resources:
18 | - mpiclusters/status
19 | verbs:
20 | - get
21 |
--------------------------------------------------------------------------------
/config/rbac/raycluster_editor_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to edit rayclusters.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: raycluster-editor-role
6 | rules:
7 | - apiGroups:
8 | - distributed-compute.dominodatalab.com
9 | resources:
10 | - rayclusters
11 | verbs:
12 | - create
13 | - delete
14 | - get
15 | - list
16 | - patch
17 | - update
18 | - watch
19 | - apiGroups:
20 | - distributed-compute.dominodatalab.com
21 | resources:
22 | - rayclusters/status
23 | verbs:
24 | - get
25 |
--------------------------------------------------------------------------------
/config/rbac/raycluster_viewer_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to view rayclusters.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: raycluster-viewer-role
6 | rules:
7 | - apiGroups:
8 | - distributed-compute.dominodatalab.com
9 | resources:
10 | - rayclusters
11 | verbs:
12 | - get
13 | - list
14 | - watch
15 | - apiGroups:
16 | - distributed-compute.dominodatalab.com
17 | resources:
18 | - rayclusters/status
19 | verbs:
20 | - get
21 |
--------------------------------------------------------------------------------
/config/rbac/role.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | creationTimestamp: null
6 | name: manager-role
7 | rules:
8 | - apiGroups:
9 | - ""
10 | resources:
11 | - pods
12 | verbs:
13 | - list
14 | - watch
15 | - apiGroups:
16 | - ""
17 | resources:
18 | - serviceaccounts
19 | - services
20 | verbs:
21 | - create
22 | - list
23 | - update
24 | - watch
25 | - apiGroups:
26 | - apps
27 | resources:
28 | - configmaps
29 | verbs:
30 | - create
31 | - list
32 | - update
33 | - watch
34 | - apiGroups:
35 | - apps
36 | resources:
37 | - statefulsets
38 | verbs:
39 | - create
40 | - list
41 | - update
42 | - watch
43 | - apiGroups:
44 | - autoscaling
45 | resources:
46 | - horizontalpodautoscalers
47 | verbs:
48 | - create
49 | - delete
50 | - list
51 | - update
52 | - watch
53 | - apiGroups:
54 | - distributed-compute.dominodatalab.com
55 | resources:
56 | - daskclusters
57 | verbs:
58 | - create
59 | - delete
60 | - get
61 | - list
62 | - patch
63 | - update
64 | - watch
65 | - apiGroups:
66 | - distributed-compute.dominodatalab.com
67 | resources:
68 | - daskclusters/finalizers
69 | verbs:
70 | - update
71 | - apiGroups:
72 | - distributed-compute.dominodatalab.com
73 | resources:
74 | - daskclusters/status
75 | verbs:
76 | - get
77 | - patch
78 | - update
79 | - apiGroups:
80 | - distributed-compute.dominodatalab.com
81 | resources:
82 | - mpiclusters
83 | verbs:
84 | - create
85 | - delete
86 | - get
87 | - list
88 | - patch
89 | - update
90 | - watch
91 | - apiGroups:
92 | - distributed-compute.dominodatalab.com
93 | resources:
94 | - mpiclusters/finalizers
95 | verbs:
96 | - update
97 | - apiGroups:
98 | - distributed-compute.dominodatalab.com
99 | resources:
100 | - mpiclusters/status
101 | verbs:
102 | - get
103 | - patch
104 | - update
105 | - apiGroups:
106 | - distributed-compute.dominodatalab.com
107 | resources:
108 | - rayclusters
109 | verbs:
110 | - create
111 | - delete
112 | - get
113 | - list
114 | - patch
115 | - update
116 | - watch
117 | - apiGroups:
118 | - distributed-compute.dominodatalab.com
119 | resources:
120 | - rayclusters/finalizers
121 | verbs:
122 | - update
123 | - apiGroups:
124 | - distributed-compute.dominodatalab.com
125 | resources:
126 | - rayclusters/status
127 | verbs:
128 | - get
129 | - patch
130 | - update
131 | - apiGroups:
132 | - distributed-compute.dominodatalab.com
133 | resources:
134 | - sparkclusters
135 | verbs:
136 | - create
137 | - delete
138 | - get
139 | - list
140 | - patch
141 | - update
142 | - watch
143 | - apiGroups:
144 | - distributed-compute.dominodatalab.com
145 | resources:
146 | - sparkclusters/finalizers
147 | verbs:
148 | - update
149 | - apiGroups:
150 | - distributed-compute.dominodatalab.com
151 | resources:
152 | - sparkclusters/status
153 | verbs:
154 | - get
155 | - patch
156 | - update
157 | - apiGroups:
158 | - networking.k8s.io
159 | resources:
160 | - networkpolicies
161 | verbs:
162 | - create
163 | - delete
164 | - list
165 | - update
166 | - watch
167 | - apiGroups:
168 | - rbac.authorization.k8s.io
169 | resources:
170 | - rolebindings
171 | - roles
172 | verbs:
173 | - create
174 | - delete
175 | - list
176 | - update
177 | - watch
178 |
--------------------------------------------------------------------------------
/config/rbac/role_binding.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: rbac.authorization.k8s.io/v1
2 | kind: ClusterRoleBinding
3 | metadata:
4 | name: manager-rolebinding
5 | roleRef:
6 | apiGroup: rbac.authorization.k8s.io
7 | kind: ClusterRole
8 | name: manager-role
9 | subjects:
10 | - kind: ServiceAccount
11 | name: default
12 | namespace: system
13 |
--------------------------------------------------------------------------------
/config/samples/distributed-compute_v1alpha1_daskcluster.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: distributed-compute.dominodatalab.com/v1alpha1
2 | kind: DaskCluster
3 | metadata:
4 | name: example
5 | spec:
6 | # schedulerPort: 8786
7 | # dashboardPort: 8787
8 | # workerPort: 3000
9 | # nannyPort: 3001
10 |
11 | # additionalClientPorts:
12 | # - name: http-api-proxy
13 | # port: 8899
14 | # targetPort: 8899
15 | # protocol: TCP
16 |
17 | # image:
18 | # registry: ""
19 | # repository: daskdev/dask
20 | # tag: 2021.6.1
21 | # pullPolicy: IfNotPresent
22 |
23 | # autoscaling:
24 | # minReplicas:
25 | # maxReplicas:
26 | # averageCPUUtilization:
27 | # averageMemoryUtilization:
28 | # scaleDownStabilizationWindowSeconds:
29 |
30 | # networkPolicy:
31 | # enabled: true
32 | # clientLabels: {}
33 | # dashboardLabels: {}
34 | # dashboardNamespaceLabels: {}
35 |
36 | # serviceAccount:
37 | # name: ""
38 | # automountServiceAccountToken: false
39 |
40 | # podSecurityContext:
41 | # runAsUser:
42 | # runAsGroup:
43 | # fsGroup:
44 |
45 | # kerberosKeytab:
46 | # contents:
47 | # mountPath:
48 |
49 | # globalLabels: {}
50 | # envVars: []
51 | # imagePullSecrets: []
52 | # podSecurityPolicy: ""
53 | # istioMutualTLSMode: ""
54 |
55 | scheduler:
56 | # labels: {}
57 | # annotations: {}
58 | # nodeSelector: {}
59 | # affinity: {}
60 | # tolerations: []
61 | # initContainers: []
62 | # volumes: []
63 | # volumeMounts: []
64 | # volumeClaimTemplates: []
65 | # resources: {}
66 |
67 | worker:
68 | # replicas: 1
69 | # labels: {}
70 | # annotations: {}
71 | # nodeSelector: {}
72 | # affinity: {}
73 | # tolerations: []
74 | # initContainers: []
75 | # volumes: []
76 | # volumeMounts: []
77 | # volumeClaimTemplates: []
78 | resources:
79 | requests:
80 | cpu: 250m
81 | memory: 250Mi
82 |
--------------------------------------------------------------------------------
/config/samples/distributed-compute_v1alpha1_mpicluster.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: distributed-compute.dominodatalab.com/v1alpha1
2 | kind: MPICluster
3 | metadata:
4 | name: example
5 | spec:
6 | # image:
7 | # registry: ""
8 | # repository: horovod/horovod
9 | # tag: 0.22.1
10 | # pullPolicy: IfNotPresent
11 |
12 | # networkPolicy:
13 | # enabled: true
14 | # clientLabels: {}
15 | # dashboardLabels: {}
16 |
17 | # serviceAccount:
18 | # name: ""
19 | # automountServiceAccountToken: false
20 |
21 | # podSecurityContext:
22 | # runAsUser:
23 | # runAsGroup:
24 | # fsGroup:
25 |
26 | # kerberosKeytab:
27 | # contents:
28 | # mountPath:
29 |
30 | # globalLabels: {}
31 | # envVars: []
32 | # imagePullSecrets: []
33 | # podSecurityPolicy: ""
34 | # istioMutualTLSMode: ""
35 |
36 | # additionalClientPorts:
37 | # - name: http-api-proxy
38 | # port: 8899
39 | # targetPort: 8899
40 | # protocol: TCP
41 |
42 | worker:
43 | # replicas: 1
44 | sharedSSHSecret: ""
45 | # userName:
46 | # userID:
47 | # groupName:
48 | # groupID:
49 | # homeDir: /mnt
50 | # labels: {}
51 | # annotations: {}
52 | # nodeSelector: {}
53 | # affinity: {}
54 | # tolerations: []
55 | # initContainers: []
56 | # volumes: []
57 | # volumeMounts: []
58 | # volumeClaimTemplates: []
59 | # resources: {}
60 |
--------------------------------------------------------------------------------
/config/samples/distributed-compute_v1alpha1_raycluster.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: distributed-compute.dominodatalab.com/v1alpha1
2 | kind: RayCluster
3 | metadata:
4 | name: example
5 | spec:
6 | # redis port and addition redis shard ports used by head node
7 | # port: 1234
8 | # redisShardPorts:
9 | # - 6380
10 | # - 6381
11 |
12 | # additionalClientPorts:
13 | # - name: http-api-proxy
14 | # port: 8899
15 | # targetPort: 8899
16 | # protocol: TCP
17 |
18 | # port used to connect clients to ray
19 | # clientServerPort: 10001
20 |
21 | # enable dashboard ui and set port
22 | # dashboardPort: 8265
23 | # enableDashboard: true
24 |
25 | # set the object store's port and initial memory
26 | # objectManagerPort: 2384
27 | # objectStoreMemoryBytes: 52428800
28 |
29 | # internal port overrides
30 | # nodeManagerPort: 2385
31 | # gcsServerPort: 2386
32 | # workerPorts:
33 | # - 12000
34 | # - 12001
35 | # - 12002
36 | # - 12003
37 | # - 12004
38 |
39 | # image:
40 | # registry: ""
41 | # repository: rayproject/ray
42 | # tag: nightly
43 | # pullPolicy: IfNotPresent
44 |
45 | # autoscaling:
46 | # minReplicas:
47 | # maxReplicas:
48 | # averageCPUUtilization:
49 | # averageMemoryUtilization:
50 | # scaleDownStabilizationWindowSeconds:
51 |
52 | # networkPolicy:
53 | # enabled: true
54 | # clientLabels: {}
55 | # dashboardLabels: {}
56 | # dashboardNamespaceLabels: {}
57 |
58 | # serviceAccount:
59 | # name: ""
60 | # automountServiceAccountToken: false
61 |
62 | # podSecurityContext:
63 | # runAsUser:
64 | # runAsGroup:
65 | # fsGroup:
66 |
67 | # kerberosKeytab:
68 | # contents:
69 | # mountPath:
70 |
71 | # globalLabels: {}
72 | # envVars: []
73 | # imagePullSecrets: []
74 | # podSecurityPolicy: ""
75 | # istioMutualTLSMode: ""
76 |
77 | head:
78 | # labels: {}
79 | # annotations: {}
80 | # nodeSelector: {}
81 | # affinity: {}
82 | # tolerations: []
83 | # initContainers: []
84 | # volumes: []
85 | # volumeMounts: []
86 | # volumeClaimTemplates: []
87 | # resources: {}
88 |
89 | worker:
90 | # replicas: 2
91 | # labels: {}
92 | # annotations: {}
93 | # nodeSelector: {}
94 | # affinity: {}
95 | # tolerations: []
96 | # initContainers: []
97 | # volumes: []
98 | # volumeMounts: []
99 | # volumeClaimTemplates: []
100 | resources:
101 | requests:
102 | cpu: 100m
103 | memory: 250Mi
104 |
--------------------------------------------------------------------------------
/config/samples/distributed-compute_v1alpha1_sparkcluster.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: distributed-compute.dominodatalab.com/v1alpha1
2 | kind: SparkCluster
3 | metadata:
4 | name: example
5 | spec:
6 | workerMemoryLimit: 100m
7 |
8 | # envoyFilterLabels: {}
9 | # clusterPort: 7077
10 | # masterWebPort: 8080
11 | # workerWebPort: 8081
12 |
13 | # additionalClientPorts:
14 | # - name: http-api-proxy
15 | # port: 8899
16 | # targetPort: 8899
17 | # protocol: TCP
18 |
19 | # image:
20 | # registry: ""
21 | # repository: bitnami/spark
22 | # tag: 3.0.2-debian-10-r0
23 | # pullPolicy: IfNotPresent
24 |
25 | # autoscaling:
26 | # minReplicas:
27 | # maxReplicas:
28 | # averageCPUUtilization:
29 | # averageMemoryUtilization:
30 | # scaleDownStabilizationWindowSeconds:
31 |
32 | # networkPolicy:
33 | # enabled: true
34 | # clientLabels: {}
35 | # dashboardLabels: {}
36 |
37 | # serviceAccount:
38 | # name: ""
39 | # automountServiceAccountToken: false
40 |
41 | # podSecurityContext:
42 | # runAsUser:
43 | # runAsGroup:
44 | # fsGroup:
45 |
46 | # kerberosKeytab:
47 | # contents:
48 | # mountPath:
49 |
50 | # globalLabels: {}
51 | # envVars: []
52 | # imagePullSecrets: []
53 | # podSecurityPolicy: ""
54 | # istioMutualTLSMode: ""
55 |
56 | master:
57 | # defaultConfiguration:
58 | # spark.driver.host: "driver-service.ns.svc.cluster.local"
59 | # spark.executor.cores: "4"
60 | # spark.executor.instances: "1"
61 | # spark.executor.memory: 15360m
62 | # spark.ui.proxyBase: "/master/proxy/url"
63 | # spark.ui.reverseProxy: "true"
64 | # spark.ui.reverseProxyUrl: "/master/proxy/base"
65 | # labels: {}
66 | # annotations: {}
67 | # nodeSelector: {}
68 | # affinity: {}
69 | # tolerations: []
70 | # initContainers: []
71 | # volumes: []
72 | # volumeMounts: []
73 | # volumeClaimTemplates: []
74 | resources:
75 | requests:
76 | cpu: 100m
77 | memory: 250Mi
78 |
79 | worker:
80 | # defaultConfiguration:
81 | # spark.driver.host: "driver-svc.ns.svc.cluster.local"
82 | # spark.executor.cores: "4"
83 | # spark.executor.instances: "1"
84 | # spark.executor.memory: 15360m
85 | # spark.ui.proxyBase: "/worker/proxy/base"
86 | # spark.ui.reverseProxy: "true"
87 | # spark.ui.reverseProxyUrl: "/worker/proxy/url"
88 | # replicas: 1
89 | # labels: {}
90 | # annotations: {}
91 | # nodeSelector: {}
92 | # affinity: {}
93 | # tolerations: []
94 | # initContainers: []
95 | # volumes: []
96 | # volumeMounts: []
97 | # volumeClaimTemplates: []
98 | resources:
99 | requests:
100 | cpu: 1
101 | memory: 250Mi
102 |
103 | driver:
104 | # port: 4041
105 | # uiPort: 4040
106 | # blockManagerPort: 4042
107 | selector:
108 | app.kubernetes.io/instance: driver-pod
109 |
--------------------------------------------------------------------------------
/config/webhook/kustomization.yaml:
--------------------------------------------------------------------------------
1 | resources:
2 | - manifests.yaml
3 | - service.yaml
4 |
5 | configurations:
6 | - kustomizeconfig.yaml
7 |
--------------------------------------------------------------------------------
/config/webhook/kustomizeconfig.yaml:
--------------------------------------------------------------------------------
1 | # the following config is for teaching kustomize where to look at when substituting vars.
2 | # It requires kustomize v2.1.0 or newer to work properly.
3 | nameReference:
4 | - kind: Service
5 | version: v1
6 | fieldSpecs:
7 | - kind: MutatingWebhookConfiguration
8 | group: admissionregistration.k8s.io
9 | path: webhooks/clientConfig/service/name
10 | - kind: ValidatingWebhookConfiguration
11 | group: admissionregistration.k8s.io
12 | path: webhooks/clientConfig/service/name
13 |
14 | namespace:
15 | - kind: MutatingWebhookConfiguration
16 | group: admissionregistration.k8s.io
17 | path: webhooks/clientConfig/service/namespace
18 | create: true
19 | - kind: ValidatingWebhookConfiguration
20 | group: admissionregistration.k8s.io
21 | path: webhooks/clientConfig/service/namespace
22 | create: true
23 |
24 | varReference:
25 | - path: metadata/annotations
26 |
--------------------------------------------------------------------------------
/config/webhook/service.yaml:
--------------------------------------------------------------------------------
1 |
2 | apiVersion: v1
3 | kind: Service
4 | metadata:
5 | name: webhook-service
6 | namespace: system
7 | spec:
8 | ports:
9 | - port: 443
10 | targetPort: 9443
11 | selector:
12 | control-plane: controller-manager
13 |
--------------------------------------------------------------------------------
/controllers/config.go:
--------------------------------------------------------------------------------
1 | package controllers
2 |
3 | import "sigs.k8s.io/controller-runtime/pkg/log/zap"
4 |
5 | // Config options for the controller manager.
6 | type Config struct {
7 | Namespaces []string
8 | MetricsAddr string
9 | HealthProbeAddr string
10 | WebhookServerPort int
11 | EnableLeaderElection bool
12 | IstioEnabled bool
13 | ZapOptions zap.Options
14 | MPIInitImage string
15 | MPISyncImage string
16 | }
17 |
--------------------------------------------------------------------------------
/controllers/controllers.go:
--------------------------------------------------------------------------------
1 | package controllers
2 |
3 | import (
4 | ctrl "sigs.k8s.io/controller-runtime"
5 | )
6 |
7 | type Builder func(manager ctrl.Manager, webhooksEnabled bool, cfg *Config) error
8 |
9 | var BuilderFuncs = []Builder{
10 | DaskCluster,
11 | MPICluster,
12 | }
13 |
--------------------------------------------------------------------------------
/controllers/daskcluster_controller.go:
--------------------------------------------------------------------------------
1 | package controllers
2 |
3 | import (
4 | ctrl "sigs.k8s.io/controller-runtime"
5 |
6 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
7 | "github.com/dominodatalab/distributed-compute-operator/pkg/cluster/dask"
8 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
9 | )
10 |
11 | //+kubebuilder:rbac:groups=distributed-compute.dominodatalab.com,resources=daskclusters,verbs=get;list;watch;create;update;patch;delete
12 | //+kubebuilder:rbac:groups=distributed-compute.dominodatalab.com,resources=daskclusters/status,verbs=get;update;patch
13 | //+kubebuilder:rbac:groups=distributed-compute.dominodatalab.com,resources=daskclusters/finalizers,verbs=update
14 |
15 | func DaskCluster(mgr ctrl.Manager, webhooksEnabled bool, cfg *Config) error {
16 | reconciler := core.NewReconciler(mgr).
17 | For(&dcv1alpha1.DaskCluster{}).
18 | Component("istio-peerauthentication", dask.IstioPeerAuthentication(cfg.IstioEnabled)).
19 | Component("serviceaccount", dask.ServiceAccount()).
20 | Component("configmap-keytab", dask.ConfigMapKeyTab()).
21 | Component("role-podsecuritypolicy", dask.RolePodSecurityPolicy()).
22 | Component("rolebinding-podsecuritypolicy", dask.RoleBindingPodSecurityPolicy()).
23 | Component("service-scheduler", dask.ServiceScheduler()).
24 | Component("service-worker", dask.ServiceWorker()).
25 | Component("service-proxy", dask.ClientPortsService()).
26 | Component("networkpolicy-scheduler", dask.NetworkPolicyScheduler()).
27 | Component("networkpolicy-worker", dask.NetworkPolicyWorker()).
28 | Component("networkpolicy-proxy", dask.ClientPortsNetworkPolicy()).
29 | Component("statefulset-scheduler", dask.StatefulSetScheduler()).
30 | Component("statefulset-worker", dask.StatefulSetWorker()).
31 | Component("horizontalpodautoscaler", dask.HorizontalPodAutoscaler()).
32 | Component("statusupdate", dask.ClusterStatusUpdate())
33 |
34 | if webhooksEnabled {
35 | reconciler.WithWebhooks()
36 | }
37 | return reconciler.Complete()
38 | }
39 |
--------------------------------------------------------------------------------
/controllers/mpicluster_controller.go:
--------------------------------------------------------------------------------
1 | package controllers
2 |
3 | import (
4 | ctrl "sigs.k8s.io/controller-runtime"
5 |
6 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
7 | "github.com/dominodatalab/distributed-compute-operator/pkg/cluster/mpi"
8 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
9 | )
10 |
11 | //+kubebuilder:rbac:groups=distributed-compute.dominodatalab.com,resources=mpiclusters,verbs=get;list;watch;create;update;patch;delete
12 | //+kubebuilder:rbac:groups=distributed-compute.dominodatalab.com,resources=mpiclusters/status,verbs=get;update;patch
13 | //+kubebuilder:rbac:groups=distributed-compute.dominodatalab.com,resources=mpiclusters/finalizers,verbs=update
14 |
15 | // MPICluster builds a controller that reconciles MPICluster objects and registers it with the manager.
16 | func MPICluster(mgr ctrl.Manager, webhooksEnabled bool, cfg *Config) error {
17 | reconciler := core.NewReconciler(mgr).
18 | For(&dcv1alpha1.MPICluster{}).
19 | Component("istio-peerauthentication", mpi.IstioPeerAuthentication(cfg.IstioEnabled)).
20 | Component("istio-client-peerauthentication", mpi.IstioClientPeerAuthentication(cfg.IstioEnabled)).
21 | Component("serviceaccount", mpi.ServiceAccount()).
22 | Component("role", mpi.RolePodSecurityPolicy()).
23 | Component("rolebinding", mpi.RoleBindingPodSecurityPolicy()).
24 | Component("configmap", mpi.ConfigMap()).
25 | Component("service-worker", mpi.ServiceWorker()).
26 | Component("service-proxy", mpi.ClientPortsService()).
27 | Component("service-client", mpi.ServiceClient()).
28 | Component("networkpolicy-worker", mpi.NetworkPolicyWorker()).
29 | Component("networkpolicy-client", mpi.NetworkPolicyClient()).
30 | Component("networkpolicy-proxy", mpi.ClientPortsNetworkPolicy()).
31 | Component("workers", mpi.StatefulSet(cfg.MPIInitImage, cfg.MPISyncImage)).
32 | Component("statusupdate", mpi.StatusUpdate())
33 |
34 | if webhooksEnabled {
35 | reconciler.WithWebhooks()
36 | }
37 | return reconciler.Complete()
38 | }
39 |
--------------------------------------------------------------------------------
/controllers/suite_test.go:
--------------------------------------------------------------------------------
1 | package controllers
2 |
3 | import (
4 | "context"
5 | "path/filepath"
6 | "testing"
7 |
8 | . "github.com/onsi/ginkgo"
9 | . "github.com/onsi/gomega"
10 | "k8s.io/client-go/kubernetes/scheme"
11 | "k8s.io/client-go/rest"
12 | ctrl "sigs.k8s.io/controller-runtime"
13 | "sigs.k8s.io/controller-runtime/pkg/client"
14 | "sigs.k8s.io/controller-runtime/pkg/envtest"
15 | logf "sigs.k8s.io/controller-runtime/pkg/log"
16 | "sigs.k8s.io/controller-runtime/pkg/log/zap"
17 |
18 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
19 | // +kubebuilder:scaffold:imports
20 | )
21 |
22 | var cfg *rest.Config
23 | var k8sClient client.Client
24 | var testEnv *envtest.Environment
25 | var ctx context.Context
26 | var cancel context.CancelFunc
27 |
28 | func TestAPIs(t *testing.T) {
29 | if testing.Short() {
30 | t.Skip("skipping controller suite in short mode")
31 | }
32 |
33 | RegisterFailHandler(Fail)
34 | RunSpecs(t, "Controller Suite")
35 | }
36 |
37 | var _ = BeforeSuite(func() {
38 | logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)))
39 |
40 | ctx, cancel = context.WithCancel(context.TODO())
41 |
42 | By("bootstrapping test environment")
43 | testEnv = &envtest.Environment{
44 | CRDDirectoryPaths: []string{filepath.Join("..", "config", "crd", "bases")},
45 | ErrorIfCRDPathMissing: true,
46 | }
47 |
48 | var err error
49 | cfg, err = testEnv.Start()
50 | Expect(err).NotTo(HaveOccurred())
51 | Expect(cfg).NotTo(BeNil())
52 |
53 | err = dcv1alpha1.AddToScheme(scheme.Scheme)
54 | Expect(err).NotTo(HaveOccurred())
55 |
56 | //+kubebuilder:scaffold:scheme
57 |
58 | k8sManager, err := ctrl.NewManager(cfg, ctrl.Options{Scheme: scheme.Scheme})
59 | Expect(err).NotTo(HaveOccurred())
60 |
61 | k8sClient = k8sManager.GetClient()
62 | Expect(k8sClient).NotTo(BeNil())
63 |
64 | config := Config{}
65 |
66 | for _, controller := range BuilderFuncs {
67 | err = controller(k8sManager, false, &config)
68 | Expect(err).ToNot(HaveOccurred())
69 | }
70 |
71 | err = (&RayClusterReconciler{
72 | Client: k8sClient,
73 | Scheme: k8sManager.GetScheme(),
74 | Log: ctrl.Log.WithName("controllers").WithName("RayCluster"),
75 | IstioEnabled: false,
76 | }).SetupWithManager(k8sManager)
77 | Expect(err).ToNot(HaveOccurred())
78 |
79 | err = (&SparkClusterReconciler{
80 | Client: k8sClient,
81 | Scheme: k8sManager.GetScheme(),
82 | Log: ctrl.Log.WithName("controllers").WithName("SparkCluster"),
83 | IstioEnabled: false,
84 | }).SetupWithManager(k8sManager)
85 | Expect(err).ToNot(HaveOccurred())
86 |
87 | go func() {
88 | err = k8sManager.Start(ctx)
89 | Expect(err).ToNot(HaveOccurred())
90 | }()
91 | }, 60)
92 |
93 | var _ = AfterSuite(func() {
94 | cancel()
95 | By("tearing down the test environment")
96 | err := testEnv.Stop()
97 | Expect(err).ToNot(HaveOccurred())
98 | })
99 |
--------------------------------------------------------------------------------
/controllers/variables.go:
--------------------------------------------------------------------------------
1 | package controllers
2 |
3 | import (
4 | "path"
5 |
6 | "github.com/banzaicloud/k8s-objectmatcher/patch"
7 |
8 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
9 | )
10 |
11 | var (
12 | // DistributedComputeFinalizer is the custom identifier used to mark
13 | // controller-managed resources that require pre-delete hook logic.
14 | DistributedComputeFinalizer = path.Join(dcv1alpha1.GroupVersion.Group, "finalizer")
15 |
16 | // PatchAnnotator applies state annotations to owned components.
17 | PatchAnnotator = patch.NewAnnotator(path.Join(dcv1alpha1.GroupVersion.Group, "last-applied"))
18 | // PatchMaker calculates changes to state annotations on owned components.
19 | PatchMaker = patch.NewPatchMaker(PatchAnnotator, &patch.K8sStrategicMergePatcher{}, &patch.BaseJSONMergePatcher{})
20 | // PatchCalculateOpts define the exclusion rules used when calculating the
21 | // difference between two k8s resources.
22 | PatchCalculateOpts = []patch.CalculateOption{
23 | patch.IgnoreStatusFields(),
24 | patch.IgnoreVolumeClaimTemplateTypeMetaAndStatus(),
25 | }
26 | )
27 |
--------------------------------------------------------------------------------
/deploy/helm/distributed-compute-operator/.helmignore:
--------------------------------------------------------------------------------
1 | # Patterns to ignore when building packages.
2 | # This supports shell glob matching, relative path matching, and
3 | # negation (prefixed with !). Only one pattern per line.
4 | .DS_Store
5 | # Common VCS dirs
6 | .git/
7 | .gitignore
8 | .bzr/
9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *.orig
18 | *~
19 | # Various IDEs
20 | .project
21 | .idea/
22 | *.tmproj
23 | .vscode/
24 |
--------------------------------------------------------------------------------
/deploy/helm/distributed-compute-operator/Chart.lock:
--------------------------------------------------------------------------------
1 | dependencies:
2 | - name: common
3 | repository: https://charts.bitnami.com/bitnami
4 | version: 1.4.1
5 | digest: sha256:c53e3c3325fc8b9b8b41efd417bad52765452600992fe8612c8cb062725b505a
6 | generated: "2021-03-11T13:33:51.03347-07:00"
7 |
--------------------------------------------------------------------------------
/deploy/helm/distributed-compute-operator/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | type: application
3 | name: distributed-compute-operator
4 | version: 0.0.0
5 | appVersion: "latest"
6 | kubeVersion: ">= 1.16.0-0"
7 | description: Kubernetes operator that manages parallel computing clusters.
8 | home: https://github.com/dominodatalab/distributed-compute-operator
9 | icon: https://raw.githubusercontent.com/dominodatalab/distributed-compute-operator/main/docs/img/logo.png
10 | maintainers:
11 | - name: sonnysideup
12 | email: eng-platform@dominodatalab.com
13 | dependencies:
14 | - name: common
15 | version: 1.4.1
16 | repository: https://charts.bitnami.com/bitnami
17 |
--------------------------------------------------------------------------------
/deploy/helm/distributed-compute-operator/charts/common-1.4.1.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dominodatalab/distributed-compute-operator/2f119d377b1632ea2e9817cacc24a6a54414aed1/deploy/helm/distributed-compute-operator/charts/common-1.4.1.tgz
--------------------------------------------------------------------------------
/deploy/helm/distributed-compute-operator/dco-values.yaml:
--------------------------------------------------------------------------------
1 | USER-SUPPLIED VALUES:
2 | image:
3 | registry: quay.io
4 | repository: domino/distributed-compute-operator
5 | tag: v0.7.3
6 | pullPolicy: Always
7 | imagePullSecrets:
8 | - name: domino-quay-repos
9 | installCRDs: true
10 | global:
11 | istio:
12 | cni: true
13 | enabled: false
14 | install: false
15 | mpi:
16 | initImage:
17 | registry: quay.io
18 | repository: domino/distributed-compute-operator-mpi-init
19 | tag: v0.7.3
20 | syncImage:
21 | registry: quay.io
22 | repository: domino/distributed-compute-operator-mpi-sync
23 | tag: v0.7.3
24 | networkPolicy:
25 | enabled: true
26 | nodeSelector:
27 | dominodatalab.com/node-pool: default
28 | podAnnotations: {}
29 | podEnv: []
30 | podLabels: {}
31 | podSecurityPolicy:
32 | enabled: true
33 | priorityClassName: domino-default
34 | prometheus:
35 | enabled: true
36 | namespaceLabels:
37 | domino-platform: "true"
38 | rbac:
39 | pspEnabled: true
40 | replicaCount: 1
41 | securityContextConstraints:
42 | enabled: false
43 |
--------------------------------------------------------------------------------
/deploy/helm/distributed-compute-operator/templates/NOTES.txt:
--------------------------------------------------------------------------------
1 | Thank you for installing the {{ .Chart.Name }}.
2 |
3 | Your release is named {{ .Release.Name }}.
4 |
5 | To learn more about the release, try:
6 |
7 | $ helm status {{ .Release.Name }}
8 | $ helm get all {{ .Release.Name }}
9 |
10 | To list the available cluster types this operator manages, try:
11 |
12 | $ kubectl get crds | grep distributed-compute
13 |
14 | See the following samples to learn how to create a new cluster:
15 |
16 | https://github.com/dominodatalab/distributed-compute-operator/tree/main/config/samples
17 |
--------------------------------------------------------------------------------
/deploy/helm/distributed-compute-operator/templates/_helpers.tpl:
--------------------------------------------------------------------------------
1 | {{/*
2 | Return the proper image name
3 | */}}
4 | {{- define "dco.image" -}}
5 | {{- $imageRoot := .Values.image -}}
6 | {{- $_ := set $imageRoot "tag" (.Values.image.tag | default .Chart.AppVersion) -}}
7 | {{- include "common.images.image" (dict "imageRoot" $imageRoot "global" $) -}}
8 | {{- end -}}
9 |
10 | {{/*
11 | Create the name of the service account to use
12 | */}}
13 | {{- define "dco.serviceAccountName" -}}
14 | {{- if .Values.serviceAccount.create -}}
15 | {{- default (include "common.names.fullname" .) .Values.serviceAccount.name -}}
16 | {{- else -}}
17 | {{- default "default" .Values.serviceAccount.name -}}
18 | {{- end -}}
19 | {{- end -}}
20 |
21 | {{/*
22 | Webhook service name
23 | */}}
24 | {{- define "dco.webhook.service" -}}
25 | {{ include "common.names.fullname" . }}-webhook-server
26 | {{- end -}}
27 |
28 | {{/*
29 | Webhook certificate CA name
30 | */}}
31 | {{- define "dco.webhook.issuer" -}}
32 | {{ include "common.names.fullname" . }}-selfsigned-issuer
33 | {{- end -}}
34 |
35 | {{/*
36 | Webhook certificate name
37 | */}}
38 | {{- define "dco.webhook.certificate" -}}
39 | {{ include "common.names.fullname" . }}-webhook
40 | {{- end -}}
41 |
42 | {{/*
43 | Webhook certificate secret name
44 | */}}
45 | {{- define "dco.webhook.secret" -}}
46 | {{ include "common.names.fullname" . }}-webhook-cert
47 | {{- end -}}
48 |
49 | {{/*
50 | Webhook certificate injection annotation
51 | */}}
52 | {{- define "dco.webhook.annotation" -}}
53 | cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "dco.webhook.certificate" . }}
54 | {{- end -}}
55 |
56 | {{/*
57 | Returns a name suitable for all manager RBAC objects
58 | */}}
59 | {{- define "dco.rbac.managerName" -}}
60 | dominodatalab:operator:{{ include "common.names.fullname" . }}:manager
61 | {{- end -}}
62 |
63 | {{/*
64 | Returns a name suitable for all hook RBAC objects
65 | */}}
66 | {{- define "dco.rbac.hookName" -}}
67 | dominodatalab:operator:{{ include "common.names.fullname" . }}:hook
68 | {{- end -}}
69 |
--------------------------------------------------------------------------------
/deploy/helm/distributed-compute-operator/templates/clusterrole.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: rbac.authorization.k8s.io/v1
2 | kind: ClusterRole
3 | metadata:
4 | name: {{ include "dco.rbac.managerName" . }}.{{ .Release.Namespace }}
5 | labels:
6 | {{- include "common.labels.standard" . | nindent 4 }}
7 | rules:
8 | - apiGroups:
9 | - distributed-compute.dominodatalab.com
10 | resources:
11 | - daskclusters
12 | - rayclusters
13 | - sparkclusters
14 | - mpiclusters
15 | verbs:
16 | - patch
17 | - update
18 | - list
19 | - watch
20 | - apiGroups:
21 | - distributed-compute.dominodatalab.com
22 | resources:
23 | - daskclusters/status
24 | - rayclusters/status
25 | - sparkclusters/status
26 | - mpiclusters/status
27 | verbs:
28 | - update
29 | - apiGroups:
30 | - distributed-compute.dominodatalab.com
31 | resources:
32 | - daskclusters/finalizers
33 | - rayclusters/finalizers
34 | - sparkclusters/finalizers
35 | - mpiclusters/finalizers
36 | verbs:
37 | - update
38 | - apiGroups:
39 | - ""
40 | resources:
41 | - endpoints
42 | - pods
43 | verbs:
44 | - list
45 | - watch
46 | - apiGroups:
47 | - ""
48 | resources:
49 | - configmaps
50 | - secrets
51 | - serviceaccounts
52 | verbs:
53 | - create
54 | - update
55 | - delete
56 | - list
57 | - watch
58 | - apiGroups:
59 | - ""
60 | resources:
61 | - services
62 | verbs:
63 | - create
64 | - update
65 | - list
66 | - watch
67 | - apiGroups:
68 | - ""
69 | resources:
70 | - persistentvolumeclaims
71 | verbs:
72 | - delete
73 | - list
74 | - watch
75 | - apiGroups:
76 | - apps
77 | resources:
78 | - statefulsets
79 | verbs:
80 | - create
81 | - update
82 | - list
83 | - watch
84 | - delete
85 | - apiGroups:
86 | - autoscaling
87 | resources:
88 | - horizontalpodautoscalers
89 | verbs:
90 | - create
91 | - update
92 | - delete
93 | - list
94 | - watch
95 | - apiGroups:
96 | - batch
97 | resources:
98 | - jobs
99 | verbs:
100 | - create
101 | - update
102 | - delete
103 | - list
104 | - watch
105 | - apiGroups:
106 | - networking.k8s.io
107 | resources:
108 | - networkpolicies
109 | verbs:
110 | - create
111 | - update
112 | - delete
113 | - list
114 | - watch
115 | - apiGroups:
116 | - policy
117 | resources:
118 | - podsecuritypolicies
119 | resourceNames:
120 | - domino-restricted
121 | verbs:
122 | - use
123 | - list
124 | - watch
125 | - apiGroups:
126 | - rbac.authorization.k8s.io
127 | resources:
128 | - roles
129 | - rolebindings
130 | verbs:
131 | - create
132 | - update
133 | - delete
134 | - list
135 | - watch
136 | - apiGroups:
137 | - security.istio.io
138 | resources:
139 | - peerauthentications
140 | verbs:
141 | - create
142 | - update
143 | - delete
144 | - list
145 | - watch
146 | - apiGroups:
147 | - networking.istio.io
148 | resources:
149 | - envoyfilters
150 | verbs:
151 | - create
152 | - update
153 | - list
154 | - watch
155 | {{- if .Values.config.enableLeaderElection }}
156 | - apiGroups:
157 | - ""
158 | resources:
159 | - configmaps
160 | verbs:
161 | - get
162 | - apiGroups:
163 | - ""
164 | resources:
165 | - events
166 | verbs:
167 | - create
168 | - apiGroups:
169 | - coordination.k8s.io
170 | resources:
171 | - leases
172 | verbs:
173 | - get
174 | - create
175 | - update
176 | {{- end }}
177 |
--------------------------------------------------------------------------------
/deploy/helm/distributed-compute-operator/templates/clusterrolebinding.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: rbac.authorization.k8s.io/v1
2 | kind: ClusterRoleBinding
3 | metadata:
4 | name: {{ include "dco.rbac.managerName" . }}.{{ .Release.Namespace }}
5 | labels:
6 | {{- include "common.labels.standard" . | nindent 4 }}
7 | roleRef:
8 | apiGroup: rbac.authorization.k8s.io
9 | kind: ClusterRole
10 | name: {{ include "dco.rbac.managerName" . }}.{{ .Release.Namespace }}
11 | subjects:
12 | - kind: ServiceAccount
13 | name: {{ include "dco.serviceAccountName" . }}
14 | namespace: {{ .Release.Namespace }}
15 |
--------------------------------------------------------------------------------
/deploy/helm/distributed-compute-operator/templates/istio.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.global.istio.enabled }}
2 | apiVersion: security.istio.io/v1beta1
3 | kind: PeerAuthentication
4 | metadata:
5 | name: {{ include "dco.webhook.service" .}}
6 | labels:
7 | {{- include "common.labels.standard" . | nindent 4 }}
8 | spec:
9 | selector:
10 | matchLabels:
11 | {{- include "common.labels.matchLabels" . | nindent 6 }}
12 | mtls:
13 | mode: UNSET
14 | portLevelMtls:
15 | {{ .Values.config.webhookPort }}:
16 | mode: DISABLE
17 | {{- end }}
18 |
--------------------------------------------------------------------------------
/deploy/helm/distributed-compute-operator/templates/networkpolicy.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.networkPolicy.enabled }}
2 | apiVersion: networking.k8s.io/v1
3 | kind: NetworkPolicy
4 | metadata:
5 | name: {{ include "common.names.fullname" . }}
6 | labels:
7 | {{- include "common.labels.standard" . | nindent 4 }}
8 | spec:
9 | podSelector:
10 | matchLabels:
11 | {{- include "common.labels.matchLabels" . | nindent 6 }}
12 | policyTypes:
13 | - Ingress
14 | ingress:
15 | - ports:
16 | - port: {{ .Values.config.webhookPort }}
17 | protocol: TCP
18 | - port: {{ .Values.config.healthProbePort }}
19 | protocol: TCP
20 | - ports:
21 | - port: {{ .Values.config.metricsPort }}
22 | protocol: TCP
23 | from:
24 | - podSelector:
25 | matchLabels:
26 | {{- toYaml .Values.prometheus.podLabels | trimSuffix "\n" | nindent 10 }}
27 | {{- with .Values.prometheus.namespaceLabels }}
28 | namespaceSelector:
29 | matchLabels:
30 | {{- toYaml . | trimSuffix "\n" | nindent 10 }}
31 | {{- end }}
32 | {{- end }}
33 |
--------------------------------------------------------------------------------
/deploy/helm/distributed-compute-operator/templates/serviceaccount.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.serviceAccount.create }}
2 | apiVersion: v1
3 | kind: ServiceAccount
4 | metadata:
5 | name: {{ include "dco.serviceAccountName" . }}
6 | labels:
7 | {{- include "common.labels.standard" . | nindent 4 }}
8 | {{- with .Values.serviceAccount.annotations }}
9 | annotations:
10 | {{- toYaml . | nindent 4 }}
11 | {{- end }}
12 | {{- end }}
13 |
--------------------------------------------------------------------------------
/deploy/helm/distributed-compute-operator/templates/webhook-cert-manager.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: cert-manager.io/v1
2 | kind: Issuer
3 | metadata:
4 | name: {{ include "dco.webhook.issuer" . }}
5 | labels:
6 | {{- include "common.labels.standard" . | nindent 4 }}
7 | spec:
8 | selfSigned: {}
9 |
10 | ---
11 | apiVersion: cert-manager.io/v1
12 | kind: Certificate
13 | metadata:
14 | name: {{ include "dco.webhook.certificate" . }}
15 | labels:
16 | {{- include "common.labels.standard" . | nindent 4 }}
17 | spec:
18 | dnsNames:
19 | - {{ include "dco.webhook.service" . }}.{{ .Release.Namespace }}.svc
20 | - {{ include "dco.webhook.service" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}
21 | issuerRef:
22 | kind: Issuer
23 | name: {{ include "dco.webhook.issuer" . }}
24 | secretName: {{ include "dco.webhook.secret" . }}
25 |
--------------------------------------------------------------------------------
/deploy/helm/distributed-compute-operator/templates/webhook-configuration-mutating.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: admissionregistration.k8s.io/v1
2 | kind: MutatingWebhookConfiguration
3 | metadata:
4 | name: {{ include "common.names.fullname" . }}.{{ .Release.Namespace }}
5 | labels:
6 | {{- include "common.labels.standard" . | nindent 4 }}
7 | annotations:
8 | {{- include "dco.webhook.annotation" . | nindent 4 }}
9 | webhooks:
10 | - admissionReviewVersions:
11 | - v1
12 | - v1beta1
13 | clientConfig:
14 | service:
15 | name: {{ include "dco.webhook.service" . }}
16 | namespace: {{ .Release.Namespace }}
17 | path: /mutate-distributed-compute-dominodatalab-com-v1alpha1-mpicluster
18 | failurePolicy: Fail
19 | name: mmpicluster.kb.io
20 | rules:
21 | - apiGroups:
22 | - distributed-compute.dominodatalab.com
23 | apiVersions:
24 | - v1alpha1
25 | operations:
26 | - CREATE
27 | - UPDATE
28 | resources:
29 | - mpiclusters
30 | sideEffects: None
31 | - admissionReviewVersions:
32 | - v1
33 | - v1beta1
34 | clientConfig:
35 | service:
36 | name: {{ include "dco.webhook.service" . }}
37 | namespace: {{ .Release.Namespace }}
38 | path: /mutate-distributed-compute-dominodatalab-com-v1alpha1-daskcluster
39 | failurePolicy: Fail
40 | name: mdaskcluster.kb.io
41 | rules:
42 | - apiGroups:
43 | - distributed-compute.dominodatalab.com
44 | apiVersions:
45 | - v1alpha1
46 | operations:
47 | - CREATE
48 | - UPDATE
49 | resources:
50 | - daskclusters
51 | sideEffects: None
52 | - admissionReviewVersions:
53 | - v1
54 | - v1beta1
55 | clientConfig:
56 | service:
57 | name: {{ include "dco.webhook.service" . }}
58 | namespace: {{ .Release.Namespace }}
59 | path: /mutate-distributed-compute-dominodatalab-com-v1alpha1-raycluster
60 | failurePolicy: Fail
61 | name: mraycluster.kb.io
62 | rules:
63 | - apiGroups:
64 | - distributed-compute.dominodatalab.com
65 | apiVersions:
66 | - v1alpha1
67 | operations:
68 | - CREATE
69 | - UPDATE
70 | resources:
71 | - rayclusters
72 | sideEffects: None
73 | - admissionReviewVersions:
74 | - v1
75 | - v1beta1
76 | clientConfig:
77 | service:
78 | name: {{ include "dco.webhook.service" . }}
79 | namespace: {{ .Release.Namespace }}
80 | path: /mutate-distributed-compute-dominodatalab-com-v1alpha1-sparkcluster
81 | failurePolicy: Fail
82 | name: msparkcluster.kb.io
83 | rules:
84 | - apiGroups:
85 | - distributed-compute.dominodatalab.com
86 | apiVersions:
87 | - v1alpha1
88 | operations:
89 | - CREATE
90 | - UPDATE
91 | resources:
92 | - sparkclusters
93 | sideEffects: None
94 |
--------------------------------------------------------------------------------
/deploy/helm/distributed-compute-operator/templates/webhook-configuration-validating.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: admissionregistration.k8s.io/v1
2 | kind: ValidatingWebhookConfiguration
3 | metadata:
4 | name: {{ include "common.names.fullname" . }}.{{ .Release.Namespace }}
5 | labels:
6 | {{- include "common.labels.standard" . | nindent 4 }}
7 | annotations:
8 | {{- include "dco.webhook.annotation" . | nindent 4 }}
9 | webhooks:
10 | - admissionReviewVersions:
11 | - v1
12 | - v1beta1
13 | clientConfig:
14 | service:
15 | name: {{ include "dco.webhook.service" . }}
16 | namespace: {{ .Release.Namespace }}
17 | path: /validate-distributed-compute-dominodatalab-com-v1alpha1-mpicluster
18 | failurePolicy: Fail
19 | name: vmpicluster.kb.io
20 | rules:
21 | - apiGroups:
22 | - distributed-compute.dominodatalab.com
23 | apiVersions:
24 | - v1alpha1
25 | operations:
26 | - CREATE
27 | - UPDATE
28 | resources:
29 | - mpiclusters
30 | sideEffects: None
31 | - admissionReviewVersions:
32 | - v1
33 | - v1beta1
34 | clientConfig:
35 | service:
36 | name: {{ include "dco.webhook.service" . }}
37 | namespace: {{ .Release.Namespace }}
38 | path: /validate-distributed-compute-dominodatalab-com-v1alpha1-daskcluster
39 | failurePolicy: Fail
40 | name: vdaskcluster.kb.io
41 | rules:
42 | - apiGroups:
43 | - distributed-compute.dominodatalab.com
44 | apiVersions:
45 | - v1alpha1
46 | operations:
47 | - CREATE
48 | - UPDATE
49 | resources:
50 | - daskclusters
51 | sideEffects: None
52 | - admissionReviewVersions:
53 | - v1
54 | - v1beta1
55 | clientConfig:
56 | service:
57 | name: {{ include "dco.webhook.service" . }}
58 | namespace: {{ .Release.Namespace }}
59 | path: /validate-distributed-compute-dominodatalab-com-v1alpha1-raycluster
60 | failurePolicy: Fail
61 | name: vraycluster.kb.io
62 | rules:
63 | - apiGroups:
64 | - distributed-compute.dominodatalab.com
65 | apiVersions:
66 | - v1alpha1
67 | operations:
68 | - CREATE
69 | - UPDATE
70 | resources:
71 | - rayclusters
72 | sideEffects: None
73 | - admissionReviewVersions:
74 | - v1
75 | - v1beta1
76 | clientConfig:
77 | service:
78 | name: {{ include "dco.webhook.service" . }}
79 | namespace: {{ .Release.Namespace }}
80 | path: /validate-distributed-compute-dominodatalab-com-v1alpha1-sparkcluster
81 | failurePolicy: Fail
82 | name: vsparkcluster.kb.io
83 | rules:
84 | - apiGroups:
85 | - distributed-compute.dominodatalab.com
86 | apiVersions:
87 | - v1alpha1
88 | operations:
89 | - CREATE
90 | - UPDATE
91 | resources:
92 | - sparkclusters
93 | sideEffects: None
94 |
--------------------------------------------------------------------------------
/deploy/helm/distributed-compute-operator/templates/webhook-service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | name: {{ include "dco.webhook.service" . }}
5 | labels:
6 | {{- include "common.labels.standard" . | nindent 4 }}
7 | spec:
8 | ports:
9 | - name: tcp-webhook
10 | port: 443
11 | targetPort: webhooks
12 | selector:
13 | {{- include "common.labels.matchLabels" . | nindent 4 }}
14 |
--------------------------------------------------------------------------------
/dockerfiles/mpi-init.Dockerfile:
--------------------------------------------------------------------------------
1 | # A specific version of the Linux OS here is very important, because it defines versions
2 | # of core libraries (libc etc) the compiled binaries will be linked against.
3 | # FYI, debian-9.13 -> libc-2.24
4 | # OSRP not neccessary here because it's just the build environment, see the final image FROM at the bottom
5 | FROM quay.io/domino/debian:10.11-368763
6 |
7 | ARG OPENSSH_VERSION=8.8p1
8 | ARG OPENSSH_URL=https://mirrors.mit.edu/pub/OpenBSD/OpenSSH/portable/openssh-${OPENSSH_VERSION}.tar.gz
9 | ARG OPENSSH_SIG_URL=https://mirrors.mit.edu/pub/OpenBSD/OpenSSH/portable/openssh-${OPENSSH_VERSION}.tar.gz.asc
10 |
11 | ARG INSTALL_DIR=/opt/domino/mpi-cluster
12 | ARG INSTALL_BIN=${INSTALL_DIR}/bin
13 |
14 | WORKDIR /root
15 |
16 | ADD *.gpgkey ./
17 |
18 | # Install common dependencies for the compiler and setting things up
19 | RUN \
20 | apt-get update && \
21 | apt-get -y install \
22 | build-essential \
23 | curl \
24 | gnupg && \
25 | mkdir -p \
26 | ${INSTALL_DIR} \
27 | ${INSTALL_BIN} && \
28 | gpg --import -q openssh.gpgkey > /dev/null && \
29 | rm -f *.gpgkey
30 |
31 | # Download an compile openssh
32 | RUN \
33 | # Newer versions of openssh include a mandatory privilege separation mechanism
34 | # that requires a special user to be available in the system. Although this
35 | # image does not execute sshd, such a user must exist for proper deployment.
36 | useradd -g 65534 -d /var/empty -s /bin/false sshd && \
37 | curl -o openssh-src.tgz -LSsf ${OPENSSH_URL} && \
38 | curl -o openssh-src.sig -LSsf ${OPENSSH_SIG_URL} && \
39 | gpg --trust-model always -q --verify openssh-src.sig openssh-src.tgz && \
40 | tar -xf openssh-src.tgz --no-same-permissions && \
41 | cd openssh-${OPENSSH_VERSION} && \
42 | ./configure \
43 | --prefix=${INSTALL_DIR} \
44 | --without-zlib \
45 | --without-openssl && \
46 | make && \
47 | make install && \
48 | cd -
49 |
50 | ADD mpi-worker-start.sh ${INSTALL_BIN}
51 |
52 | # Create a tarball containing all the necessary stuff
53 | RUN \
54 | rm -f ${INSTALL_DIR}/etc/ssh_host_* && \
55 | chmod 755 ${INSTALL_BIN}/mpi-worker-start.sh && \
56 | tar -czf worker-utils.tgz \
57 | ${INSTALL_DIR}/bin \
58 | ${INSTALL_DIR}/etc \
59 | ${INSTALL_DIR}/libexec \
60 | ${INSTALL_DIR}/sbin
61 |
62 | # The final image only contains built artifacts.
63 | # The base image should be up-to-date, but a specific version is not important.
64 | FROM quay.io/domino/debian:10.11-368763
65 | WORKDIR /root
66 | COPY --from=0 /root/worker-utils.tgz ./
67 | CMD tar -C / -xf /root/worker-utils.tgz
68 |
--------------------------------------------------------------------------------
/dockerfiles/mpi-sync.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM quay.io/domino/debian:10.11-368763
2 |
3 | ARG DOMINO_UID=12574
4 | ARG DOMINO_USER=domino
5 | ARG DOMINO_GID=12574
6 | ARG DOMINO_GROUP=domino
7 |
8 | ARG DOMINO_DIR=/opt/domino/rsync
9 | ARG DOMINO_BIN=$DOMINO_DIR/bin
10 | ARG DOMINO_ETC=$DOMINO_DIR/etc
11 |
12 | ARG RSYNC_RUN_DIR=/run/rsyncd-${DOMINO_USER}
13 | ARG RSYNC_CONFIG_FILE=rsyncd.conf
14 | ARG RSYNC_START_SCRIPT=rsync-start.sh
15 |
16 | ARG ALLENV="\$RSYNC_RUN_DIR,\$DOMINO_ETC,\$RSYNC_CONFIG_FILE"
17 |
18 | WORKDIR /root
19 |
20 | RUN \
21 | apt-get update && \
22 | apt-get -y install \
23 | rsync \
24 | gettext-base \
25 | procps && \
26 | rm -rf /var/lib/apt/lists/* && \
27 | mkdir -p \
28 | "$DOMINO_DIR" \
29 | "$DOMINO_BIN" \
30 | "$DOMINO_ETC" \
31 | "$RSYNC_RUN_DIR"
32 |
33 | ADD $RSYNC_START_SCRIPT $RSYNC_CONFIG_FILE ./
34 |
35 | RUN \
36 | groupadd -g $DOMINO_GID $DOMINO_GROUP && \
37 | useradd -u $DOMINO_UID -g $DOMINO_GID -mN -s /bin/bash $DOMINO_USER && \
38 | envsubst "$ALLENV" < "$RSYNC_START_SCRIPT" > "$DOMINO_BIN/$RSYNC_START_SCRIPT" && \
39 | envsubst "$ALLENV" < "$RSYNC_CONFIG_FILE" > "$DOMINO_ETC/$RSYNC_CONFIG_FILE" && \
40 | chown -R $DOMINO_USER:$DOMINO_GROUP "$RSYNC_RUN_DIR" && \
41 | chown -R $DOMINO_USER:$DOMINO_GROUP "$DOMINO_DIR" && \
42 | chmod 755 "$DOMINO_BIN/$RSYNC_START_SCRIPT" && \
43 | chmod 644 "$DOMINO_ETC/$RSYNC_CONFIG_FILE"
44 |
45 | # For testing -- to be removed
46 | RUN \
47 | chown -R $DOMINO_USER:$DOMINO_GROUP /mnt
48 |
--------------------------------------------------------------------------------
/dockerfiles/mpi-worker-start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -o nounset
4 | set -o errexit
5 |
6 | INSTALL_DIR="/opt/domino/mpi-cluster"
7 | SSH_USER="sshd"
8 | SSH_RUN_DIR="/run/sshd-${DOMINO_USER}"
9 |
10 | mkdir -p "$SSH_RUN_DIR"
11 | chmod 777 "$SSH_RUN_DIR"
12 |
13 | if ! id $SSH_USER >/dev/null 2>&1; then
14 | useradd -g 65534 -mN -s "/usr/sbin/nologin" -d "$SSH_RUN_DIR" $SSH_USER
15 | fi
16 |
17 | if ! cut -d: -f3 < /etc/group | grep "^${DOMINO_GID}$" >/dev/null 2>&1; then
18 | groupadd -g $DOMINO_GID $DOMINO_GROUP
19 | fi
20 | if ! id $DOMINO_UID >/dev/null 2>&1; then
21 | useradd -u $DOMINO_UID -g $DOMINO_GID -mN -s /bin/bash -d "$DOMINO_HOME_DIR" $DOMINO_USER
22 | else
23 | # Change username of user with matching userid if needed
24 | EXISTING_USER=$(id -nu $DOMINO_UID)
25 | if [ "$EXISTING_USER" != "$DOMINO_USER" ]; then
26 | usermod -l $DOMINO_USER $EXISTING_USER
27 | fi
28 |
29 | # Change groupname of group with matching groupid if needed
30 | EXISTING_GROUP=$(id -ng $DOMINO_GID)
31 | if [ "$EXISTING_GROUP" != "$DOMINO_GROUP" ]; then
32 | groupmod --new-name $DOMINO_GROUP $EXISTING_GROUP
33 | fi
34 |
35 | # Change home directory (idempotent)
36 | usermod -d "$DOMINO_HOME_DIR" $DOMINO_USER
37 |
38 | # Add to domino group (idempotent)
39 | usermod -a -G $DOMINO_GROUP $DOMINO_USER
40 | fi
41 |
42 |
43 | # Add the new domino user to the non-root groups of the current container user
44 | for gid in `id -G`; do
45 | if [ $gid != 0 ]; then
46 | # Add user to a new/existing group with desired id.
47 | # https://askubuntu.com/a/639998
48 | group_name=$(cut -d: -f1 < <(getent group $gid))
49 | if [ -z "$group_name" ]; then
50 | group_name="group-$gid"
51 | groupadd -g $gid $group_name
52 | fi
53 | usermod -a -G $group_name $DOMINO_USER
54 | fi
55 | done
56 |
57 | cat << EOF > "$DOMINO_HOME_DIR/.profile"
58 | if [ "\$BASH" ] && [ -f ~/.bashrc ]; then
59 | . ~/.bashrc
60 | fi
61 | EOF
62 | chmod 644 "$DOMINO_HOME_DIR/.profile"
63 | chown $DOMINO_UID:$DOMINO_GID "$DOMINO_HOME_DIR/.profile"
64 |
65 | rm -f "$DOMINO_HOME_DIR/.bashrc"
66 | touch "$DOMINO_HOME_DIR/.bashrc"
67 | printenv | grep PATH | sed 's;^;export ;' >> "$DOMINO_HOME_DIR/.bashrc"
68 | printenv | grep MPI | sed 's;^;export ;' >> "$DOMINO_HOME_DIR/.bashrc"
69 | printenv | grep DOMINO | sed 's;^;export ;' >> "$DOMINO_HOME_DIR/.bashrc"
70 | chmod 644 "$DOMINO_HOME_DIR/.bashrc"
71 | chown $DOMINO_UID:$DOMINO_GID "$DOMINO_HOME_DIR/.bashrc"
72 |
73 | CONFIG_DIR="$INSTALL_DIR/etc"
74 | mkdir -p "$CONFIG_DIR"
75 |
76 | rm -f "$CONFIG_DIR/ssh_host_*"
77 | "$INSTALL_DIR/bin/ssh-keygen" -f "$CONFIG_DIR/ssh_host_key" -N '' -t ed25519
78 | chmod 400 "$CONFIG_DIR/ssh_host_key"
79 | chown $DOMINO_UID:$DOMINO_GID "$CONFIG_DIR/ssh_host_key"
80 |
81 | cat << EOF > "$CONFIG_DIR/sshd_config"
82 | Port $DOMINO_SSH_PORT
83 | HostKey "$CONFIG_DIR/ssh_host_key"
84 | AuthorizedKeysFile "$DOMINO_KEYS_PATH"
85 | PidFile "$SSH_RUN_DIR/sshd.pid"
86 | AllowUsers $DOMINO_USER
87 | EOF
88 | chmod 444 "$CONFIG_DIR/sshd_config"
89 |
90 | su -c "$INSTALL_DIR/sbin/sshd -f \"$CONFIG_DIR/sshd_config\" -De" - $DOMINO_USER
91 |
--------------------------------------------------------------------------------
/dockerfiles/rsync-start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -o nounset
4 | set -o errexit
5 |
6 | /usr/bin/rsync \
7 | --daemon \
8 | --no-detach \
9 | --verbose \
10 | --config="$DOMINO_ETC/$RSYNC_CONFIG_FILE" \
11 | --port=$RSYNC_PORT
12 |
--------------------------------------------------------------------------------
/dockerfiles/rsyncd.conf:
--------------------------------------------------------------------------------
1 | pid file = $RSYNC_RUN_DIR/rsync.pid
2 | lock file = $RSYNC_RUN_DIR/rsync.lock
3 | log file = /dev/stdout
4 | use chroot = false
5 | read only = false
6 | timeout = 300
7 |
8 | [mnt]
9 | path = /mnt
10 |
11 | [repos]
12 | path = /repos
13 |
14 | [imported]
15 | path = /mnt/imported
16 |
--------------------------------------------------------------------------------
/docs/development.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dominodatalab/distributed-compute-operator/2f119d377b1632ea2e9817cacc24a6a54414aed1/docs/development.md
--------------------------------------------------------------------------------
/docs/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dominodatalab/distributed-compute-operator/2f119d377b1632ea2e9817cacc24a6a54414aed1/docs/img/logo.png
--------------------------------------------------------------------------------
/hack/boilerplate.go.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dominodatalab/distributed-compute-operator/2f119d377b1632ea2e9817cacc24a6a54414aed1/hack/boilerplate.go.txt
--------------------------------------------------------------------------------
/istio/global-strict-mtls.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: security.istio.io/v1beta1
2 | kind: PeerAuthentication
3 | metadata:
4 | name: default
5 | namespace: istio-system
6 | spec:
7 | mtls:
8 | mode: STRICT
9 |
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import "github.com/dominodatalab/distributed-compute-operator/cmd"
4 |
5 | func main() {
6 | cmd.Execute()
7 | }
8 |
--------------------------------------------------------------------------------
/pkg/cluster/dask/clientports.go:
--------------------------------------------------------------------------------
1 | package dask
2 |
3 | import (
4 | corev1 "k8s.io/api/core/v1"
5 | "sigs.k8s.io/controller-runtime/pkg/client"
6 |
7 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/components"
8 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
9 | )
10 |
11 | func ClientPortsService() core.OwnedComponent {
12 | return components.ClientPortsServiceComponent{
13 | ClientPorts: func(obj *client.Object) []corev1.ServicePort {
14 | return daskCluster(*obj).Spec.AdditionalClientPorts
15 | },
16 | ClientLabels: func(obj *client.Object) map[string]string {
17 | return daskCluster(*obj).Spec.NetworkPolicy.ClientLabels
18 | },
19 | Meta: meta,
20 | }
21 | }
22 |
23 | func ClientPortsNetworkPolicy() core.OwnedComponent {
24 | return components.ClientPortsNetworkPolicyComponent{
25 | ClientPorts: func(obj *client.Object) []corev1.ServicePort {
26 | return daskCluster(*obj).Spec.AdditionalClientPorts
27 | },
28 | ClientLabels: func(obj *client.Object) map[string]string {
29 | return daskCluster(*obj).Spec.NetworkPolicy.ClientLabels
30 | },
31 | Meta: meta,
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/pkg/cluster/dask/clusterstatusupdate.go:
--------------------------------------------------------------------------------
1 | package dask
2 |
3 | import (
4 | appsv1 "k8s.io/api/apps/v1"
5 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
6 | "sigs.k8s.io/controller-runtime/pkg/client"
7 |
8 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
9 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/components"
10 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
11 | )
12 |
13 | func ClusterStatusUpdate() core.Component {
14 | return components.ClusterStatusUpdate(func(obj client.Object) components.ClusterStatusUpdateDataSource {
15 | return &clusterStatusUpdateDS{dc: daskCluster(obj)}
16 | })
17 | }
18 |
19 | type clusterStatusUpdateDS struct {
20 | dc *dcv1alpha1.DaskCluster
21 | }
22 |
23 | func (c *clusterStatusUpdateDS) ListOpts() []client.ListOption {
24 | return []client.ListOption{
25 | client.InNamespace(c.dc.Namespace),
26 | client.MatchingLabels(meta.StandardLabels(c.dc)),
27 | }
28 | }
29 |
30 | func (c *clusterStatusUpdateDS) StatefulSet() *appsv1.StatefulSet {
31 | return &appsv1.StatefulSet{
32 | ObjectMeta: metav1.ObjectMeta{
33 | Name: meta.InstanceName(c.dc, ComponentWorker),
34 | Namespace: c.dc.Namespace,
35 | },
36 | }
37 | }
38 |
39 | func (c *clusterStatusUpdateDS) ClusterStatusConfig() *dcv1alpha1.ClusterStatusConfig {
40 | return &c.dc.Status.ClusterStatusConfig
41 | }
42 |
43 | func (c *clusterStatusUpdateDS) Image() *dcv1alpha1.OCIImageDefinition {
44 | return c.dc.Spec.Image
45 | }
46 |
--------------------------------------------------------------------------------
/pkg/cluster/dask/configmap.go:
--------------------------------------------------------------------------------
1 | package dask
2 |
3 | import (
4 | corev1 "k8s.io/api/core/v1"
5 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
6 | "sigs.k8s.io/controller-runtime/pkg/client"
7 |
8 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
9 | "github.com/dominodatalab/distributed-compute-operator/pkg/cluster/metadata"
10 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/components"
11 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
12 | )
13 |
14 | func ConfigMapKeyTab() core.OwnedComponent {
15 | return components.ConfigMap(func(obj client.Object) components.ConfigMapDataSource {
16 | return &configMapDS{dc: daskCluster(obj)}
17 | })
18 | }
19 |
20 | type configMapDS struct {
21 | dc *dcv1alpha1.DaskCluster
22 | }
23 |
24 | func (s *configMapDS) ConfigMap() *corev1.ConfigMap {
25 | cm := &corev1.ConfigMap{
26 | ObjectMeta: metav1.ObjectMeta{
27 | Name: meta.InstanceName(s.dc, metadata.ComponentNone),
28 | Namespace: s.dc.Namespace,
29 | Labels: meta.StandardLabels(s.dc),
30 | },
31 | }
32 |
33 | if s.dc.Spec.KerberosKeytab == nil {
34 | return cm
35 | }
36 | cm.BinaryData = map[string][]byte{"keytab": s.dc.Spec.KerberosKeytab.Contents}
37 |
38 | return cm
39 | }
40 |
41 | func (s *configMapDS) Delete() bool {
42 | return s.dc.Spec.KerberosKeytab == nil
43 | }
44 |
--------------------------------------------------------------------------------
/pkg/cluster/dask/dask_test.go:
--------------------------------------------------------------------------------
1 | package dask
2 |
3 | import (
4 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
5 |
6 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
7 | )
8 |
9 | func testDaskCluster() *dcv1alpha1.DaskCluster {
10 | return &dcv1alpha1.DaskCluster{
11 | TypeMeta: metav1.TypeMeta{
12 | Kind: "DaskCluster",
13 | APIVersion: "distributed-compute.dominodatalab.com/v1test1",
14 | },
15 | ObjectMeta: metav1.ObjectMeta{
16 | Name: "test",
17 | Namespace: "ns",
18 | },
19 | Spec: dcv1alpha1.DaskClusterSpec{
20 | ScalableClusterConfig: dcv1alpha1.ScalableClusterConfig{
21 | ClusterConfig: dcv1alpha1.ClusterConfig{
22 | Image: &dcv1alpha1.OCIImageDefinition{
23 | Registry: "",
24 | Repository: "daskdev/dask",
25 | Tag: "test-tag",
26 | },
27 | NetworkPolicy: dcv1alpha1.NetworkPolicyConfig{
28 | ClientLabels: map[string]string{
29 | "test-client": "true",
30 | },
31 | DashboardLabels: map[string]string{
32 | "test-ui-client": "true",
33 | },
34 | DashboardNamespaceLabels: map[string]string{
35 | "domino-platform": "true",
36 | },
37 | },
38 | PodSecurityPolicy: "privileged",
39 | },
40 | },
41 | Scheduler: dcv1alpha1.WorkloadConfig{},
42 | Worker: dcv1alpha1.DaskClusterWorker{},
43 | SchedulerPort: 8786,
44 | DashboardPort: 8787,
45 | WorkerPort: 3000,
46 | NannyPort: 3001,
47 | },
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/pkg/cluster/dask/horizonalpodautoscaler.go:
--------------------------------------------------------------------------------
1 | package dask
2 |
3 | import (
4 | autoscalingv2 "k8s.io/api/autoscaling/v2"
5 | corev1 "k8s.io/api/core/v1"
6 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
7 | "sigs.k8s.io/controller-runtime/pkg/client"
8 |
9 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
10 | "github.com/dominodatalab/distributed-compute-operator/pkg/cluster/metadata"
11 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/components"
12 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
13 | )
14 |
15 | func HorizontalPodAutoscaler() core.OwnedComponent {
16 | return components.HorizontalPodAutoscaler(func(obj client.Object) components.HorizontalPodAutoscalerDataSource {
17 | return &horizontalPodAutoscalerDS{dc: daskCluster(obj)}
18 | })
19 | }
20 |
21 | type horizontalPodAutoscalerDS struct {
22 | dc *dcv1alpha1.DaskCluster
23 | }
24 |
25 | func (s *horizontalPodAutoscalerDS) HorizontalPodAutoscaler() *autoscalingv2.HorizontalPodAutoscaler {
26 | hpa := &autoscalingv2.HorizontalPodAutoscaler{
27 | ObjectMeta: metav1.ObjectMeta{
28 | Name: meta.InstanceName(s.dc, metadata.ComponentNone),
29 | Namespace: s.dc.Namespace,
30 | Labels: meta.StandardLabels(s.dc),
31 | },
32 | }
33 |
34 | as := s.dc.Spec.Autoscaling
35 | if as == nil {
36 | return hpa
37 | }
38 |
39 | var behavior *autoscalingv2.HorizontalPodAutoscalerBehavior
40 | if as.ScaleDownStabilizationWindowSeconds != nil {
41 | behavior = &autoscalingv2.HorizontalPodAutoscalerBehavior{
42 | ScaleDown: &autoscalingv2.HPAScalingRules{
43 | StabilizationWindowSeconds: as.ScaleDownStabilizationWindowSeconds,
44 | },
45 | }
46 | }
47 |
48 | var metrics []autoscalingv2.MetricSpec
49 | if as.AverageCPUUtilization != nil {
50 | metrics = append(metrics, autoscalingv2.MetricSpec{
51 | Type: autoscalingv2.ResourceMetricSourceType,
52 | Resource: &autoscalingv2.ResourceMetricSource{
53 | Name: corev1.ResourceCPU,
54 | Target: autoscalingv2.MetricTarget{
55 | Type: autoscalingv2.UtilizationMetricType,
56 | AverageUtilization: s.dc.Spec.Autoscaling.AverageCPUUtilization,
57 | },
58 | },
59 | })
60 | }
61 | if as.AverageMemoryUtilization != nil {
62 | metrics = append(metrics, autoscalingv2.MetricSpec{
63 | Type: autoscalingv2.ResourceMetricSourceType,
64 | Resource: &autoscalingv2.ResourceMetricSource{
65 | Name: corev1.ResourceMemory,
66 | Target: autoscalingv2.MetricTarget{
67 | Type: autoscalingv2.UtilizationMetricType,
68 | AverageUtilization: s.dc.Spec.Autoscaling.AverageMemoryUtilization,
69 | },
70 | },
71 | })
72 | }
73 |
74 | hpa.Spec = autoscalingv2.HorizontalPodAutoscalerSpec{
75 | ScaleTargetRef: autoscalingv2.CrossVersionObjectReference{
76 | Kind: s.dc.Kind,
77 | Name: s.dc.Name,
78 | APIVersion: s.dc.APIVersion,
79 | },
80 | MinReplicas: s.dc.Spec.Autoscaling.MinReplicas,
81 | MaxReplicas: s.dc.Spec.Autoscaling.MaxReplicas,
82 | Metrics: metrics,
83 | Behavior: behavior,
84 | }
85 |
86 | return hpa
87 | }
88 |
89 | func (s *horizontalPodAutoscalerDS) Delete() bool {
90 | return s.dc.Spec.Autoscaling == nil
91 | }
92 |
--------------------------------------------------------------------------------
/pkg/cluster/dask/istiopeerauthentication.go:
--------------------------------------------------------------------------------
1 | package dask
2 |
3 | import (
4 | "sigs.k8s.io/controller-runtime/pkg/client"
5 |
6 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
7 | "github.com/dominodatalab/distributed-compute-operator/pkg/cluster/metadata"
8 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/components"
9 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
10 | "github.com/dominodatalab/distributed-compute-operator/pkg/resources/istio"
11 | )
12 |
13 | func IstioPeerAuthentication(enabled bool) core.Component {
14 | return components.IstioPeerAuthentication(func(obj client.Object) components.IstioPeerAuthenticationDataSource {
15 | return &istioPeerAuthenticationDS{dc: daskCluster(obj), enabled: enabled}
16 | })
17 | }
18 |
19 | type istioPeerAuthenticationDS struct {
20 | dc *dcv1alpha1.DaskCluster
21 | enabled bool
22 | }
23 |
24 | func (s *istioPeerAuthenticationDS) PeerAuthInfo() *istio.PeerAuthInfo {
25 | return &istio.PeerAuthInfo{
26 | Name: meta.InstanceName(s.dc, metadata.ComponentNone),
27 | Namespace: s.dc.Namespace,
28 | Labels: meta.StandardLabels(s.dc),
29 | Selector: meta.MatchLabels(s.dc),
30 | Mode: s.dc.Spec.MutualTLSMode,
31 | }
32 | }
33 |
34 | func (s *istioPeerAuthenticationDS) Enabled() bool {
35 | return s.enabled
36 | }
37 |
38 | func (s *istioPeerAuthenticationDS) Delete() bool {
39 | return s.dc.Spec.MutualTLSMode == ""
40 | }
41 |
--------------------------------------------------------------------------------
/pkg/cluster/dask/metadata.go:
--------------------------------------------------------------------------------
1 | package dask
2 |
3 | import (
4 | "sigs.k8s.io/controller-runtime/pkg/client"
5 |
6 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
7 | "github.com/dominodatalab/distributed-compute-operator/pkg/cluster/metadata"
8 | )
9 |
10 | const (
11 | ApplicationName = "dask"
12 | ComponentScheduler metadata.Component = "scheduler"
13 | ComponentWorker metadata.Component = "worker"
14 | )
15 |
16 | var meta = metadata.NewProvider(
17 | ApplicationName,
18 | func(obj client.Object) string { return daskCluster(obj).Spec.Image.Tag },
19 | func(obj client.Object) map[string]string { return daskCluster(obj).Spec.GlobalLabels },
20 | )
21 |
22 | func daskCluster(obj client.Object) *dcv1alpha1.DaskCluster {
23 | return obj.(*dcv1alpha1.DaskCluster)
24 | }
25 |
--------------------------------------------------------------------------------
/pkg/cluster/dask/rbac.go:
--------------------------------------------------------------------------------
1 | package dask
2 |
3 | import (
4 | rbacv1 "k8s.io/api/rbac/v1"
5 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
6 | "sigs.k8s.io/controller-runtime/pkg/client"
7 |
8 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
9 | "github.com/dominodatalab/distributed-compute-operator/pkg/cluster/metadata"
10 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/components"
11 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
12 | )
13 |
14 | var (
15 | policyAPIGroups = []string{"policy"}
16 | podSecurityPolicyResources = []string{"podsecuritypolicies"}
17 | useVerbs = []string{"use"}
18 | )
19 |
20 | func RolePodSecurityPolicy() core.OwnedComponent {
21 | return components.Role(func(obj client.Object) components.RoleDataSource {
22 | return &pspDS{dc: daskCluster(obj)}
23 | })
24 | }
25 |
26 | func RoleBindingPodSecurityPolicy() core.OwnedComponent {
27 | return components.RoleBinding(func(obj client.Object) components.RoleBindingDataSource {
28 | return &pspDS{dc: daskCluster(obj)}
29 | })
30 | }
31 |
32 | type pspDS struct {
33 | dc *dcv1alpha1.DaskCluster
34 | }
35 |
36 | func (s *pspDS) Role() *rbacv1.Role {
37 | return &rbacv1.Role{
38 | ObjectMeta: s.objectMeta(),
39 | Rules: []rbacv1.PolicyRule{
40 | {
41 | APIGroups: policyAPIGroups,
42 | Resources: podSecurityPolicyResources,
43 | Verbs: useVerbs,
44 | ResourceNames: []string{s.dc.Spec.PodSecurityPolicy},
45 | },
46 | },
47 | }
48 | }
49 |
50 | func (s *pspDS) RoleBinding() *rbacv1.RoleBinding {
51 | om := s.objectMeta()
52 |
53 | return &rbacv1.RoleBinding{
54 | ObjectMeta: om,
55 | RoleRef: rbacv1.RoleRef{
56 | APIGroup: rbacv1.GroupName,
57 | Kind: "Role",
58 | Name: om.Name,
59 | },
60 | Subjects: []rbacv1.Subject{
61 | {
62 | Kind: rbacv1.ServiceAccountKind,
63 | Name: om.Name,
64 | Namespace: s.dc.Namespace,
65 | },
66 | },
67 | }
68 | }
69 |
70 | func (s *pspDS) Delete() bool {
71 | return s.dc.Spec.PodSecurityPolicy == ""
72 | }
73 |
74 | func (s *pspDS) objectMeta() metav1.ObjectMeta {
75 | return metav1.ObjectMeta{
76 | Name: meta.InstanceName(s.dc, metadata.ComponentNone),
77 | Namespace: s.dc.Namespace,
78 | Labels: meta.StandardLabels(s.dc),
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/pkg/cluster/dask/rbac_test.go:
--------------------------------------------------------------------------------
1 | package dask
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | rbacv1 "k8s.io/api/rbac/v1"
8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9 | )
10 |
11 | func TestPspDS_Role(t *testing.T) {
12 | dc := testDaskCluster()
13 | ds := pspDS{dc: dc}
14 |
15 | actual := ds.Role()
16 | expected := &rbacv1.Role{
17 | ObjectMeta: metav1.ObjectMeta{
18 | Name: "test-dask",
19 | Namespace: "ns",
20 | Labels: map[string]string{
21 | "app.kubernetes.io/instance": "test",
22 | "app.kubernetes.io/managed-by": "distributed-compute-operator",
23 | "app.kubernetes.io/name": "dask",
24 | "app.kubernetes.io/version": "test-tag",
25 | },
26 | },
27 | Rules: []rbacv1.PolicyRule{
28 | {
29 | APIGroups: []string{"policy"},
30 | Resources: []string{"podsecuritypolicies"},
31 | Verbs: []string{"use"},
32 | ResourceNames: []string{"privileged"},
33 | },
34 | },
35 | }
36 |
37 | assert.Equal(t, expected, actual)
38 | }
39 |
40 | func TestPspDS_RoleBinding(t *testing.T) {
41 | dc := testDaskCluster()
42 | ds := pspDS{dc: dc}
43 |
44 | actual := ds.RoleBinding()
45 | expected := &rbacv1.RoleBinding{
46 | ObjectMeta: metav1.ObjectMeta{
47 | Name: "test-dask",
48 | Namespace: "ns",
49 | Labels: map[string]string{
50 | "app.kubernetes.io/instance": "test",
51 | "app.kubernetes.io/managed-by": "distributed-compute-operator",
52 | "app.kubernetes.io/name": "dask",
53 | "app.kubernetes.io/version": "test-tag",
54 | },
55 | },
56 | RoleRef: rbacv1.RoleRef{
57 | APIGroup: rbacv1.GroupName,
58 | Kind: "Role",
59 | Name: "test-dask",
60 | },
61 | Subjects: []rbacv1.Subject{
62 | {
63 | Kind: rbacv1.ServiceAccountKind,
64 | Name: "test-dask",
65 | Namespace: "ns",
66 | },
67 | },
68 | }
69 |
70 | assert.Equal(t, expected, actual)
71 | }
72 |
73 | func TestPspDS_Delete(t *testing.T) {
74 | dc := testDaskCluster()
75 | ds := pspDS{dc: dc}
76 |
77 | t.Run("provided_name", func(t *testing.T) {
78 | dc.Spec.PodSecurityPolicy = "restricted"
79 | assert.False(t, ds.Delete())
80 | })
81 |
82 | t.Run("empty_name", func(t *testing.T) {
83 | dc.Spec.PodSecurityPolicy = ""
84 | assert.True(t, ds.Delete())
85 | })
86 | }
87 |
--------------------------------------------------------------------------------
/pkg/cluster/dask/service.go:
--------------------------------------------------------------------------------
1 | package dask
2 |
3 | import (
4 | corev1 "k8s.io/api/core/v1"
5 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
6 | "k8s.io/apimachinery/pkg/util/intstr"
7 | "sigs.k8s.io/controller-runtime/pkg/client"
8 |
9 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
10 | "github.com/dominodatalab/distributed-compute-operator/pkg/cluster/metadata"
11 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/components"
12 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
13 | )
14 |
15 | func ServiceScheduler() core.OwnedComponent {
16 | return components.Service(func(obj client.Object) components.ServiceDataSource {
17 | return &serviceDS{dc: daskCluster(obj), comp: ComponentScheduler}
18 | })
19 | }
20 |
21 | func ServiceWorker() core.OwnedComponent {
22 | return components.Service(func(obj client.Object) components.ServiceDataSource {
23 | return &serviceDS{dc: daskCluster(obj), comp: ComponentWorker}
24 | })
25 | }
26 |
27 | type serviceDS struct {
28 | dc *dcv1alpha1.DaskCluster
29 | comp metadata.Component
30 | }
31 |
32 | func (s *serviceDS) Service() *corev1.Service {
33 | return &corev1.Service{
34 | ObjectMeta: metav1.ObjectMeta{
35 | Name: meta.InstanceName(s.dc, s.comp),
36 | Namespace: s.dc.Namespace,
37 | Labels: meta.StandardLabelsWithComponent(s.dc, s.comp, nil),
38 | },
39 | Spec: corev1.ServiceSpec{
40 | ClusterIP: corev1.ClusterIPNone,
41 | Selector: meta.MatchLabelsWithComponent(s.dc, s.comp),
42 | Ports: s.ports(),
43 | },
44 | }
45 | }
46 |
47 | func (s *serviceDS) ports() []corev1.ServicePort {
48 | if s.comp == ComponentScheduler {
49 | return []corev1.ServicePort{
50 | {
51 | Name: "tcp-serve",
52 | Port: s.dc.Spec.SchedulerPort,
53 | TargetPort: intstr.FromString("serve"),
54 | },
55 | {
56 | Name: "tcp-dashboard",
57 | Port: s.dc.Spec.DashboardPort,
58 | TargetPort: intstr.FromString("dashboard"),
59 | },
60 | }
61 | }
62 |
63 | return []corev1.ServicePort{
64 | {
65 | Name: "tcp-worker",
66 | Port: s.dc.Spec.WorkerPort,
67 | TargetPort: intstr.FromString("worker"),
68 | },
69 | {
70 | Name: "tcp-nanny",
71 | Port: s.dc.Spec.NannyPort,
72 | TargetPort: intstr.FromString("nanny"),
73 | },
74 | {
75 | Name: "tcp-dashboard",
76 | Port: s.dc.Spec.DashboardPort,
77 | TargetPort: intstr.FromString("dashboard"),
78 | },
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/pkg/cluster/dask/service_test.go:
--------------------------------------------------------------------------------
1 | package dask
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | corev1 "k8s.io/api/core/v1"
8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9 | "k8s.io/apimachinery/pkg/util/intstr"
10 | )
11 |
12 | func TestServiceDataSource_Service(t *testing.T) {
13 | dc := testDaskCluster()
14 |
15 | t.Run("scheduler", func(t *testing.T) {
16 | ds := serviceDS{dc: dc, comp: ComponentScheduler}
17 |
18 | actual := ds.Service()
19 | expected := &corev1.Service{
20 | ObjectMeta: metav1.ObjectMeta{
21 | Name: "test-dask-scheduler",
22 | Namespace: "ns",
23 | Labels: map[string]string{
24 | "app.kubernetes.io/component": "scheduler",
25 | "app.kubernetes.io/instance": "test",
26 | "app.kubernetes.io/managed-by": "distributed-compute-operator",
27 | "app.kubernetes.io/name": "dask",
28 | "app.kubernetes.io/version": "test-tag",
29 | },
30 | },
31 | Spec: corev1.ServiceSpec{
32 | ClusterIP: corev1.ClusterIPNone,
33 | Selector: map[string]string{
34 | "app.kubernetes.io/component": "scheduler",
35 | "app.kubernetes.io/instance": "test",
36 | "app.kubernetes.io/name": "dask",
37 | },
38 | Ports: []corev1.ServicePort{
39 | {
40 | Name: "tcp-serve",
41 | Port: 8786,
42 | TargetPort: intstr.FromString("serve"),
43 | },
44 | {
45 | Name: "tcp-dashboard",
46 | Port: 8787,
47 | TargetPort: intstr.FromString("dashboard"),
48 | },
49 | },
50 | },
51 | }
52 |
53 | assert.Equal(t, expected, actual)
54 | })
55 |
56 | t.Run("worker", func(t *testing.T) {
57 | ds := serviceDS{dc: dc, comp: ComponentWorker}
58 |
59 | actual := ds.Service()
60 | expected := &corev1.Service{
61 | ObjectMeta: metav1.ObjectMeta{
62 | Name: "test-dask-worker",
63 | Namespace: "ns",
64 | Labels: map[string]string{
65 | "app.kubernetes.io/component": "worker",
66 | "app.kubernetes.io/instance": "test",
67 | "app.kubernetes.io/managed-by": "distributed-compute-operator",
68 | "app.kubernetes.io/name": "dask",
69 | "app.kubernetes.io/version": "test-tag",
70 | },
71 | },
72 | Spec: corev1.ServiceSpec{
73 | ClusterIP: corev1.ClusterIPNone,
74 | Selector: map[string]string{
75 | "app.kubernetes.io/component": "worker",
76 | "app.kubernetes.io/instance": "test",
77 | "app.kubernetes.io/name": "dask",
78 | },
79 | Ports: []corev1.ServicePort{
80 | {
81 | Name: "tcp-worker",
82 | Port: 3000,
83 | TargetPort: intstr.FromString("worker"),
84 | },
85 | {
86 | Name: "tcp-nanny",
87 | Port: 3001,
88 | TargetPort: intstr.FromString("nanny"),
89 | },
90 | {
91 | Name: "tcp-dashboard",
92 | Port: 8787,
93 | TargetPort: intstr.FromString("dashboard"),
94 | },
95 | },
96 | },
97 | }
98 |
99 | assert.Equal(t, expected, actual)
100 | })
101 | }
102 |
--------------------------------------------------------------------------------
/pkg/cluster/dask/serviceaccount.go:
--------------------------------------------------------------------------------
1 | package dask
2 |
3 | import (
4 | corev1 "k8s.io/api/core/v1"
5 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
6 | "k8s.io/utils/pointer"
7 | "sigs.k8s.io/controller-runtime/pkg/client"
8 |
9 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
10 | "github.com/dominodatalab/distributed-compute-operator/pkg/cluster/metadata"
11 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/components"
12 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
13 | )
14 |
15 | func ServiceAccount() core.OwnedComponent {
16 | factory := func(obj client.Object) components.ServiceAccountDataSource {
17 | return &serviceAccountDS{dc: daskCluster(obj)}
18 | }
19 |
20 | return components.ServiceAccount(factory)
21 | }
22 |
23 | type serviceAccountDS struct {
24 | dc *dcv1alpha1.DaskCluster
25 | }
26 |
27 | func (s *serviceAccountDS) ServiceAccount() *corev1.ServiceAccount {
28 | return &corev1.ServiceAccount{
29 | ObjectMeta: metav1.ObjectMeta{
30 | Name: meta.InstanceName(s.dc, metadata.ComponentNone),
31 | Namespace: s.dc.Namespace,
32 | Labels: meta.StandardLabels(s.dc),
33 | },
34 | AutomountServiceAccountToken: pointer.Bool(s.dc.Spec.ServiceAccount.AutomountServiceAccountToken),
35 | }
36 | }
37 |
38 | func (s *serviceAccountDS) Delete() bool {
39 | return s.dc.Spec.ServiceAccount.Name != ""
40 | }
41 |
--------------------------------------------------------------------------------
/pkg/cluster/dask/serviceaccount_test.go:
--------------------------------------------------------------------------------
1 | package dask
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | "github.com/stretchr/testify/require"
8 | corev1 "k8s.io/api/core/v1"
9 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
10 | "k8s.io/utils/pointer"
11 | )
12 |
13 | func TestServiceAccountDS_ServiceAccount(t *testing.T) {
14 | dc := testDaskCluster()
15 | ds := serviceAccountDS{dc: dc}
16 |
17 | actual := ds.ServiceAccount()
18 | expected := &corev1.ServiceAccount{
19 | ObjectMeta: metav1.ObjectMeta{
20 | Name: "test-dask",
21 | Namespace: "ns",
22 | Labels: map[string]string{
23 | "app.kubernetes.io/instance": "test",
24 | "app.kubernetes.io/managed-by": "distributed-compute-operator",
25 | "app.kubernetes.io/name": "dask",
26 | "app.kubernetes.io/version": "test-tag",
27 | },
28 | },
29 | AutomountServiceAccountToken: pointer.Bool(false),
30 | }
31 |
32 | require.Equal(t, expected, actual)
33 |
34 | dc.Spec.ServiceAccount.AutomountServiceAccountToken = true
35 | actual = ds.ServiceAccount()
36 |
37 | assert.Equal(t, actual.AutomountServiceAccountToken, pointer.Bool(true))
38 | }
39 |
40 | func TestServiceAccountDS_Delete(t *testing.T) {
41 | dc := testDaskCluster()
42 | ds := serviceAccountDS{dc: dc}
43 |
44 | t.Run("empty_name", func(t *testing.T) {
45 | dc.Spec.ServiceAccount.Name = ""
46 | assert.False(t, ds.Delete())
47 | })
48 |
49 | t.Run("provided_name", func(t *testing.T) {
50 | dc.Spec.ServiceAccount.Name = "other"
51 | assert.True(t, ds.Delete())
52 | })
53 | }
54 |
--------------------------------------------------------------------------------
/pkg/cluster/metadata/metadata.go:
--------------------------------------------------------------------------------
1 | package metadata
2 |
3 | import (
4 | "fmt"
5 |
6 | "sigs.k8s.io/controller-runtime/pkg/client"
7 |
8 | "github.com/dominodatalab/distributed-compute-operator/pkg/util"
9 | )
10 |
11 | const (
12 | // ApplicationNameLabelKey indicates the name of the application.
13 | ApplicationNameLabelKey = "app.kubernetes.io/name"
14 | // ApplicationInstanceLabelKey indicates a unique name identifying the instance of an application.
15 | ApplicationInstanceLabelKey = "app.kubernetes.io/instance"
16 | // ApplicationVersionLabelKey indicates the current version of the application.
17 | ApplicationVersionLabelKey = "app.kubernetes.io/version"
18 | // ApplicationComponentLabelKey indicates the component within the architecture of an application.
19 | ApplicationComponentLabelKey = "app.kubernetes.io/component"
20 | // ApplicationManagedByLabelKey indicates the tool being used to manage the operation of an application.
21 | ApplicationManagedByLabelKey = "app.kubernetes.io/managed-by"
22 | // ApplicationManagedByLabelValue is the specific tool being used to manage applications created by this project.
23 | ApplicationManagedByLabelValue = "distributed-compute-operator"
24 | // DescriptionAnnotationKey can be used to add extra information to a Kubernetes object via its annotations.
25 | DescriptionAnnotationKey = "distributed-compute.dominodatalab.com/description"
26 | )
27 |
28 | // Component is used to drive Kubernetes object generation for different types.
29 | type Component string
30 |
31 | // ComponentNone indicates a generic resource.
32 | const ComponentNone Component = "none"
33 |
34 | type versionExtractor func(client.Object) string
35 | type globalLabelsFn func(client.Object) map[string]string
36 |
37 | type Provider struct {
38 | application string
39 | version versionExtractor
40 | globalLabels globalLabelsFn
41 | }
42 |
43 | func NewProvider(name string, version versionExtractor, globalLabels globalLabelsFn) *Provider {
44 | return &Provider{
45 | application: name,
46 | version: version,
47 | globalLabels: globalLabels,
48 | }
49 | }
50 |
51 | func (p *Provider) InstanceName(obj client.Object, comp Component) string {
52 | if comp == ComponentNone {
53 | return fmt.Sprintf("%s-%s", obj.GetName(), p.application)
54 | }
55 |
56 | return fmt.Sprintf("%s-%s-%s", obj.GetName(), p.application, comp)
57 | }
58 |
59 | func (p *Provider) StandardLabels(obj client.Object) map[string]string {
60 | labels := map[string]string{
61 | ApplicationNameLabelKey: p.application,
62 | ApplicationInstanceLabelKey: obj.GetName(),
63 | ApplicationVersionLabelKey: p.version(obj),
64 | ApplicationManagedByLabelKey: ApplicationManagedByLabelValue,
65 | }
66 |
67 | return util.MergeStringMaps(p.globalLabels(obj), labels)
68 | }
69 |
70 | func (p *Provider) StandardLabelsWithComponent(obj client.Object, comp Component, extraLabels map[string]string) map[string]string {
71 | labels := p.StandardLabels(obj)
72 | labels[ApplicationComponentLabelKey] = string(comp)
73 |
74 | if extraLabels != nil {
75 | labels = util.MergeStringMaps(extraLabels, labels)
76 | }
77 |
78 | return labels
79 | }
80 |
81 | func (p *Provider) MatchLabels(obj client.Object) map[string]string {
82 | return map[string]string{
83 | ApplicationNameLabelKey: p.application,
84 | ApplicationInstanceLabelKey: obj.GetName(),
85 | }
86 | }
87 |
88 | func (p *Provider) MatchLabelsWithComponent(obj client.Object, comp Component) map[string]string {
89 | labels := p.MatchLabels(obj)
90 | labels[ApplicationComponentLabelKey] = string(comp)
91 |
92 | return labels
93 | }
94 |
--------------------------------------------------------------------------------
/pkg/cluster/mpi/clientports.go:
--------------------------------------------------------------------------------
1 | package mpi
2 |
3 | import (
4 | corev1 "k8s.io/api/core/v1"
5 | "sigs.k8s.io/controller-runtime/pkg/client"
6 |
7 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/components"
8 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
9 | )
10 |
11 | func ClientPortsService() core.OwnedComponent {
12 | return components.ClientPortsServiceComponent{
13 | ClientPorts: func(obj *client.Object) []corev1.ServicePort {
14 | return objToMPICluster(*obj).Spec.AdditionalClientPorts
15 | },
16 | ClientLabels: func(obj *client.Object) map[string]string {
17 | return objToMPICluster(*obj).Spec.NetworkPolicy.ClientLabels
18 | },
19 | Meta: meta,
20 | }
21 | }
22 |
23 | func ClientPortsNetworkPolicy() core.OwnedComponent {
24 | return components.ClientPortsNetworkPolicyComponent{
25 | ClientPorts: func(obj *client.Object) []corev1.ServicePort {
26 | return objToMPICluster(*obj).Spec.AdditionalClientPorts
27 | },
28 | ClientLabels: func(obj *client.Object) map[string]string {
29 | return objToMPICluster(*obj).Spec.NetworkPolicy.ClientLabels
30 | },
31 | Meta: meta,
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/pkg/cluster/mpi/configmap.go:
--------------------------------------------------------------------------------
1 | package mpi
2 |
3 | import (
4 | "fmt"
5 | "strings"
6 |
7 | corev1 "k8s.io/api/core/v1"
8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9 | ctrl "sigs.k8s.io/controller-runtime"
10 | "sigs.k8s.io/controller-runtime/pkg/client"
11 |
12 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
13 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/actions"
14 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
15 | )
16 |
17 | func ConfigMap() core.OwnedComponent {
18 | return &configMapComponent{}
19 | }
20 |
21 | type configMapComponent struct{}
22 |
23 | func (c configMapComponent) Reconcile(ctx *core.Context) (ctrl.Result, error) {
24 | cr := objToMPICluster(ctx.Object)
25 |
26 | hostFileConfig := createHostFileConfig(cr)
27 | err := actions.CreateOrUpdateOwnedResource(ctx, cr, hostFileConfig)
28 | if err != nil {
29 | return ctrl.Result{}, fmt.Errorf("cannot reconcile hostfile configmap: %w", err)
30 | }
31 |
32 | keytabConfig := createKeytabConfig(cr)
33 | if keytabConfig != nil {
34 | err := actions.CreateOrUpdateOwnedResource(ctx, cr, keytabConfig)
35 | if err != nil {
36 | return ctrl.Result{}, fmt.Errorf("cannot reconcile keytab configmap: %w", err)
37 | }
38 | }
39 |
40 | return ctrl.Result{}, nil
41 | }
42 |
43 | func (c configMapComponent) Kind() client.Object {
44 | return &corev1.ConfigMap{}
45 | }
46 |
47 | func createHostFileConfig(cr *dcv1alpha1.MPICluster) *corev1.ConfigMap {
48 | svcName := serviceName(cr, ComponentWorker)
49 | workerName := workerStatefulSetName(cr)
50 | workerReplicas := *cr.Spec.Worker.Replicas
51 |
52 | var hostFileBuilder strings.Builder
53 | for idx := 0; idx < int(workerReplicas); idx++ {
54 | entry := fmt.Sprintf("%s-%d.%s\n", workerName, idx, svcName)
55 | hostFileBuilder.WriteString(entry)
56 | }
57 |
58 | return &corev1.ConfigMap{
59 | ObjectMeta: metav1.ObjectMeta{
60 | Name: configMapName(cr) + "-" + hostFileName,
61 | Namespace: cr.Namespace,
62 | Labels: meta.StandardLabels(cr),
63 | },
64 | Data: map[string]string{
65 | hostFileName: hostFileBuilder.String(),
66 | },
67 | }
68 | }
69 |
70 | func createKeytabConfig(cr *dcv1alpha1.MPICluster) *corev1.ConfigMap {
71 | if cr.Spec.KerberosKeytab == nil {
72 | return nil
73 | }
74 | return &corev1.ConfigMap{
75 | ObjectMeta: metav1.ObjectMeta{
76 | Name: configMapName(cr) + "-" + keytabName,
77 | Namespace: cr.Namespace,
78 | Labels: meta.StandardLabels(cr),
79 | },
80 | BinaryData: map[string][]byte{
81 | keytabName: cr.Spec.KerberosKeytab.Contents,
82 | },
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/pkg/cluster/mpi/istiopeerauthentication.go:
--------------------------------------------------------------------------------
1 | package mpi
2 |
3 | import (
4 | "sigs.k8s.io/controller-runtime/pkg/client"
5 |
6 | authenticationv1alpha1 "istio.io/api/authentication/v1alpha1"
7 |
8 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
9 | "github.com/dominodatalab/distributed-compute-operator/pkg/cluster/metadata"
10 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/components"
11 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
12 | "github.com/dominodatalab/distributed-compute-operator/pkg/resources/istio"
13 | )
14 |
15 | func IstioPeerAuthentication(enabled bool) core.Component {
16 | return components.IstioPeerAuthentication(func(obj client.Object) components.IstioPeerAuthenticationDataSource {
17 | return &istioPeerAuthenticationDS{mpi: objToMPICluster(obj), enabled: enabled}
18 | })
19 | }
20 |
21 | type istioPeerAuthenticationDS struct {
22 | mpi *dcv1alpha1.MPICluster
23 | enabled bool
24 | }
25 |
26 | func (s *istioPeerAuthenticationDS) PeerAuthInfo() *istio.PeerAuthInfo {
27 | return &istio.PeerAuthInfo{
28 | Name: meta.InstanceName(s.mpi, metadata.ComponentNone),
29 | Namespace: s.mpi.Namespace,
30 | Labels: meta.StandardLabels(s.mpi),
31 | Selector: meta.MatchLabels(s.mpi),
32 | Mode: s.mpi.Spec.MutualTLSMode,
33 | }
34 | }
35 |
36 | func (s *istioPeerAuthenticationDS) Enabled() bool {
37 | return s.enabled
38 | }
39 |
40 | func (s *istioPeerAuthenticationDS) Delete() bool {
41 | return s.mpi.Spec.MutualTLSMode == ""
42 | }
43 |
44 | func IstioClientPeerAuthentication(enabled bool) core.Component {
45 | return components.IstioPeerAuthentication(func(obj client.Object) components.IstioPeerAuthenticationDataSource {
46 | return &istioClientPeerAuthenticationDS{mpi: objToMPICluster(obj), enabled: enabled}
47 | })
48 | }
49 |
50 | type istioClientPeerAuthenticationDS struct {
51 | mpi *dcv1alpha1.MPICluster
52 | enabled bool
53 | }
54 |
55 | func (s *istioClientPeerAuthenticationDS) PeerAuthInfo() *istio.PeerAuthInfo {
56 | return &istio.PeerAuthInfo{
57 | Name: meta.InstanceName(s.mpi, ComponentClient),
58 | Namespace: s.mpi.Namespace,
59 | Labels: meta.StandardLabels(s.mpi),
60 | Selector: s.mpi.Spec.NetworkPolicy.ClientLabels,
61 | Mode: authenticationv1alpha1.MutualTls_PERMISSIVE.String(),
62 | }
63 | }
64 |
65 | func (s *istioClientPeerAuthenticationDS) Enabled() bool {
66 | return s.enabled && s.mpi.Spec.Worker.Annotations["sidecar.istio.io/inject"] == "false"
67 | }
68 |
69 | func (s *istioClientPeerAuthenticationDS) Delete() bool {
70 | return false
71 | }
72 |
--------------------------------------------------------------------------------
/pkg/cluster/mpi/metadata.go:
--------------------------------------------------------------------------------
1 | package mpi
2 |
3 | import (
4 | "sigs.k8s.io/controller-runtime/pkg/client"
5 |
6 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
7 | "github.com/dominodatalab/distributed-compute-operator/pkg/cluster/metadata"
8 | )
9 |
10 | const (
11 | ApplicationName = "mpi"
12 | ComponentWorker metadata.Component = "worker"
13 | ComponentClient metadata.Component = "client"
14 | )
15 |
16 | var meta = metadata.NewProvider(
17 | ApplicationName,
18 | func(obj client.Object) string { return objToMPICluster(obj).Spec.Image.Tag },
19 | func(obj client.Object) map[string]string { return objToMPICluster(obj).Spec.GlobalLabels },
20 | )
21 |
22 | func objToMPICluster(obj client.Object) *dcv1alpha1.MPICluster {
23 | return obj.(*dcv1alpha1.MPICluster)
24 | }
25 |
--------------------------------------------------------------------------------
/pkg/cluster/mpi/mpi.go:
--------------------------------------------------------------------------------
1 | package mpi
2 |
3 | import (
4 | "time"
5 |
6 | "sigs.k8s.io/controller-runtime/pkg/client"
7 |
8 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
9 | "github.com/dominodatalab/distributed-compute-operator/pkg/cluster/metadata"
10 | )
11 |
12 | const (
13 | // SSH port used by MPI worker
14 | sshdPort = 2222
15 | sshdPortName = "tcp-ssh"
16 |
17 | // Locations of the mounted files and their modes
18 | authorizedKeysPath = "/etc/mpi/authorized_keys"
19 | authorizedKeysMode = 0444 // octal!
20 |
21 | // Location of common Domino utilities
22 | customUtilPath = "/opt/domino/mpi-cluster"
23 |
24 | // Default parameters of a user account for executing MPI workload.
25 | defaultUserID = 12574
26 | defaultUserName = "domino"
27 | defaultGroupID = 12574
28 | defaultGroupName = "domino"
29 | defaultHomeDir = "/mnt"
30 |
31 | // SSH ports used by rsync sidecar
32 | rsyncPort = 2223
33 | rsyncPortName = "tcp-rsync"
34 |
35 | // User and group for running the sidecar container;
36 | // they should match a user provisioned in the sidecar image.
37 | rsyncUserID = 12574
38 | rsyncGroupID = 12574
39 |
40 | // Name of an MPI hostfile; also a key in the config map and its prefix
41 | hostFileName = "hostfile"
42 |
43 | // Name of a Kerberos keytab file; also a key in the config map and its prefix
44 | keytabName = "keytab"
45 |
46 | // Period of rerunning resource finalizers
47 | finalizerRetryPeriod = 1 * time.Second
48 | )
49 |
50 | func configMapName(cr client.Object) string {
51 | return meta.InstanceName(cr, "config")
52 | }
53 |
54 | func selectServiceAccount(cr *dcv1alpha1.MPICluster) string {
55 | if cr.Spec.ServiceAccount.Name != "" {
56 | return cr.Spec.ServiceAccount.Name
57 | }
58 |
59 | return serviceAccountName(cr)
60 | }
61 |
62 | func serviceAccountName(cr client.Object) string {
63 | return meta.InstanceName(cr, metadata.ComponentNone)
64 | }
65 |
66 | func serviceName(cr client.Object, comp metadata.Component) string {
67 | return meta.InstanceName(cr, comp)
68 | }
69 |
70 | func sshSecretName(cr *dcv1alpha1.MPICluster) string {
71 | worker := cr.Spec.Worker
72 | return worker.SharedSSHSecret
73 | }
74 |
75 | func workerStatefulSetName(cr client.Object) string {
76 | return meta.InstanceName(cr, ComponentWorker)
77 | }
78 |
--------------------------------------------------------------------------------
/pkg/cluster/mpi/networkpolicy.go:
--------------------------------------------------------------------------------
1 | package mpi
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 |
7 | networkingv1 "k8s.io/api/networking/v1"
8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9 | ctrl "sigs.k8s.io/controller-runtime"
10 | "sigs.k8s.io/controller-runtime/pkg/client"
11 |
12 | "github.com/dominodatalab/distributed-compute-operator/pkg/cluster/metadata"
13 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/actions"
14 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
15 | )
16 |
17 | func NetworkPolicyWorker() core.OwnedComponent {
18 | return &networkPolicyComponent{
19 | comp: ComponentWorker,
20 | }
21 | }
22 |
23 | func NetworkPolicyClient() core.OwnedComponent {
24 | return &networkPolicyComponent{
25 | comp: ComponentClient,
26 | }
27 | }
28 |
29 | type networkPolicyComponent struct {
30 | comp metadata.Component
31 | }
32 |
33 | func (c networkPolicyComponent) Reconcile(ctx *core.Context) (ctrl.Result, error) {
34 | cr := objToMPICluster(ctx.Object)
35 |
36 | matchLabels := meta.MatchLabels(cr)
37 |
38 | var podSelectorMatchLabels map[string]string
39 | var ingressRules []networkingv1.NetworkPolicyPeer
40 |
41 | switch c.comp {
42 | case ComponentWorker:
43 | podSelectorMatchLabels = matchLabels
44 | ingressRules = []networkingv1.NetworkPolicyPeer{
45 | {
46 | PodSelector: &metav1.LabelSelector{
47 | MatchLabels: matchLabels,
48 | },
49 | },
50 | {
51 | PodSelector: &metav1.LabelSelector{
52 | MatchLabels: cr.Spec.NetworkPolicy.ClientLabels,
53 | },
54 | },
55 | }
56 | case ComponentClient:
57 | podSelectorMatchLabels = cr.Spec.NetworkPolicy.ClientLabels
58 | ingressRules = []networkingv1.NetworkPolicyPeer{
59 | {
60 | PodSelector: &metav1.LabelSelector{
61 | MatchLabels: matchLabels,
62 | },
63 | },
64 | }
65 | case metadata.ComponentNone:
66 | err := errors.New("unknown component for NetworkPolicy")
67 | return ctrl.Result{}, err
68 | }
69 |
70 | netpol := &networkingv1.NetworkPolicy{
71 | ObjectMeta: metav1.ObjectMeta{
72 | Name: meta.InstanceName(cr, c.comp),
73 | Namespace: cr.Namespace,
74 | Labels: meta.StandardLabels(cr),
75 | },
76 | Spec: networkingv1.NetworkPolicySpec{
77 | PodSelector: metav1.LabelSelector{
78 | MatchLabels: podSelectorMatchLabels,
79 | },
80 | Ingress: []networkingv1.NetworkPolicyIngressRule{
81 | {
82 | From: ingressRules,
83 | },
84 | },
85 | PolicyTypes: []networkingv1.PolicyType{
86 | networkingv1.PolicyTypeIngress,
87 | },
88 | },
89 | }
90 |
91 | err := actions.CreateOrUpdateOwnedResource(ctx, cr, netpol)
92 | if err != nil {
93 | err = fmt.Errorf("cannot reconcile networkpolicy: %w", err)
94 | }
95 |
96 | return ctrl.Result{}, err
97 | }
98 |
99 | func (c networkPolicyComponent) Kind() client.Object {
100 | return &networkingv1.NetworkPolicy{}
101 | }
102 |
--------------------------------------------------------------------------------
/pkg/cluster/mpi/podsecuritypolicy.go:
--------------------------------------------------------------------------------
1 | package mpi
2 |
3 | import (
4 | "fmt"
5 |
6 | rbacv1 "k8s.io/api/rbac/v1"
7 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
8 | ctrl "sigs.k8s.io/controller-runtime"
9 | "sigs.k8s.io/controller-runtime/pkg/client"
10 |
11 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
12 | "github.com/dominodatalab/distributed-compute-operator/pkg/cluster/metadata"
13 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/actions"
14 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
15 | )
16 |
17 | var (
18 | policyAPIGroups = []string{"policy"}
19 | podSecurityPolicyResources = []string{"podsecuritypolicies"}
20 | useVerbs = []string{"use"}
21 | )
22 |
23 | func RolePodSecurityPolicy() core.OwnedComponent {
24 | return &podSecurityPolicyComponent{kind: &rbacv1.Role{}}
25 | }
26 |
27 | func RoleBindingPodSecurityPolicy() core.OwnedComponent {
28 | return &podSecurityPolicyComponent{kind: &rbacv1.RoleBinding{}}
29 | }
30 |
31 | type podSecurityPolicyComponent struct {
32 | kind client.Object
33 | }
34 |
35 | func (c podSecurityPolicyComponent) Reconcile(ctx *core.Context) (ctrl.Result, error) {
36 | cr := objToMPICluster(ctx.Object)
37 |
38 | desc, resource := c.buildResource(cr)
39 |
40 | if cr.Spec.PodSecurityPolicy == "" {
41 | return ctrl.Result{}, actions.DeleteIfExists(ctx, resource)
42 | }
43 |
44 | err := actions.CreateOrUpdateOwnedResource(ctx, ctx.Object, resource)
45 | if err != nil {
46 | err = fmt.Errorf("cannot reconcile %s: %w", desc, err)
47 | }
48 |
49 | return ctrl.Result{}, err
50 | }
51 |
52 | func (c podSecurityPolicyComponent) Kind() client.Object {
53 | return c.kind
54 | }
55 |
56 | func (c podSecurityPolicyComponent) buildResource(cr *dcv1alpha1.MPICluster) (string, client.Object) {
57 | om := metav1.ObjectMeta{
58 | Name: meta.InstanceName(cr, metadata.ComponentNone),
59 | Namespace: cr.Namespace,
60 | Labels: meta.StandardLabels(cr),
61 | }
62 |
63 | switch c.Kind().(type) {
64 | case *rbacv1.Role:
65 | return "role", &rbacv1.Role{
66 | ObjectMeta: om,
67 | Rules: []rbacv1.PolicyRule{
68 | {
69 | APIGroups: policyAPIGroups,
70 | Resources: podSecurityPolicyResources,
71 | Verbs: useVerbs,
72 | ResourceNames: []string{cr.Spec.PodSecurityPolicy},
73 | },
74 | },
75 | }
76 | case *rbacv1.RoleBinding:
77 | return "role binding", &rbacv1.RoleBinding{
78 | ObjectMeta: om,
79 | RoleRef: rbacv1.RoleRef{
80 | APIGroup: rbacv1.GroupName,
81 | Kind: "Role",
82 | Name: om.Name,
83 | },
84 | Subjects: []rbacv1.Subject{
85 | {
86 | Kind: rbacv1.ServiceAccountKind,
87 | Name: serviceAccountName(cr),
88 | Namespace: cr.Namespace,
89 | },
90 | },
91 | }
92 | }
93 |
94 | panic("you did something dumb")
95 | }
96 |
--------------------------------------------------------------------------------
/pkg/cluster/mpi/service.go:
--------------------------------------------------------------------------------
1 | package mpi
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 |
7 | corev1 "k8s.io/api/core/v1"
8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9 | "k8s.io/apimachinery/pkg/util/intstr"
10 | ctrl "sigs.k8s.io/controller-runtime"
11 | "sigs.k8s.io/controller-runtime/pkg/client"
12 |
13 | "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
14 | "github.com/dominodatalab/distributed-compute-operator/pkg/cluster/metadata"
15 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/actions"
16 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
17 | )
18 |
19 | func ServiceWorker() core.OwnedComponent {
20 | return &serviceComponent{
21 | comp: ComponentWorker,
22 | }
23 | }
24 |
25 | func ServiceClient() core.OwnedComponent {
26 | return &serviceComponent{
27 | comp: ComponentClient,
28 | }
29 | }
30 |
31 | type serviceComponent struct {
32 | comp metadata.Component
33 | }
34 |
35 | func (c serviceComponent) Reconcile(ctx *core.Context) (ctrl.Result, error) {
36 | cr := objToMPICluster(ctx.Object)
37 |
38 | ports := []corev1.ServicePort{}
39 | var selector map[string]string
40 | var extraLabels map[string]string
41 | switch c.comp {
42 | case ComponentClient:
43 | selector = cr.Spec.NetworkPolicy.ClientLabels
44 | extraLabels = map[string]string{}
45 | case ComponentWorker:
46 | ports = append(ports,
47 | corev1.ServicePort{
48 | Name: sshdPortName,
49 | Port: sshdPort,
50 | TargetPort: intstr.FromString(sshdPortName),
51 | Protocol: corev1.ProtocolTCP,
52 | },
53 | corev1.ServicePort{
54 | Name: rsyncPortName,
55 | Port: rsyncPort,
56 | TargetPort: intstr.FromString(rsyncPortName),
57 | Protocol: corev1.ProtocolTCP,
58 | })
59 |
60 | selector = meta.MatchLabelsWithComponent(cr, c.comp)
61 | extraLabels = cr.Spec.Worker.Labels
62 | case metadata.ComponentNone:
63 | err := errors.New("unknown component for NetworkPolicy")
64 | return ctrl.Result{}, err
65 | }
66 |
67 | ports = append(ports, mpiPorts(cr)...)
68 |
69 | svc := &corev1.Service{
70 | ObjectMeta: metav1.ObjectMeta{
71 | Name: serviceName(cr, c.comp),
72 | Namespace: cr.Namespace,
73 | Labels: meta.StandardLabelsWithComponent(cr, c.comp, extraLabels),
74 | },
75 | Spec: corev1.ServiceSpec{
76 | ClusterIP: corev1.ClusterIPNone,
77 | Selector: selector,
78 | Ports: ports,
79 | },
80 | }
81 |
82 | err := actions.CreateOrUpdateOwnedResource(ctx, cr, svc)
83 | if err != nil {
84 | err = fmt.Errorf("cannot reconcile service: %w", err)
85 | }
86 |
87 | return ctrl.Result{}, err
88 | }
89 |
90 | func (c serviceComponent) Kind() client.Object {
91 | return &corev1.Service{}
92 | }
93 |
94 | func mpiPorts(cr *v1alpha1.MPICluster) []corev1.ServicePort {
95 | ports := []corev1.ServicePort{}
96 | for idx, port := range cr.Spec.WorkerPorts {
97 | ports = append(ports, corev1.ServicePort{
98 | Name: fmt.Sprintf("tcp-mpi-%d", idx),
99 | Port: port,
100 | TargetPort: intstr.FromInt(int(port)),
101 | })
102 | }
103 | return ports
104 | }
105 |
--------------------------------------------------------------------------------
/pkg/cluster/mpi/serviceaccount.go:
--------------------------------------------------------------------------------
1 | package mpi
2 |
3 | import (
4 | "fmt"
5 |
6 | corev1 "k8s.io/api/core/v1"
7 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
8 | "k8s.io/utils/pointer"
9 | ctrl "sigs.k8s.io/controller-runtime"
10 | "sigs.k8s.io/controller-runtime/pkg/client"
11 |
12 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/actions"
13 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
14 | )
15 |
16 | func ServiceAccount() core.OwnedComponent {
17 | return &serviceAccountComponent{}
18 | }
19 |
20 | type serviceAccountComponent struct{}
21 |
22 | func (c serviceAccountComponent) Reconcile(ctx *core.Context) (ctrl.Result, error) {
23 | cr := objToMPICluster(ctx.Object)
24 | conf := cr.Spec.ServiceAccount
25 |
26 | sa := &corev1.ServiceAccount{
27 | ObjectMeta: metav1.ObjectMeta{
28 | Name: serviceAccountName(cr),
29 | Namespace: cr.Namespace,
30 | Labels: meta.StandardLabels(cr),
31 | },
32 | AutomountServiceAccountToken: pointer.Bool(conf.AutomountServiceAccountToken),
33 | }
34 |
35 | if conf.Name != "" {
36 | return ctrl.Result{}, actions.DeleteIfExists(ctx, sa)
37 | }
38 |
39 | err := actions.CreateOrUpdateOwnedResource(ctx, cr, sa)
40 | if err != nil {
41 | err = fmt.Errorf("cannot reconcile serviceaccount: %w", err)
42 | }
43 |
44 | return ctrl.Result{}, err
45 | }
46 |
47 | func (c serviceAccountComponent) Kind() client.Object {
48 | return &corev1.ServiceAccount{}
49 | }
50 |
--------------------------------------------------------------------------------
/pkg/controller/components/configmap.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "fmt"
5 |
6 | corev1 "k8s.io/api/core/v1"
7 | ctrl "sigs.k8s.io/controller-runtime"
8 | "sigs.k8s.io/controller-runtime/pkg/client"
9 |
10 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/actions"
11 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
12 | )
13 |
14 | type ConfigMapDataSource interface {
15 | ConfigMap() *corev1.ConfigMap
16 | Delete() bool
17 | }
18 |
19 | type ConfigMapDataSourceFactory func(object client.Object) ConfigMapDataSource
20 |
21 | func ConfigMap(f ConfigMapDataSourceFactory) core.OwnedComponent {
22 | return &configMapComponent{factory: f}
23 | }
24 |
25 | type configMapComponent struct {
26 | factory ConfigMapDataSourceFactory
27 | }
28 |
29 | func (c *configMapComponent) Kind() client.Object {
30 | return &corev1.ConfigMap{}
31 | }
32 |
33 | func (c *configMapComponent) Reconcile(ctx *core.Context) (ctrl.Result, error) {
34 | ds := c.factory(ctx.Object)
35 | cm := ds.ConfigMap()
36 |
37 | if ds.Delete() {
38 | return ctrl.Result{}, actions.DeleteIfExists(ctx, cm)
39 | }
40 |
41 | err := actions.CreateOrUpdateOwnedResource(ctx, ctx.Object, cm)
42 | if err != nil {
43 | err = fmt.Errorf("cannot reconcile config map: %w", err)
44 | }
45 |
46 | return ctrl.Result{}, err
47 | }
48 |
--------------------------------------------------------------------------------
/pkg/controller/components/horizontalpodautoscaler.go:
--------------------------------------------------------------------------------
1 | //nolint:dupl
2 | package components
3 |
4 | import (
5 | "fmt"
6 |
7 | autoscalingv2 "k8s.io/api/autoscaling/v2"
8 | ctrl "sigs.k8s.io/controller-runtime"
9 | "sigs.k8s.io/controller-runtime/pkg/client"
10 |
11 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/actions"
12 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
13 | )
14 |
15 | type HorizontalPodAutoscalerDataSource interface {
16 | HorizontalPodAutoscaler() *autoscalingv2.HorizontalPodAutoscaler
17 | Delete() bool
18 | }
19 |
20 | type HorizontalPodAutoscalerDataSourceFactory func(client.Object) HorizontalPodAutoscalerDataSource
21 |
22 | func HorizontalPodAutoscaler(f HorizontalPodAutoscalerDataSourceFactory) core.OwnedComponent {
23 | return &horizontalPodAutoscaler{factory: f}
24 | }
25 |
26 | type horizontalPodAutoscaler struct {
27 | factory HorizontalPodAutoscalerDataSourceFactory
28 | }
29 |
30 | func (c *horizontalPodAutoscaler) Kind() client.Object {
31 | return &autoscalingv2.HorizontalPodAutoscaler{}
32 | }
33 |
34 | func (c *horizontalPodAutoscaler) Reconcile(ctx *core.Context) (ctrl.Result, error) {
35 | ds := c.factory(ctx.Object)
36 | hpa := ds.HorizontalPodAutoscaler()
37 |
38 | if ds.Delete() {
39 | return ctrl.Result{}, actions.DeleteIfExists(ctx, hpa)
40 | }
41 |
42 | err := actions.CreateOrUpdateOwnedResource(ctx, ctx.Object, hpa)
43 | if err != nil {
44 | err = fmt.Errorf("cannot reconcile horizontal pod autoscaler: %w", err)
45 | }
46 |
47 | return ctrl.Result{}, err
48 | }
49 |
--------------------------------------------------------------------------------
/pkg/controller/components/istiopeerauthentication.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "fmt"
5 |
6 | ctrl "sigs.k8s.io/controller-runtime"
7 | "sigs.k8s.io/controller-runtime/pkg/client"
8 |
9 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/actions"
10 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
11 | "github.com/dominodatalab/distributed-compute-operator/pkg/resources/istio"
12 | )
13 |
14 | type IstioPeerAuthenticationDataSource interface {
15 | PeerAuthInfo() *istio.PeerAuthInfo
16 | Enabled() bool
17 | Delete() bool
18 | }
19 |
20 | type IstioPeerAuthenticationDataSourceFactory func(client.Object) IstioPeerAuthenticationDataSource
21 |
22 | func IstioPeerAuthentication(f IstioPeerAuthenticationDataSourceFactory) core.Component {
23 | return &istioPeerAuthenticationComponent{factory: f}
24 | }
25 |
26 | type istioPeerAuthenticationComponent struct {
27 | factory IstioPeerAuthenticationDataSourceFactory
28 | }
29 |
30 | func (c *istioPeerAuthenticationComponent) Reconcile(ctx *core.Context) (ctrl.Result, error) {
31 | ds := c.factory(ctx.Object)
32 |
33 | if !ds.Enabled() {
34 | return ctrl.Result{}, nil
35 | }
36 |
37 | peerAuth := istio.NewPeerAuthentication(ds.PeerAuthInfo())
38 | if ds.Delete() {
39 | return ctrl.Result{}, actions.DeleteIfExists(ctx, peerAuth)
40 | }
41 |
42 | err := actions.CreateOrUpdateOwnedResource(ctx, ctx.Object, peerAuth)
43 | if err != nil {
44 | err = fmt.Errorf("cannot reconcile istio peer authentication: %w", err)
45 | }
46 |
47 | return ctrl.Result{}, err
48 | }
49 |
--------------------------------------------------------------------------------
/pkg/controller/components/networkpolicy.go:
--------------------------------------------------------------------------------
1 | //nolint:dupl
2 | package components
3 |
4 | import (
5 | "fmt"
6 |
7 | networkingv1 "k8s.io/api/networking/v1"
8 | ctrl "sigs.k8s.io/controller-runtime"
9 | "sigs.k8s.io/controller-runtime/pkg/client"
10 |
11 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/actions"
12 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
13 | )
14 |
15 | type NetworkPolicyDataSource interface {
16 | NetworkPolicy() *networkingv1.NetworkPolicy
17 | Delete() bool
18 | }
19 |
20 | type NetworkPolicyDataSourceFactory func(client.Object) NetworkPolicyDataSource
21 |
22 | func NetworkPolicy(f NetworkPolicyDataSourceFactory) core.OwnedComponent {
23 | return &networkPolicyComponent{factory: f}
24 | }
25 |
26 | type networkPolicyComponent struct {
27 | factory NetworkPolicyDataSourceFactory
28 | }
29 |
30 | func (c *networkPolicyComponent) Kind() client.Object {
31 | return &networkingv1.NetworkPolicy{}
32 | }
33 |
34 | func (c *networkPolicyComponent) Reconcile(ctx *core.Context) (ctrl.Result, error) {
35 | ds := c.factory(ctx.Object)
36 | netpol := ds.NetworkPolicy()
37 |
38 | if ds.Delete() {
39 | return ctrl.Result{}, actions.DeleteIfExists(ctx, netpol)
40 | }
41 |
42 | err := actions.CreateOrUpdateOwnedResource(ctx, ctx.Object, netpol)
43 | if err != nil {
44 | err = fmt.Errorf("cannot reconcile network policy: %w", err)
45 | }
46 |
47 | return ctrl.Result{}, err
48 | }
49 |
--------------------------------------------------------------------------------
/pkg/controller/components/rbac.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "fmt"
5 |
6 | rbacv1 "k8s.io/api/rbac/v1"
7 | ctrl "sigs.k8s.io/controller-runtime"
8 | "sigs.k8s.io/controller-runtime/pkg/client"
9 |
10 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/actions"
11 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
12 | )
13 |
14 | type RoleDataSource interface {
15 | Role() *rbacv1.Role
16 | Delete() bool
17 | }
18 |
19 | type RoleDataSourceFactory func(client.Object) RoleDataSource
20 |
21 | func Role(f RoleDataSourceFactory) core.OwnedComponent {
22 | return &roleComponent{factory: f}
23 | }
24 |
25 | type roleComponent struct {
26 | factory RoleDataSourceFactory
27 | }
28 |
29 | func (c *roleComponent) Kind() client.Object {
30 | return &rbacv1.Role{}
31 | }
32 |
33 | func (c *roleComponent) Reconcile(ctx *core.Context) (ctrl.Result, error) {
34 | ds := c.factory(ctx.Object)
35 | role := ds.Role()
36 |
37 | if ds.Delete() {
38 | return ctrl.Result{}, actions.DeleteIfExists(ctx, role)
39 | }
40 |
41 | err := actions.CreateOrUpdateOwnedResource(ctx, ctx.Object, role)
42 | if err != nil {
43 | err = fmt.Errorf("cannot reconcile role: %w", err)
44 | }
45 |
46 | return ctrl.Result{}, err
47 | }
48 |
49 | type RoleBindingDataSource interface {
50 | RoleBinding() *rbacv1.RoleBinding
51 | Delete() bool
52 | }
53 |
54 | type RoleBindingDataSourceFactory func(client.Object) RoleBindingDataSource
55 |
56 | func RoleBinding(f RoleBindingDataSourceFactory) core.OwnedComponent {
57 | return &roleBindingComponent{factory: f}
58 | }
59 |
60 | type roleBindingComponent struct {
61 | factory RoleBindingDataSourceFactory
62 | }
63 |
64 | func (c *roleBindingComponent) Kind() client.Object {
65 | return &rbacv1.RoleBinding{}
66 | }
67 |
68 | func (c *roleBindingComponent) Reconcile(ctx *core.Context) (ctrl.Result, error) {
69 | ds := c.factory(ctx.Object)
70 | rb := ds.RoleBinding()
71 |
72 | if ds.Delete() {
73 | return ctrl.Result{}, actions.DeleteIfExists(ctx, rb)
74 | }
75 |
76 | err := actions.CreateOrUpdateOwnedResource(ctx, ctx.Object, rb)
77 | if err != nil {
78 | err = fmt.Errorf("cannot reconcile role binding: %w", err)
79 | }
80 |
81 | return ctrl.Result{}, err
82 | }
83 |
--------------------------------------------------------------------------------
/pkg/controller/components/service.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "fmt"
5 |
6 | corev1 "k8s.io/api/core/v1"
7 | ctrl "sigs.k8s.io/controller-runtime"
8 | "sigs.k8s.io/controller-runtime/pkg/client"
9 |
10 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/actions"
11 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
12 | )
13 |
14 | type ServiceDataSource interface {
15 | Service() *corev1.Service
16 | }
17 |
18 | type ServiceDataSourceFactory func(client.Object) ServiceDataSource
19 |
20 | func Service(f ServiceDataSourceFactory) core.OwnedComponent {
21 | return &serviceComponent{factory: f}
22 | }
23 |
24 | type serviceComponent struct {
25 | factory ServiceDataSourceFactory
26 | }
27 |
28 | func (c *serviceComponent) Kind() client.Object {
29 | return &corev1.Service{}
30 | }
31 |
32 | func (c *serviceComponent) Reconcile(ctx *core.Context) (ctrl.Result, error) {
33 | ds := c.factory(ctx.Object)
34 | svc := ds.Service()
35 |
36 | err := actions.CreateOrUpdateOwnedResource(ctx, ctx.Object, svc)
37 | if err != nil {
38 | err = fmt.Errorf("cannot reconcile service: %w", err)
39 | }
40 |
41 | return ctrl.Result{}, err
42 | }
43 |
--------------------------------------------------------------------------------
/pkg/controller/components/serviceaccount.go:
--------------------------------------------------------------------------------
1 | //nolint:dupl
2 | package components
3 |
4 | import (
5 | "fmt"
6 |
7 | corev1 "k8s.io/api/core/v1"
8 | ctrl "sigs.k8s.io/controller-runtime"
9 | "sigs.k8s.io/controller-runtime/pkg/client"
10 |
11 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/actions"
12 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
13 | )
14 |
15 | type ServiceAccountDataSource interface {
16 | ServiceAccount() *corev1.ServiceAccount
17 | Delete() bool
18 | }
19 |
20 | type ServiceAccountDataSourceFactory func(client.Object) ServiceAccountDataSource
21 |
22 | func ServiceAccount(f ServiceAccountDataSourceFactory) core.OwnedComponent {
23 | return &serviceAccountComponent{factory: f}
24 | }
25 |
26 | type serviceAccountComponent struct {
27 | factory ServiceAccountDataSourceFactory
28 | }
29 |
30 | func (c *serviceAccountComponent) Kind() client.Object {
31 | return &corev1.ServiceAccount{}
32 | }
33 |
34 | func (c *serviceAccountComponent) Reconcile(ctx *core.Context) (ctrl.Result, error) {
35 | ds := c.factory(ctx.Object)
36 | sa := ds.ServiceAccount()
37 |
38 | if ds.Delete() {
39 | return ctrl.Result{}, actions.DeleteIfExists(ctx, sa)
40 | }
41 |
42 | err := actions.CreateOrUpdateOwnedResource(ctx, ctx.Object, sa)
43 | if err != nil {
44 | err = fmt.Errorf("cannot reconcile service account: %w", err)
45 | }
46 |
47 | return ctrl.Result{}, err
48 | }
49 |
--------------------------------------------------------------------------------
/pkg/controller/components/statefulset.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "fmt"
5 |
6 | appsv1 "k8s.io/api/apps/v1"
7 | ctrl "sigs.k8s.io/controller-runtime"
8 | "sigs.k8s.io/controller-runtime/pkg/client"
9 |
10 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/actions"
11 | "github.com/dominodatalab/distributed-compute-operator/pkg/controller/core"
12 | )
13 |
14 | type StatefulSetDataSource interface {
15 | StatefulSet() (*appsv1.StatefulSet, error)
16 | PVCListOpts() []client.ListOption
17 | }
18 |
19 | type StatefulSetDataSourceFactory func(client.Object) StatefulSetDataSource
20 |
21 | func StatefulSet(f StatefulSetDataSourceFactory) core.OwnedComponent {
22 | return &statefulSetComponent{factory: f}
23 | }
24 |
25 | type statefulSetComponent struct {
26 | factory StatefulSetDataSourceFactory
27 | }
28 |
29 | func (c *statefulSetComponent) Kind() client.Object {
30 | return &appsv1.StatefulSet{}
31 | }
32 |
33 | func (c *statefulSetComponent) Reconcile(ctx *core.Context) (ctrl.Result, error) {
34 | ds := c.factory(ctx.Object)
35 |
36 | sts, err := ds.StatefulSet()
37 | if err != nil {
38 | return ctrl.Result{}, fmt.Errorf("failed to build statefulset: %w", err)
39 | }
40 |
41 | err = actions.CreateOrUpdateOwnedResource(ctx, ctx.Object, sts)
42 | if err != nil {
43 | err = fmt.Errorf("cannot reconcile stateful set: %w", err)
44 | }
45 |
46 | return ctrl.Result{}, err
47 | }
48 |
49 | func (c *statefulSetComponent) Finalize(ctx *core.Context) (ctrl.Result, bool, error) {
50 | ds := c.factory(ctx.Object)
51 | err := actions.DeleteStorage(ctx, ds.PVCListOpts())
52 |
53 | return ctrl.Result{}, err == nil, err
54 | }
55 |
--------------------------------------------------------------------------------
/pkg/controller/core/components.go:
--------------------------------------------------------------------------------
1 | package core
2 |
3 | import (
4 | ctrl "sigs.k8s.io/controller-runtime"
5 | "sigs.k8s.io/controller-runtime/pkg/client"
6 | )
7 |
8 | type Component interface {
9 | Reconcile(*Context) (ctrl.Result, error)
10 | }
11 |
12 | type OwnedComponent interface {
13 | Component
14 | Kind() client.Object
15 | }
16 |
17 | type FinalizerComponent interface {
18 | Finalize(*Context) (ctrl.Result, bool, error)
19 | }
20 |
--------------------------------------------------------------------------------
/pkg/controller/core/context.go:
--------------------------------------------------------------------------------
1 | package core
2 |
3 | import (
4 | "context"
5 |
6 | "github.com/go-logr/logr"
7 | "k8s.io/apimachinery/pkg/runtime"
8 | "k8s.io/client-go/tools/record"
9 | "sigs.k8s.io/controller-runtime/pkg/client"
10 | )
11 |
12 | type Context struct {
13 | context.Context
14 |
15 | Log logr.Logger
16 | Object client.Object
17 | Client client.Client
18 | Scheme *runtime.Scheme
19 | Recorder record.EventRecorder
20 | Patch *Patch
21 | }
22 |
--------------------------------------------------------------------------------
/pkg/controller/core/patch.go:
--------------------------------------------------------------------------------
1 | package core
2 |
3 | import (
4 | "path"
5 |
6 | "github.com/banzaicloud/k8s-objectmatcher/patch"
7 | "k8s.io/apimachinery/pkg/runtime/schema"
8 | )
9 |
10 | var defaultCalculateOpts = []patch.CalculateOption{
11 | patch.IgnoreStatusFields(),
12 | patch.IgnoreVolumeClaimTemplateTypeMetaAndStatus(),
13 | }
14 |
15 | type Patch struct {
16 | Annotator *patch.Annotator
17 | Maker patch.Maker
18 | CalculateOpts []patch.CalculateOption
19 | }
20 |
21 | func NewPatch(gvk schema.GroupVersionKind) *Patch {
22 | a := patch.NewAnnotator(path.Join(gvk.Group, "last-applied"))
23 | m := patch.NewPatchMaker(a, &patch.K8sStrategicMergePatcher{}, &patch.BaseJSONMergePatcher{})
24 |
25 | return &Patch{
26 | Annotator: a,
27 | Maker: m,
28 | CalculateOpts: defaultCalculateOpts,
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/pkg/crd/istio.go:
--------------------------------------------------------------------------------
1 | package crd
2 |
3 | import (
4 | "time"
5 |
6 | "github.com/hashicorp/go-retryablehttp"
7 | )
8 |
9 | const (
10 | checkURL = "http://localhost:15021/healthz/ready"
11 | finishURL = "http://localhost:15020/quitquitquit"
12 | )
13 |
14 | var retryClient *retryablehttp.Client
15 |
16 | func waitForIstioSidecar() (func(), error) {
17 | log.Info("Checking istio sidecar")
18 | resp, err := retryClient.Head(checkURL)
19 | if err != nil {
20 | log.Error(err, "Istio sidecar is not ready")
21 | return nil, err
22 | }
23 | defer resp.Body.Close()
24 |
25 | log.Info("Istio sidecar available")
26 | fn := func() {
27 | log.Info("Triggering istio termination")
28 | _, _ = retryClient.Post(finishURL, "", nil)
29 | }
30 |
31 | return fn, err
32 | }
33 |
34 | func init() {
35 | retryClient = retryablehttp.NewClient()
36 | retryClient.RetryMax = 10
37 | retryClient.RetryWaitMin = 1 * time.Second
38 | retryClient.RetryWaitMax = 1 * time.Second
39 | }
40 |
--------------------------------------------------------------------------------
/pkg/resources/istio/peerauthentication.go:
--------------------------------------------------------------------------------
1 | package istio
2 |
3 | import (
4 | securityv1beta1 "istio.io/api/security/v1beta1"
5 | "istio.io/api/type/v1beta1"
6 | istio "istio.io/client-go/pkg/apis/security/v1beta1"
7 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
8 | )
9 |
10 | // PeerAuthInfo defines fields used to generate Istio PeerAuthentication objects.
11 | type PeerAuthInfo struct {
12 | Name string
13 | Namespace string
14 | Labels map[string]string
15 | Selector map[string]string
16 | Mode string
17 | }
18 |
19 | // NewPeerAuthentication uses PeerAuthInfo to generate and return a new PeerAuthentication object.
20 | func NewPeerAuthentication(info *PeerAuthInfo) *istio.PeerAuthentication {
21 | modeVal := securityv1beta1.PeerAuthentication_MutualTLS_Mode_value[info.Mode]
22 |
23 | return &istio.PeerAuthentication{
24 | ObjectMeta: metav1.ObjectMeta{
25 | Name: info.Name,
26 | Namespace: info.Namespace,
27 | Labels: info.Labels,
28 | },
29 | Spec: securityv1beta1.PeerAuthentication{
30 | Selector: &v1beta1.WorkloadSelector{
31 | MatchLabels: info.Selector,
32 | },
33 | Mtls: &securityv1beta1.PeerAuthentication_MutualTLS{
34 | Mode: securityv1beta1.PeerAuthentication_MutualTLS_Mode(modeVal),
35 | },
36 | },
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/pkg/resources/istio/peerauthentication_test.go:
--------------------------------------------------------------------------------
1 | package istio
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | securityv1beta1 "istio.io/api/security/v1beta1"
8 | "istio.io/api/type/v1beta1"
9 | istio "istio.io/client-go/pkg/apis/security/v1beta1"
10 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
11 | )
12 |
13 | func TestNewPeerAuthentication(t *testing.T) {
14 | testcases := []struct {
15 | smode string
16 | mode securityv1beta1.PeerAuthentication_MutualTLS_Mode
17 | }{
18 | {"UNSET", securityv1beta1.PeerAuthentication_MutualTLS_UNSET},
19 | {"DISABLE", securityv1beta1.PeerAuthentication_MutualTLS_DISABLE},
20 | {"PERMISSIVE", securityv1beta1.PeerAuthentication_MutualTLS_PERMISSIVE},
21 | {"STRICT", securityv1beta1.PeerAuthentication_MutualTLS_STRICT},
22 | {"GARBAGE", securityv1beta1.PeerAuthentication_MutualTLS_UNSET},
23 | }
24 | for _, tc := range testcases {
25 | info := &PeerAuthInfo{
26 | Name: "cluster",
27 | Namespace: "ns",
28 | Labels: map[string]string{
29 | "awesome": "true",
30 | },
31 | Selector: map[string]string{
32 | "app.kubernetes.io/name": "compute-r",
33 | },
34 | Mode: tc.smode,
35 | }
36 | actual := NewPeerAuthentication(info)
37 |
38 | expected := &istio.PeerAuthentication{
39 | ObjectMeta: metav1.ObjectMeta{
40 | Name: "cluster",
41 | Namespace: "ns",
42 | Labels: map[string]string{
43 | "awesome": "true",
44 | },
45 | },
46 | Spec: securityv1beta1.PeerAuthentication{
47 | Selector: &v1beta1.WorkloadSelector{
48 | MatchLabels: map[string]string{
49 | "app.kubernetes.io/name": "compute-r",
50 | },
51 | },
52 | Mtls: &securityv1beta1.PeerAuthentication_MutualTLS{
53 | Mode: tc.mode,
54 | },
55 | },
56 | }
57 |
58 | assert.Equal(t, expected, actual)
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/pkg/resources/metadata.go:
--------------------------------------------------------------------------------
1 | package resources
2 |
3 | const (
4 | // ApplicationNameLabelKey indicates the name of the application.
5 | ApplicationNameLabelKey = "app.kubernetes.io/name"
6 | // ApplicationInstanceLabelKey indicates a unique name identifying the instance of an application.
7 | ApplicationInstanceLabelKey = "app.kubernetes.io/instance"
8 | // ApplicationVersionLabelKey indicates the current version of the application.
9 | ApplicationVersionLabelKey = "app.kubernetes.io/version"
10 | // ApplicationComponentLabelKey indicates the component within the architecture of an application.
11 | ApplicationComponentLabelKey = "app.kubernetes.io/component"
12 | // ApplicationManagedByLabelKey indicates the tool being used to manage the operation of an application.
13 | ApplicationManagedByLabelKey = "app.kubernetes.io/managed-by"
14 | // ApplicationManagedByLabelValue is the specific tool being used to manage applications created by this project.
15 | ApplicationManagedByLabelValue = "distributed-compute-operator"
16 | // DescriptionAnnotationKey can be used to add extra information to a Kubernetes object via its annotations.
17 | DescriptionAnnotationKey = "distributed-compute.dominodatalab.com/description"
18 | )
19 |
20 | // MetadataLabels returns a map used to label Kubernetes resources.
21 | func MetadataLabels(name, instance, version string) map[string]string {
22 | return map[string]string{
23 | ApplicationNameLabelKey: name,
24 | ApplicationInstanceLabelKey: instance,
25 | ApplicationVersionLabelKey: version,
26 | ApplicationManagedByLabelKey: ApplicationManagedByLabelValue,
27 | }
28 | }
29 |
30 | // MetadataLabelsWithComponent returns a map used to label Kubernetes resources that act as unique components.
31 | func MetadataLabelsWithComponent(name, instance, version, component string) map[string]string {
32 | labels := MetadataLabels(name, instance, version)
33 | labels[ApplicationComponentLabelKey] = component
34 |
35 | return labels
36 | }
37 |
38 | // SelectorLabels returns a map used to select Kubernetes objects that have
39 | // been labeled with output from MetadataLabels.
40 | func SelectorLabels(name, instance string) map[string]string {
41 | return map[string]string{
42 | ApplicationNameLabelKey: name,
43 | ApplicationInstanceLabelKey: instance,
44 | }
45 | }
46 |
47 | // SelectorLabelsWithComponent returns a map used to select Kubernetes objects
48 | // that have been labeled with output from MetadataLabelsWithComponent.
49 | func SelectorLabelsWithComponent(name, instance, component string) map[string]string {
50 | labels := SelectorLabels(name, instance)
51 | labels[ApplicationComponentLabelKey] = component
52 |
53 | return labels
54 | }
55 |
--------------------------------------------------------------------------------
/pkg/resources/metadata_test.go:
--------------------------------------------------------------------------------
1 | package resources
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | )
8 |
9 | func TestMetadataLabels(t *testing.T) {
10 | actual := MetadataLabels("my-app", "inst", "v1.0.0")
11 | expected := map[string]string{
12 | "app.kubernetes.io/name": "my-app",
13 | "app.kubernetes.io/instance": "inst",
14 | "app.kubernetes.io/version": "v1.0.0",
15 | "app.kubernetes.io/managed-by": "distributed-compute-operator",
16 | }
17 |
18 | assert.Equal(t, expected, actual)
19 | }
20 |
21 | func TestMetadataLabelsWithComponent(t *testing.T) {
22 | actual := MetadataLabelsWithComponent("my-app", "inst", "v1.0.0", "comp")
23 | expected := map[string]string{
24 | "app.kubernetes.io/name": "my-app",
25 | "app.kubernetes.io/instance": "inst",
26 | "app.kubernetes.io/version": "v1.0.0",
27 | "app.kubernetes.io/managed-by": "distributed-compute-operator",
28 | "app.kubernetes.io/component": "comp",
29 | }
30 |
31 | assert.Equal(t, expected, actual)
32 | }
33 |
34 | func TestSelectorLabels(t *testing.T) {
35 | actual := SelectorLabels("my-app", "inst")
36 | expected := map[string]string{
37 | "app.kubernetes.io/name": "my-app",
38 | "app.kubernetes.io/instance": "inst",
39 | }
40 |
41 | assert.Equal(t, expected, actual)
42 | }
43 |
44 | func TestSelectorLabelsWithComponent(t *testing.T) {
45 | actual := SelectorLabelsWithComponent("my-app", "inst", "comp")
46 | expected := map[string]string{
47 | "app.kubernetes.io/name": "my-app",
48 | "app.kubernetes.io/instance": "inst",
49 | "app.kubernetes.io/component": "comp",
50 | }
51 |
52 | assert.Equal(t, expected, actual)
53 | }
54 |
--------------------------------------------------------------------------------
/pkg/resources/ray/helpers_test.go:
--------------------------------------------------------------------------------
1 | package ray
2 |
3 | import (
4 | corev1 "k8s.io/api/core/v1"
5 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
6 | "k8s.io/utils/pointer"
7 |
8 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
9 | )
10 |
11 | // rayClusterFixture should be used for all ray unit testing.
12 | func rayClusterFixture() *dcv1alpha1.RayCluster {
13 | return &dcv1alpha1.RayCluster{
14 | TypeMeta: metav1.TypeMeta{
15 | Kind: "RayCluster",
16 | APIVersion: "distributed-compute.dominodatalab.com/v1test1",
17 | },
18 | ObjectMeta: metav1.ObjectMeta{
19 | Name: "test-id",
20 | Namespace: "fake-ns",
21 | },
22 | Spec: dcv1alpha1.RayClusterSpec{
23 | ScalableClusterConfig: dcv1alpha1.ScalableClusterConfig{
24 | ClusterConfig: dcv1alpha1.ClusterConfig{
25 | Image: &dcv1alpha1.OCIImageDefinition{
26 | Registry: "fake-reg",
27 | Repository: "fake-repo",
28 | Tag: "fake-tag",
29 | PullPolicy: corev1.PullIfNotPresent,
30 | },
31 | },
32 | },
33 | Port: 6379,
34 | RedisShardPorts: []int32{
35 | 6380,
36 | 6381,
37 | },
38 | ClientServerPort: 10001,
39 | ObjectManagerPort: 2384,
40 | NodeManagerPort: 2385,
41 | GCSServerPort: 2386,
42 | WorkerPorts: []int32{11000, 11001},
43 | DashboardPort: 8265,
44 | Worker: dcv1alpha1.RayClusterWorker{
45 | Replicas: pointer.Int32(5),
46 | },
47 | },
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/pkg/resources/ray/horizontalpodautoscaler.go:
--------------------------------------------------------------------------------
1 | package ray
2 |
3 | import (
4 | "fmt"
5 |
6 | autoscalingv2 "k8s.io/api/autoscaling/v2"
7 | corev1 "k8s.io/api/core/v1"
8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9 |
10 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
11 | )
12 |
13 | // NewHorizontalPodAutoscaler generates an HPA that targets a RayCluster resource.
14 | //
15 | // The metrics-server needs to be launched separately and the worker stateful
16 | // set requires cpu resource requests in order for this object to have any
17 | // effect.
18 | func NewHorizontalPodAutoscaler(rc *dcv1alpha1.RayCluster) (*autoscalingv2.HorizontalPodAutoscaler, error) {
19 | autoscaling := rc.Spec.Autoscaling
20 | if autoscaling == nil {
21 | return nil, fmt.Errorf("cannot build HPA without autoscaling config")
22 | }
23 |
24 | var behavior *autoscalingv2.HorizontalPodAutoscalerBehavior
25 | if autoscaling.ScaleDownStabilizationWindowSeconds != nil {
26 | behavior = &autoscalingv2.HorizontalPodAutoscalerBehavior{
27 | ScaleDown: &autoscalingv2.HPAScalingRules{
28 | StabilizationWindowSeconds: autoscaling.ScaleDownStabilizationWindowSeconds,
29 | },
30 | }
31 | }
32 |
33 | var metrics []autoscalingv2.MetricSpec
34 | if autoscaling.AverageCPUUtilization != nil {
35 | metrics = append(metrics, autoscalingv2.MetricSpec{
36 | Type: autoscalingv2.ResourceMetricSourceType,
37 | Resource: &autoscalingv2.ResourceMetricSource{
38 | Name: corev1.ResourceCPU,
39 | Target: autoscalingv2.MetricTarget{
40 | Type: autoscalingv2.UtilizationMetricType,
41 | AverageUtilization: autoscaling.AverageCPUUtilization,
42 | },
43 | },
44 | })
45 | }
46 | if autoscaling.AverageMemoryUtilization != nil {
47 | metrics = append(metrics, autoscalingv2.MetricSpec{
48 | Type: autoscalingv2.ResourceMetricSourceType,
49 | Resource: &autoscalingv2.ResourceMetricSource{
50 | Name: corev1.ResourceMemory,
51 | Target: autoscalingv2.MetricTarget{
52 | Type: autoscalingv2.UtilizationMetricType,
53 | AverageUtilization: autoscaling.AverageMemoryUtilization,
54 | },
55 | },
56 | })
57 | }
58 |
59 | hpa := &autoscalingv2.HorizontalPodAutoscaler{
60 | ObjectMeta: HorizontalPodAutoscalerObjectMeta(rc),
61 | Spec: autoscalingv2.HorizontalPodAutoscalerSpec{
62 | ScaleTargetRef: autoscalingv2.CrossVersionObjectReference{
63 | APIVersion: rc.APIVersion,
64 | Kind: rc.Kind,
65 | Name: rc.Name,
66 | },
67 | MinReplicas: autoscaling.MinReplicas,
68 | MaxReplicas: autoscaling.MaxReplicas,
69 | Metrics: metrics,
70 | Behavior: behavior,
71 | },
72 | }
73 |
74 | return hpa, nil
75 | }
76 |
77 | // HorizontalPodAutoscalerObjectMeta returns the ObjectMeta object used to identify new HPA objects.
78 | func HorizontalPodAutoscalerObjectMeta(rc *dcv1alpha1.RayCluster) metav1.ObjectMeta {
79 | return metav1.ObjectMeta{
80 | Name: InstanceObjectName(rc.Name, ComponentNone),
81 | Namespace: rc.Namespace,
82 | Labels: AddGlobalLabels(MetadataLabels(rc), rc.Spec.GlobalLabels),
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/pkg/resources/ray/podsecuritypolicy.go:
--------------------------------------------------------------------------------
1 | package ray
2 |
3 | import (
4 | rbacv1 "k8s.io/api/rbac/v1"
5 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
6 |
7 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
8 | )
9 |
10 | var (
11 | policyAPIGroups = []string{"policy"}
12 | podSecurityPolicyResources = []string{"podsecuritypolicies"}
13 | useVerbs = []string{"use"}
14 | )
15 |
16 | // NewPodSecurityPolicyRBAC generates the role and role binding required to use a pod security policy.
17 | // The role is bound to the service account used by the ray cluster pods.
18 | func NewPodSecurityPolicyRBAC(rc *dcv1alpha1.RayCluster) (*rbacv1.Role, *rbacv1.RoleBinding) {
19 | name := InstanceObjectName(rc.Name, ComponentNone)
20 |
21 | role := &rbacv1.Role{
22 | ObjectMeta: metav1.ObjectMeta{
23 | Name: name,
24 | Namespace: rc.Namespace,
25 | Labels: AddGlobalLabels(MetadataLabels(rc), rc.Spec.GlobalLabels),
26 | },
27 | Rules: []rbacv1.PolicyRule{
28 | {
29 | APIGroups: policyAPIGroups,
30 | Resources: podSecurityPolicyResources,
31 | Verbs: useVerbs,
32 | ResourceNames: []string{rc.Spec.PodSecurityPolicy},
33 | },
34 | },
35 | }
36 |
37 | binding := &rbacv1.RoleBinding{
38 | ObjectMeta: metav1.ObjectMeta{
39 | Name: name,
40 | Namespace: rc.Namespace,
41 | Labels: AddGlobalLabels(MetadataLabels(rc), rc.Spec.GlobalLabels),
42 | },
43 | RoleRef: rbacv1.RoleRef{
44 | APIGroup: rbacv1.GroupName,
45 | Kind: "Role",
46 | Name: role.Name,
47 | },
48 | Subjects: []rbacv1.Subject{
49 | {
50 | Kind: rbacv1.ServiceAccountKind,
51 | Name: InstanceObjectName(rc.Name, ComponentNone),
52 | Namespace: rc.Namespace,
53 | },
54 | },
55 | }
56 |
57 | return role, binding
58 | }
59 |
--------------------------------------------------------------------------------
/pkg/resources/ray/podsecuritypolicy_test.go:
--------------------------------------------------------------------------------
1 | package ray
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | rbacv1 "k8s.io/api/rbac/v1"
8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9 | )
10 |
11 | func TestNewPodSecurityPolicyRBAC(t *testing.T) {
12 | rc := rayClusterFixture()
13 | rc.Spec.PodSecurityPolicy = "test-psp"
14 | role, roleBinding := NewPodSecurityPolicyRBAC(rc)
15 |
16 | t.Run("role", func(t *testing.T) {
17 | expected := &rbacv1.Role{
18 | ObjectMeta: metav1.ObjectMeta{
19 | Name: "test-id-ray",
20 | Namespace: "fake-ns",
21 | Labels: map[string]string{
22 | "app.kubernetes.io/name": "ray",
23 | "app.kubernetes.io/instance": "test-id",
24 | "app.kubernetes.io/version": "fake-tag",
25 | "app.kubernetes.io/managed-by": "distributed-compute-operator",
26 | },
27 | },
28 | Rules: []rbacv1.PolicyRule{
29 | {
30 | APIGroups: []string{"policy"},
31 | Resources: []string{"podsecuritypolicies"},
32 | Verbs: []string{"use"},
33 | ResourceNames: []string{"test-psp"},
34 | },
35 | },
36 | }
37 | assert.Equal(t, expected, role)
38 | })
39 |
40 | t.Run("role_binding", func(t *testing.T) {
41 | expected := &rbacv1.RoleBinding{
42 | ObjectMeta: metav1.ObjectMeta{
43 | Name: "test-id-ray",
44 | Namespace: "fake-ns",
45 | Labels: map[string]string{
46 | "app.kubernetes.io/name": "ray",
47 | "app.kubernetes.io/instance": "test-id",
48 | "app.kubernetes.io/version": "fake-tag",
49 | "app.kubernetes.io/managed-by": "distributed-compute-operator",
50 | },
51 | },
52 | RoleRef: rbacv1.RoleRef{
53 | APIGroup: "rbac.authorization.k8s.io",
54 | Kind: "Role",
55 | Name: "test-id-ray",
56 | },
57 | Subjects: []rbacv1.Subject{
58 | {
59 | Kind: "ServiceAccount",
60 | Name: "test-id-ray",
61 | Namespace: "fake-ns",
62 | },
63 | },
64 | }
65 | assert.Equal(t, expected, roleBinding)
66 | })
67 | }
68 |
--------------------------------------------------------------------------------
/pkg/resources/ray/ray.go:
--------------------------------------------------------------------------------
1 | package ray
2 |
3 | import (
4 | "fmt"
5 |
6 | "sigs.k8s.io/controller-runtime/pkg/client"
7 |
8 | "github.com/dominodatalab/distributed-compute-operator/pkg/cluster/metadata"
9 |
10 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
11 | "github.com/dominodatalab/distributed-compute-operator/pkg/resources"
12 | "github.com/dominodatalab/distributed-compute-operator/pkg/util"
13 | )
14 |
15 | // Component is used to drive Kubernetes object generation for different ray types.
16 | type Component string
17 |
18 | const (
19 | // ComponentNone indicates a generic ray resource.
20 | ComponentNone Component = "none"
21 | // ComponentHead indicates a ray head resource.
22 | ComponentHead Component = "head"
23 | // ComponentWorker indicates a ray worker resource.
24 | ComponentWorker Component = "worker"
25 | // ApplicationName defines the static name used to generate ray object metadata.
26 | ApplicationName = "ray"
27 | )
28 |
29 | // InstanceObjectName returns the name that will be used to create most owned cluster resources.
30 | func InstanceObjectName(instance string, comp Component) string {
31 | if comp == ComponentNone {
32 | return fmt.Sprintf("%s-%s", instance, ApplicationName)
33 | }
34 |
35 | return fmt.Sprintf("%s-%s-%s", instance, ApplicationName, comp)
36 | }
37 |
38 | // HeadlessHeadServiceName returns the name of the headless service used to
39 | // register the head ray pod.
40 | func HeadlessHeadServiceName(name string) string {
41 | return InstanceObjectName(name, ComponentHead)
42 | }
43 |
44 | // HeadlessWorkerServiceName returns the name of the headless service used to
45 | // register ray worker pods.
46 | func HeadlessWorkerServiceName(name string) string {
47 | return InstanceObjectName(name, ComponentWorker)
48 | }
49 |
50 | // MetadataLabels returns standard metadata for ray resources.
51 | func MetadataLabels(rc *dcv1alpha1.RayCluster) map[string]string {
52 | return resources.MetadataLabels(ApplicationName, rc.Name, rc.Spec.Image.Tag)
53 | }
54 |
55 | // MetadataLabelsWithComponent returns standard component metadata for ray resources.
56 | func MetadataLabelsWithComponent(rc *dcv1alpha1.RayCluster, comp Component) map[string]string {
57 | return resources.MetadataLabelsWithComponent(ApplicationName, rc.Name, rc.Spec.Image.Tag, string(comp))
58 | }
59 |
60 | // SelectorLabels returns a resource selector clause for ray resources.
61 | func SelectorLabels(rc *dcv1alpha1.RayCluster) map[string]string {
62 | return resources.SelectorLabels(ApplicationName, rc.Name)
63 | }
64 |
65 | // SelectorLabelsWithComponent returns a resource component selector clause for ray resources.
66 | func SelectorLabelsWithComponent(rc *dcv1alpha1.RayCluster, comp Component) map[string]string {
67 | return resources.SelectorLabelsWithComponent(ApplicationName, rc.Name, string(comp))
68 | }
69 |
70 | func AddGlobalLabels(labels map[string]string, globalLabels map[string]string) map[string]string {
71 | if globalLabels != nil {
72 | labels = util.MergeStringMaps(globalLabels, labels)
73 | }
74 | return labels
75 | }
76 |
77 | var Meta = metadata.NewProvider(
78 | ApplicationName,
79 | func(obj client.Object) string { return objToRayCluster(obj).Spec.Image.Tag },
80 | func(obj client.Object) map[string]string { return objToRayCluster(obj).Spec.GlobalLabels },
81 | )
82 |
83 | func objToRayCluster(obj client.Object) *dcv1alpha1.RayCluster {
84 | return obj.(*dcv1alpha1.RayCluster)
85 | }
86 |
--------------------------------------------------------------------------------
/pkg/resources/ray/ray_test.go:
--------------------------------------------------------------------------------
1 | package ray
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | )
8 |
9 | func TestInstanceObjectName(t *testing.T) {
10 | t.Run("with_component", func(t *testing.T) {
11 | comp := Component("test")
12 | actual := InstanceObjectName("steve-o", comp)
13 | assert.Equal(t, "steve-o-ray-test", actual)
14 | })
15 |
16 | t.Run("component_none", func(t *testing.T) {
17 | actual := InstanceObjectName("steve-o", ComponentNone)
18 | assert.Equal(t, "steve-o-ray", actual)
19 | })
20 | }
21 |
22 | func TestHeadlessHeadServiceName(t *testing.T) {
23 | actual := HeadlessHeadServiceName("steve-o")
24 | assert.Equal(t, "steve-o-ray-head", actual)
25 | }
26 |
27 | func TestHeadlessWorkerServiceName(t *testing.T) {
28 | actual := HeadlessWorkerServiceName("steve-o")
29 | assert.Equal(t, "steve-o-ray-worker", actual)
30 | }
31 |
32 | func TestMetadataLabels(t *testing.T) {
33 | rc := rayClusterFixture()
34 | actual := MetadataLabels(rc)
35 |
36 | expected := map[string]string{
37 | "app.kubernetes.io/name": "ray",
38 | "app.kubernetes.io/instance": "test-id",
39 | "app.kubernetes.io/version": "fake-tag",
40 | "app.kubernetes.io/managed-by": "distributed-compute-operator",
41 | }
42 | assert.Equal(t, expected, actual)
43 | }
44 |
45 | func TestMetadataLabelsWithComponent(t *testing.T) {
46 | rc := rayClusterFixture()
47 | actual := MetadataLabelsWithComponent(rc, "something")
48 |
49 | expected := map[string]string{
50 | "app.kubernetes.io/name": "ray",
51 | "app.kubernetes.io/instance": "test-id",
52 | "app.kubernetes.io/version": "fake-tag",
53 | "app.kubernetes.io/managed-by": "distributed-compute-operator",
54 | "app.kubernetes.io/component": "something",
55 | }
56 | assert.Equal(t, expected, actual)
57 | }
58 |
59 | func TestSelectorLabels(t *testing.T) {
60 | rc := rayClusterFixture()
61 | actual := SelectorLabels(rc)
62 |
63 | expected := map[string]string{
64 | "app.kubernetes.io/name": "ray",
65 | "app.kubernetes.io/instance": "test-id",
66 | }
67 | assert.Equal(t, expected, actual)
68 | }
69 |
70 | func TestSelectorLabelsWithComponent(t *testing.T) {
71 | rc := rayClusterFixture()
72 | actual := SelectorLabelsWithComponent(rc, "something")
73 |
74 | expected := map[string]string{
75 | "app.kubernetes.io/name": "ray",
76 | "app.kubernetes.io/instance": "test-id",
77 | "app.kubernetes.io/component": "something",
78 | }
79 | assert.Equal(t, expected, actual)
80 | }
81 |
--------------------------------------------------------------------------------
/pkg/resources/ray/serviceaccount.go:
--------------------------------------------------------------------------------
1 | package ray
2 |
3 | import (
4 | corev1 "k8s.io/api/core/v1"
5 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
6 | "k8s.io/utils/pointer"
7 |
8 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
9 | )
10 |
11 | // NewServiceAccount generates a service account resource without API access.
12 | func NewServiceAccount(rc *dcv1alpha1.RayCluster) *corev1.ServiceAccount {
13 | return &corev1.ServiceAccount{
14 | ObjectMeta: metav1.ObjectMeta{
15 | Name: InstanceObjectName(rc.Name, ComponentNone),
16 | Namespace: rc.Namespace,
17 | Labels: AddGlobalLabels(MetadataLabels(rc), rc.Spec.GlobalLabels),
18 | },
19 | AutomountServiceAccountToken: pointer.Bool(false),
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/pkg/resources/ray/serviceaccount_test.go:
--------------------------------------------------------------------------------
1 | package ray
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | corev1 "k8s.io/api/core/v1"
8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9 | "k8s.io/utils/pointer"
10 | )
11 |
12 | func TestNewServiceAccount(t *testing.T) {
13 | rc := rayClusterFixture()
14 | sa := NewServiceAccount(rc)
15 |
16 | expected := &corev1.ServiceAccount{
17 | ObjectMeta: metav1.ObjectMeta{
18 | Name: "test-id-ray",
19 | Namespace: "fake-ns",
20 | Labels: map[string]string{
21 | "app.kubernetes.io/name": "ray",
22 | "app.kubernetes.io/instance": "test-id",
23 | "app.kubernetes.io/version": "fake-tag",
24 | "app.kubernetes.io/managed-by": "distributed-compute-operator",
25 | },
26 | },
27 | AutomountServiceAccountToken: pointer.Bool(false),
28 | }
29 | assert.Equal(t, expected, sa)
30 | }
31 |
--------------------------------------------------------------------------------
/pkg/resources/spark/configmap.go:
--------------------------------------------------------------------------------
1 | package spark
2 |
3 | import (
4 | "fmt"
5 | "sort"
6 | "strings"
7 |
8 | corev1 "k8s.io/api/core/v1"
9 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
10 |
11 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
12 | )
13 |
14 | // NewFrameworkConfigMap generates a configmap which represents a spark-defaults.conf file out of provided config
15 | func NewFrameworkConfigMap(sc *dcv1alpha1.SparkCluster) *corev1.ConfigMap {
16 | data := map[string]string{}
17 | if sc.Spec.Master.DefaultConfiguration != nil {
18 | data[string(ComponentMaster)] = generateSparkDefaults(sc.Spec.Master.DefaultConfiguration)
19 | }
20 | if sc.Spec.Worker.DefaultConfiguration != nil {
21 | data[string(ComponentWorker)] = generateSparkDefaults(sc.Spec.Worker.DefaultConfiguration)
22 | }
23 | if len(data) == 0 {
24 | return nil
25 | }
26 | return &corev1.ConfigMap{
27 | ObjectMeta: metav1.ObjectMeta{
28 | Name: FrameworkConfigMapName(sc.Name, ComponentNone),
29 | Namespace: sc.Namespace,
30 | Labels: AddGlobalLabels(MetadataLabels(sc), sc.Spec.GlobalLabels),
31 | },
32 | Data: data,
33 | }
34 | }
35 |
36 | // NewKeyTabConfigMap generates a configmap which represents the Kerberos KeyTab configuration out of provided config
37 | func NewKeyTabConfigMap(sc *dcv1alpha1.SparkCluster) *corev1.ConfigMap {
38 | binaryData := map[string][]byte{}
39 |
40 | if sc.Spec.KerberosKeytab != nil {
41 | binaryData["keytab"] = sc.Spec.KerberosKeytab.Contents
42 | }
43 |
44 | if len(binaryData) == 0 {
45 | return nil
46 | }
47 |
48 | return &corev1.ConfigMap{
49 | ObjectMeta: metav1.ObjectMeta{
50 | Name: KeyTabConfigMapName(sc.Name, ComponentNone),
51 | Namespace: sc.Namespace,
52 | Labels: AddGlobalLabels(MetadataLabels(sc), sc.Spec.GlobalLabels),
53 | },
54 | BinaryData: binaryData,
55 | }
56 | }
57 |
58 | // looks a little weird because map iteration isn't stable in go, but we want to provide a stable interface
59 | // so we sort the keys and emit a config in sorted order
60 | func generateSparkDefaults(defaults map[string]string) string {
61 | var keys []string
62 | for k := range defaults {
63 | keys = append(keys, k)
64 | }
65 | sort.Strings(keys)
66 | b := strings.Builder{}
67 | for _, k := range keys {
68 | b.WriteString(fmt.Sprintf("%s %s\n", k, defaults[k]))
69 | }
70 | return b.String()
71 | }
72 |
--------------------------------------------------------------------------------
/pkg/resources/spark/envoyfilter.go:
--------------------------------------------------------------------------------
1 | package spark
2 |
3 | import (
4 | "fmt"
5 |
6 | spb "google.golang.org/protobuf/types/known/structpb"
7 | networkingv1alpha3 "istio.io/api/networking/v1alpha3"
8 | apinetworkingv1alpha3 "istio.io/client-go/pkg/apis/networking/v1alpha3"
9 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
10 |
11 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
12 | )
13 |
14 | const filterName = "envoy.filters.network.tcp_proxy"
15 |
16 | // NewEnvoyFilter creates a new EnvoyFilter resource to set idle_timeout for Istio-enabled deployments
17 | func NewEnvoyFilter(sc *dcv1alpha1.SparkCluster) *apinetworkingv1alpha3.EnvoyFilter {
18 | match := networkingv1alpha3.EnvoyFilter_EnvoyConfigObjectMatch{
19 | Context: networkingv1alpha3.EnvoyFilter_ANY,
20 | ObjectTypes: &networkingv1alpha3.EnvoyFilter_EnvoyConfigObjectMatch_Listener{
21 | Listener: &networkingv1alpha3.EnvoyFilter_ListenerMatch{
22 | FilterChain: &networkingv1alpha3.EnvoyFilter_ListenerMatch_FilterChainMatch{
23 | Filter: &networkingv1alpha3.EnvoyFilter_ListenerMatch_FilterMatch{
24 | Name: filterName,
25 | },
26 | },
27 | },
28 | },
29 | }
30 |
31 | patch := networkingv1alpha3.EnvoyFilter_Patch{
32 | Operation: networkingv1alpha3.EnvoyFilter_Patch_MERGE,
33 | Value: &spb.Struct{
34 | Fields: map[string]*spb.Value{
35 | "name": {
36 | Kind: &spb.Value_StringValue{
37 | StringValue: "envoy.filters.network.tcp_proxy",
38 | },
39 | },
40 | "typed_config": {
41 | Kind: &spb.Value_StructValue{
42 | StructValue: &spb.Struct{
43 | Fields: map[string]*spb.Value{
44 | "@type": {
45 | Kind: &spb.Value_StringValue{
46 | StringValue: "type.googleapis.com/envoy.extensions.filters.network.tcp_proxy.v3.TcpProxy",
47 | },
48 | },
49 | "idle_timeout": {
50 | Kind: &spb.Value_StringValue{
51 | StringValue: "0s",
52 | },
53 | },
54 | },
55 | },
56 | },
57 | },
58 | },
59 | },
60 | }
61 |
62 | configPatches := []*networkingv1alpha3.EnvoyFilter_EnvoyConfigObjectPatch{
63 | {
64 | ApplyTo: networkingv1alpha3.EnvoyFilter_NETWORK_FILTER,
65 | Match: &match,
66 | Patch: &patch,
67 | },
68 | }
69 |
70 | workloadSelector := networkingv1alpha3.WorkloadSelector{
71 | Labels: sc.Spec.EnvoyFilterLabels,
72 | }
73 |
74 | envoyFilter := &apinetworkingv1alpha3.EnvoyFilter{
75 | TypeMeta: metav1.TypeMeta{},
76 | ObjectMeta: metav1.ObjectMeta{
77 | Name: fmt.Sprintf("%s-%s", InstanceObjectName(sc.Name, ComponentNone), "envoyfilter"),
78 | Namespace: sc.Namespace,
79 | Labels: AddGlobalLabels(MetadataLabels(sc), sc.Labels),
80 | },
81 | Spec: networkingv1alpha3.EnvoyFilter{
82 | WorkloadSelector: &workloadSelector,
83 | ConfigPatches: configPatches,
84 | },
85 | }
86 |
87 | return envoyFilter
88 | }
89 |
--------------------------------------------------------------------------------
/pkg/resources/spark/envoyfilter_test.go:
--------------------------------------------------------------------------------
1 | package spark
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | spb "google.golang.org/protobuf/types/known/structpb"
8 | networkingv1alpha3 "istio.io/api/networking/v1alpha3"
9 | apinetworkingv1alpha3 "istio.io/client-go/pkg/apis/networking/v1alpha3"
10 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
11 | )
12 |
13 | func TestNewEnvoyFilter(t *testing.T) {
14 | t.Run("default", func(t *testing.T) {
15 | sc := sparkClusterFixture()
16 | actual := NewEnvoyFilter(sc)
17 |
18 | patch := networkingv1alpha3.EnvoyFilter_Patch{
19 | Operation: networkingv1alpha3.EnvoyFilter_Patch_MERGE,
20 | Value: &spb.Struct{
21 | Fields: map[string]*spb.Value{
22 | "name": {
23 | Kind: &spb.Value_StringValue{
24 | StringValue: "envoy.filters.network.tcp_proxy",
25 | },
26 | },
27 | "typed_config": {
28 | Kind: &spb.Value_StructValue{
29 | StructValue: &spb.Struct{
30 | Fields: map[string]*spb.Value{
31 | "@type": {
32 | Kind: &spb.Value_StringValue{
33 | StringValue: "type.googleapis.com/envoy.extensions.filters.network.tcp_proxy.v3.TcpProxy",
34 | },
35 | },
36 | "idle_timeout": {
37 | Kind: &spb.Value_StringValue{
38 | StringValue: "0s",
39 | },
40 | },
41 | },
42 | },
43 | },
44 | },
45 | },
46 | },
47 | }
48 |
49 | configPatches := []*networkingv1alpha3.EnvoyFilter_EnvoyConfigObjectPatch{
50 | {
51 | ApplyTo: networkingv1alpha3.EnvoyFilter_NETWORK_FILTER,
52 | Match: &networkingv1alpha3.EnvoyFilter_EnvoyConfigObjectMatch{
53 | Context: networkingv1alpha3.EnvoyFilter_ANY,
54 | ObjectTypes: &networkingv1alpha3.EnvoyFilter_EnvoyConfigObjectMatch_Listener{
55 | Listener: &networkingv1alpha3.EnvoyFilter_ListenerMatch{
56 | FilterChain: &networkingv1alpha3.EnvoyFilter_ListenerMatch_FilterChainMatch{
57 | Filter: &networkingv1alpha3.EnvoyFilter_ListenerMatch_FilterMatch{
58 | Name: "envoy.filters.network.tcp_proxy",
59 | },
60 | },
61 | },
62 | },
63 | },
64 | Patch: &patch,
65 | },
66 | }
67 |
68 | workloadSelector := networkingv1alpha3.WorkloadSelector{
69 | Labels: sc.Spec.EnvoyFilterLabels,
70 | }
71 |
72 | expected := &apinetworkingv1alpha3.EnvoyFilter{
73 | TypeMeta: metav1.TypeMeta{},
74 | ObjectMeta: metav1.ObjectMeta{
75 | Name: "test-id-spark-envoyfilter",
76 | Namespace: sc.Namespace,
77 | Labels: MetadataLabels(sc),
78 | },
79 | Spec: networkingv1alpha3.EnvoyFilter{
80 | WorkloadSelector: &workloadSelector,
81 | ConfigPatches: configPatches,
82 | },
83 | }
84 |
85 | assert.Equal(t, expected, actual, "Istio EnvoyFilter not correctly generated")
86 | })
87 | }
88 |
--------------------------------------------------------------------------------
/pkg/resources/spark/helpers_test.go:
--------------------------------------------------------------------------------
1 | package spark
2 |
3 | import (
4 | corev1 "k8s.io/api/core/v1"
5 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
6 | "k8s.io/utils/pointer"
7 |
8 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
9 | )
10 |
11 | // sparkClusterFixture should be used for all spark unit testing.
12 | func sparkClusterFixture() *dcv1alpha1.SparkCluster {
13 | return &dcv1alpha1.SparkCluster{
14 | TypeMeta: metav1.TypeMeta{
15 | Kind: "SparkCluster",
16 | APIVersion: "distributed-compute.dominodatalab.com/v1test1",
17 | },
18 | ObjectMeta: metav1.ObjectMeta{
19 | Name: "test-id",
20 | Namespace: "fake-ns",
21 | },
22 | Spec: dcv1alpha1.SparkClusterSpec{
23 | ScalableClusterConfig: dcv1alpha1.ScalableClusterConfig{
24 | ClusterConfig: dcv1alpha1.ClusterConfig{
25 | Image: &dcv1alpha1.OCIImageDefinition{
26 | Registry: "fake-reg",
27 | Repository: "fake-repo",
28 | Tag: "fake-tag",
29 | PullPolicy: corev1.PullIfNotPresent,
30 | },
31 | },
32 | },
33 | ClusterPort: 7077,
34 | MasterWebPort: 8080,
35 | WorkerWebPort: 8081,
36 | WorkerMemoryLimit: "4505m",
37 | Worker: dcv1alpha1.SparkClusterWorker{
38 | Replicas: pointer.Int32(5),
39 | },
40 | },
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/pkg/resources/spark/horizontalpodautoscaler.go:
--------------------------------------------------------------------------------
1 | package spark
2 |
3 | import (
4 | "fmt"
5 |
6 | autoscalingv2 "k8s.io/api/autoscaling/v2"
7 | corev1 "k8s.io/api/core/v1"
8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9 |
10 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
11 | )
12 |
13 | // NewHorizontalPodAutoscaler generates an HPA that targets a SparkCluster resource.
14 | //
15 | // The metrics-server needs to be launched separately and the worker deployment
16 | // requires cpu resource requests in order for this object to have any effect.
17 | func NewHorizontalPodAutoscaler(sc *dcv1alpha1.SparkCluster) (*autoscalingv2.HorizontalPodAutoscaler, error) {
18 | autoscaling := sc.Spec.Autoscaling
19 | if autoscaling == nil {
20 | return nil, fmt.Errorf("cannot build HPA without autoscaling config")
21 | }
22 |
23 | var behavior *autoscalingv2.HorizontalPodAutoscalerBehavior
24 | if autoscaling.ScaleDownStabilizationWindowSeconds != nil {
25 | behavior = &autoscalingv2.HorizontalPodAutoscalerBehavior{
26 | ScaleDown: &autoscalingv2.HPAScalingRules{
27 | StabilizationWindowSeconds: autoscaling.ScaleDownStabilizationWindowSeconds,
28 | },
29 | }
30 | }
31 |
32 | var metrics []autoscalingv2.MetricSpec
33 | if autoscaling.AverageCPUUtilization != nil {
34 | metrics = append(metrics, autoscalingv2.MetricSpec{
35 | Type: autoscalingv2.ResourceMetricSourceType,
36 | Resource: &autoscalingv2.ResourceMetricSource{
37 | Name: corev1.ResourceCPU,
38 | Target: autoscalingv2.MetricTarget{
39 | Type: autoscalingv2.UtilizationMetricType,
40 | AverageUtilization: autoscaling.AverageCPUUtilization,
41 | },
42 | },
43 | })
44 | }
45 | if autoscaling.AverageMemoryUtilization != nil {
46 | metrics = append(metrics, autoscalingv2.MetricSpec{
47 | Type: autoscalingv2.ResourceMetricSourceType,
48 | Resource: &autoscalingv2.ResourceMetricSource{
49 | Name: corev1.ResourceMemory,
50 | Target: autoscalingv2.MetricTarget{
51 | Type: autoscalingv2.UtilizationMetricType,
52 | AverageUtilization: autoscaling.AverageMemoryUtilization,
53 | },
54 | },
55 | })
56 | }
57 |
58 | hpa := &autoscalingv2.HorizontalPodAutoscaler{
59 | ObjectMeta: HorizontalPodAutoscalerObjectMeta(sc),
60 | Spec: autoscalingv2.HorizontalPodAutoscalerSpec{
61 | ScaleTargetRef: autoscalingv2.CrossVersionObjectReference{
62 | APIVersion: sc.APIVersion,
63 | Kind: sc.Kind,
64 | Name: sc.Name,
65 | },
66 | MinReplicas: autoscaling.MinReplicas,
67 | MaxReplicas: autoscaling.MaxReplicas,
68 | Metrics: metrics,
69 | Behavior: behavior,
70 | },
71 | }
72 |
73 | return hpa, nil
74 | }
75 |
76 | // HorizontalPodAutoscalerObjectMeta returns the ObjectMeta object used to identify new HPA objects.
77 | func HorizontalPodAutoscalerObjectMeta(sc *dcv1alpha1.SparkCluster) metav1.ObjectMeta {
78 | return metav1.ObjectMeta{
79 | Name: InstanceObjectName(sc.Name, ComponentNone),
80 | Namespace: sc.Namespace,
81 | Labels: AddGlobalLabels(MetadataLabels(sc), sc.Spec.GlobalLabels),
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/pkg/resources/spark/podsecuritypolicy.go:
--------------------------------------------------------------------------------
1 | package spark
2 |
3 | import (
4 | rbacv1 "k8s.io/api/rbac/v1"
5 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
6 |
7 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
8 | )
9 |
10 | var (
11 | policyAPIGroups = []string{"policy"}
12 | podSecurityPolicyResources = []string{"podsecuritypolicies"}
13 | useVerbs = []string{"use"}
14 | )
15 |
16 | // NewPodSecurityPolicyRBAC generates the role and role binding required to use a pod security policy.
17 | // The role is bound to the service account used by the spark cluster pods.
18 | func NewPodSecurityPolicyRBAC(sc *dcv1alpha1.SparkCluster) (*rbacv1.Role, *rbacv1.RoleBinding) {
19 | name := InstanceObjectName(sc.Name, ComponentNone)
20 |
21 | role := &rbacv1.Role{
22 | ObjectMeta: metav1.ObjectMeta{
23 | Name: name,
24 | Namespace: sc.Namespace,
25 | Labels: AddGlobalLabels(MetadataLabels(sc), sc.Spec.GlobalLabels),
26 | },
27 | Rules: []rbacv1.PolicyRule{
28 | {
29 | APIGroups: policyAPIGroups,
30 | Resources: podSecurityPolicyResources,
31 | Verbs: useVerbs,
32 | ResourceNames: []string{sc.Spec.PodSecurityPolicy},
33 | },
34 | },
35 | }
36 |
37 | binding := &rbacv1.RoleBinding{
38 | ObjectMeta: metav1.ObjectMeta{
39 | Name: name,
40 | Namespace: sc.Namespace,
41 | Labels: AddGlobalLabels(MetadataLabels(sc), sc.Spec.GlobalLabels),
42 | },
43 | RoleRef: rbacv1.RoleRef{
44 | APIGroup: rbacv1.GroupName,
45 | Kind: "Role",
46 | Name: role.Name,
47 | },
48 | Subjects: []rbacv1.Subject{
49 | {
50 | Kind: rbacv1.ServiceAccountKind,
51 | Name: InstanceObjectName(sc.Name, ComponentNone),
52 | Namespace: sc.Namespace,
53 | },
54 | },
55 | }
56 |
57 | return role, binding
58 | }
59 |
--------------------------------------------------------------------------------
/pkg/resources/spark/podsecuritypolicy_test.go:
--------------------------------------------------------------------------------
1 | package spark
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | rbacv1 "k8s.io/api/rbac/v1"
8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9 | )
10 |
11 | func TestNewPodSecurityPolicyRBAC(t *testing.T) {
12 | rc := sparkClusterFixture()
13 | rc.Spec.PodSecurityPolicy = "test-psp"
14 | role, roleBinding := NewPodSecurityPolicyRBAC(rc)
15 |
16 | t.Run("role", func(t *testing.T) {
17 | expected := &rbacv1.Role{
18 | ObjectMeta: metav1.ObjectMeta{
19 | Name: "test-id-spark",
20 | Namespace: "fake-ns",
21 | Labels: map[string]string{
22 | "app.kubernetes.io/name": "spark",
23 | "app.kubernetes.io/instance": "test-id",
24 | "app.kubernetes.io/version": "fake-tag",
25 | "app.kubernetes.io/managed-by": "distributed-compute-operator",
26 | },
27 | },
28 | Rules: []rbacv1.PolicyRule{
29 | {
30 | APIGroups: []string{"policy"},
31 | Resources: []string{"podsecuritypolicies"},
32 | Verbs: []string{"use"},
33 | ResourceNames: []string{"test-psp"},
34 | },
35 | },
36 | }
37 | assert.Equal(t, expected, role)
38 | })
39 |
40 | t.Run("role_binding", func(t *testing.T) {
41 | expected := &rbacv1.RoleBinding{
42 | ObjectMeta: metav1.ObjectMeta{
43 | Name: "test-id-spark",
44 | Namespace: "fake-ns",
45 | Labels: map[string]string{
46 | "app.kubernetes.io/name": "spark",
47 | "app.kubernetes.io/instance": "test-id",
48 | "app.kubernetes.io/version": "fake-tag",
49 | "app.kubernetes.io/managed-by": "distributed-compute-operator",
50 | },
51 | },
52 | RoleRef: rbacv1.RoleRef{
53 | APIGroup: "rbac.authorization.k8s.io",
54 | Kind: "Role",
55 | Name: "test-id-spark",
56 | },
57 | Subjects: []rbacv1.Subject{
58 | {
59 | Kind: "ServiceAccount",
60 | Name: "test-id-spark",
61 | Namespace: "fake-ns",
62 | },
63 | },
64 | }
65 | assert.Equal(t, expected, roleBinding)
66 | })
67 | }
68 |
--------------------------------------------------------------------------------
/pkg/resources/spark/serviceaccount.go:
--------------------------------------------------------------------------------
1 | package spark
2 |
3 | import (
4 | corev1 "k8s.io/api/core/v1"
5 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
6 | "k8s.io/utils/pointer"
7 |
8 | dcv1alpha1 "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
9 | )
10 |
11 | // NewServiceAccount generates a service account resource without API access.
12 | func NewServiceAccount(sc *dcv1alpha1.SparkCluster) *corev1.ServiceAccount {
13 | return &corev1.ServiceAccount{
14 | ObjectMeta: metav1.ObjectMeta{
15 | Name: InstanceObjectName(sc.Name, ComponentNone),
16 | Namespace: sc.Namespace,
17 | Labels: AddGlobalLabels(MetadataLabels(sc), sc.Spec.GlobalLabels),
18 | },
19 | AutomountServiceAccountToken: pointer.Bool(false),
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/pkg/resources/spark/serviceaccount_test.go:
--------------------------------------------------------------------------------
1 | package spark
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | corev1 "k8s.io/api/core/v1"
8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9 | "k8s.io/utils/pointer"
10 | )
11 |
12 | func TestNewServiceAccount(t *testing.T) {
13 | rc := sparkClusterFixture()
14 | sa := NewServiceAccount(rc)
15 |
16 | expected := &corev1.ServiceAccount{
17 | ObjectMeta: metav1.ObjectMeta{
18 | Name: "test-id-spark",
19 | Namespace: "fake-ns",
20 | Labels: map[string]string{
21 | "app.kubernetes.io/name": "spark",
22 | "app.kubernetes.io/instance": "test-id",
23 | "app.kubernetes.io/version": "fake-tag",
24 | "app.kubernetes.io/managed-by": "distributed-compute-operator",
25 | },
26 | },
27 | AutomountServiceAccountToken: pointer.Bool(false),
28 | }
29 | assert.Equal(t, expected, sa)
30 | }
31 |
--------------------------------------------------------------------------------
/pkg/resources/spark/spark_test.go:
--------------------------------------------------------------------------------
1 | package spark
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | )
8 |
9 | func TestHeadServiceName(t *testing.T) {
10 | actual := MasterServiceName("steve-o")
11 | assert.Equal(t, "steve-o-spark-master", actual)
12 | }
13 |
14 | func TestInstanceObjectName(t *testing.T) {
15 | t.Run("with_component", func(t *testing.T) {
16 | comp := Component("test")
17 | actual := InstanceObjectName("steve-o", comp)
18 | assert.Equal(t, "steve-o-spark-test", actual)
19 | })
20 |
21 | t.Run("component_none", func(t *testing.T) {
22 | actual := InstanceObjectName("steve-o", ComponentNone)
23 | assert.Equal(t, "steve-o-spark", actual)
24 | })
25 | }
26 |
27 | func TestMetadataLabels(t *testing.T) {
28 | rc := sparkClusterFixture()
29 | actual := MetadataLabels(rc)
30 |
31 | expected := map[string]string{
32 | "app.kubernetes.io/name": "spark",
33 | "app.kubernetes.io/instance": "test-id",
34 | "app.kubernetes.io/version": "fake-tag",
35 | "app.kubernetes.io/managed-by": "distributed-compute-operator",
36 | }
37 | assert.Equal(t, expected, actual)
38 | }
39 |
40 | func TestMetadataLabelsWithComponent(t *testing.T) {
41 | rc := sparkClusterFixture()
42 | actual := MetadataLabelsWithComponent(rc, Component("something"))
43 |
44 | expected := map[string]string{
45 | "app.kubernetes.io/name": "spark",
46 | "app.kubernetes.io/instance": "test-id",
47 | "app.kubernetes.io/version": "fake-tag",
48 | "app.kubernetes.io/managed-by": "distributed-compute-operator",
49 | "app.kubernetes.io/component": "something",
50 | }
51 | assert.Equal(t, expected, actual)
52 | }
53 |
54 | func TestSelectorLabels(t *testing.T) {
55 | rc := sparkClusterFixture()
56 | actual := SelectorLabels(rc)
57 |
58 | expected := map[string]string{
59 | "app.kubernetes.io/name": "spark",
60 | "app.kubernetes.io/instance": "test-id",
61 | }
62 | assert.Equal(t, expected, actual)
63 | }
64 |
65 | func TestSelectorLabelsWithComponent(t *testing.T) {
66 | rc := sparkClusterFixture()
67 | actual := SelectorLabelsWithComponent(rc, Component("something"))
68 |
69 | expected := map[string]string{
70 | "app.kubernetes.io/name": "spark",
71 | "app.kubernetes.io/instance": "test-id",
72 | "app.kubernetes.io/component": "something",
73 | }
74 | assert.Equal(t, expected, actual)
75 | }
76 |
77 | func TestFrameworkConfigMapName(t *testing.T) {
78 | rc := sparkClusterFixture()
79 | actual := FrameworkConfigMapName(rc.Name, Component("something"))
80 |
81 | expected := "test-id-framework-spark-something"
82 |
83 | assert.Equal(t, expected, actual)
84 | }
85 |
86 | func TestKeyTabConfigMapName(t *testing.T) {
87 | rc := sparkClusterFixture()
88 | actual := KeyTabConfigMapName(rc.Name, Component("something"))
89 |
90 | expected := "test-id-keytab-spark-something"
91 |
92 | assert.Equal(t, expected, actual)
93 | }
94 |
--------------------------------------------------------------------------------
/pkg/util/util.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import (
4 | "fmt"
5 | "strconv"
6 |
7 | "github.com/distribution/reference"
8 |
9 | "github.com/dominodatalab/distributed-compute-operator/api/v1alpha1"
10 | )
11 |
12 | // IntsToStrings converts an integer slice into a string slice.
13 | func IntsToStrings(is []int32) (ss []string) {
14 | for _, i := range is {
15 | ss = append(ss, strconv.Itoa(int(i)))
16 | }
17 | return
18 | }
19 |
20 | // MergeStringMaps merges the src map into the dst.
21 | func MergeStringMaps(src, dst map[string]string) map[string]string {
22 | for k, v := range src {
23 | dst[k] = v
24 | }
25 | return dst
26 | }
27 |
28 | // ParseImageDefinition generates a fully-qualified image reference to an OCI image.
29 | // An error will be returned when the image definition is invalid.
30 | func ParseImageDefinition(def *v1alpha1.OCIImageDefinition) (string, error) {
31 | ref := def.Repository
32 |
33 | if def.Registry != "" {
34 | ref = fmt.Sprintf("%s/%s", def.Registry, ref)
35 | }
36 | if def.Tag != "" {
37 | ref = fmt.Sprintf("%s:%s", ref, def.Tag)
38 | }
39 |
40 | named, err := reference.ParseNormalizedNamed(ref)
41 | if err != nil {
42 | return "", fmt.Errorf("invalid OCIImageDefinition: %w", err)
43 | }
44 | named = reference.TagNameOnly(named)
45 |
46 | return named.String(), nil
47 | }
48 |
49 | // BoolPtrIsTrue returns true if bool pointer is true. This returns false if
50 | // pointer is false or nil.
51 | func BoolPtrIsTrue(ptr *bool) bool {
52 | return ptr != nil && *ptr
53 | }
54 |
55 | // BoolPtrIsNilOrFalse returns true if bool pointer is nil or false, otherwise
56 | // this returns false.
57 | func BoolPtrIsNilOrFalse(ptr *bool) bool {
58 | return ptr == nil || !*ptr
59 | }
60 |
61 | // GetIndexFromSlice returns the index of a specific string in a slice or -1 if the value is not present.
62 | func GetIndexFromSlice(s []string, match string) int {
63 | for index, val := range s {
64 | if val == match {
65 | return index
66 | }
67 | }
68 | return -1
69 | }
70 |
71 | // RemoveFromSlice removes index i from slice s. Does not maintain order of the original slice.
72 | // https://stackoverflow.com/a/37335777/13979167
73 | func RemoveFromSlice(s []string, i int) []string {
74 | if i >= 0 && i < len(s) {
75 | s[len(s)-1], s[i] = s[i], s[len(s)-1]
76 | return s[:len(s)-1]
77 | }
78 | return s
79 | }
80 |
--------------------------------------------------------------------------------
/scripts/hotpatch.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | IMAGE_NAME=${IMAGE_NAME:-"quay.io/domino/distributed-compute-operator"}
3 | IMAGE_TAG_PREFIX=${IMAGE_TAG_PREFIX:-"dev-"}
4 | latest_tag="$IMAGE_TAG_PREFIX$(date +%s)"
5 | image="$IMAGE_NAME:$latest_tag"
6 | make manifests generate docker-build IMG="$image"
7 |
8 | declare -r COMPUTE_NAMESPACE=$(kubectl get namespaces -ojson | jq -rc '.items[] | select(.metadata.name | endswith("-compute")) | .metadata.name')
9 |
10 | docker push $image
11 |
12 | helm upgrade \
13 | distributed-compute-operator \
14 | deploy/helm/distributed-compute-operator \
15 | --install \
16 | -n $COMPUTE_NAMESPACE \
17 | --set image.registry="quay.io" \
18 | --set image.repository="domino/distributed-compute-operator" \
19 | --set image.tag="$latest_tag" \
20 | --set config.logDevelopmentMode=true \
21 | --set istio.enabled=true \
22 | --set istio.cniPluginInstalled=true \
23 | --set networkPolicy.enabled=true \
24 |
--------------------------------------------------------------------------------
/scripts/release/before-hook.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Performs actions required prior to a release.
4 |
5 | set -eu
6 |
7 | # ensure dependencies are in-sync prior to builds
8 | go mod tidy
9 |
10 | # ensure crds are up-to-date
11 | make manifests
12 |
13 | # copy crds into a known location for goreleaser
14 | dir=custom-resource-definitions
15 | mkdir -p $dir
16 | cp config/crd/bases/*.yaml $dir
17 |
--------------------------------------------------------------------------------
/scripts/release/helm.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Functions used to log into helm registries, and package/push project chart.
4 |
5 | set -euo pipefail
6 |
7 | HELM_BIN=${HELM_BIN:-helm}
8 |
9 | function dco::helm::login() {
10 | local registry="$1"
11 | local username="$2"
12 | local password="$3"
13 | local namespace="$4"
14 |
15 | echo "$password" | $HELM_BIN registry login "$registry" \
16 | --namespace "$namespace" \
17 | --username "$username" \
18 | --password-stdin
19 | }
20 |
21 | function dco::helm::push() {
22 | local registry=$1
23 | local version=$2
24 | local semantic_version
25 | local chart_path
26 |
27 | if [[ $version =~ ^(pr-[[:digit:]]+|main)$ ]]; then
28 | semantic_version="0.0.0-$version"
29 | else
30 | semantic_version=$version
31 | fi
32 |
33 | $HELM_BIN package deploy/helm/distributed-compute-operator \
34 | --destination chart-archives \
35 | --app-version "$version" \
36 | --version "$semantic_version"
37 |
38 | chart_path="chart-archives/distributed-compute-operator-$semantic_version.tgz"
39 |
40 | $HELM_BIN push "$chart_path" oci://"$registry"
41 |
42 | rm -rf chart-archives/
43 | }
44 |
45 | function dco::helm::main() {
46 | local command=$1
47 | shift
48 |
49 | case $command in
50 | login)
51 | local host=""
52 | local username=""
53 | local password=""
54 | local namespace=""
55 | local usage
56 |
57 | usage="usage: $(basename "$0") login -h HOST -u USERNAME -p PASSWORD [-n NAMESPACE]"
58 | while getopts h:u:p:n: opt; do
59 | case $opt in
60 | h)
61 | host=$OPTARG
62 | ;;
63 | u)
64 | username=$OPTARG
65 | ;;
66 | p)
67 | password=$OPTARG
68 | ;;
69 | n)
70 | namespace=$OPTARG
71 | ;;
72 | *)
73 | echo "$usage"
74 | exit 1
75 | esac
76 | done
77 | shift $((OPTIND -1))
78 |
79 | if [[ -z $host ]] || [[ -z $username ]] || [[ -z $password ]]; then
80 | echo "$usage"
81 | exit 1
82 | fi
83 |
84 | dco::helm::login "$host" "$username" "$password" "$namespace"
85 | ;;
86 | push)
87 | local registry=""
88 | local version=""
89 | local usage
90 |
91 | usage="usage: $(basename "$0") push -r REGISTRY -v VERSION"
92 | while getopts r:v: opt; do
93 | case $opt in
94 | r)
95 | registry=$OPTARG
96 | ;;
97 | v)
98 | version=$OPTARG
99 | ;;
100 | *)
101 | echo "$usage"
102 | exit 1
103 | esac
104 | done
105 | shift $((OPTIND -1))
106 |
107 | if [[ -z $registry ]] || [[ -z $version ]]; then
108 | echo "$usage"
109 | exit 1
110 | fi
111 |
112 | dco::helm::push "$registry" "$version"
113 | ;;
114 | ""|help)
115 | echo
116 | echo "Usage: $(basename "$0") COMMAND ARGS"
117 | echo
118 | echo "Commands:"
119 | echo " login Authenticate with remote registry"
120 | echo " push Build and upload chart to a remote registry"
121 | echo " help Display usage"
122 | exit 1
123 | esac
124 | }
125 |
126 | if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
127 | dco::helm::main "${@:-""}"
128 | fi
129 |
--------------------------------------------------------------------------------
/test/test.go:
--------------------------------------------------------------------------------
1 | package test
2 |
3 | import (
4 | "path/filepath"
5 | "runtime"
6 | )
7 |
8 | // MissingAssetsWarning is a hint as to why an envtest environment will not start.
9 | const MissingAssetsWarning = "Ensure required testing binaries are present by running `make test-assets`"
10 |
11 | // KubebuilderBinaryAssetsDir returns a path where control plane binaries required by envtest should be installed.
12 | // TODO: figure out whether to remove this or update it; it no longer works.
13 | func KubebuilderBinaryAssetsDir() string {
14 | _, b, _, _ := runtime.Caller(0)
15 | return filepath.Join(filepath.Dir(b), "..", "testbin", "bin")
16 | }
17 |
--------------------------------------------------------------------------------