├── test ├── test_helper.exs ├── ballast_test.exs ├── ballast │ ├── config_test.exs │ ├── kube │ │ ├── node_test.exs │ │ └── eviction_test.exs │ ├── evictor_test.exs │ ├── node_pool_test.exs │ ├── pool_policy │ │ ├── changeset_test.exs │ │ └── managed_pool_test.exs │ ├── controllers │ │ └── v1 │ │ │ ├── eviction_policy_test.exs │ │ │ └── pool_policy_test.exs │ ├── pool_policy_test.exs │ └── node_pool │ │ └── adapters │ │ └── gke_test.exs └── support │ ├── resource.yaml │ ├── node_pool_list.json │ └── docker-for-desktop.yaml ├── manifests ├── base │ ├── service_account.yaml │ ├── service.yaml │ ├── cluster_role_binding.yaml │ ├── kustomization.yaml │ ├── cluster_role.yaml │ ├── custom_resource_definition.yaml │ └── deployment.yaml └── ingress.yaml ├── .formatter.exs ├── config ├── prod.exs ├── dev.exs ├── test.exs └── config.exs ├── coveralls.json ├── lib ├── ballast │ ├── kube.ex │ ├── application.ex │ ├── node_pool │ │ ├── adapter.ex │ │ └── adapters │ │ │ ├── mock.ex │ │ │ └── gke.ex │ ├── sys │ │ ├── logger.ex │ │ ├── metrics.ex │ │ └── instrumentation.ex │ ├── controllers │ │ └── v1 │ │ │ ├── eviction_policy.ex │ │ │ └── pool_policy.ex │ ├── pool_policy │ │ ├── cooldown_cache.ex │ │ ├── managed_pool.ex │ │ └── changeset.ex │ ├── kube │ │ ├── eviction.ex │ │ └── node.ex │ ├── config.ex │ ├── pool_policy.ex │ ├── evictor.ex │ └── node_pool.ex └── ballast.ex ├── .dialyzer_ignore.exs ├── .dockerignore ├── examples ├── evictionpolicy-all.yaml └── evictionpolicy-unpreferred.yaml ├── terraform ├── ballast-poolpolicy.tpl.yaml ├── variables.tf ├── output.tf └── main.tf ├── Dockerfile ├── .gitignore ├── test-scale-up.yaml ├── mix.exs ├── Makefile ├── .credo.exs ├── README.md └── mix.lock /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | -------------------------------------------------------------------------------- /manifests/base/service_account.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: ballast 5 | -------------------------------------------------------------------------------- /test/ballast_test.exs: -------------------------------------------------------------------------------- 1 | defmodule BallastTest do 2 | use ExUnit.Case, async: true 3 | doctest Ballast 4 | end 5 | -------------------------------------------------------------------------------- /test/ballast/config_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Ballast.ConfigTest do 2 | use ExUnit.Case, async: true 3 | doctest Ballast.Config 4 | end 5 | -------------------------------------------------------------------------------- /test/ballast/kube/node_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Ballast.Kube.NodeTest do 2 | use ExUnit.Case, async: true 3 | doctest Ballast.Kube.Node 4 | end 5 | -------------------------------------------------------------------------------- /.formatter.exs: -------------------------------------------------------------------------------- 1 | # Used by "mix format" 2 | [ 3 | inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"], 4 | line_length: 120 5 | ] 6 | -------------------------------------------------------------------------------- /test/ballast/evictor_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Ballast.EvictorTest do 2 | @moduledoc false 3 | use ExUnit.Case, async: true 4 | doctest Ballast.Evictor 5 | end 6 | -------------------------------------------------------------------------------- /test/ballast/node_pool_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Ballast.NodePoolTest do 2 | @moduledoc false 3 | use ExUnit.Case, async: true 4 | doctest Ballast.NodePool 5 | end 6 | -------------------------------------------------------------------------------- /config/prod.exs: -------------------------------------------------------------------------------- 1 | use Mix.Config 2 | 3 | config :k8s, 4 | clusters: %{ 5 | # An empty config defaults to using pod.spec.serviceAccountName 6 | default: %{} 7 | } 8 | -------------------------------------------------------------------------------- /test/ballast/kube/eviction_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Ballast.Kube.EvictionTest do 2 | @moduledoc false 3 | use ExUnit.Case, async: true 4 | doctest Ballast.Kube.Eviction 5 | end 6 | -------------------------------------------------------------------------------- /config/dev.exs: -------------------------------------------------------------------------------- 1 | use Mix.Config 2 | 3 | config :logger, level: :debug 4 | 5 | config :k8s, 6 | clusters: %{ 7 | default: %{ 8 | conf: "~/.kube/config" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /coveralls.json: -------------------------------------------------------------------------------- 1 | { 2 | "skip_files": [ 3 | "lib/ballast/node_pool/adapters/mock.ex" 4 | ], 5 | "default_stop_words": [ 6 | "defevent", 7 | "defdelegate" 8 | ] 9 | } 10 | -------------------------------------------------------------------------------- /lib/ballast/kube.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast.Kube do 2 | @moduledoc """ 3 | Abstractions around the kubernetes resources and the [`k8s`](https://hexdocs.pm/k8s/readme.html) library. 4 | """ 5 | end 6 | -------------------------------------------------------------------------------- /test/ballast/pool_policy/changeset_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Ballast.PoolPolicy.ChangesetTest do 2 | @moduledoc false 3 | use ExUnit.Case, async: true 4 | doctest Ballast.PoolPolicy.Changeset 5 | end 6 | -------------------------------------------------------------------------------- /manifests/base/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: ballast-metrics 5 | spec: 6 | type: NodePort 7 | ports: 8 | - port: 9323 9 | targetPort: metrics 10 | -------------------------------------------------------------------------------- /.dialyzer_ignore.exs: -------------------------------------------------------------------------------- 1 | [ 2 | {"lib/ballast/controllers/v1/pool_policy.ex"}, 3 | {"lib/ballast/controllers/v1/eviction_policy.ex"}, 4 | ~r/ballast\.ex.*pattern_match_cov.*pattern variable _error@1 can never match/, 5 | ] 6 | -------------------------------------------------------------------------------- /manifests/ingress.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Ingress 3 | metadata: 4 | name: ballast-metrics 5 | namespace: default 6 | spec: 7 | backend: 8 | serviceName: ballast-metrics 9 | servicePort: 9323 10 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | _build 2 | .elixir_ls 3 | cover 4 | config/dev.exs 5 | config/test.exs 6 | examples 7 | manifests 8 | terraform 9 | deps 10 | doc 11 | test 12 | .* 13 | coveralls.json 14 | Makefile 15 | local.Makefile 16 | erl_crash.dump 17 | *.md 18 | *.yaml 19 | -------------------------------------------------------------------------------- /config/test.exs: -------------------------------------------------------------------------------- 1 | use Mix.Config 2 | 3 | config :ballast, node_pool_adapter: Ballast.NodePool.Adapters.Mock 4 | 5 | config :k8s, 6 | clusters: %{ 7 | default: %{ 8 | conf: "test/support/docker-for-desktop.yaml", 9 | conf_opts: [context: "docker-for-desktop"] 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /examples/evictionpolicy-all.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: ballast.bonny.run/v1 2 | kind: EvictionPolicy 3 | metadata: 4 | name: all-nginx 5 | spec: 6 | mode: all # unpreferred; evict off all nodes or only unpreferred nodes 7 | maxLifetime: 30 # in seconds 8 | selector: 9 | matchLabels: 10 | app: nginx -------------------------------------------------------------------------------- /manifests/base/cluster_role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: ballast 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: ballast 9 | subjects: 10 | - kind: ServiceAccount 11 | name: ballast 12 | -------------------------------------------------------------------------------- /manifests/base/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: default 4 | commonLabels: 5 | k8s-app: ballast 6 | resources: 7 | - custom_resource_definition.yaml 8 | - cluster_role.yaml 9 | - cluster_role_binding.yaml 10 | - service_account.yaml 11 | - deployment.yaml 12 | - service.yaml 13 | -------------------------------------------------------------------------------- /test/support/resource.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: ballast.bonny.run/v1 2 | kind: "PoolPolicy" 3 | metadata: 4 | name: example-policy 5 | spec: 6 | projectId: my-project 7 | location: my-source-region-or-zone 8 | clusterName: my-cluster 9 | poolName: my-source-pool 10 | cooldownSeconds: 60 11 | managedPools: 12 | - poolName: my-managed-pool 13 | minimumInstances: 1 14 | minimumPercent: 30 15 | location: my-managed-pool-region-or-zone 16 | -------------------------------------------------------------------------------- /config/config.exs: -------------------------------------------------------------------------------- 1 | # This file is responsible for configuring your application 2 | # and its dependencies with the aid of the Mix.Config module. 3 | use Mix.Config 4 | 5 | config :logger, level: :info 6 | 7 | config :bonny, 8 | reconcile_every: 30 * 1000, 9 | reconcile_batch_size: 10, 10 | watch_timeout: 60 * 1000, 11 | controllers: [Ballast.Controller.V1.PoolPolicy, Ballast.Controller.V1.EvictionPolicy] 12 | 13 | import_config "#{Mix.env()}.exs" 14 | -------------------------------------------------------------------------------- /examples/evictionpolicy-unpreferred.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: ballast.bonny.run/v1 2 | kind: EvictionPolicy 3 | metadata: 4 | name: unpreferred-nodes-nginx 5 | spec: 6 | mode: unpreferred # unpreferred; evict off all nodes or only unpreferred nodes 7 | maxLifetime: 30 # in seconds 8 | selector: 9 | matchLabels: 10 | app: nginx 11 | # matchExpressions: 12 | # - {key: tier, operator: In, values: [cache]} 13 | # - {key: environment, operator: NotIn, values: [dev]} -------------------------------------------------------------------------------- /terraform/ballast-poolpolicy.tpl.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: ballast.bonny.run/v1 2 | kind: PoolPolicy 3 | metadata: 4 | name: ballast-example 5 | spec: 6 | projectId: ${project} 7 | location: ${location} 8 | clusterName: ${cluster} 9 | poolName: ${source_pool} 10 | cooldownSeconds: 30 11 | managedPools: 12 | - poolName: ${managed_pool_1} 13 | minimumInstances: 1 14 | minimumPercent: 25 15 | location: ${location} 16 | - poolName: ${managed_pool_2} 17 | minimumInstances: 1 18 | minimumPercent: 25 19 | location: ${location} 20 | -------------------------------------------------------------------------------- /terraform/variables.tf: -------------------------------------------------------------------------------- 1 | variable "gcp_location" { 2 | description = "GCP Region or Zone" 3 | default = "us-central1" 4 | #default = "us-central1-a" 5 | } 6 | 7 | variable "gcp_project" { 8 | description = "GCP Project ID" 9 | } 10 | 11 | variable "gke_cluster_name" { 12 | description = "GKE Cluster Name" 13 | default = "ballast" 14 | } 15 | 16 | variable "gcp_credentials_path" { 17 | description = "Path to GCP credentials JSON" 18 | } 19 | 20 | variable "gcp_on_demand_max_nodes" { 21 | default = 5 22 | } 23 | 24 | variable "gcp_preemptible_max_nodes" { 25 | default = 5 26 | } 27 | -------------------------------------------------------------------------------- /lib/ballast/application.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast.Application do 2 | @moduledoc false 3 | 4 | use Application 5 | 6 | @spec start(any(), any()) :: {:error, any()} | {:ok, pid()} 7 | def start(_type, _args) do 8 | metrics = Ballast.Sys.Metrics.setup() 9 | TelemetryMetricsPrometheus.init(metrics, port: Ballast.Config.metrics_port()) 10 | 11 | enable_debugging = Ballast.Config.debugging_enabled?() 12 | Ballast.Sys.Logger.attach(enable_debugging) 13 | 14 | children = [ 15 | {Ballast.PoolPolicy.CooldownCache, []} 16 | ] 17 | 18 | opts = [strategy: :one_for_one, name: Ballast.Supervisor] 19 | Supervisor.start_link(children, opts) 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /terraform/output.tf: -------------------------------------------------------------------------------- 1 | output "gke_auth_command" { 2 | value = "gcloud container clusters get-credentials ${google_container_cluster.main.name} --location ${google_container_cluster.main.location}" 3 | } 4 | 5 | output "gke_cluster_name" { 6 | value = "${google_container_cluster.main.name}" 7 | } 8 | 9 | output "gke_preemptible_pool" { 10 | value = "${google_container_node_pool.pvm-n1-1.name}" 11 | } 12 | 13 | output "gke_on_demand_pool_autoscaling" { 14 | value = "${google_container_node_pool.od-n1-1.name}" 15 | } 16 | 17 | output "gke_on_demand_pool_fixed" { 18 | value = "${google_container_node_pool.pvm-n1-2.name}" 19 | } 20 | 21 | output "example_poolpolicy_yaml" { 22 | value = "${local_file.poolpolicy-yaml.filename}" 23 | } -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ######################### 2 | ###### Build Image ###### 3 | ######################### 4 | 5 | FROM bitwalker/alpine-elixir:1.9.1 as builder 6 | 7 | ENV MIX_ENV=prod \ 8 | MIX_HOME=/opt/mix \ 9 | HEX_HOME=/opt/hex 10 | 11 | RUN mix local.hex --force && \ 12 | mix local.rebar --force 13 | 14 | WORKDIR /app 15 | 16 | COPY . . 17 | 18 | RUN mix deps.get --only-prod && mix release 19 | 20 | ######################### 21 | ##### Release Image ##### 22 | ######################### 23 | 24 | FROM alpine:3.10 25 | 26 | RUN apk add --update openssl ncurses 27 | 28 | WORKDIR /app 29 | COPY --from=builder /app/_build/prod/rel/ballast ./ 30 | RUN chown -R nobody: /app 31 | 32 | ENTRYPOINT ["/app/bin/ballast"] 33 | CMD ["start"] 34 | -------------------------------------------------------------------------------- /manifests/base/cluster_role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: ballast 5 | rules: 6 | - apiGroups: 7 | - apiextensions.k8s.io 8 | resources: 9 | - customresourcedefinitions 10 | verbs: 11 | - "*" 12 | - apiGroups: 13 | - ballast.bonny.run 14 | resources: 15 | - poolpolicies 16 | verbs: 17 | - "*" 18 | - apiGroups: 19 | - ballast.bonny.run 20 | resources: 21 | - evictionpolicies 22 | verbs: 23 | - "*" 24 | - apiGroups: 25 | - '' 26 | resources: 27 | - nodes 28 | verbs: 29 | - list 30 | - apiGroups: 31 | - '' 32 | resources: 33 | - pods 34 | verbs: 35 | - list 36 | - apiGroups: 37 | - '' 38 | resources: 39 | - pods/eviction 40 | verbs: 41 | - create 42 | -------------------------------------------------------------------------------- /manifests/base/custom_resource_definition.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apiextensions.k8s.io/v1beta1 3 | kind: CustomResourceDefinition 4 | metadata: 5 | name: poolpolicies.ballast.bonny.run 6 | spec: 7 | group: ballast.bonny.run 8 | names: 9 | kind: PoolPolicy 10 | plural: poolpolicies 11 | shortNames: 12 | - pp 13 | singular: poolpolicy 14 | scope: Cluster 15 | version: v1 16 | --- 17 | apiVersion: apiextensions.k8s.io/v1beta1 18 | kind: CustomResourceDefinition 19 | metadata: 20 | name: evictionpolicies.ballast.bonny.run 21 | spec: 22 | group: ballast.bonny.run 23 | names: 24 | kind: EvictionPolicy 25 | plural: evictionpolicies 26 | shortNames: 27 | - evp 28 | singular: evictionpolicy 29 | scope: Cluster 30 | version: v1 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build/ 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover/ 6 | 7 | # The directory Mix downloads your dependencies sources to. 8 | /deps/ 9 | 10 | # Where third-party dependencies like ExDoc output generated docs. 11 | /doc/ 12 | 13 | # Ignore .fetch files in case you like to edit your project deps locally. 14 | /.fetch 15 | 16 | # If the VM crashes, it generates a dump, let's ignore it too. 17 | erl_crash.dump 18 | 19 | # Also ignore archive artifacts (built via "mix archive.build"). 20 | *.ez 21 | 22 | # Ignore package tarball (built via "mix hex.build"). 23 | ballast-*.tar 24 | 25 | .elixir_ls 26 | 27 | # Terraform 28 | .terraform 29 | terraform.tfstate 30 | terraform.tfstate.backup 31 | terraform.tfvars 32 | .terraform.tfstate.lock.info 33 | terraform/ballast-poolpolicy.yaml 34 | 35 | # Ballast 36 | manifest.yaml 37 | env 38 | .iex.exs 39 | -------------------------------------------------------------------------------- /lib/ballast.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast do 2 | @moduledoc """ 3 | Documentation for Ballast. 4 | """ 5 | 6 | @scopes [ 7 | "https://www.googleapis.com/auth/compute", 8 | "https://www.googleapis.com/auth/cloud-platform" 9 | ] 10 | 11 | @scopes Enum.join(@scopes, " ") 12 | alias Ballast.Sys.Instrumentation, as: Inst 13 | 14 | @spec conn() :: {:ok, Tesla.Client.t()} | {:error, any()} 15 | @doc false 16 | def conn() do 17 | {duration, response} = :timer.tc(Goth.Token, :for_scope, [@scopes]) 18 | measurements = %{duration: duration} 19 | metadata = %{provider: "gke"} 20 | 21 | case response do 22 | {:ok, tkn} -> 23 | Inst.provider_authentication_succeeded(measurements, metadata) 24 | {:ok, GoogleApi.Container.V1.Connection.new(tkn.token)} 25 | 26 | {:error, error} -> 27 | Inst.provider_authentication_failed(measurements, metadata) 28 | {:error, error} 29 | end 30 | end 31 | end 32 | -------------------------------------------------------------------------------- /test/ballast/controllers/v1/eviction_policy_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Ballast.Controller.V1.EvictionPolicyTest do 2 | @moduledoc false 3 | use ExUnit.Case, async: false 4 | alias Ballast.Controller.V1.EvictionPolicy 5 | 6 | describe "add/1" do 7 | test "returns :ok" do 8 | event = %{} 9 | result = EvictionPolicy.add(event) 10 | assert result == :ok 11 | end 12 | end 13 | 14 | describe "modify/1" do 15 | test "returns :ok" do 16 | event = %{} 17 | result = EvictionPolicy.modify(event) 18 | assert result == :ok 19 | end 20 | end 21 | 22 | describe "delete/1" do 23 | test "returns :ok" do 24 | event = %{} 25 | result = EvictionPolicy.delete(event) 26 | assert result == :ok 27 | end 28 | end 29 | 30 | describe "reconcile/1" do 31 | test "returns :ok" do 32 | event = %{} 33 | result = EvictionPolicy.reconcile(event) 34 | assert result == :ok 35 | end 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /test/support/node_pool_list.json: -------------------------------------------------------------------------------- 1 | { 2 | "nodePools": [ 3 | { 4 | "autoscaling": null, 5 | "initialNodeCount": 1, 6 | "instanceGroupUrls": [ 7 | "https://www.googleapis.com/compute/v1/projects/my-project/zones/us-central1-a/instanceGroupManagers/gke-demo-demo-on-demand" 8 | ], 9 | "name": "demo-on-demand", 10 | "selfLink": "https://container.googleapis.com/v1/projects/my-project/zones/us-central1-a/clusters/demo/nodePools/demo-on-demand", 11 | "status": "RUNNING" 12 | }, 13 | { 14 | "autoscaling": { 15 | "enabled": true, 16 | "maxNodeCount": 5, 17 | "minNodeCount": 3 18 | }, 19 | "initialNodeCount": 1, 20 | "instanceGroupUrls": [ 21 | "https://www.googleapis.com/compute/v1/projects/my-project/zones/us-central1-a/instanceGroupManagers/gke-demo-demo-preemptible" 22 | ], 23 | "name": "demo-preemptible", 24 | "selfLink": "https://container.googleapis.com/v1/projects/my-project/zones/us-central1-a/clusters/demo/nodePools/demo-preemptible", 25 | "status": "RUNNING" 26 | } 27 | ] 28 | } 29 | -------------------------------------------------------------------------------- /test/ballast/controllers/v1/pool_policy_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Ballast.Controller.V1.PoolPolicyTest do 2 | @moduledoc false 3 | use ExUnit.Case, async: true 4 | alias Ballast.Controller.V1.PoolPolicy 5 | 6 | @spec make_resource() :: map() 7 | defp make_resource() do 8 | YamlElixir.read_from_file!("test/support/resource.yaml") 9 | end 10 | 11 | describe "add/1" do 12 | test "returns :ok" do 13 | event = make_resource() 14 | result = PoolPolicy.add(event) 15 | assert result == :ok 16 | end 17 | end 18 | 19 | describe "modify/1" do 20 | test "returns :ok" do 21 | event = make_resource() 22 | result = PoolPolicy.modify(event) 23 | assert result == :ok 24 | end 25 | end 26 | 27 | describe "delete/1" do 28 | test "returns :ok" do 29 | event = make_resource() 30 | result = PoolPolicy.delete(event) 31 | assert result == :ok 32 | end 33 | end 34 | 35 | describe "reconcile/1" do 36 | test "returns :ok" do 37 | event = make_resource() 38 | result = PoolPolicy.reconcile(event) 39 | assert result == :ok 40 | end 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /test-scale-up.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: nginx-deployment 5 | spec: 6 | selector: 7 | matchLabels: 8 | app: nginx 9 | replicas: 10 10 | template: 11 | metadata: 12 | labels: 13 | app: nginx 14 | spec: 15 | affinity: 16 | nodeAffinity: 17 | requiredDuringSchedulingIgnoredDuringExecution: 18 | nodeSelectorTerms: 19 | - matchExpressions: 20 | - key: node-group 21 | operator: In 22 | values: 23 | - ballast-example-group 24 | preferredDuringSchedulingIgnoredDuringExecution: 25 | - weight: 1 26 | preference: 27 | matchExpressions: 28 | - key: node-type 29 | operator: In 30 | values: 31 | - preemptible 32 | containers: 33 | - name: nginx 34 | image: nginx:1.7.9 35 | ports: 36 | - containerPort: 80 37 | resources: 38 | limits: 39 | cpu: 50m 40 | memory: 50Mi 41 | requests: 42 | cpu: 25m 43 | memory: 25Mi 44 | -------------------------------------------------------------------------------- /lib/ballast/node_pool/adapter.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast.NodePool.Adapters do 2 | @moduledoc """ 3 | `NodePool` adapter for getting node pool metadata. 4 | """ 5 | 6 | @doc """ 7 | Returns the cloud provider specific unique ID for the node pool 8 | """ 9 | @callback id(Ballast.NodePool.t()) :: binary 10 | 11 | @doc """ 12 | Populates a `NodePool` with the current instance count and the cloud providers HTTP response (`data` field). 13 | """ 14 | @callback get(Ballast.NodePool.t(), Tesla.Client.t()) :: {:ok, Ballast.NodePool.t()} | {:error, Tesla.Env.t()} 15 | 16 | @doc """ 17 | Scale the minimum count. 18 | """ 19 | @callback scale(Ballast.PoolPolicy.Changeset.t(), Tesla.Client.t()) :: {:ok, map} | {:error, Tesla.Env.t()} 20 | 21 | @doc """ 22 | Determine if autoscaling is enabled on the pool. 23 | """ 24 | @callback autoscaling_enabled?(Ballast.NodePool.t()) :: boolean() 25 | 26 | @doc """ 27 | The label selector to find all nodes of a specific cloud provider pool via the Kubernetes API 28 | """ 29 | @callback label_selector() :: binary 30 | 31 | @doc """ 32 | The label selector to nodes of a specific cloud provider pool via the Kubernetes API 33 | """ 34 | @callback label_selector(Ballast.NodePool.t()) :: binary 35 | end 36 | -------------------------------------------------------------------------------- /lib/ballast/node_pool/adapters/mock.ex: -------------------------------------------------------------------------------- 1 | # credo:disable-for-this-file 2 | defmodule Ballast.NodePool.Adapters.Mock do 3 | @moduledoc false 4 | 5 | @behaviour Ballast.NodePool.Adapters 6 | @list_json "test/support/node_pool_list.json" 7 | 8 | alias Ballast.NodePool 9 | 10 | @impl true 11 | def label_selector(), do: "" 12 | 13 | @impl true 14 | def label_selector(_), do: "" 15 | 16 | @impl true 17 | def id(%NodePool{} = pool) do 18 | "#{pool.project}/#{pool.location}/#{pool.cluster}/#{pool.name}" 19 | end 20 | 21 | @impl true 22 | def get(%NodePool{name: "invalid-pool"}, _conn) do 23 | {:error, %Tesla.Env{status: 403}} 24 | end 25 | 26 | @impl true 27 | def get(%NodePool{name: "pool-without-autoscaling"} = pool, _conn) do 28 | {:ok, pool} = get(pool, nil) 29 | pool_without_autoscaling = Map.delete(pool, :autoscaling) 30 | {:ok, pool_without_autoscaling} 31 | end 32 | 33 | @impl true 34 | def get(pool, _conn) do 35 | data = 36 | @list_json 37 | |> File.read!() 38 | |> Jason.decode!(keys: :atoms) 39 | |> Map.get(:nodePools) 40 | |> List.last() 41 | 42 | pool = %NodePool{pool | instance_count: 10, data: data} 43 | 44 | {:ok, pool} 45 | end 46 | 47 | @impl true 48 | def scale(_, _), do: {:ok, %{}} 49 | 50 | @impl true 51 | def autoscaling_enabled?(_), do: true 52 | end 53 | -------------------------------------------------------------------------------- /lib/ballast/sys/logger.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast.Sys.Logger do 2 | @moduledoc """ 3 | Attaches telemetry events to the Elixir Logger 4 | """ 5 | 6 | require Logger 7 | 8 | @spec attach(boolean) :: :ok 9 | @doc """ 10 | Attaches telemetry events to the Elixir Logger 11 | 12 | Set `BALLAST_DEBUG=true` to enable debug logging. 13 | """ 14 | def attach(enable_debugging) 15 | 16 | def attach(true) do 17 | attach_ballast() 18 | attach_bonny() 19 | end 20 | 21 | def attach(_) do 22 | attach_ballast() 23 | end 24 | 25 | @doc false 26 | @spec log_handler(keyword, map | integer, map, atom) :: :ok 27 | def log_handler(event, measurements, metadata, preferred_level) do 28 | event_name = Enum.join(event, ".") 29 | 30 | level = 31 | case Regex.match?(~r/fail|error/, event_name) do 32 | true -> :error 33 | _ -> preferred_level 34 | end 35 | 36 | Logger.log(level, "[#{event_name}] #{inspect(measurements)} #{inspect(metadata)}") 37 | end 38 | 39 | @spec attach_bonny() :: :ok 40 | defp attach_bonny() do 41 | events = Bonny.Telemetry.events() 42 | :telemetry.attach_many("bonny-instrumentation-logger", events, &log_handler/4, :debug) 43 | end 44 | 45 | @spec attach_ballast() :: :ok 46 | defp attach_ballast() do 47 | events = Ballast.Sys.Instrumentation.events() 48 | :telemetry.attach_many("ballast-instrumentation-logger", events, &log_handler/4, :info) 49 | end 50 | end 51 | -------------------------------------------------------------------------------- /lib/ballast/controllers/v1/eviction_policy.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast.Controller.V1.EvictionPolicy do 2 | @moduledoc """ 3 | Ballast: EvictionPolicy CRD. 4 | """ 5 | 6 | use Bonny.Controller 7 | 8 | @scope :cluster 9 | @group "ballast.bonny.run" 10 | 11 | @names %{ 12 | plural: "evictionpolicies", 13 | singular: "evictionpolicy", 14 | kind: "EvictionPolicy", 15 | shortNames: ["evp"] 16 | } 17 | 18 | @rule {"", ["nodes"], ["list"]} 19 | @rule {"", ["pods"], ["list"]} 20 | @rule {"", ["pods/eviction"], ["create"]} 21 | 22 | @doc """ 23 | Handles an `ADDED` event 24 | """ 25 | @spec add(map()) :: :ok | :error 26 | @impl Bonny.Controller 27 | def add(payload), do: reconcile(payload) 28 | 29 | @doc """ 30 | Handles a `MODIFIED` event 31 | """ 32 | @spec modify(map()) :: :ok | :error 33 | @impl Bonny.Controller 34 | def modify(payload), do: reconcile(payload) 35 | 36 | @doc """ 37 | Handles a `DELETED` event 38 | """ 39 | @spec delete(map()) :: :ok | :error 40 | @impl Bonny.Controller 41 | def delete(_), do: :ok 42 | 43 | @doc """ 44 | Called periodically for each existing CustomResource to allow for reconciliation. 45 | """ 46 | @spec reconcile(map()) :: :ok | :error 47 | @impl Bonny.Controller 48 | def reconcile(payload) do 49 | handle_eviction(payload) 50 | end 51 | 52 | @spec handle_eviction(map()) :: :ok | :error 53 | defp handle_eviction(%{} = policy) do 54 | with {:ok, pods} <- Ballast.Evictor.evictable(policy) do 55 | Enum.each(pods, &Ballast.Kube.Eviction.create/1) 56 | :ok 57 | end 58 | end 59 | 60 | defp handle_eviction(_) do 61 | :ok 62 | end 63 | end 64 | -------------------------------------------------------------------------------- /lib/ballast/sys/metrics.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast.Sys.Metrics do 2 | @moduledoc """ 3 | Prometheus Telemetry integration 4 | """ 5 | 6 | import Telemetry.Metrics 7 | 8 | @doc false 9 | @spec setup() :: list(Telemetry.Metrics.t()) 10 | def setup() do 11 | [ 12 | counter("ballast.pool_policy.backed_off.count", description: nil), 13 | counter("ballast.pool_policy.applied.count", description: nil), 14 | counter("ballast.pool_policy.reconciled.count", description: nil), 15 | counter("ballast.pool_policy.deleted.count", description: nil), 16 | counter("ballast.pool_policy.modified.count", description: nil), 17 | counter("ballast.pool_policy.added.count", description: nil), 18 | counter("ballast.nodes.list.succeeded.count", description: nil), 19 | counter("ballast.nodes.list.failed.count", description: nil), 20 | counter("ballast.pod.eviction.failed.count", description: nil), 21 | counter("ballast.pod.eviction.succeeded.count", description: nil), 22 | counter("ballast.get_eviction_candidates.failed.count", description: nil), 23 | counter("ballast.get_eviction_candidates.succeeded.count", description: nil), 24 | counter("ballast.provider.get_pool.failed.count", description: nil), 25 | counter("ballast.provider.get_pool.succeeded.count", description: nil), 26 | counter("ballast.provider.get_pool_size.failed.count", description: nil), 27 | counter("ballast.provider.get_pool_size.succeeded.count", description: nil), 28 | counter("ballast.provider.scale_pool.failed.count", description: nil), 29 | counter("ballast.provider.scale_pool.succeeded.count", description: nil) 30 | ] 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /lib/ballast/pool_policy/cooldown_cache.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast.PoolPolicy.CooldownCache do 2 | @moduledoc """ 3 | Cooldown tracking for `Ballast.PoolPolicy` 4 | """ 5 | 6 | use GenServer 7 | alias Ballast.PoolPolicy 8 | 9 | # Client 10 | 11 | @spec start_link(any()) :: {:error, any()} | {:ok, pid()} 12 | def start_link(_) do 13 | GenServer.start_link(__MODULE__, %{}, name: __MODULE__) 14 | end 15 | 16 | @spec ran(PoolPolicy.t()) :: :ok 17 | def ran(%PoolPolicy{name: name, cooldown_seconds: cooldown_seconds}) do 18 | GenServer.cast(__MODULE__, {:ran, name, cooldown_seconds}) 19 | end 20 | 21 | @spec ready?(PoolPolicy.t()) :: :ok | {:error, :cooling_down} 22 | def ready?(%PoolPolicy{} = policy) do 23 | GenServer.call(__MODULE__, {:ready?, policy}) 24 | end 25 | 26 | # Server 27 | 28 | @impl true 29 | @spec init(map) :: {:ok, map} 30 | def init(state) do 31 | {:ok, state} 32 | end 33 | 34 | @impl true 35 | def handle_call({:ready?, policy}, _from, state) do 36 | entry = Map.get(state, policy.name) 37 | is_ready = is_ready?(entry) 38 | 39 | {:reply, is_ready, state} 40 | end 41 | 42 | @impl true 43 | def handle_cast({:ran, name, cooldown_seconds}, state) do 44 | cooldown_ms = cooldown_seconds * 1000 45 | new_state = Map.put(state, name, now() + cooldown_ms) 46 | {:noreply, new_state} 47 | end 48 | 49 | @spec is_ready?(nil | pos_integer) :: :ok | {:error, :cooling_down} 50 | defp is_ready?(nil), do: :ok 51 | 52 | defp is_ready?(until) do 53 | case now() >= until do 54 | true -> 55 | :ok 56 | 57 | false -> 58 | {:error, :cooling_down} 59 | end 60 | end 61 | 62 | @spec now() :: pos_integer 63 | defp now do 64 | :os.system_time(:millisecond) 65 | end 66 | end 67 | -------------------------------------------------------------------------------- /test/ballast/pool_policy/managed_pool_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Ballast.PoolPolicy.ManagedPoolTest do 2 | @moduledoc false 3 | use ExUnit.Case, async: true 4 | doctest Ballast.PoolPolicy.ManagedPool 5 | alias Ballast.PoolPolicy.ManagedPool 6 | 7 | describe "new/3" do 8 | test "gets pool data" do 9 | spec = %{ 10 | "minimumPercent" => "30", 11 | "minimumInstances" => "2", 12 | "poolName" => "managed-pool", 13 | "location" => "us-central1-a" 14 | } 15 | 16 | {:ok, managed_pool} = ManagedPool.new(spec, "my-project", "my-cluster") 17 | assert %Ballast.NodePool{} = managed_pool.pool 18 | end 19 | 20 | test "formats minimum_percent" do 21 | spec = %{ 22 | "minimumPercent" => "30", 23 | "minimumInstances" => "2", 24 | "poolName" => "managed-pool", 25 | "location" => "us-central1-a" 26 | } 27 | 28 | {:ok, managed_pool} = ManagedPool.new(spec, "my-project", "my-cluster") 29 | assert managed_pool.minimum_percent == 30 30 | end 31 | 32 | test "formats minimum_instances" do 33 | spec = %{ 34 | "minimumPercent" => "30", 35 | "minimumInstances" => "2", 36 | "poolName" => "managed-pool", 37 | "location" => "us-central1-a" 38 | } 39 | 40 | {:ok, managed_pool} = ManagedPool.new(spec, "my-project", "my-cluster") 41 | assert managed_pool.minimum_instances == 2 42 | end 43 | 44 | test "returns nil when the pool cannot be found" do 45 | spec = %{ 46 | "poolName" => "invalid-pool", 47 | "minimumPercent" => "30", 48 | "minimumInstances" => "2", 49 | "location" => "us-central1-a" 50 | } 51 | 52 | assert {:error, pool_not_found} = ManagedPool.new(spec, "my-project", "my-cluster") 53 | end 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /lib/ballast/pool_policy/managed_pool.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast.PoolPolicy.ManagedPool do 2 | @moduledoc """ 3 | A managed pool 4 | """ 5 | alias Ballast.NodePool 6 | 7 | defstruct [:pool, :minimum_instances, :minimum_percent] 8 | 9 | @type t :: %__MODULE__{ 10 | pool: NodePool.t(), 11 | minimum_percent: pos_integer, 12 | minimum_instances: pos_integer 13 | } 14 | 15 | @doc """ 16 | Parse resource `managed_pool` spec and annotate with `NodePool` data from API. 17 | """ 18 | @spec new(map(), binary(), binary()) :: {:ok, t()} | {:error, atom} 19 | def new(managed_pool_spec, project, cluster) do 20 | %{ 21 | "minimumPercent" => mp, 22 | "minimumInstances" => mi, 23 | "poolName" => name, 24 | "location" => location 25 | } = managed_pool_spec 26 | 27 | pool = NodePool.new(project, location, cluster, name) 28 | 29 | with {:ok, conn} <- Ballast.conn(), {:ok, pool} <- NodePool.get(pool, conn) do 30 | {:ok, 31 | %__MODULE__{ 32 | pool: pool, 33 | minimum_percent: cast_minimum_percent(mp), 34 | minimum_instances: cast_minimum_instances(mi) 35 | }} 36 | else 37 | {:error, _} -> 38 | {:error, :pool_not_found} 39 | end 40 | end 41 | 42 | @spec cast_minimum_percent(String.t() | pos_integer) :: pos_integer 43 | defp cast_minimum_percent(mp) when is_integer(mp), do: mp 44 | defp cast_minimum_percent(mp) when is_binary(mp), do: String.to_integer(mp) 45 | defp cast_minimum_percent(_), do: Ballast.Config.default_minimum_percent() 46 | 47 | @spec cast_minimum_instances(String.t() | pos_integer) :: pos_integer 48 | defp cast_minimum_instances(mi) when is_integer(mi), do: mi 49 | defp cast_minimum_instances(mi) when is_binary(mi), do: String.to_integer(mi) 50 | defp cast_minimum_instances(_), do: Ballast.Config.default_minimum_instances() 51 | end 52 | -------------------------------------------------------------------------------- /lib/ballast/kube/eviction.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast.Kube.Eviction do 2 | @moduledoc """ 3 | Encapsulates a Kubernetes [`Eviction` resource](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.14/#create-eviction-pod-v1-core) 4 | 5 | ## Links 6 | 7 | * [Eviction API](https://kubernetes.io/docs/tasks/administer-cluster/safely-drain-node/#the-eviction-api) 8 | """ 9 | 10 | @api_version "policy/v1beta1" 11 | 12 | alias __MODULE__ 13 | alias Ballast.Sys.Instrumentation, as: Inst 14 | 15 | @doc """ 16 | Returns an `Eviction` Kubernetes manifest 17 | 18 | ## Examples 19 | iex> Ballast.Kube.Eviction.manifest("default", "aged-nginx") 20 | %{"apiVersion" => "policy/v1beta1", "kind" => "Eviction", "metadata" => %{"name" => "aged-nginx", "namespace" => "default"}} 21 | """ 22 | @spec manifest(binary, binary) :: map 23 | def manifest(namespace, name) do 24 | %{ 25 | "apiVersion" => @api_version, 26 | "kind" => "Eviction", 27 | "metadata" => %{ 28 | "namespace" => namespace, 29 | "name" => name 30 | } 31 | } 32 | end 33 | 34 | @doc "Creates a pod eviction." 35 | @spec create(map) :: {:ok, HTTPoison.Response.t()} | {:error, HTTPoison.Error.t()} 36 | def create(%{"metadata" => %{"name" => name, "namespace" => ns}} = pod) do 37 | eviction = Eviction.manifest(ns, name) 38 | operation = K8s.Client.create("v1", "pods/eviction", [namespace: ns, name: name], eviction) 39 | 40 | {duration, response} = :timer.tc(K8s.Client, :run, [operation, :default]) 41 | 42 | measurements = %{duration: duration} 43 | metadata = %{node: pod["spec"]["nodeName"], pod: name} 44 | 45 | case response do 46 | {:ok, _} = resp -> 47 | Inst.pod_eviction_succeeded(measurements, metadata) 48 | resp 49 | 50 | error -> 51 | Inst.pod_eviction_failed(measurements, metadata) 52 | error 53 | end 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule Ballast.MixProject do 2 | use Mix.Project 3 | 4 | def project do 5 | [ 6 | app: :ballast, 7 | version: "0.1.0", 8 | elixir: "~> 1.9", 9 | start_permanent: Mix.env() == :prod, 10 | deps: deps(), 11 | elixirc_paths: elixirc_paths(Mix.env()), 12 | test_coverage: [tool: ExCoveralls], 13 | preferred_cli_env: [coveralls: :test, "coveralls.travis": :test, "coveralls.html": :test], 14 | aliases: aliases(), 15 | docs: [ 16 | extras: ["README.md"], 17 | main: "readme" 18 | ], 19 | dialyzer: [ 20 | ignore_warnings: ".dialyzer_ignore.exs", 21 | list_unused_filters: true 22 | ] 23 | ] 24 | end 25 | 26 | # Run "mix help compile.app" to learn about applications. 27 | def application do 28 | [ 29 | extra_applications: [:logger], 30 | mod: {Ballast.Application, []} 31 | ] 32 | end 33 | 34 | defp aliases do 35 | [lint: ["format", "credo", "dialyzer"]] 36 | end 37 | 38 | defp elixirc_paths(:test), do: ["lib", "test/support"] 39 | defp elixirc_paths(_), do: ["lib"] 40 | 41 | # Run "mix help deps" to learn about dependencies. 42 | defp deps do 43 | [ 44 | {:bonny, "~> 0.3"}, 45 | {:k8s, "~> 0.3"}, 46 | {:jason, "~> 1.1"}, 47 | {:notion, "~> 0.2"}, 48 | 49 | # Metrics 50 | {:telemetry, "~> 0.4"}, 51 | {:telemetry_metrics, "~> 0.3"}, 52 | {:telemetry_metrics_prometheus, "~> 0.1"}, 53 | {:telemetry_poller, "~> 0.3.0"}, 54 | 55 | # GoogleApi requires Poison 3, but doesnt include it in its deps :( 56 | # https://github.com/googleapis/elixir-google-api/issues/1232 57 | {:poison, "~> 3.1"}, 58 | {:goth, "~> 1.1"}, 59 | {:google_api_container, "~> 0.5"}, 60 | {:google_api_compute, "~> 0.6"}, 61 | 62 | # Dev deps 63 | {:mix_test_watch, "~> 0.8", only: :dev, runtime: false}, 64 | {:dialyxir, "~> 1.0.0-rc.4", only: :dev, runtime: false}, 65 | {:ex_doc, "~> 0.20", only: :dev}, 66 | {:credo, "~> 1.0.0", only: [:dev, :test], runtime: false}, 67 | 68 | # Test Deps 69 | {:excoveralls, "~> 0.10", only: :test} 70 | ] 71 | end 72 | end 73 | -------------------------------------------------------------------------------- /lib/ballast/sys/instrumentation.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast.Sys.Instrumentation do 2 | @moduledoc false 3 | use Notion, name: :ballast, metadata: %{} 4 | 5 | @doc "Get nodes succceeded" 6 | defevent([:nodes, :list, :succeeded]) 7 | 8 | @doc "Get nodes failed" 9 | defevent([:nodes, :list, :failed]) 10 | 11 | @doc "Pod eviction succceeded" 12 | defevent([:pod, :eviction, :succeeded]) 13 | 14 | @doc "Pod eviction failed" 15 | defevent([:pod, :eviction, :failed]) 16 | 17 | @doc "Provider generated an authentication token" 18 | defevent([:provider, :authentication, :succeeded]) 19 | 20 | @doc "Provider failed to generate an authentication token" 21 | defevent([:provider, :authentication, :failed]) 22 | 23 | @doc "Scaling pool minimum size was skipped" 24 | defevent([:provider, :scale_pool, :skipped]) 25 | 26 | @doc "Scaling pool minimum size from the provider API succeeded" 27 | defevent([:provider, :scale_pool, :succeeded]) 28 | 29 | @doc "Scaling pool minimum size from the provider API failed" 30 | defevent([:provider, :scale_pool, :failed]) 31 | 32 | @doc "Getting pool size from the provider API succeeded" 33 | defevent([:provider, :get_pool_size, :succeeded]) 34 | 35 | @doc "Getting pool size from the provider API failed" 36 | defevent([:provider, :get_pool_size, :failed]) 37 | 38 | @doc "Getting the pool from the provider API succeeded" 39 | defevent([:provider, :get_pool, :succeeded]) 40 | 41 | @doc "Getting the pool from the provider API failed" 42 | defevent([:provider, :get_pool, :failed]) 43 | 44 | @doc "Getting a list of eviction candidates from the k8s API succeeded" 45 | defevent([:get_eviction_candidates, :succeeded]) 46 | 47 | @doc "Getting a list of eviction candidates from the k8s API failed" 48 | defevent([:get_eviction_candidates, :failed]) 49 | 50 | @doc "A new PoolPolicy resource was added" 51 | defevent([:pool_policy, :added]) 52 | 53 | @doc "A PoolPolicy was modified" 54 | defevent([:pool_policy, :modified]) 55 | 56 | @doc "A PoolPolicy was deleted" 57 | defevent([:pool_policy, :deleted]) 58 | 59 | @doc "A PoolPolicy was reconciled" 60 | defevent([:pool_policy, :reconciled]) 61 | 62 | @doc "A PoolPolicy was applied successfully" 63 | defevent([:pool_policy, :applied]) 64 | 65 | @doc "A PoolPolicy was in cooldown and backed off" 66 | defevent([:pool_policy, :backed_off]) 67 | end 68 | -------------------------------------------------------------------------------- /manifests/base/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: ballast 5 | spec: 6 | replicas: 1 7 | strategy: 8 | type: Recreate 9 | template: 10 | spec: 11 | volumes: 12 | - name: ballast-operator-sa-keys 13 | secret: 14 | secretName: ballast-operator-sa-keys 15 | containers: 16 | - name: ballast 17 | image: quay.io/coryodaniel/ballast 18 | env: 19 | - name: BALLAST_METRICS_PORT 20 | value: "9323" 21 | - name: GOOGLE_APPLICATION_CREDENTIALS 22 | value: /credentials/gcp.json 23 | - name: MIX_ENV 24 | value: prod 25 | - name: BONNY_POD_NAME 26 | valueFrom: 27 | fieldRef: 28 | fieldPath: metadata.name 29 | - name: BONNY_POD_NAMESPACE 30 | valueFrom: 31 | fieldRef: 32 | fieldPath: metadata.namespace 33 | - name: BONNY_POD_IP 34 | valueFrom: 35 | fieldRef: 36 | fieldPath: status.podIP 37 | - name: BONNY_POD_SERVICE_ACCOUNT 38 | valueFrom: 39 | fieldRef: 40 | fieldPath: spec.serviceAccountName 41 | readinessProbe: 42 | httpGet: 43 | path: /metrics 44 | port: metrics 45 | initialDelaySeconds: 5 46 | timeoutSeconds: 1 47 | periodSeconds: 15 48 | livenessProbe: 49 | httpGet: 50 | path: /metrics 51 | port: metrics 52 | initialDelaySeconds: 5 53 | periodSeconds: 15 54 | ports: 55 | - containerPort: 9323 56 | name: metrics 57 | volumeMounts: 58 | - name: ballast-operator-sa-keys 59 | mountPath: /credentials 60 | readOnly: true 61 | resources: 62 | requests: 63 | cpu: 50m 64 | memory: 100Mi 65 | limits: 66 | cpu: 500m 67 | memory: 500Mi 68 | securityContext: 69 | allowPrivilegeEscalation: false 70 | readOnlyRootFilesystem: true 71 | runAsNonRoot: true 72 | runAsUser: 65534 73 | serviceAccountName: ballast 74 | -------------------------------------------------------------------------------- /test/ballast/pool_policy_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Ballast.PoolPolicyTest do 2 | @moduledoc false 3 | use ExUnit.Case, async: true 4 | doctest Ballast.PoolPolicy 5 | alias Ballast.{NodePool, PoolPolicy} 6 | 7 | describe "from_resource/1" do 8 | test "parses a valid resource" do 9 | resource = make_resource() 10 | {:ok, policy} = PoolPolicy.from_resource(resource) 11 | 12 | expected = %PoolPolicy{ 13 | cooldown_seconds: 60, 14 | name: "example-policy", 15 | pool: %NodePool{ 16 | instance_count: 10, 17 | cluster: "my-cluster", 18 | project: "my-project", 19 | location: "my-source-region-or-zone", 20 | name: "my-source-pool", 21 | under_pressure: false, 22 | data: mock_data_response() 23 | }, 24 | changesets: [], 25 | managed_pools: [ 26 | %PoolPolicy.ManagedPool{ 27 | pool: %NodePool{ 28 | instance_count: 10, 29 | cluster: "my-cluster", 30 | project: "my-project", 31 | location: "my-managed-pool-region-or-zone", 32 | name: "my-managed-pool", 33 | data: mock_data_response() 34 | }, 35 | minimum_percent: 30, 36 | minimum_instances: 1 37 | } 38 | ] 39 | } 40 | 41 | assert policy == expected 42 | end 43 | 44 | test "returns an error when it fails to GET the node pool" do 45 | resource = make_resource("invalid-pool") 46 | 47 | assert {:error, _} = PoolPolicy.from_resource(resource) 48 | end 49 | end 50 | 51 | @spec make_resource() :: map() 52 | defp make_resource() do 53 | YamlElixir.read_from_file!("test/support/resource.yaml") 54 | end 55 | 56 | @spec make_resource() :: map() 57 | defp make_resource(source_pool_name) do 58 | make_resource() 59 | |> put_in(["spec", "poolName"], source_pool_name) 60 | end 61 | 62 | @spec mock_data_response() :: map() 63 | def mock_data_response() do 64 | %{ 65 | autoscaling: %{enabled: true, maxNodeCount: 5, minNodeCount: 3}, 66 | initialNodeCount: 1, 67 | instanceGroupUrls: [ 68 | "https://www.googleapis.com/compute/v1/projects/my-project/zones/us-central1-a/instanceGroupManagers/gke-demo-demo-preemptible" 69 | ], 70 | name: "demo-preemptible", 71 | selfLink: 72 | "https://container.googleapis.com/v1/projects/my-project/zones/us-central1-a/clusters/demo/nodePools/demo-preemptible", 73 | status: "RUNNING" 74 | } 75 | end 76 | end 77 | -------------------------------------------------------------------------------- /lib/ballast/config.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast.Config do 2 | @moduledoc """ 3 | Configuration interface 4 | """ 5 | 6 | @sys_env_key_metrics "BALLAST_METRICS_PORT" 7 | @sys_env_key_debugging "BALLAST_DEBUG" 8 | 9 | @default_metrics_port 9323 10 | @default_minimum_percent 50 11 | @default_minimum_instances 1 12 | 13 | @doc "Is debugging enabled" 14 | @spec debugging_enabled?() :: boolean() 15 | def debugging_enabled?() do 16 | @sys_env_key_debugging |> System.get_env() |> parse_boolean 17 | end 18 | 19 | @doc "Prometheus metrics port" 20 | @spec metrics_port :: pos_integer 21 | def metrics_port do 22 | port = 23 | @sys_env_key_metrics 24 | |> System.get_env() 25 | |> string_to_integer() 26 | 27 | port || @default_metrics_port 28 | end 29 | 30 | @doc """ 31 | Get the default minimum percent for managed pools. 32 | 33 | ## Example 34 | iex> Ballast.Config.default_minimum_percent() 35 | 50 36 | """ 37 | @spec default_minimum_percent() :: pos_integer 38 | def default_minimum_percent() do 39 | get_config_value(:default_minimum_percent, @default_minimum_percent) 40 | end 41 | 42 | @doc """ 43 | Get the default minimum instances for managed pools. 44 | 45 | ## Example 46 | iex> Ballast.Config.default_minimum_instances() 47 | 1 48 | """ 49 | @spec default_minimum_instances() :: pos_integer 50 | def default_minimum_instances() do 51 | get_config_value(:default_minimum_instances, @default_minimum_instances) 52 | end 53 | 54 | @spec get_config_value(atom, any()) :: any() 55 | defp get_config_value(name, default) do 56 | env_var_name = name |> Atom.to_string() |> String.upcase() 57 | from_env = System.get_env(env_var_name) 58 | from_app = Application.get_env(:ballast, name, default) 59 | 60 | from_env || from_app 61 | end 62 | 63 | @doc """ 64 | Parses an integer from a string 65 | 66 | ## Examples 67 | iex> Ballast.Config.string_to_integer("300") 68 | 300 69 | 70 | iex> Ballast.Config.string_to_integer("nonsense") 71 | nil 72 | 73 | iex> Ballast.Config.string_to_integer(300) 74 | 300 75 | 76 | """ 77 | @spec string_to_integer(any) :: integer() | nil 78 | def string_to_integer(str) when is_binary(str), do: str |> Integer.parse() |> string_to_integer 79 | def string_to_integer({int, _}), do: int 80 | def string_to_integer(int) when is_number(int), do: int 81 | def string_to_integer(_), do: nil 82 | 83 | @doc false 84 | @spec parse_boolean(any) :: boolean() 85 | def parse_boolean("true"), do: true 86 | def parse_boolean(true), do: true 87 | def parse_boolean(_), do: false 88 | end 89 | -------------------------------------------------------------------------------- /lib/ballast/pool_policy.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast.PoolPolicy do 2 | @moduledoc """ 3 | Internal representation of `Ballast.Controller.V1.PoolPolicy` custom resource. 4 | """ 5 | @default_cooldown_seconds 300 6 | 7 | alias Ballast.{NodePool, PoolPolicy} 8 | alias PoolPolicy.Changeset 9 | 10 | defstruct name: nil, pool: nil, managed_pools: [], changesets: [], cooldown_seconds: nil 11 | 12 | @typedoc "PoolPolicy" 13 | @type t :: %__MODULE__{ 14 | name: nil | String.t(), 15 | pool: NodePool.t(), 16 | cooldown_seconds: pos_integer, 17 | managed_pools: list(PoolPolicy.ManagedPool.t()), 18 | changesets: list(Changeset.t()) 19 | } 20 | 21 | @doc """ 22 | Converts a `Ballast.Controller.V1.PoolPolicy` resource to a `Ballast.PoolPolicy` and populates managed pool's `NodePool`s data. 23 | """ 24 | @spec from_resource(map) :: {:ok, t} | {:error, Tesla.Env.t()} 25 | def from_resource(%{"metadata" => %{"name" => name}} = resource) do 26 | pool = NodePool.new(resource) 27 | 28 | with {:ok, conn} <- Ballast.conn(), 29 | {:ok, pool} <- NodePool.get(pool, conn), 30 | pool <- NodePool.set_pressure_status(pool) do 31 | managed_pools = make_managed_pools(resource) 32 | cooldown_seconds = get_in(resource, ["spec", "cooldownSeconds"]) || @default_cooldown_seconds 33 | 34 | policy = %PoolPolicy{ 35 | pool: pool, 36 | managed_pools: managed_pools, 37 | name: name, 38 | cooldown_seconds: cooldown_seconds 39 | } 40 | 41 | {:ok, policy} 42 | end 43 | end 44 | 45 | @doc """ 46 | Applies all changesets. Returns a tuple of two lists: `{succeeded, failed}` 47 | """ 48 | @spec apply(t) :: {list, list} 49 | def apply(%__MODULE__{} = policy) do 50 | {:ok, conn} = Ballast.conn() 51 | 52 | Enum.split_with(policy.changesets, fn changeset -> 53 | :ok == NodePool.scale(changeset, conn) 54 | end) 55 | end 56 | 57 | @doc """ 58 | Generates changesets for managed pools. 59 | """ 60 | @spec changesets(t) :: {:ok, t} | {:error, any()} 61 | def changesets(%PoolPolicy{managed_pools: managed_pools} = policy) do 62 | changesets = Enum.map(managed_pools, fn mp -> Changeset.new(mp, policy) end) 63 | 64 | {:ok, %PoolPolicy{policy | changesets: changesets}} 65 | end 66 | 67 | # make_managed_pools/1 removes managed_pools that encountered errors in `Ballast.NodePool.Adapter/g2` 68 | @spec make_managed_pools(map) :: list(PoolPolicy.ManagedPool.t()) 69 | defp make_managed_pools(%{"spec" => %{"managedPools" => managed_pools}} = resource) do 70 | %{"spec" => %{"projectId" => project, "clusterName" => cluster}} = resource 71 | 72 | managed_pools 73 | |> Enum.map(fn managed_pool -> PoolPolicy.ManagedPool.new(managed_pool, project, cluster) end) 74 | |> Enum.reject(fn {status, _} -> status == :error end) 75 | |> Enum.map(fn {:ok, managed_pool} -> managed_pool end) 76 | end 77 | 78 | defp make_managed_pools(_), do: [] 79 | end 80 | -------------------------------------------------------------------------------- /lib/ballast/controllers/v1/pool_policy.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast.Controller.V1.PoolPolicy do 2 | @moduledoc """ 3 | Ballast: PoolPolicy CRD. 4 | """ 5 | 6 | use Bonny.Controller 7 | alias Ballast.{PoolPolicy} 8 | alias Ballast.Sys.Instrumentation, as: Inst 9 | 10 | @scope :cluster 11 | @group "ballast.bonny.run" 12 | 13 | @names %{ 14 | plural: "poolpolicies", 15 | singular: "poolpolicy", 16 | kind: "PoolPolicy", 17 | shortNames: ["pp"] 18 | } 19 | 20 | @rule {"", ["nodes"], ["list"]} 21 | 22 | @doc """ 23 | Handles an `ADDED` event 24 | """ 25 | @spec add(map()) :: :ok | :error 26 | @impl Bonny.Controller 27 | def add(payload) do 28 | dispatch(payload, :added) 29 | do_apply(payload) 30 | end 31 | 32 | @spec dispatch(map | binary, atom, map | nil) :: :ok 33 | def dispatch(policy_or_name, action, measurements \\ %{}) 34 | def dispatch(%{"metadata" => %{"name" => name}}, event, measurements), do: dispatch(name, event, measurements) 35 | 36 | def dispatch(name, event, measurements) do 37 | metadata = %{name: name} 38 | 39 | case event do 40 | :added -> Inst.pool_policy_added(measurements, metadata) 41 | :deleted -> Inst.pool_policy_deleted(measurements, metadata) 42 | :modified -> Inst.pool_policy_modified(measurements, metadata) 43 | :reconciled -> Inst.pool_policy_reconciled(measurements, metadata) 44 | :applied -> Inst.pool_policy_applied(measurements, metadata) 45 | :backed_off -> Inst.pool_policy_backed_off(measurements, metadata) 46 | end 47 | end 48 | 49 | @doc """ 50 | Handles a `MODIFIED` event 51 | """ 52 | @spec modify(map()) :: :ok | :error 53 | @impl Bonny.Controller 54 | def modify(payload) do 55 | dispatch(payload, :modified) 56 | do_apply(payload) 57 | end 58 | 59 | @doc """ 60 | Handles a `DELETED` event. This handler is a *no-op*. 61 | """ 62 | @spec delete(map()) :: :ok | :error 63 | @impl Bonny.Controller 64 | def delete(payload) do 65 | dispatch(payload, :deleted) 66 | end 67 | 68 | @doc """ 69 | Called periodically for each existing CustomResource to allow for reconciliation. 70 | """ 71 | @spec reconcile(map()) :: :ok | :error 72 | @impl Bonny.Controller 73 | def reconcile(payload) do 74 | dispatch(payload, :reconciled) 75 | do_apply(payload) 76 | end 77 | 78 | @spec do_apply(map) :: :ok | :error 79 | defp do_apply(payload) do 80 | with {:ok, policy} <- PoolPolicy.from_resource(payload) do 81 | handle_policy(policy) 82 | end 83 | end 84 | 85 | @spec handle_policy(Ballast.PoolPolicy.t()) :: :ok | :error 86 | defp handle_policy(%Ballast.PoolPolicy{} = policy) do 87 | with :ok <- PoolPolicy.CooldownCache.ready?(policy), 88 | {:ok, policy} <- PoolPolicy.changesets(policy), 89 | {succeeded, failed} <- PoolPolicy.apply(policy) do 90 | PoolPolicy.CooldownCache.ran(policy) 91 | dispatch(policy.name, :applied, %{succeeded: length(succeeded), failed: length(failed)}) 92 | :ok 93 | else 94 | {:error, :cooling_down} -> 95 | dispatch(policy.name, :backed_off) 96 | :ok 97 | 98 | :error -> 99 | :error 100 | end 101 | end 102 | end 103 | -------------------------------------------------------------------------------- /test/ballast/node_pool/adapters/gke_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Ballast.NodePool.Adapters.GKETest do 2 | use ExUnit.Case, async: true 3 | alias Ballast.NodePool.Adapters.GKE 4 | alias Ballast.NodePool 5 | doctest Ballast.NodePool.Adapters.GKE 6 | 7 | @moduletag :external 8 | 9 | defp config() do 10 | gcp_project = System.get_env("GCP_PROJECT") 11 | {gcp_project, "us-central1", "ballast", "ballast-pvm-n1-1"} 12 | end 13 | 14 | describe "autoscaling_enabled?/1" do 15 | test "returns false when disabled" do 16 | pool = %NodePool{ 17 | data: %{ 18 | autoscaling: %{enabled: false} 19 | } 20 | } 21 | 22 | refute GKE.autoscaling_enabled?(pool) 23 | end 24 | 25 | test "returns true when enabled" do 26 | pool = %NodePool{ 27 | data: %{ 28 | autoscaling: %{enabled: true} 29 | } 30 | } 31 | 32 | assert GKE.autoscaling_enabled?(pool) 33 | end 34 | end 35 | 36 | describe "scale/1" do 37 | test "when autoscaling is disabled" do 38 | {:ok, conn} = Ballast.conn() 39 | {project, location, cluster, _} = config() 40 | pool = "ballast-pvm-n1-2" 41 | 42 | data = %{} 43 | node_pool = NodePool.new(project, location, cluster, pool, data) 44 | 45 | managed_pool = %Ballast.PoolPolicy.ManagedPool{pool: node_pool, minimum_percent: 10, minimum_instances: 1} 46 | source_pool = %Ballast.NodePool{instance_count: 10} 47 | policy = %Ballast.PoolPolicy{pool: source_pool} 48 | changeset = Ballast.PoolPolicy.Changeset.new(managed_pool, policy) 49 | 50 | refute GKE.autoscaling_enabled?(changeset.managed_pool.pool) 51 | assert {:ok, _} = GKE.scale(changeset, conn) 52 | end 53 | 54 | test "when autoscaling is enabled" do 55 | {:ok, conn} = Ballast.conn() 56 | {project, location, cluster, pool} = config() 57 | 58 | data = %{autoscaling: %{enabled: true, maxNodeCount: 3}} 59 | node_pool = NodePool.new(project, location, cluster, pool, data) 60 | 61 | managed_pool = %Ballast.PoolPolicy.ManagedPool{pool: node_pool, minimum_percent: 10, minimum_instances: 1} 62 | source_pool = %Ballast.NodePool{instance_count: 10} 63 | policy = %Ballast.PoolPolicy{pool: source_pool} 64 | changeset = Ballast.PoolPolicy.Changeset.new(managed_pool, policy) 65 | 66 | assert GKE.autoscaling_enabled?(changeset.managed_pool.pool) 67 | assert {:ok, _} = GKE.scale(changeset, conn) 68 | end 69 | end 70 | 71 | describe "get/2" do 72 | test "returns a node pool" do 73 | {:ok, conn} = Ballast.conn() 74 | {project, location, cluster, pool} = config() 75 | node_pool = NodePool.new(project, location, cluster, pool) 76 | 77 | {:ok, response} = GKE.get(node_pool, conn) 78 | 79 | assert %NodePool{} = response 80 | end 81 | 82 | test "gets the current instance count" do 83 | {:ok, conn} = Ballast.conn() 84 | {project, location, cluster, pool} = config() 85 | node_pool = NodePool.new(project, location, cluster, pool) 86 | 87 | {:ok, response} = GKE.get(node_pool, conn) 88 | %NodePool{instance_count: instance_count} = response 89 | assert instance_count 90 | end 91 | 92 | test "captures the response in `data`" do 93 | {:ok, conn} = Ballast.conn() 94 | {project, location, cluster, pool} = config() 95 | node_pool = NodePool.new(project, location, cluster, pool) 96 | 97 | {:ok, response} = GKE.get(node_pool, conn) 98 | assert match?(%NodePool{data: %{autoscaling: _, instanceGroupUrls: _, name: _}}, response) 99 | end 100 | end 101 | end 102 | -------------------------------------------------------------------------------- /test/support/docker-for-desktop.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | clusters: 3 | - cluster: 4 | insecure-skip-tls-verify: true 5 | server: https://localhost:6443 6 | name: docker-for-desktop 7 | contexts: 8 | - context: 9 | cluster: docker-for-desktop 10 | user: docker-for-desktop 11 | name: docker-for-desktop 12 | current-context: docker-for-desktop 13 | kind: Config 14 | users: 15 | - name: docker-for-desktop 16 | user: 17 | client-certificate-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUM5RENDQWR5Z0F3SUJBZ0lJQzV2dStpZE5UMll3RFFZSktvWklodmNOQVFFTEJRQXdGVEVUTUJFR0ExVUUKQXhNS2EzVmlaWEp1WlhSbGN6QWVGdzB4T1RBMU1qTXlNRFEyTkRSYUZ3MHlNREExTXpBeU1UQXlNalphTURZeApGekFWQmdOVkJBb1REbk41YzNSbGJUcHRZWE4wWlhKek1Sc3dHUVlEVlFRREV4SmtiMk5yWlhJdFptOXlMV1JsCmMydDBiM0F3Z2dFaU1BMEdDU3FHU0liM0RRRUJBUVVBQTRJQkR3QXdnZ0VLQW9JQkFRQzJaSm4yNmJaNDc2K2UKWS91S3BlV3VxK0RNUmVZNGJyY3pMaTBzVThLL2hlc3RYdHB2a3FhbGw2SWZ3Uk1hZXAzRzhiekxqL3poaXBtaQpYbVpWRDdoYmpTYURodUI5c056Ylcrdjh1YitKUHpkMmR3cytMakpKUVFCNURCMjdHaWUvSEFWMWJUY0RQUm01CnVGbVVlVklhck4xMlBJRXcwSGlaZzZPUmJjdDlUUzVXQWNGZUE4aVdEazRhcjltM2FMRnpxcXhKaEhMWUt3cHYKS2pCMzJjZmtDL3RlTkd3Y1hndGJDYTMxQTVRU3NpektNb1hmMmNlUXYwMDdKdllCZmYwVGE1U1lLOW00ZUZjOQp1VHZkZ3lpUk54UzJBbW1HYUY5L2pHSUEyS25FRnYvTjVXRlZJUUROTENjOVNIZUM3K2pQRjZzdW05RENyQkFlCllsSzVTWVlIQWdNQkFBR2pKekFsTUE0R0ExVWREd0VCL3dRRUF3SUZvREFUQmdOVkhTVUVEREFLQmdnckJnRUYKQlFjREFqQU5CZ2txaGtpRzl3MEJBUXNGQUFPQ0FRRUFOVE04Q3RPSnRNTGpPSUZOZGJpY0FIdlpnQzlxemJjOApmdVY3S2hMakR4am5XRUROMlltTzhWS2o1Ly9mek5RemxJU3pwNnBPSjZ6QjY2SFVxbXExTThmaDErVTBuNEUzCjRycWdHTU4welBib3owV2hhUFVJa0YwTVBlS1hvQ1dPTnVQTk5jVmlHNGcySEg3TXM5V1ZHalgxMXdEK2xpVGsKSlJhVmlIWlBwZ2p6ZmN6N1YzcjEvNklHNVUwZTlDSjBwMDdubEp2OGk4ZEpnTkJQU2grWkVPNlpocmVkeWNLbwpoakxQNHpHanFVek1aV2ZwZjRNcG9zMjRlOHVFMDM1TFRyUFl2d3J0Ynp5Z3ZTbnFSNHc4cUpyakNBTXFldnBHCndYZU5qKzVKckcwQW9PaHQ4S3hoOXNJMUNia1Q3OCs1K2JJblFkSCtsS1piZFhzRXhVV1pZdz09Ci0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K 18 | client-key-data: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFb3dJQkFBS0NBUUVBdG1TWjl1bTJlTyt2bm1QN2lxWGxycXZnekVYbU9HNjNNeTR0TEZQQ3Y0WHJMVjdhCmI1S21wWmVpSDhFVEducWR4dkc4eTQvODRZcVpvbDVtVlErNFc0MG1nNGJnZmJEYzIxdnIvTG0vaVQ4M2RuY0wKUGk0eVNVRUFlUXdkdXhvbnZ4d0ZkVzAzQXowWnViaFpsSGxTR3F6ZGRqeUJNTkI0bVlPamtXM0xmVTB1VmdIQgpYZ1BJbGc1T0dxL1p0Mml4YzZxc1NZUnkyQ3NLYnlvd2Q5bkg1QXY3WGpSc0hGNExXd210OVFPVUVySXN5aktGCjM5bkhrTDlOT3liMkFYMzlFMnVVbUN2WnVIaFhQYms3M1lNb2tUY1V0Z0pwaG1oZmY0eGlBTmlweEJiL3plVmgKVlNFQXpTd25QVWgzZ3Uvb3p4ZXJMcHZRd3F3UUhtSlN1VW1HQndJREFRQUJBb0lCQUJCMk5mczJEdFVOZFFGTApydGxOcmRROUNSWUd2UWVjM29ZTkJlQzVVcHRzbjlSQmVsK1ZrZHJpTzZ5NEJWeGZjcndROTJROEE2dnJrWUE1Ckd1MVVkT1EyY2lKdTJ2WWRMMkhSQXhTVlF6ZEU4R0dOendMMnNtMHdJN1hKSlNuTTY0ZHhKUUQ3TVhybVdkc00KUzNGK0hhM3dxakJRdDZ4TE5JZE9mVVhUN2dqSjluMXZMVVR3MXBXN2l3MzFXeGJhdEtTWkF0bUlKTzZGdjhacQpjYjU1L2lEa3V1U2lOWkhsVjRrVjVtUHc4Tis3VnVacUZUd1pPb2hwc2x4UG9NOXYwMHM2STBCM3NKalpmcXd5Ckg5ZDFmYkJGTDJyaWJUWSt0ekhGOVFndEhQeS9maU5nWXF2U2t6UGd4ZVdqQmNDY2pQVWY4WThYL0cvWHRxWVoKTHMybDdoa0NnWUVBM3RpYkVlMkVnUUlxemI2REpiVFV4dFZ1VHlVV3VSeEpYVVE0UlVNYTBxWmRFOEVia25hNgpvVjV4WmF1TU1zQ1U4Qk5yTlRvL1pyYlI4YmVjbklqdHJyYkVmdVpXTEhkRFFNdnlrWkVVQUNKQ09ycGo0MlkwCmI0TGIyWXRTUUtNK0UrRFppcDdmdTBSNWRZdUxhb29vTnozWWxQU0lsTGI4WnZNcS9Ud0c3TlVDZ1lFQTBZZEkKaURrTkZiSFZMMXlpQ0ZweDFYRHYwMmNKVnZESWhKdUJtY3pYc0JXeTVVM1k2c2dCblk3R2NQb1RxK2Z3aTRtQwpObDVDQkZXbmVHeWxXZEI3RVQraTg5TGNGRkxub3pRbmhoZG03YXQza2V0RUpzdHJ2OWE2QlE1TDQ2alB5MktkCll2a0dhUFhrM1huSCs4NDVEK05hUEtoZkt1b05IN2htOGl0aTVXc0NnWUFDd1JhWmtCLzFJRHU3TTBHWHM3WjEKaHFuSVhLTDlRclk5YXR4dkxyY2tKMTQ3SWtYZTk0eXk5ZUhjNmRQZnhMV1NpZ3JuSzRNaEY5MWNhWEtNRkwwcgpUYUw4Rm9pMkgvczZ6OHA5dGhkVE1SazBkSjFMY0RYeHN1Sll1L29XUm00Ny9LbHpweWNJdjMzbzBBSzNIR0lHCjNNTFNoSHN6cTFkUFJtNGZseUJXeVFLQmdHT1JQUGQyM08vdFJOZ3YxZFBRWHYzRm9FRytXZUxWb3dQWS9kby8KVEZPaHpCUkpUUVlzcXcvYk5CT2wwc2xDKzd6alNGeGJxRlYwK3BNd1ZkYjdTakFsSkdOblZTdlhSYm41WTdyRgp6aVNzOGJJR3Nha1p5NGNyRzlTaE9aRGl2SFB5MWNXZGZ0MUwxRWttd05DNDY4YlFEQ1o5QUZSZFNvVkRWTEJKCnk4SW5Bb0dCQUtRUndNVGVXcnhZZkRoMitZYzlsQVNqM1R1SE4zQlBNVExpRlEzdWNzMDliQTJPRVlYejkrUCsKcC9DSXNYVk12RWo0bnlXN1NnR1R1dmw5eVpIOEo5cFgwMWM0UWdiZmRSWFRrcXhIdTM3cTA5NHBsb3NxVkEwcgpWUWI2eHBKbGh4YVNTN3dOWTBmeWpsU1U4cUtOUmJoWHFKSlVqU3UxSEhUWDFkVHFOeDl5Ci0tLS0tRU5EIFJTQSBQUklWQVRFIEtFWS0tLS0tCg== 19 | -------------------------------------------------------------------------------- /lib/ballast/evictor.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast.Evictor do 2 | @moduledoc """ 3 | Finds pods that are candidates for eviction. 4 | """ 5 | 6 | @default_max_lifetime 600 7 | 8 | alias K8s.{Client, Operation, Selector} 9 | alias Ballast.Sys.Instrumentation, as: Inst 10 | 11 | @doc """ 12 | Gets all pods with eviction enabled. 13 | """ 14 | @spec candidates(map()) :: {:ok, Enumerable.t()} | {:error, HTTPoison.Response.t()} 15 | def candidates(%{} = policy) do 16 | op = Client.list("v1", :pods, namespace: :all) 17 | selector = Selector.parse(policy) 18 | op_w_selector = %Operation{op | label_selector: selector} 19 | 20 | response = Client.stream(op_w_selector, :default) 21 | 22 | case response do 23 | {:ok, stream} -> 24 | Inst.get_eviction_candidates_succeeded(%{}, %{}) 25 | {:ok, stream} 26 | 27 | {:error, _any} = error -> 28 | Inst.get_eviction_candidates_failed(%{}, %{}) 29 | error 30 | end 31 | end 32 | 33 | @doc """ 34 | Get a list of evictable pods on the given node pool. 35 | 36 | Filters `candidates/1` by `pod_started_before/1` and optionally `on_unpreferred_node/N` 37 | """ 38 | @spec evictable(map) :: {:ok, Enumerable.t()} | {:error, HTTPoison.Response.t()} 39 | def evictable(%{} = policy) do 40 | with {:ok, nodes} <- Ballast.Kube.Node.list(), 41 | {:ok, stream} <- candidates(policy) do 42 | max_lifetime = max_lifetime(policy) 43 | started_before = pods_started_before(stream, max_lifetime) 44 | 45 | ready_for_eviction = 46 | case mode(policy) do 47 | :all -> started_before 48 | :unpreferred -> pods_on_unpreferred_node(started_before, nodes) 49 | end 50 | 51 | {:ok, ready_for_eviction} 52 | end 53 | end 54 | 55 | @spec pods_on_unpreferred_node(Enumerable.t(), list(map)) :: Enumerable.t() 56 | defp pods_on_unpreferred_node(pods, nodes) do 57 | Stream.filter(pods, fn pod -> pod_on_unpreferred_node(pod, nodes) end) 58 | end 59 | 60 | @doc false 61 | @spec pods_started_before(Enumerable.t(), pos_integer) :: Enumerable.t() 62 | def pods_started_before(pods, max_lifetime) do 63 | Stream.filter(pods, fn pod -> pod_started_before(pod, max_lifetime) end) 64 | end 65 | 66 | @spec pod_on_unpreferred_node(map, list(map)) :: boolean 67 | def pod_on_unpreferred_node( 68 | %{ 69 | "spec" => %{ 70 | "nodeName" => node_name, 71 | "affinity" => %{"nodeAffinity" => %{"preferredDuringSchedulingIgnoredDuringExecution" => affinity}} 72 | } 73 | }, 74 | nodes 75 | ) do 76 | prefs = Enum.map(affinity, fn a -> Map.get(a, "preference") end) 77 | 78 | preferred = 79 | nodes 80 | |> find_node_by_name(node_name) 81 | |> Ballast.Kube.Node.matches_preferences?(prefs) 82 | 83 | !preferred 84 | end 85 | 86 | def pod_on_unpreferred_node(_pod_with_no_affinity, _nodes), do: false 87 | 88 | @spec find_node_by_name(list(map), binary()) :: map() | nil 89 | defp find_node_by_name(nodes, node_name) do 90 | Enum.find(nodes, fn %{"metadata" => %{"name" => name}} -> name == node_name end) 91 | end 92 | 93 | @doc """ 94 | Check if a pod started before a given time 95 | 96 | ## Examples 97 | iex> start_time = DateTime.utc_now |> DateTime.add(-61, :second) |> DateTime.to_string 98 | ...> Ballast.Evictor.pod_started_before(%{"status" => %{"startTime" => start_time}}, 60) 99 | true 100 | 101 | iex> start_time = DateTime.utc_now |> DateTime.to_string 102 | ...> Ballast.Evictor.pod_started_before(%{"status" => %{"startTime" => start_time}}, 60) 103 | false 104 | """ 105 | @spec pod_started_before(map, pos_integer) :: boolean 106 | def pod_started_before(%{"status" => %{"startTime" => start_time}}, seconds) do 107 | seconds_ago = -parse_seconds(seconds) 108 | cutoff_time = DateTime.utc_now() |> DateTime.add(seconds_ago, :second) 109 | 110 | with {:ok, start_time, _} <- DateTime.from_iso8601(start_time), 111 | :lt <- DateTime.compare(start_time, cutoff_time) do 112 | true 113 | else 114 | _ -> false 115 | end 116 | end 117 | 118 | def pod_started_before(_, _), do: false 119 | 120 | @spec max_lifetime(map()) :: pos_integer() 121 | defp max_lifetime(%{"spec" => %{"maxLifetime" => sec}}), do: parse_seconds(sec) 122 | defp max_lifetime(_), do: @default_max_lifetime 123 | 124 | @spec mode(map()) :: :all | :unpreferred 125 | defp mode(%{"spec" => %{"mode" => "unpreferred"}}), do: :unpreferred 126 | defp mode(_), do: :all 127 | 128 | @spec parse_seconds(binary() | pos_integer() | {pos_integer(), term()}) :: pos_integer() 129 | defp parse_seconds(sec) when is_binary(sec), do: sec |> Integer.parse() |> parse_seconds 130 | defp parse_seconds(sec) when is_integer(sec), do: sec 131 | defp parse_seconds({sec, _}), do: sec 132 | defp parse_seconds(_), do: 0 133 | end 134 | -------------------------------------------------------------------------------- /terraform/main.tf: -------------------------------------------------------------------------------- 1 | provider "google" { 2 | project = "${var.gcp_project}" 3 | credentials = "${file(var.gcp_credentials_path)}" 4 | } 5 | 6 | locals { 7 | node_group = "ballast-example-group" 8 | } 9 | 10 | resource "google_project_service" "container" { 11 | service = "container.googleapis.com" 12 | 13 | disable_dependent_services = false 14 | disable_on_destroy = false 15 | } 16 | 17 | resource "google_container_cluster" "main" { 18 | depends_on = ["google_project_service.container"] 19 | name = "${var.gke_cluster_name}" 20 | location = "${var.gcp_location}" 21 | min_master_version = "1.13" 22 | remove_default_node_pool = true 23 | initial_node_count = 1 24 | 25 | master_auth { 26 | username = "" 27 | password = "" 28 | 29 | client_certificate_config { 30 | issue_client_certificate = false 31 | } 32 | } 33 | } 34 | 35 | resource "google_container_node_pool" "od-n1-1" { 36 | name = "${var.gke_cluster_name}-od-n1-1" 37 | location = "${var.gcp_location}" 38 | cluster = "${google_container_cluster.main.name}" 39 | initial_node_count = 1 40 | 41 | autoscaling { 42 | min_node_count = 1 43 | max_node_count = var.gcp_on_demand_max_nodes 44 | } 45 | 46 | management { 47 | auto_repair = true 48 | auto_upgrade = true 49 | } 50 | 51 | node_config { 52 | preemptible = false 53 | machine_type = "n1-standard-1" 54 | 55 | metadata = { 56 | disable-legacy-endpoints = "true" 57 | } 58 | 59 | labels = { 60 | node-group = "${local.node_group}" 61 | node-type = "on-demand" 62 | } 63 | } 64 | 65 | lifecycle { 66 | ignore_changes = [ 67 | autoscaling 68 | ] 69 | } 70 | } 71 | 72 | resource "google_container_node_pool" "pvm-n1-1" { 73 | name = "${var.gke_cluster_name}-pvm-n1-1" 74 | location = "${var.gcp_location}" 75 | cluster = "${google_container_cluster.main.name}" 76 | initial_node_count = 1 77 | 78 | autoscaling { 79 | min_node_count = 1 80 | max_node_count = var.gcp_preemptible_max_nodes 81 | } 82 | 83 | management { 84 | auto_repair = true 85 | auto_upgrade = true 86 | } 87 | 88 | node_config { 89 | preemptible = true 90 | machine_type = "n1-standard-1" 91 | 92 | metadata = { 93 | disable-legacy-endpoints = "true" 94 | } 95 | 96 | labels = { 97 | node-group = "${local.node_group}" 98 | node-type = "preemptible" 99 | } 100 | } 101 | 102 | lifecycle { 103 | ignore_changes = [ 104 | autoscaling 105 | ] 106 | } 107 | } 108 | 109 | resource "google_container_node_pool" "pvm-n1-2" { 110 | name = "${var.gke_cluster_name}-pvm-n1-2" 111 | location = "${var.gcp_location}" 112 | cluster = "${google_container_cluster.main.name}" 113 | initial_node_count = 1 114 | 115 | autoscaling { 116 | min_node_count = 1 117 | max_node_count = var.gcp_preemptible_max_nodes 118 | } 119 | 120 | management { 121 | auto_repair = true 122 | auto_upgrade = true 123 | } 124 | 125 | node_config { 126 | preemptible = true 127 | machine_type = "n1-standard-2" 128 | 129 | metadata = { 130 | disable-legacy-endpoints = "true" 131 | } 132 | 133 | labels = { 134 | node-group = "${local.node_group}" 135 | node-type = "preemptible" 136 | } 137 | } 138 | 139 | lifecycle { 140 | ignore_changes = [ 141 | autoscaling 142 | ] 143 | } 144 | } 145 | 146 | resource "google_container_node_pool" "other" { 147 | name = "${var.gke_cluster_name}-other" 148 | location = "${var.gcp_location}" 149 | cluster = "${google_container_cluster.main.name}" 150 | initial_node_count = 1 151 | 152 | autoscaling { 153 | min_node_count = 1 154 | max_node_count = 2 155 | } 156 | 157 | management { 158 | auto_repair = true 159 | auto_upgrade = true 160 | } 161 | 162 | node_config { 163 | machine_type = "n1-standard-1" 164 | 165 | metadata = { 166 | disable-legacy-endpoints = "true" 167 | } 168 | } 169 | } 170 | 171 | data "template_file" "poolpolicy-yaml" { 172 | template = "${file("${path.module}/ballast-poolpolicy.tpl.yaml")}" 173 | 174 | vars = { 175 | project = "${var.gcp_project}" 176 | location = "${var.gcp_location}" 177 | cluster = "${google_container_cluster.main.name}" 178 | source_pool = "${google_container_node_pool.od-n1-1.name}" 179 | managed_pool_1 = "${google_container_node_pool.pvm-n1-1.name}" 180 | managed_pool_2 = "${google_container_node_pool.pvm-n1-2.name}" 181 | } 182 | } 183 | 184 | resource "local_file" "poolpolicy-yaml" { 185 | content = "${data.template_file.poolpolicy-yaml.rendered}" 186 | filename = "${path.module}/ballast-poolpolicy.yaml" 187 | } 188 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all build clean compile help integration lint push test setup 2 | .PHONY: dev.cluster.apply dev.cluster.delete 3 | .PHONY: operator.apply operator.delete 4 | .PHONY: dev.policy.apply dev.policy.delete 5 | .PHONY: dev.scale.down dev.scale.start dev.scale.totals dev.scale.up dev.scale.where 6 | .PHONY: dev.start.iex dev.start.in-cluster 7 | .PHONY: pools.roll.od-n1-1 pools.roll.pvm-n1-2 pools.roll.pvm-n1-2 8 | .PHONY: pools.sourcepool.enable pools.sourcepool.disable 9 | 10 | IMAGE=quay.io/coryodaniel/ballast 11 | 12 | help: ## Show this help 13 | help: 14 | @grep -E '^[a-zA-Z0-9._%-]+:.*?## .*$$' Makefile | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' 15 | 16 | guard-%: # Creates an environment variable requirement by setting a prereq of guard-YOUR_ENV_VAR 17 | @ if [ -z '${${*}}' ]; then \ 18 | echo "Environment variable $* not set"; \ 19 | exit 1;\ 20 | fi 21 | 22 | lazy: 23 | lazy: ## Find places where I've been lazy 24 | grep -R --include="*.ex" -E ":ok[^=].* = " ./lib | grep -v '\->' 25 | 26 | all: ## Lints, tests, compiles, and pushes "latest" docker tag. 27 | all: lint test compile build push 28 | 29 | clean: ## Clean builds, dependencies, coverage reports, and docs 30 | rm -rf _build 31 | rm -rf deps 32 | rm -rf cover 33 | rm -rf doc 34 | 35 | setup: ## Setup 36 | 37 | test: ## Run unit tests with coverage 38 | mix test --exclude external:true --cover 39 | 40 | integration: ## Run integration tests with coverage. 41 | mix test --cover 42 | 43 | lint: ## run format, credo, and dialyzer 44 | mix lint 45 | 46 | compile: ## Compile ballast 47 | mix deps.get 48 | mix compile 49 | 50 | build: ## Build docker image 51 | build: guard-IMAGE compile 52 | docker build -t ${IMAGE} . 53 | 54 | push: ## Release 'latest' docker image 55 | push: guard-IMAGE 56 | docker push ${IMAGE}:latest 57 | 58 | .PHONY: debug 59 | debug: guard-IMAGE compile build 60 | debug: ## Builds a debug image 61 | docker tag ${IMAGE} ${IMAGE}:debug 62 | docker push ${IMAGE}:debug 63 | 64 | shell: guard-IMAGE 65 | docker run -it --entrypoint "/bin/sh" quay.io/coryodaniel/ballast:latest 66 | 67 | tag: guard-DOCKER_LABEL 68 | tag: ## Tag a release 69 | docker tag ${IMAGE}:latest ${IMAGE}:${DOCKER_LABEL} 70 | docker push ${IMAGE}:${DOCKER_LABEL} 71 | 72 | dev.cluster.apply: ## Create / Update development cluster 73 | dev.cluster.apply: 74 | cd terraform && terraform init && \ 75 | terraform apply -var-file=terraform.tfvars 76 | 77 | dev.cluster.delete: ## Delete development cluster 78 | cd terraform && terraform destroy -var-file=terraform.tfvars 79 | 80 | operator.apply: ## Apply kustomization base in kubectl current context using the latest docker image 81 | -@kubectl delete -k ./manifests/base/ 82 | kubectl apply -k ./manifests/base/ 83 | 84 | operator.delete: ## Delete the operator in kubectl current context 85 | kubectl delete -k ./manifests/base/ 86 | 87 | dev.policy.apply: ## Create / Update example PoolPolicy 88 | dev.policy.apply: 89 | -@kubectl delete -f ./terraform/ballast-poolpolicy.yaml 90 | kubectl apply -f ./terraform/ballast-poolpolicy.yaml 91 | 92 | dev.policy.delete: ## Delete example PoolPolicy 93 | dev.policy.delete: 94 | kubectl delete -f ./terraform/ballast-poolpolicy.yaml 95 | 96 | dev.scale.start: ## Start an nginx deployment 97 | kubectl apply -f ./test-scale-up.yaml 98 | 99 | dev.scale.50: ## Scale nginx to 50 replicas 100 | REPLICAS=50 $(MAKE) dev.scale.up 101 | dev.scale.500: ## Scale nginx to 500 replicas 102 | REPLICAS=500 $(MAKE) dev.scale.up 103 | dev.scale.1000: ## Scale nginx to 1000 replicas 104 | REPLICAS=1000 $(MAKE) dev.scale.up 105 | 106 | dev.scale.up: ## Scale nginx deployment to a lot 107 | dev.scale.up: guard-REPLICAS 108 | echo "Scaling to ${REPLICAS} replicas" 109 | kubectl scale --replicas=${REPLICAS} -f ./test-scale-up.yaml 110 | 111 | dev.scale.down: ## Destroy nginx deployment 112 | kubectl delete -f ./test-scale-up.yaml 113 | 114 | dev.scale.where: ## Show which nodes scaled nginx test is on 115 | kubectl get pods -o wide --sort-by="{.spec.nodeName}" --chunk-size=0 116 | 117 | dev.scale.totals: ## Show count of pods on node pools 118 | $(MAKE) dev.scale.where | grep -Fo -e other -e od-n1-1 -e pvm-n1-1 -e pvm-n1-2 | uniq -c 119 | 120 | dev.start.iex: ## Deploys CRD and RBAC to kubectl current context, but runs ballast in iex 121 | - rm manifest.yaml 122 | mix bonny.gen.manifest 123 | kubectl apply -f ./manifest.yaml 124 | iex --dot-iex .iex.exs -S mix 125 | 126 | dev.start.in-cluster: ## Deploys "latest" docker image into kubectl current context w/ a newly generated manifest.yaml 127 | - rm manifest.yaml 128 | mix bonny.gen.manifest --image ${IMAGE} 129 | kubectl apply -f ./manifest.yaml 130 | 131 | dev.svc-metrics.forward: ## Forward the remote k8s ballast-metrics service to localhost 132 | kubectl port-forward service/ballast-metrics 9323:9323 133 | 134 | ## Managed Pools 135 | 136 | SOURCE_POOL=$(shell kubectl get nodes | grep od-n | awk '{print $$1}') 137 | pools.sourcepool.disable: ## Disable the source pool 138 | for node in ${SOURCE_POOL} ; do (kubectl drain $$node --force --ignore-daemonsets &); done 139 | 140 | pools.sourcepool.enable: ## Enabled the source pool 141 | for node in ${SOURCE_POOL} ; do (kubectl uncordon $$node &); done 142 | 143 | pools.nodes.current: ## Show number of nodes in pool 144 | kubectl get nodes | grep -Fo -e other -e od-n1-1 -e pvm-n1-1 -e pvm-n1-2 | uniq -c 145 | 146 | pools.roll.od-n1-1: ## Rolling replace the od-n1-1 managed node pool 147 | pools.roll.od-n1-1: _roll_pool.od-n1-1 148 | pools.roll.pvm-n1-1: ## Rolling replace the pvm-n1-1 managed node pool 149 | pools.roll.pvm-n1-1: _roll_pool.pvm-n1-1 150 | pools.roll.pvm-n1-2: ## Rolling replace the pvm-n1-2 managed node pool 151 | pools.roll.pvm-n1-2: _roll_pool.pvm-n1-2 152 | 153 | _roll_pool.%: 154 | gcloud compute instance-groups managed list |\ 155 | grep gke-ballast-ballast-$* |\ 156 | awk '{print $$1, $$2}' |\ 157 | xargs -n 2 bash -c 'gcloud compute instance-groups managed rolling-action replace $$0 --zone $$1 --max-unavailable 100 --max-surge 1' 158 | 159 | -------------------------------------------------------------------------------- /.credo.exs: -------------------------------------------------------------------------------- 1 | # This file contains the configuration for Credo and you are probably reading 2 | # this after creating it with `mix credo.gen.config`. 3 | # 4 | # If you find anything wrong or unclear in this file, please report an 5 | # issue on GitHub: https://github.com/rrrene/credo/issues 6 | # 7 | %{ 8 | # 9 | # You can have as many configs as you like in the `configs:` field. 10 | configs: [ 11 | %{ 12 | # 13 | # Run any exec using `mix credo -C `. If no exec name is given 14 | # "default" is used. 15 | # 16 | name: "default", 17 | # 18 | # These are the files included in the analysis: 19 | files: %{ 20 | # 21 | # You can give explicit globs or simply directories. 22 | # In the latter case `**/*.{ex,exs}` will be used. 23 | # 24 | included: ["lib/", "src/", "test/", "web/", "apps/"], 25 | excluded: [~r"/_build/", ~r"/deps/", ~r"/node_modules/"] 26 | }, 27 | # 28 | # If you create your own checks, you must specify the source files for 29 | # them here, so they can be loaded by Credo before running the analysis. 30 | # 31 | requires: [], 32 | # 33 | # If you want to enforce a style guide and need a more traditional linting 34 | # experience, you can change `strict` to `true` below: 35 | # 36 | strict: false, 37 | # 38 | # If you want to use uncolored output by default, you can change `color` 39 | # to `false` below: 40 | # 41 | color: true, 42 | # 43 | # You can customize the parameters of any check by adding a second element 44 | # to the tuple. 45 | # 46 | # To disable a check put `false` as second element: 47 | # 48 | # {Credo.Check.Design.DuplicatedCode, false} 49 | # 50 | checks: [ 51 | # 52 | ## Consistency Checks 53 | # 54 | {Credo.Check.Consistency.ExceptionNames, []}, 55 | {Credo.Check.Consistency.LineEndings, []}, 56 | {Credo.Check.Consistency.ParameterPatternMatching, []}, 57 | {Credo.Check.Consistency.SpaceAroundOperators, []}, 58 | {Credo.Check.Consistency.SpaceInParentheses, []}, 59 | {Credo.Check.Consistency.TabsOrSpaces, []}, 60 | 61 | # 62 | ## Design Checks 63 | # 64 | # You can customize the priority of any check 65 | # Priority values are: `low, normal, high, higher` 66 | # 67 | {Credo.Check.Design.AliasUsage, [priority: :low, if_nested_deeper_than: 2, if_called_more_often_than: 0]}, 68 | # You can also customize the exit_status of each check. 69 | # If you don't want TODO comments to cause `mix credo` to fail, just 70 | # set this value to 0 (zero). 71 | # 72 | {Credo.Check.Design.TagTODO, [exit_status: 2]}, 73 | {Credo.Check.Design.TagFIXME, []}, 74 | 75 | # 76 | ## Readability Checks 77 | # 78 | {Credo.Check.Readability.AliasOrder, []}, 79 | {Credo.Check.Readability.FunctionNames, []}, 80 | {Credo.Check.Readability.LargeNumbers, []}, 81 | {Credo.Check.Readability.MaxLineLength, [priority: :low, max_length: 120]}, 82 | {Credo.Check.Readability.ModuleAttributeNames, []}, 83 | {Credo.Check.Readability.ModuleDoc, []}, 84 | {Credo.Check.Readability.ModuleNames, []}, 85 | {Credo.Check.Readability.ParenthesesInCondition, []}, 86 | {Credo.Check.Readability.ParenthesesOnZeroArityDefs, []}, 87 | {Credo.Check.Readability.PredicateFunctionNames, []}, 88 | {Credo.Check.Readability.PreferImplicitTry, []}, 89 | {Credo.Check.Readability.RedundantBlankLines, []}, 90 | {Credo.Check.Readability.Semicolons, []}, 91 | {Credo.Check.Readability.SpaceAfterCommas, []}, 92 | {Credo.Check.Readability.StringSigils, []}, 93 | {Credo.Check.Readability.TrailingBlankLine, []}, 94 | {Credo.Check.Readability.TrailingWhiteSpace, []}, 95 | {Credo.Check.Readability.VariableNames, []}, 96 | 97 | # 98 | ## Refactoring Opportunities 99 | # 100 | {Credo.Check.Refactor.CondStatements, []}, 101 | {Credo.Check.Refactor.CyclomaticComplexity, []}, 102 | {Credo.Check.Refactor.FunctionArity, []}, 103 | {Credo.Check.Refactor.LongQuoteBlocks, []}, 104 | {Credo.Check.Refactor.MapInto, []}, 105 | {Credo.Check.Refactor.MatchInCondition, []}, 106 | {Credo.Check.Refactor.NegatedConditionsInUnless, []}, 107 | {Credo.Check.Refactor.NegatedConditionsWithElse, []}, 108 | {Credo.Check.Refactor.Nesting, []}, 109 | {Credo.Check.Refactor.PipeChainStart, 110 | [excluded_argument_types: [:atom, :binary, :fn, :keyword], excluded_functions: []]}, 111 | {Credo.Check.Refactor.UnlessWithElse, []}, 112 | 113 | # 114 | ## Warnings 115 | # 116 | {Credo.Check.Warning.BoolOperationOnSameValues, []}, 117 | {Credo.Check.Warning.ExpensiveEmptyEnumCheck, []}, 118 | {Credo.Check.Warning.IExPry, []}, 119 | {Credo.Check.Warning.IoInspect, []}, 120 | {Credo.Check.Warning.LazyLogging, []}, 121 | {Credo.Check.Warning.OperationOnSameValues, []}, 122 | {Credo.Check.Warning.OperationWithConstantResult, []}, 123 | {Credo.Check.Warning.RaiseInsideRescue, []}, 124 | {Credo.Check.Warning.UnusedEnumOperation, []}, 125 | {Credo.Check.Warning.UnusedFileOperation, []}, 126 | {Credo.Check.Warning.UnusedKeywordOperation, []}, 127 | {Credo.Check.Warning.UnusedListOperation, []}, 128 | {Credo.Check.Warning.UnusedPathOperation, []}, 129 | {Credo.Check.Warning.UnusedRegexOperation, []}, 130 | {Credo.Check.Warning.UnusedStringOperation, []}, 131 | {Credo.Check.Warning.UnusedTupleOperation, []}, 132 | 133 | # 134 | # Controversial and experimental checks (opt-in, just remove `, false`) 135 | # 136 | {Credo.Check.Consistency.MultiAliasImportRequireUse}, 137 | {Credo.Check.Design.DuplicatedCode}, 138 | {Credo.Check.Readability.Specs} 139 | ] 140 | } 141 | ] 142 | } 143 | -------------------------------------------------------------------------------- /lib/ballast/node_pool.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast.NodePool do 2 | @moduledoc """ 3 | Interface for interacting with Kubernetes NodePools. 4 | """ 5 | 6 | @adapter Application.get_env(:ballast, :node_pool_adapter, Ballast.NodePool.Adapters.GKE) 7 | @node_pool_pressure_percent 90 8 | @node_pool_pressure_threshold @node_pool_pressure_percent / 100 9 | 10 | alias Ballast.NodePool 11 | alias Ballast.PoolPolicy.{Changeset, ManagedPool} 12 | alias Ballast.Sys.Instrumentation, as: Inst 13 | 14 | defstruct [ 15 | :cluster, 16 | :instance_count, 17 | :minimum_count, 18 | :maximum_count, 19 | :project, 20 | :location, 21 | :name, 22 | :data, 23 | :under_pressure, 24 | :zone_count 25 | ] 26 | 27 | @typedoc "Node pool metadata" 28 | @type t :: %__MODULE__{ 29 | cluster: String.t(), 30 | project: String.t(), 31 | location: String.t(), 32 | instance_count: integer | nil, 33 | minimum_count: integer | nil, 34 | maximum_count: integer | nil, 35 | zone_count: integer | nil, 36 | name: String.t(), 37 | data: map | nil, 38 | under_pressure: boolean | nil 39 | } 40 | 41 | @doc """ 42 | Creates a `Ballast.NodePool` struct from a CRD resource's `spec` attribute 43 | 44 | ## Example 45 | iex> resource = %{"spec" => %{"clusterName" => "foo", "projectId" => "bar", "location" => "baz", "poolName" => "qux"}} 46 | ...> Ballast.NodePool.new(resource) 47 | %Ballast.NodePool{cluster: "foo", project: "bar", location: "baz", name: "qux", data: %{}} 48 | """ 49 | @spec new(map) :: t 50 | def new(%{"spec" => spec}), do: new(spec) 51 | 52 | def new(%{"projectId" => p, "location" => l, "clusterName" => c, "poolName" => n}) do 53 | new(p, l, c, n) 54 | end 55 | 56 | def new(_invalid) do 57 | # TODO: emit formatting error (should ship a openapi spec w/ bonny) 58 | # TODO: how to handle return value here ...? 59 | %__MODULE__{} 60 | end 61 | 62 | @doc """ 63 | Creates a `Ballast.NodePool` struct with or without metadata. Used for `get` queries and responses. 64 | 65 | ## Example 66 | NodePool without response data 67 | iex> Ballast.NodePool.new("project", "location", "cluster", "name") 68 | %Ballast.NodePool{cluster: "cluster", project: "project", location: "location", name: "name", data: %{}} 69 | 70 | NodePool with response data 71 | iex> Ballast.NodePool.new("project", "location", "cluster", "name", %{"foo" => "bar"}) 72 | %Ballast.NodePool{cluster: "cluster", project: "project", location: "location", name: "name", data: %{"foo" => "bar"}} 73 | """ 74 | @spec new(String.t(), String.t(), String.t(), String.t(), map | nil) :: t 75 | def new(project, location, cluster, name, data \\ %{}) do 76 | %NodePool{cluster: cluster, project: project, location: location, name: name, data: data} 77 | end 78 | 79 | @doc """ 80 | Updates a `NodePool`'s `:under_pressure` field based on `under_pressure?/1` 81 | """ 82 | @spec set_pressure_status(NodePool.t()) :: NodePool.t() 83 | def set_pressure_status(pool) do 84 | under_pressure = NodePool.under_pressure?(pool) 85 | %NodePool{pool | under_pressure: under_pressure} 86 | end 87 | 88 | @doc """ 89 | Gets a node pool. 90 | 91 | ## Example 92 | iex> node_pool = Ballast.NodePool.new("my-project", "us-central1-a", "my-cluster", "my-pool") 93 | ...> {:ok, conn} = Ballast.conn() 94 | ...> Ballast.NodePool.get(node_pool, conn) 95 | {:ok, %Ballast.NodePool{cluster: "my-cluster", location: "us-central1-a", name: "my-pool", project: "my-project", instance_count: 10, data: %{autoscaling: %{enabled: true, maxNodeCount: 5, minNodeCount: 3}, instanceGroupUrls: ["https://www.googleapis.com/compute/v1/projects/my-project/zones/us-central1-a/instanceGroupManagers/gke-demo-demo-preemptible"], name: "demo-preemptible", selfLink: "https://container.googleapis.com/v1/projects/my-project/zones/us-central1-a/clusters/demo/nodePools/demo-preemptible", status: "RUNNING", initialNodeCount: 1}}} 96 | """ 97 | @spec get(t, Tesla.Client.t()) :: {:ok, t} | {:error, Tesla.Env.t()} 98 | def get(pool, conn) do 99 | {duration, response} = :timer.tc(adapter_for(pool), :get, [pool, conn]) 100 | measurements = %{duration: duration} 101 | 102 | case response do 103 | {:ok, pool_w_instance_count} -> 104 | Inst.provider_get_pool_succeeded(measurements, %{pool: pool_w_instance_count.name}) 105 | {:ok, pool_w_instance_count} 106 | 107 | {:error, %Tesla.Env{status: status}} = error -> 108 | Inst.provider_get_pool_failed(measurements, %{status: status, pool: pool.name}) 109 | error 110 | end 111 | end 112 | 113 | @doc """ 114 | Scales a `NodePool` 115 | 116 | ## Examples 117 | iex> node_pool = Ballast.NodePool.new("my-proj", "my-loc", "my-cluster", "my-pool") 118 | ...> managed_pool = %Ballast.PoolPolicy.ManagedPool{pool: node_pool, minimum_percent: 30, minimum_instances: 1} 119 | ...> source_pool = %Ballast.NodePool{instance_count: 10} 120 | ...> policy = %Ballast.PoolPolicy{name: "gke-scale-nodepool-test", pool: source_pool, managed_pools: [managed_pool]} 121 | ...> changeset = Ballast.PoolPolicy.Changeset.new(managed_pool, policy) 122 | ...> Ballast.NodePool.scale(changeset, Ballast.conn()) 123 | :ok 124 | """ 125 | @spec scale(Changeset.t(), Tesla.Client.t()) :: :ok | {:error, Tesla.Env.t()} 126 | def scale(%Changeset{strategy: :nothing} = changeset, _) do 127 | {measurements, metadata} = Changeset.measurements_and_metadata(changeset) 128 | Inst.provider_scale_pool_skipped(measurements, metadata) 129 | :ok 130 | end 131 | 132 | def scale( 133 | %Changeset{ 134 | minimum_count: desired_minimum, 135 | managed_pool: %ManagedPool{pool: %NodePool{instance_count: current_minimum}} 136 | } = changeset, 137 | _ 138 | ) 139 | when desired_minimum == current_minimum do 140 | {measurements, metadata} = Changeset.measurements_and_metadata(changeset) 141 | Inst.provider_scale_pool_skipped(measurements, metadata) 142 | :ok 143 | end 144 | 145 | def scale(%Changeset{} = changeset, conn) do 146 | adapter = adapter_for(changeset.managed_pool.pool) 147 | {duration, response} = :timer.tc(adapter, :scale, [changeset, conn]) 148 | {measurements, metadata} = Changeset.measurements_and_metadata(changeset) 149 | 150 | measurements = Map.put(measurements, :duration, duration) 151 | 152 | case response do 153 | {:ok, _} -> 154 | Inst.provider_scale_pool_succeeded(measurements, metadata) 155 | :ok 156 | 157 | {:error, %Tesla.Env{status: status}} = error -> 158 | metadata = Map.put(metadata, :status, status) 159 | Inst.provider_scale_pool_failed(measurements, metadata) 160 | error 161 | end 162 | end 163 | 164 | @doc """ 165 | Determines if autoscaling is enabled for a pool 166 | """ 167 | @spec autoscaling_enabled?(Ballast.NodePool.t()) :: boolean() 168 | def autoscaling_enabled?(pool), do: adapter_for(pool).autoscaling_enabled?(pool) 169 | 170 | @doc """ 171 | Determine if a pool is under pressure. 172 | 173 | A pool is considered under pressure when more than #{@node_pool_pressure_percent} percent of its nodes are under pressure. 174 | 175 | Notes: 176 | * Assumes there is pressure if there is an error from the k8s API. 177 | * Considers no nodes returned as under pressure. 178 | """ 179 | @spec under_pressure?(Ballast.NodePool.t()) :: boolean() 180 | def under_pressure?(%Ballast.NodePool{} = pool) do 181 | label_selector = adapter_for(pool).label_selector(pool) 182 | params = %{labelSelector: label_selector} 183 | 184 | with {:ok, [_h | _t] = nodes} <- Ballast.Kube.Node.list(params) do 185 | nodes_under_pressure = Enum.filter(nodes, fn node -> node_under_pressure?(node) end) 186 | 187 | percent_under_pressure = length(nodes_under_pressure) / length(nodes) 188 | percent_under_pressure >= @node_pool_pressure_threshold 189 | else 190 | _error -> true 191 | end 192 | end 193 | 194 | @spec node_under_pressure?(map) :: boolean() 195 | defp node_under_pressure?(node) do 196 | !Ballast.Kube.Node.ready?(node) || Ballast.Kube.Node.resources_constrained?(node) 197 | end 198 | 199 | # Mocking out for multi-provider. Should take a NodePool or PoolPolicy and determine which cloud provider to use. 200 | @spec adapter_for(Ballast.NodePool.t()) :: module() 201 | defp adapter_for(_), do: @adapter 202 | end 203 | -------------------------------------------------------------------------------- /lib/ballast/node_pool/adapters/gke.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast.NodePool.Adapters.GKE do 2 | @moduledoc """ 3 | GKE `Ballast.NodePool` implementation. 4 | 5 | Note: `@spec`s are added to `@impl` here because credo has an [open proposal](https://github.com/rrrene/credo/issues/427) to solve the issue. 6 | """ 7 | @behaviour Ballast.NodePool.Adapters 8 | @instance_group_manager_pattern ~r{projects/(?[^/]+)/zones/(?[^/]+)/instanceGroupManagers/(?[^/]+)} 9 | 10 | alias Ballast.NodePool 11 | alias GoogleApi.Container.V1.Api.Projects, as: Container 12 | alias GoogleApi.Compute.V1.Api.InstanceGroups 13 | 14 | # HACK: container_projects_locations_clusters_node_pools_get is in master, but there is a syntax error. 15 | # Manually including function here. 16 | alias GoogleApi.Container.V1.Connection 17 | alias GoogleApi.Gax.{Request, Response} 18 | 19 | @impl true 20 | @spec label_selector() :: binary 21 | @doc """ 22 | Returns the label selector to get all GKE nodes in the cluster. 23 | 24 | ## Example 25 | iex> Ballast.NodePool.Adapters.GKE.label_selector() 26 | "cloud.google.com/gke-nodepool" 27 | """ 28 | def label_selector(), do: "cloud.google.com/gke-nodepool" 29 | 30 | @impl true 31 | @spec label_selector(Ballast.NodePool.t()) :: binary 32 | @doc """ 33 | Returns the label selector to get all the given pool's nodes in the cluster. 34 | 35 | ## Example 36 | NodePool without response data 37 | iex> pool = Ballast.NodePool.new("foo", "bar", "baz", "qux") 38 | ...> Ballast.NodePool.Adapters.GKE.label_selector(pool) 39 | "cloud.google.com/gke-nodepool=qux" 40 | """ 41 | def label_selector(%NodePool{name: name}) do 42 | "#{label_selector()}=#{name}" 43 | end 44 | 45 | @doc """ 46 | Generates the URL identifier for the GKE API 47 | 48 | ## Example 49 | NodePool without response data 50 | iex> pool = Ballast.NodePool.new("foo", "bar", "baz", "qux") 51 | ...> Ballast.NodePool.Adapters.GKE.id(pool) 52 | "projects/foo/locations/bar/clusters/baz/nodePools/qux" 53 | """ 54 | @impl true 55 | @spec id(Ballast.NodePool.t()) :: String.t() 56 | def id(%NodePool{} = pool) do 57 | "projects/#{pool.project}/locations/#{pool.location}/clusters/#{pool.cluster}/nodePools/#{pool.name}" 58 | end 59 | 60 | @impl true 61 | @spec scale(Ballast.PoolPolicy.Changeset.t(), Tesla.Client.t()) :: {:ok, map} | {:error, Tesla.Env.t()} 62 | def scale(%Ballast.PoolPolicy.Changeset{} = changeset, conn) do 63 | case autoscaling_enabled?(changeset.managed_pool.pool) do 64 | true -> set_autoscaling(changeset.managed_pool.pool, changeset.minimum_count, conn) 65 | false -> set_size(changeset.managed_pool.pool, changeset.minimum_count, conn) 66 | end 67 | end 68 | 69 | @impl true 70 | @spec get(Ballast.NodePool.t(), Tesla.Client.t()) :: {:ok, Ballast.NodePool.t()} | {:error, Tesla.Env.t()} 71 | def get(%NodePool{} = pool, conn) do 72 | id = id(pool) 73 | response = container_projects_locations_clusters_node_pools_get(conn, id) 74 | 75 | with {:ok, data} <- response, 76 | pool_with_data <- %NodePool{pool | data: data}, 77 | pool_with_min_max <- set_min_max_counts(pool_with_data), 78 | pool_with_all_counts <- set_instance_count(pool_with_min_max, conn) do 79 | {:ok, pool_with_all_counts} 80 | end 81 | end 82 | 83 | @spec set_instance_count(Ballast.NodePool.t(), Tesla.Client.t()) :: Ballast.NodePool.t() 84 | defp set_instance_count(%NodePool{data: %{instanceGroupUrls: urls}} = pool, conn) do 85 | instance_count = Enum.reduce(urls, 0, fn url, agg -> agg + get_instance_group_size(url, conn) end) 86 | %NodePool{pool | instance_count: instance_count, zone_count: length(urls)} 87 | end 88 | 89 | @spec get_instance_group_size(String.t(), Tesla.Client.t()) :: integer 90 | defp get_instance_group_size(url, conn) do 91 | with {:ok, project, zone, name} <- parse_instance_group_manager_params(url), 92 | {:ok, %{size: size}} <- InstanceGroups.compute_instance_groups_get(conn, project, zone, name) do 93 | Ballast.Sys.Instrumentation.provider_get_pool_size_succeeded(%{size: size}, %{name: name}) 94 | size 95 | else 96 | _ -> 97 | Ballast.Sys.Instrumentation.provider_get_pool_size_failed(%{}, %{url: url}) 98 | 0 99 | end 100 | end 101 | 102 | @spec set_min_max_counts(Ballast.NodePool.t()) :: Ballast.NodePool.t() 103 | defp set_min_max_counts( 104 | %NodePool{data: %{autoscaling: %{maxNodeCount: max, minNodeCount: min, enabled: true}}} = pool 105 | ) do 106 | %NodePool{pool | minimum_count: min, maximum_count: max} 107 | end 108 | 109 | defp set_min_max_counts(pool), do: pool 110 | 111 | @spec set_autoscaling(Ballast.NodePool.t(), pos_integer, Tesla.Client.t()) :: :ok | {:error, Tesla.Env.t()} 112 | defp set_autoscaling(pool, minimum_count, conn) do 113 | id = id(pool) 114 | old_autoscaling = pool.data.autoscaling 115 | new_autoscaling = Map.put(old_autoscaling, :minNodeCount, minimum_count) 116 | body = %{autoscaling: new_autoscaling} 117 | Container.container_projects_locations_clusters_node_pools_set_autoscaling(conn, id, body: body) 118 | end 119 | 120 | @spec set_size(Ballast.NodePool.t(), pos_integer, Tesla.Client.t()) :: :ok | {:error, Tesla.Env.t()} 121 | defp set_size(pool, minimum_count, conn) do 122 | id = id(pool) 123 | body = %{nodeCount: minimum_count} 124 | Container.container_projects_locations_clusters_node_pools_set_size(conn, id, body: body) 125 | end 126 | 127 | @doc """ 128 | Parses a Google API `instanceGroupUrl` into arguments for `GoogleApi.Compute.V1.Api.InstanceGroups`. 129 | 130 | *Note:* The URL expected is for `instanceGroupManagers`, but `instanceGroups` use the same ID and provide a `size` response. 131 | 132 | ## Examples 133 | iex> Ballast.NodePool.Adapters.GKE.parse_instance_group_manager_params("/projects/my-project/zones/my-zone/instanceGroupManagers/my-igm") 134 | {:ok, "my-project", "my-zone", "my-igm"} 135 | 136 | iex> Ballast.NodePool.Adapters.GKE.parse_instance_group_manager_params("projects/zones/instanceGroupManagers") 137 | {:error, :invalid_instance_group_url} 138 | """ 139 | @spec parse_instance_group_manager_params(String.t()) :: {:ok, String.t(), String.t(), String.t()} | {:error, atom} 140 | def parse_instance_group_manager_params(url) do 141 | @instance_group_manager_pattern 142 | |> Regex.named_captures(url) 143 | |> validate_instance_group_manager_params 144 | end 145 | 146 | @impl Ballast.NodePool.Adapters 147 | @spec autoscaling_enabled?(Ballast.NodePool.t()) :: boolean() 148 | def autoscaling_enabled?(%NodePool{data: %{autoscaling: %{enabled: true}}}), do: true 149 | def autoscaling_enabled?(_), do: false 150 | 151 | @spec validate_instance_group_manager_params(map) :: 152 | {:ok, binary, binary, binary} | {:error, :invalid_instance_group_url} 153 | defp validate_instance_group_manager_params(%{"project" => p, "zone" => z, "name" => n}), do: {:ok, p, z, n} 154 | defp validate_instance_group_manager_params(_), do: {:error, :invalid_instance_group_url} 155 | 156 | # HACK 157 | @spec container_projects_locations_clusters_node_pools_get( 158 | Tesla.Client.t(), 159 | String.t(), 160 | Keyword.t() | nil, 161 | Keyword.t() | nil 162 | ) :: {:ok, %GoogleApi.Container.V1.Model.NodePool{}} | {:error, any()} 163 | def container_projects_locations_clusters_node_pools_get( 164 | connection, 165 | name, 166 | optional_params \\ [], 167 | opts \\ [] 168 | ) do 169 | optional_params_config = %{ 170 | :"$.xgafv" => :query, 171 | :access_token => :query, 172 | :alt => :query, 173 | :callback => :query, 174 | :fields => :query, 175 | :key => :query, 176 | :oauth_token => :query, 177 | :prettyPrint => :query, 178 | :quotaUser => :query, 179 | :uploadType => :query, 180 | :upload_protocol => :query, 181 | :clusterId => :query, 182 | :nodePoolId => :query, 183 | :projectId => :query, 184 | :zone => :query 185 | } 186 | 187 | request = 188 | Request.new() 189 | |> Request.method(:get) 190 | |> Request.url("/v1/{+name}", %{"name" => URI.encode(name, &URI.char_unreserved?/1)}) 191 | |> Request.add_optional_params(optional_params_config, optional_params) 192 | 193 | connection 194 | |> Connection.execute(request) 195 | |> Response.decode(opts ++ [struct: %GoogleApi.Container.V1.Model.NodePool{}]) 196 | end 197 | end 198 | -------------------------------------------------------------------------------- /lib/ballast/pool_policy/changeset.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast.PoolPolicy.Changeset do 2 | @moduledoc """ 3 | Changes to apply to a managed pool 4 | """ 5 | 6 | alias Ballast.{NodePool, PoolPolicy} 7 | alias Ballast.PoolPolicy.{Changeset, ManagedPool} 8 | 9 | defstruct [:managed_pool, :minimum_count, :policy, :strategy] 10 | 11 | @typedoc """ 12 | * `managed_pool` - the managed pool changeset will be applied to 13 | * `minimum_count` - the new minimum count for the autoscaler or cluster 14 | * `strategy` - what ballast thinks is happening to the source pool - poor name. 15 | * `policy` - the policy being applied 16 | """ 17 | @type t :: %__MODULE__{ 18 | managed_pool: ManagedPool.t(), 19 | minimum_count: integer, 20 | strategy: :nothing | :scale_up | :scale_down, 21 | policy: PoolPolicy.t() 22 | } 23 | 24 | @doc """ 25 | Creates a new `Changeset` given a `Ballast.PoolPolicy.ManagedPool` and a current source `NodePool`. 26 | 27 | ## Examples 28 | iex> managed_pool = %Ballast.PoolPolicy.ManagedPool{pool: %Ballast.NodePool{name: "managed-pool"}, minimum_percent: 30, minimum_instances: 1} 29 | ...> source_pool = %Ballast.NodePool{instance_count: 10} 30 | ...> policy = %Ballast.PoolPolicy{pool: source_pool, managed_pools: [managed_pool]} 31 | ...> Ballast.PoolPolicy.Changeset.new(managed_pool, policy) 32 | %Ballast.PoolPolicy.Changeset{ 33 | managed_pool: %Ballast.PoolPolicy.ManagedPool{ 34 | minimum_instances: 1, 35 | minimum_percent: 30, 36 | pool: %Ballast.NodePool{ 37 | cluster: nil, 38 | data: nil, 39 | instance_count: nil, 40 | location: nil, 41 | maximum_count: nil, 42 | minimum_count: nil, 43 | name: "managed-pool", 44 | project: nil, 45 | under_pressure: nil, 46 | zone_count: nil 47 | } 48 | }, 49 | minimum_count: 3, 50 | policy: %Ballast.PoolPolicy{ 51 | changesets: [], 52 | cooldown_seconds: nil, 53 | managed_pools: [ 54 | %Ballast.PoolPolicy.ManagedPool{ 55 | minimum_instances: 1, 56 | minimum_percent: 30, 57 | pool: %Ballast.NodePool{ 58 | cluster: nil, 59 | data: nil, 60 | instance_count: nil, 61 | location: nil, 62 | maximum_count: nil, 63 | minimum_count: nil, 64 | name: "managed-pool", 65 | project: nil, 66 | under_pressure: nil, 67 | zone_count: nil 68 | } 69 | } 70 | ], 71 | name: nil, 72 | pool: %Ballast.NodePool{ 73 | cluster: nil, 74 | data: nil, 75 | instance_count: 10, 76 | location: nil, 77 | maximum_count: nil, 78 | minimum_count: nil, 79 | name: nil, 80 | project: nil, 81 | under_pressure: nil, 82 | zone_count: nil 83 | } 84 | }, 85 | strategy: :scale_down 86 | } 87 | """ 88 | @spec new(ManagedPool.t(), PoolPolicy.t()) :: t 89 | def new(%ManagedPool{} = managed_pool, %PoolPolicy{} = policy) do 90 | changeset = %Changeset{ 91 | managed_pool: managed_pool, 92 | minimum_count: managed_pool.minimum_instances, 93 | strategy: strategy(managed_pool.pool, policy.pool), 94 | policy: policy 95 | } 96 | 97 | calculate_minimum_and_update(changeset) 98 | end 99 | 100 | @spec calculate_minimum_and_update(Changeset.t()) :: Changeset.t() 101 | def calculate_minimum_and_update(%Changeset{} = changeset) do 102 | calculated_minimum = 103 | calc_new_minimum_count( 104 | changeset.policy.pool.instance_count, 105 | changeset.policy.pool.zone_count, 106 | changeset.managed_pool.minimum_percent, 107 | changeset.managed_pool.minimum_instances, 108 | changeset.managed_pool.pool.maximum_count 109 | ) 110 | 111 | %Changeset{changeset | minimum_count: calculated_minimum} 112 | end 113 | 114 | @doc "Metrics/logging metadata and measurements" 115 | @spec measurements_and_metadata(Changeset.t()) :: {map, map} 116 | def measurements_and_metadata(%Changeset{} = changeset), do: {measurements(changeset), metadata(changeset)} 117 | 118 | @spec measurements(Changeset.t()) :: map 119 | defp measurements(%Changeset{} = changeset) do 120 | %{ 121 | source_pool_current_instance_count: changeset.policy.pool.instance_count, 122 | source_pool_zone_count: changeset.policy.pool.zone_count, 123 | managed_pool_current_instance_count: changeset.managed_pool.pool.instance_count, 124 | managed_pool_current_autoscaling_minimum: changeset.managed_pool.pool.minimum_count, 125 | managed_pool_current_autoscaling_maximum: changeset.managed_pool.pool.maximum_count, 126 | managed_pool_conf_minimum_percent: changeset.managed_pool.minimum_percent, 127 | managed_pool_conf_minimum_instances: changeset.managed_pool.minimum_instances, 128 | managed_pool_new_autoscaling_minimum: changeset.minimum_count 129 | } 130 | end 131 | 132 | @spec metadata(Changeset.t()) :: map 133 | defp metadata(%Changeset{} = changeset) do 134 | %{pool: changeset.managed_pool.pool.name, strategy: changeset.strategy, policy: changeset.policy.name} 135 | end 136 | 137 | @doc """ 138 | Rules: 139 | * If source pool is zero, assume scale to 0 and :scale_down 140 | * NOTE: this is possibly *not* true for Preemptible source pools 141 | * If the source pool has more nodes 142 | * calculate and scale UP that managed pool's minimum count. `:scale_up` 143 | * Else; source is lower because its scaling down, or preempted/stockedout. 144 | * If source is under pressure 145 | * `:nothing` Nothing to do, autoscaler should be adding nodes to source and managed pools 146 | * Else 147 | * `:scale_down` calculate and scale DOWN that managed pool's minimum count. 148 | * Note: There is a case when the source pools count is 0, the managed pool will be scaled down. This isn't optimal, but we dont know _why_ the source pool is zero. To mitigate scaling managed pools to zero, set the `minimumInstances`. 149 | 150 | 151 | ## Examples 152 | When the source pool instance count is zero 153 | iex> managed_pool = %Ballast.NodePool{instance_count: 5} 154 | ...> source_pool = %Ballast.NodePool{instance_count: 0} 155 | ...> Ballast.PoolPolicy.Changeset.strategy(managed_pool, source_pool) 156 | :scale_down 157 | 158 | When the source pool instance count is greater 159 | iex> managed_pool = %Ballast.NodePool{instance_count: 5} 160 | ...> source_pool = %Ballast.NodePool{instance_count: 10, under_pressure: false} 161 | ...> Ballast.PoolPolicy.Changeset.strategy(managed_pool, source_pool) 162 | :scale_up 163 | 164 | When the source pool instance count is lower and the source pool is under pressure 165 | iex> managed_pool = %Ballast.NodePool{instance_count: 5} 166 | ...> source_pool = %Ballast.NodePool{instance_count: 1, under_pressure: true} 167 | ...> Ballast.PoolPolicy.Changeset.strategy(managed_pool, source_pool) 168 | :nothing 169 | 170 | When the source pool instance count is lower and the source pool is not under pressure 171 | iex> managed_pool = %Ballast.NodePool{instance_count: 5} 172 | ...> source_pool = %Ballast.NodePool{instance_count: 1, under_pressure: false} 173 | ...> Ballast.PoolPolicy.Changeset.strategy(managed_pool, source_pool) 174 | :scale_down 175 | """ 176 | @spec strategy(NodePool.t(), NodePool.t()) :: :nothing | :scale_up | :scale_down 177 | def strategy(_, %NodePool{instance_count: 0}), do: :scale_down 178 | def strategy(%NodePool{instance_count: mic}, %NodePool{instance_count: sic}) when sic >= mic, do: :scale_up 179 | def strategy(_, %NodePool{under_pressure: true}), do: :nothing 180 | def strategy(_, _), do: :scale_down 181 | 182 | @doc """ 183 | Calculates the managed pool's new minimum instance count. 184 | 185 | ## Examples 186 | When the calculated count is less than the minimum count, return minimum 187 | iex> {current_source_instance_count, source_zone_count, minimum_percent, minimum_count, maximum_count} = {10, 1, 10, 2, 100} 188 | ...> Ballast.PoolPolicy.Changeset.calc_new_minimum_count(current_source_instance_count, source_zone_count, minimum_percent, minimum_count, maximum_count) 189 | 2 190 | 191 | When the calculated count is greater than minimum count, return calculated 192 | iex> {current_source_instance_count, source_zone_count, minimum_percent, minimum_count, maximum_count} = {10, 1, 50, 2, 100} 193 | ...> Ballast.PoolPolicy.Changeset.calc_new_minimum_count(current_source_instance_count, source_zone_count, minimum_percent, minimum_count, maximum_count) 194 | 5 195 | 196 | When the calculated count is greater than maximum count, return maximum 197 | iex> {current_source_instance_count, source_zone_count, minimum_percent, minimum_count, maximum_count} = {200, 1, 100, 2, 33} 198 | ...> Ballast.PoolPolicy.Changeset.calc_new_minimum_count(current_source_instance_count, source_zone_count, minimum_percent, minimum_count, maximum_count) 199 | 33 200 | 201 | For a regional cluster when the calculated count is greater than minimum count, return calculated 202 | iex> {current_source_instance_count, source_zone_count, minimum_percent, minimum_count, maximum_count} = {10, 3, 50, 2, 100} 203 | ...> Ballast.PoolPolicy.Changeset.calc_new_minimum_count(current_source_instance_count, source_zone_count, minimum_percent, minimum_count, maximum_count) 204 | 2 205 | """ 206 | @spec calc_new_minimum_count(integer, integer, integer, integer, integer) :: integer 207 | def calc_new_minimum_count( 208 | source_pool_current_count, 209 | source_pool_zone_count, 210 | minimum_percent, 211 | minimum_instances, 212 | managed_pool_max_count 213 | ) do 214 | source_pool_zone_count = source_pool_zone_count || 1 215 | minimum_instances_for_cluster = source_pool_current_count * (minimum_percent / 100) 216 | new_minimum_count = round(minimum_instances_for_cluster / source_pool_zone_count) 217 | 218 | do_calc_new_minimum_count(new_minimum_count, minimum_instances, managed_pool_max_count) 219 | end 220 | 221 | defp do_calc_new_minimum_count(new_minimum_count, _, managed_pool_max_count) 222 | when is_integer(managed_pool_max_count) and new_minimum_count >= managed_pool_max_count, 223 | do: managed_pool_max_count 224 | 225 | defp do_calc_new_minimum_count(new_minimum_count, minimum_instances, _) 226 | when new_minimum_count > minimum_instances, 227 | do: new_minimum_count 228 | 229 | defp do_calc_new_minimum_count(_, minimum_instances, _), do: minimum_instances 230 | end 231 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ballast 2 | 3 | Ballast manages kubernetes node pools to give you the cost of preemptible nodes with the confidence of on demand nodes. 4 | 5 | - [Ballast](#Ballast) 6 | - [Getting Started](#Getting-Started) 7 | - [Create a GCP Service Account](#Create-a-GCP-Service-Account) 8 | - [Create a Kubernetes Secret w/ the GCP Service Account Keys](#Create-a-Kubernetes-Secret-w-the-GCP-Service-Account-Keys) 9 | - [Deploy the operator](#Deploy-the-operator) 10 | - [Environment Variables](#Environment-Variables) 11 | - [Managing Ballast PoolPolicies](#Managing-Ballast-PoolPolicies) 12 | - [Example `PoolPolicy`](#Example-PoolPolicy) 13 | - [Optimizing costs with preemptible pools and node affinity](#Optimizing-costs-with-preemptible-pools-and-node-affinity) 14 | - [Contributing](#Contributing) 15 | - [Setting up a development/test cluster](#Setting-up-a-developmenttest-cluster) 16 | - [Using docker-desktop](#Using-docker-desktop) 17 | - [Using terraform and GKE](#Using-terraform-and-GKE) 18 | - [Deploying operator CRDs to test against](#Deploying-operator-CRDs-to-test-against) 19 | - [Testing](#Testing) 20 | - [Developing](#Developing) 21 | - [Links](#Links) 22 | 23 | ## Getting Started 24 | 25 | There are 3 steps to deploy the **ballast-operator**: 26 | 27 | 1. Create a GCP Service Account 28 | 2. Create a Kubernetes Secret w/ the GCP Service Account Keys 29 | 3. Deploy the operator 30 | 31 | ### Create a GCP Service Account 32 | 33 | The ballast `Deployment` will need to run as a _GCP service account_ with access to your clusters' node pools. 34 | 35 | The following script will create a GCP service account with permissions to view and manage cluster pool sizes. 36 | 37 | ```shell 38 | export GCP_PROJECT=my-project-id 39 | export SERVICE_ACCOUNT=ballast-operator 40 | 41 | gcloud iam service-accounts create ${SERVICE_ACCOUNT} 42 | 43 | gcloud projects add-iam-policy-binding ${GCP_PROJECT} \ 44 | --member serviceAccount:${SERVICE_ACCOUNT}@${GCP_PROJECT}.iam.gserviceaccount.com \ 45 | --role roles/container.admin 46 | 47 | gcloud projects add-iam-policy-binding ${GCP_PROJECT} \ 48 | --member serviceAccount:${SERVICE_ACCOUNT}@${GCP_PROJECT}.iam.gserviceaccount.com \ 49 | --role roles/compute.viewer 50 | ``` 51 | 52 | *Note:* ballast only needs a few permissions. Security minded users may prefer to create a custom role with the following permissions instead: 53 | 54 | - container.clusters.get 55 | - container.clusters.update 56 | - compute.instanceGroups.get 57 | 58 | ### Create a Kubernetes Secret w/ the GCP Service Account Keys 59 | 60 | The following script will create a secret named `ballast-operator-sa-keys` that contains the GCP service account JSON keys. 61 | 62 | ```shell 63 | gcloud iam service-accounts keys create /tmp/ballast-keys.json \ 64 | --iam-account ${SERVICE_ACCOUNT}@${GCP_PROJECT}.iam.gserviceaccount.com 65 | 66 | kubectl create secret generic ballast-operator-sa-keys --from-file=gcp.json=/tmp/ballast-keys.json 67 | 68 | rm /tmp/ballast-keys.json 69 | ``` 70 | 71 | ### Deploy the operator 72 | 73 | A kustomization [`base`](./manifests/base/kustomization.yaml) is included that deploys: 74 | 75 | - [`ClusterRole`](./manifests/base/cluster_role.yaml) 76 | - [`ClusterRoleBinding`](./manifests/base/cluster_role_binding.yaml) 77 | - [`CustomResourceDefinition`](./manifests/base/custom_resource_definition.yaml) 78 | - [`Deployment`](./manifests/base/deployment.yaml) 79 | - [`PodDisruptionBudget`](./manifests/base/pod_disruption_budget.yaml) 80 | - [`ServiceAccount`](./manifests/base/service_account.yaml) 81 | - [`Service`](./manifests/base/service.yaml) 82 | 83 | The kustomization file expects `secret/ballast-operator-sa-keys` (created above) to exist in the same namespace the operator is deployed in. 84 | 85 | ```shell 86 | kubectl apply -k ./manifests/base/ 87 | ``` 88 | 89 | The operator exposes prometheus metrics on port 9323 at `/metrics`. 90 | 91 | #### Environment Variables 92 | 93 | - `BALLAST_METRICS_PORT`=9323 94 | - `BALLAST_DEBUG`=true 95 | - `GOOGLE_APPLICATION_CREDENTIALS`=/abs/path/to/creds.json 96 | 97 | ## Managing Ballast CRDs 98 | 99 | ### Example `PoolPolicy` 100 | 101 | Ballast requires that all node-pools be created in advanced. Ballast only scales *managed* pools' _minimum count_ (or _current size_ in the case autoscaling is disabled) to match the required minimums of the *source* pool. 102 | 103 | ```yaml 104 | apiVersion: ballast.bonny.run/v1 105 | kind: PoolPolicy 106 | metadata: 107 | name: ballast-example 108 | spec: 109 | projectId: gcp-project-id-here 110 | location: us-central1-a # zone that main/source pool of preemptible nodes exist in 111 | clusterName: your-cluster-name 112 | poolName: my-main-pool # name of the main/source pool 113 | cooldownSeconds: 300 114 | managedPools: # list of pools to scale relative to main pool 115 | - poolName: pool-b 116 | minimumInstances: 1 117 | minimumPercent: 25 118 | location: us-central1-a 119 | - poolName: pool-c 120 | minimumInstances: 5 121 | minimumPercent: 50 122 | location: us-central1-a 123 | ``` 124 | 125 | Multiple managed pools can be specified. A mix of autoscaling and fixed size pools can be used, as well as pools of different instance types/sizes. 126 | 127 | ### Optimizing costs with preemptible pools and node affinity 128 | 129 | The following steps will cause Kubernetes to *prefer* scheduling workloads on your preemptible nodes, but schedule workloads on your on-demand pools when it must. 130 | 131 | 1. Add the label `node-group:a-good-name-for-your-node-group` to **_all_** of your node pools that will be referenced in your `PoolPolicy`. 132 | 2. Add the following affinity to your `Pod`, `Deployment`, or other workload.. 133 | 134 | ```yaml 135 | spec: 136 | affinity: 137 | nodeAffinity: 138 | requiredDuringSchedulingIgnoredDuringExecution: 139 | nodeSelectorTerms: 140 | - matchExpressions: 141 | - key: node-group 142 | operator: In 143 | values: 144 | - a-good-name-for-your-node-group 145 | preferredDuringSchedulingIgnoredDuringExecution: 146 | - weight: 1 147 | preference: 148 | matchExpressions: 149 | - key: cloud.google.com/gke-preemptible 150 | operator: In 151 | values: 152 | - "true" 153 | ``` 154 | 155 | ### Example `EvictionPolicy` 156 | 157 | Ballast also supports a CRD called an `EvictionPolicy`. Eviction policies allow you to specify rules for evicting pods from nodes. This can be useful for eviction pods off of unpreferred nodes effectively implementing ~`preferredDuringSchedulingPreferredDuringExecution`. 158 | 159 | The schema is: 160 | 161 | - `mode` (*all*, *unpreferred*) evict off all nodes or only unpreferred nodes based on `preferredDuringSchedulingIgnoredDuringExecution`; Default: *all* 162 | - `maxLifetime` max lifetime of a pod matching `selector` ; Default: *600* seconds 163 | - `selector` matchLabel and matchExpressions for selecting pods to evict 164 | 165 | ```yaml 166 | apiVersion: ballast.bonny.run/v1 167 | kind: EvictionPolicy 168 | metadata: 169 | name: unpreferred-nodes-nginx 170 | spec: 171 | mode: unpreferred 172 | maxLifetime: 600 173 | selector: 174 | matchLabels: 175 | app: nginx 176 | matchExpressions: 177 | - {key: tier, operator: In, values: [frontend]} 178 | - {key: environment, operator: NotIn, values: [dev]} 179 | ``` 180 | 181 | ## Contributing 182 | 183 | Ballast is built with the [bonny operator framework](https://github.com/coryodaniel/bonny) and Elixir. 184 | 185 | [Terraform](https://terraform.io) is used to provision test clusters. 186 | 187 | A number of make commands exist to aid in development and testing: 188 | 189 | ```shell 190 | make help 191 | ``` 192 | 193 | ### Setting up a development/test cluster 194 | 195 | #### Using docker-desktop 196 | 197 | Two test suites are provided, both require a function kubernetes server. [Docker Desktop](https://www.docker.com/products/docker-desktop) ships with a version of kubernetes to get started locally quickly. 198 | 199 | Alternatively you can use [terraform](https://www.terraform.io/downloads.html) to provision a cluster on GKE with `make dev.cluster.apply`. You will be charged for resources when using this approach. 200 | 201 | #### Using terraform and GKE 202 | 203 | First you will need to configure terraform with your GCP project and credentials: 204 | 205 | ```shell 206 | touch ./terraform/terraform.tfvars 207 | echo 'gcp_project = "my-project-id"' >> ./terraform/terraform.tfvars 208 | echo 'gcp_credentials_path = "path/to/my/gcp-credentials.json"' >> ./terraform/terraform.tfvars 209 | ``` 210 | 211 | Now create the cluster, this can take a while: 212 | 213 | ```shell 214 | make dev.cluster.apply 215 | ``` 216 | 217 | When you are done destroy the cluster with: 218 | 219 | ```shell 220 | make dev.cluster.delete 221 | ``` 222 | 223 | ### Deploying operator CRDs to test against 224 | 225 | After setting up your test cluster you'll need to deploy the operator CRDs so that the cluster has the features the test suite will exercise. 226 | 227 | ```shell 228 | make dev.start.in-cluster 229 | ``` 230 | 231 | ### Testing 232 | 233 | Two test suites exist: 234 | 235 | - `make test` - elixir unit test suite on underlying controller code 236 | - `make integration` - scales node pools on GKE 237 | 238 | Two environment variables must be exported to run the full integration tests. 239 | 240 | ```shell 241 | export GOOGLE_APPLICATION_CREDENTIALS=/abs/path/to/creds.json 242 | export GCP_PROJECT=your-project-id 243 | ``` 244 | 245 | Additionally `make lint` will run the mix code formatter, credo, and dialyzer. 246 | 247 | ### Developing 248 | 249 | You'll need a function cluster to connect to. Ballast will use your `current-context` in `~/.kube/config`. This can be changed in `config/dev.exs`. 250 | 251 | GOOGLE_APPLICATION_CREDENTIALS must be set to start the application. 252 | 253 | ```shell 254 | export GOOGLE_APPLICATION_CREDENTIALS=/abs/path/to/creds.json 255 | ``` 256 | 257 | Then run the following to generate a development manifest, apply it to your cluster, and start `iex`: 258 | 259 | ```shell 260 | make dev.start.iex 261 | ``` 262 | 263 | ## Links 264 | 265 | - GKE Docs 266 | - [Instance Manager Groups REST API](https://cloud.google.com/compute/docs/reference/rest/v1/instanceGroupManagers) 267 | - [Node Pools REST API](https://cloud.google.com/kubernetes-engine/docs/reference/rest/v1/projects.locations.clusters.nodePools) 268 | - [Elixir Container Docs](https://hexdocs.pm/google_api_container/GoogleApi.Container.V1.Api.Projects.html) 269 | - [Elixir Compute Docs](https://hexdocs.pm/google_api_compute) 270 | - GKE API Explorer 271 | - [setAutoscaling](https://cloud.google.com/kubernetes-engine/docs/reference/rest/v1/projects.locations.clusters.nodePools/setAutoscaling) 272 | - [setSize](https://cloud.google.com/kubernetes-engine/docs/reference/rest/v1/projects.locations.clusters.nodePools/setSize) 273 | - [nodePools get](https://cloud.google.com/kubernetes-engine/docs/reference/rest/v1/projects.locations.clusters.nodePools/get) 274 | - [instanceGroups get](https://cloud.google.com/compute/docs/reference/rest/v1/instanceGroups/get) 275 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "accept": {:hex, :accept, "0.3.5", "b33b127abca7cc948bbe6caa4c263369abf1347cfa9d8e699c6d214660f10cd1", [:rebar3], [], "hexpm"}, 3 | "base64url": {:hex, :base64url, "0.0.1", "36a90125f5948e3afd7be97662a1504b934dd5dac78451ca6e9abf85a10286be", [:rebar], [], "hexpm"}, 4 | "bonny": {:hex, :bonny, "0.3.2", "962f8c8c387182a1ccde8d6f6c6348d45e790b59de5a55a6aa7a7d150a2b79d9", [:make, :mix], [{:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:k8s, "~> 0.2", [hex: :k8s, repo: "hexpm", optional: false]}, {:telemetry, ">= 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"}, 5 | "bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm"}, 6 | "certifi": {:hex, :certifi, "2.5.1", "867ce347f7c7d78563450a18a6a28a8090331e77fa02380b4a21962a65d36ee5", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm"}, 7 | "cowboy": {:hex, :cowboy, "2.6.3", "99aa50e94e685557cad82e704457336a453d4abcb77839ad22dbe71f311fcc06", [:rebar3], [{:cowlib, "~> 2.7.3", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, "~> 1.7.1", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm"}, 8 | "cowlib": {:hex, :cowlib, "2.7.3", "a7ffcd0917e6d50b4d5fb28e9e2085a0ceb3c97dea310505f7460ff5ed764ce9", [:rebar3], [], "hexpm"}, 9 | "credo": {:hex, :credo, "1.0.5", "fdea745579f8845315fe6a3b43e2f9f8866839cfbc8562bb72778e9fdaa94214", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm"}, 10 | "dialyxir": {:hex, :dialyxir, "1.0.0-rc.6", "78e97d9c0ff1b5521dd68041193891aebebce52fc3b93463c0a6806874557d7d", [:mix], [{:erlex, "~> 0.2.1", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm"}, 11 | "earmark": {:hex, :earmark, "1.3.2", "b840562ea3d67795ffbb5bd88940b1bed0ed9fa32834915125ea7d02e35888a5", [:mix], [], "hexpm"}, 12 | "erlex": {:hex, :erlex, "0.2.2", "cb0e6878fdf86dc63509eaf2233a71fa73fc383c8362c8ff8e8b6f0c2bb7017c", [:mix], [], "hexpm"}, 13 | "ex_doc": {:hex, :ex_doc, "0.20.2", "1bd0dfb0304bade58beb77f20f21ee3558cc3c753743ae0ddbb0fd7ba2912331", [:mix], [{:earmark, "~> 1.3", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.10", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"}, 14 | "excoveralls": {:hex, :excoveralls, "0.11.1", "dd677fbdd49114fdbdbf445540ec735808250d56b011077798316505064edb2c", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm"}, 15 | "file_system": {:hex, :file_system, "0.2.7", "e6f7f155970975789f26e77b8b8d8ab084c59844d8ecfaf58cbda31c494d14aa", [:mix], [], "hexpm"}, 16 | "google_api_compute": {:hex, :google_api_compute, "0.7.0", "08761a283c26a5cf114a5dc1d1917aa029cdb1cf873fd56fd6f20a5d7c8c0fca", [:mix], [{:google_gax, "~> 0.1.0", [hex: :google_gax, repo: "hexpm", optional: false]}], "hexpm"}, 17 | "google_api_container": {:hex, :google_api_container, "0.8.0", "6b9b6c238b3966d7e60bca746fd8d14fbbb0b729ecf1b4189259c1c37ab2a5b3", [:mix], [{:google_gax, "~> 0.1.0", [hex: :google_gax, repo: "hexpm", optional: false]}], "hexpm"}, 18 | "google_gax": {:hex, :google_gax, "0.1.3", "3455b58188803a554cc07447268af1da1cf35ca280267e297c677d005d1b2117", [:mix], [{:poison, ">= 1.0.0 and < 4.0.0", [hex: :poison, repo: "hexpm", optional: false]}, {:tesla, "~> 1.0", [hex: :tesla, repo: "hexpm", optional: false]}], "hexpm"}, 19 | "goth": {:hex, :goth, "1.1.0", "85977656822e54217bc0472666f1ce15dc3921495ef5f4f0774ef15503bae207", [:mix], [{:httpoison, "~> 0.11 or ~> 1.0", [hex: :httpoison, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:joken, "~> 2.0", [hex: :joken, repo: "hexpm", optional: false]}], "hexpm"}, 20 | "hackney": {:hex, :hackney, "1.15.1", "9f8f471c844b8ce395f7b6d8398139e26ddca9ebc171a8b91342ee15a19963f4", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.4", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"}, 21 | "httpoison": {:hex, :httpoison, "1.5.1", "0f55b5b673b03c5c327dac7015a67cb571b99b631acc0bc1b0b98dcd6b9f2104", [:mix], [{:hackney, "~> 1.8", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"}, 22 | "idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"}, 23 | "jason": {:hex, :jason, "1.1.2", "b03dedea67a99223a2eaf9f1264ce37154564de899fd3d8b9a21b1a6fd64afe7", [:mix], [{:decimal, "~> 1.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm"}, 24 | "joken": {:hex, :joken, "2.1.0", "bf21a73105d82649f617c5e59a7f8919aa47013d2519ebcc39d998d8d12adda9", [:mix], [{:jose, "~> 1.9", [hex: :jose, repo: "hexpm", optional: false]}], "hexpm"}, 25 | "jose": {:hex, :jose, "1.9.0", "4167c5f6d06ffaebffd15cdb8da61a108445ef5e85ab8f5a7ad926fdf3ada154", [:mix, :rebar3], [{:base64url, "~> 0.0.1", [hex: :base64url, repo: "hexpm", optional: false]}], "hexpm"}, 26 | "k8s": {:hex, :k8s, "0.3.1", "e47284e042896d8f84c0f67e96570fef2b45dca63d9863e103ba12a76f9ebfe3", [:make, :mix], [{:httpoison, "~> 1.0", [hex: :httpoison, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:notion, "~> 0.2", [hex: :notion, repo: "hexpm", optional: false]}, {:telemetry, ">= 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:yaml_elixir, "~> 2.4", [hex: :yaml_elixir, repo: "hexpm", optional: false]}], "hexpm"}, 27 | "makeup": {:hex, :makeup, "0.8.0", "9cf32aea71c7fe0a4b2e9246c2c4978f9070257e5c9ce6d4a28ec450a839b55f", [:mix], [{:nimble_parsec, "~> 0.5.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"}, 28 | "makeup_elixir": {:hex, :makeup_elixir, "0.13.0", "be7a477997dcac2e48a9d695ec730b2d22418292675c75aa2d34ba0909dcdeda", [:mix], [{:makeup, "~> 0.8", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm"}, 29 | "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm"}, 30 | "mime": {:hex, :mime, "1.3.1", "30ce04ab3175b6ad0bdce0035cba77bba68b813d523d1aac73d9781b4d193cf8", [:mix], [], "hexpm"}, 31 | "mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm"}, 32 | "mix_test_watch": {:hex, :mix_test_watch, "0.9.0", "c72132a6071261893518fa08e121e911c9358713f62794a90c95db59042af375", [:mix], [{:file_system, "~> 0.2.1 or ~> 0.3", [hex: :file_system, repo: "hexpm", optional: false]}], "hexpm"}, 33 | "nimble_parsec": {:hex, :nimble_parsec, "0.5.0", "90e2eca3d0266e5c53f8fbe0079694740b9c91b6747f2b7e3c5d21966bba8300", [:mix], [], "hexpm"}, 34 | "notion": {:hex, :notion, "0.2.0", "0facad61b5a071dc27fff3afb8630868178f3b6369d047b5eba1c3cc86d44e37", [:mix], [{:telemetry, "~> 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"}, 35 | "parse_trans": {:hex, :parse_trans, "3.3.0", "09765507a3c7590a784615cfd421d101aec25098d50b89d7aa1d66646bc571c1", [:rebar3], [], "hexpm"}, 36 | "plug": {:hex, :plug, "1.8.2", "0bcce1daa420f189a6491f3940cc77ea7fb1919761175c9c3b59800d897440fc", [:mix], [{:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm"}, 37 | "plug_cowboy": {:hex, :plug_cowboy, "2.0.2", "6055f16868cc4882b24b6e1d63d2bada94fb4978413377a3b32ac16c18dffba2", [:mix], [{:cowboy, "~> 2.5", [hex: :cowboy, repo: "hexpm", optional: false]}, {:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"}, 38 | "plug_crypto": {:hex, :plug_crypto, "1.0.0", "18e49317d3fa343f24620ed22795ec29d4a5e602d52d1513ccea0b07d8ea7d4d", [:mix], [], "hexpm"}, 39 | "poison": {:hex, :poison, "3.1.0", "d9eb636610e096f86f25d9a46f35a9facac35609a7591b3be3326e99a0484665", [:mix], [], "hexpm"}, 40 | "prometheus": {:hex, :prometheus, "4.4.0", "6dcb11fc80faf873cb2297664720414aed16b0a4fc3a1a15ae538d66f84ccc34", [:mix, :rebar3], [], "hexpm"}, 41 | "prometheus_ex": {:hex, :prometheus_ex, "3.0.5", "fa58cfd983487fc5ead331e9a3e0aa622c67232b3ec71710ced122c4c453a02f", [:mix], [{:prometheus, "~> 4.0", [hex: :prometheus, repo: "hexpm", optional: false]}], "hexpm"}, 42 | "prometheus_plugs": {:hex, :prometheus_plugs, "1.1.5", "25933d48f8af3a5941dd7b621c889749894d8a1082a6ff7c67cc99dec26377c5", [:mix], [{:accept, "~> 0.1", [hex: :accept, repo: "hexpm", optional: false]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: false]}, {:prometheus_ex, "~> 1.1 or ~> 2.0 or ~> 3.0", [hex: :prometheus_ex, repo: "hexpm", optional: false]}, {:prometheus_process_collector, "~> 1.1", [hex: :prometheus_process_collector, repo: "hexpm", optional: true]}], "hexpm"}, 43 | "ranch": {:hex, :ranch, "1.7.1", "6b1fab51b49196860b733a49c07604465a47bdb78aa10c1c16a3d199f7f8c881", [:rebar3], [], "hexpm"}, 44 | "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.4", "f0eafff810d2041e93f915ef59899c923f4568f4585904d010387ed74988e77b", [:make, :mix, :rebar3], [], "hexpm"}, 45 | "telemetry": {:hex, :telemetry, "0.4.0", "8339bee3fa8b91cb84d14c2935f8ecf399ccd87301ad6da6b71c09553834b2ab", [:rebar3], [], "hexpm"}, 46 | "telemetry_metrics": {:hex, :telemetry_metrics, "0.3.0", "5de4037d058faf6355835c0ec65ff19605258ee696fa9f93304a389d2d497445", [:mix], [{:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"}, 47 | "telemetry_metrics_prometheus": {:hex, :telemetry_metrics_prometheus, "0.1.2", "85728b7f7801eb01dce3cf4308f6914775f39c3954900e6a5a3e59f89da90028", [:mix], [{:plug_cowboy, "~> 2.0", [hex: :plug_cowboy, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}, {:telemetry_metrics, "~> 0.2", [hex: :telemetry_metrics, repo: "hexpm", optional: false]}, {:telemetry_poller, "~> 0.3", [hex: :telemetry_poller, repo: "hexpm", optional: false]}], "hexpm"}, 48 | "telemetry_poller": {:hex, :telemetry_poller, "0.3.0", "e67c0a0bcb9d457985e7b8715abe7e98b7c055532b069bcac9a112ce2367c409", [:mix], [{:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"}, 49 | "tesla": {:hex, :tesla, "1.2.1", "864783cc27f71dd8c8969163704752476cec0f3a51eb3b06393b3971dc9733ff", [:mix], [{:exjsx, ">= 3.0.0", [hex: :exjsx, repo: "hexpm", optional: true]}, {:fuse, "~> 2.4", [hex: :fuse, repo: "hexpm", optional: true]}, {:hackney, "~> 1.6", [hex: :hackney, repo: "hexpm", optional: true]}, {:ibrowse, "~> 4.4.0", [hex: :ibrowse, repo: "hexpm", optional: true]}, {:jason, ">= 1.0.0", [hex: :jason, repo: "hexpm", optional: true]}, {:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, {:poison, ">= 1.0.0", [hex: :poison, repo: "hexpm", optional: true]}], "hexpm"}, 50 | "unicode_util_compat": {:hex, :unicode_util_compat, "0.4.1", "d869e4c68901dd9531385bb0c8c40444ebf624e60b6962d95952775cac5e90cd", [:rebar3], [], "hexpm"}, 51 | "yamerl": {:hex, :yamerl, "0.7.0", "e51dba652dce74c20a88294130b48051ebbbb0be7d76f22de064f0f3ccf0aaf5", [:rebar3], [], "hexpm"}, 52 | "yaml_elixir": {:hex, :yaml_elixir, "2.4.0", "2f444abc3c994c902851fde56b6a9cb82895c291c05a0490a289035c2e62ae71", [:mix], [{:yamerl, "~> 0.7", [hex: :yamerl, repo: "hexpm", optional: false]}], "hexpm"}, 53 | } 54 | -------------------------------------------------------------------------------- /lib/ballast/kube/node.ex: -------------------------------------------------------------------------------- 1 | defmodule Ballast.Kube.Node do 2 | @moduledoc """ 3 | Encapsulates a Kubernetes [`Node` resource](https://kubernetes.io/docs/concepts/architecture/nodes/). 4 | """ 5 | 6 | @kind "Node" 7 | 8 | @resources_constrainted_conditions [ 9 | # "NetworkUnavailable", 10 | "OutOfDisk", 11 | "MemoryPressure", 12 | "PIDPressure", 13 | "DiskPressure" 14 | ] 15 | 16 | alias K8s.{Client, Resource} 17 | alias Ballast.Sys.Instrumentation, as: Inst 18 | 19 | @doc """ 20 | List kubernetes nodes. 21 | """ 22 | @spec list(map()) :: {:ok, list(map)} | :error 23 | def list(params \\ %{}) do 24 | op = Client.list("v1", :nodes) 25 | 26 | with {:ok, stream} <- Client.stream(op, :default, params: params) do 27 | {duration, nodes} = :timer.tc(Enum, :into, [stream, []]) 28 | measurements = %{duration: duration, count: length(nodes)} 29 | Inst.nodes_list_succeeded(measurements, %{}) 30 | 31 | {:ok, nodes} 32 | else 33 | _error -> 34 | Inst.nodes_list_failed(%{}, %{}) 35 | :error 36 | end 37 | end 38 | 39 | @doc """ 40 | Checks if `status.conditions` are present and node is `Ready` 41 | 42 | [Node Status](https://kubernetes.io/docs/concepts/architecture/nodes/#node-status) 43 | 44 | ## Examples 45 | When `status.conditions` is present, and node is `Ready` 46 | 47 | iex> node = %{ 48 | ...> "kind" => "#{@kind}", 49 | ...> "status" => %{ 50 | ...> "conditions" => [ 51 | ...> %{"type" => "MemoryPressure", "status" => "False"}, 52 | ...> %{"type" => "Ready", "status" => "True"} 53 | ...> ] 54 | ...> } 55 | ...> } 56 | ...> Ballast.Kube.Node.ready?(node) 57 | true 58 | 59 | When `status.conditions` is present, and node is not `Ready` 60 | 61 | iex> node = %{ 62 | ...> "kind" => "#{@kind}", 63 | ...> "status" => %{ 64 | ...> "conditions" => [%{"type" => "Ready", "status" => "False"}] 65 | ...> } 66 | ...> } 67 | ...> Ballast.Kube.Node.ready?(node) 68 | false 69 | 70 | When `status.conditions` is missing: 71 | 72 | iex> node = %{"kind" => "#{@kind}"} 73 | ...> Ballast.Kube.Node.ready?(node) 74 | false 75 | """ 76 | @spec ready?(map()) :: boolean() 77 | def ready?(%{"status" => %{"conditions" => conditions}} = _node) do 78 | conditions 79 | |> find_condition_by_type("Ready") 80 | |> condition_has_status?("True") 81 | end 82 | 83 | def ready?(_), do: false 84 | 85 | # Currently only supports mNatchExpressions (not matchFields) 86 | # Also ignores weight of preferences 87 | @doc false 88 | @spec matches_preferences?(map, list(map)) :: boolean 89 | def matches_preferences?(node, prefs) do 90 | Enum.any?(prefs, fn pref -> 91 | exprs = Map.get(pref, "matchExpressions", %{}) 92 | match_expressions?(node, exprs) 93 | end) 94 | end 95 | 96 | @doc """ 97 | Checks if a node matches all `matchExpressions` logical `AND` 98 | 99 | ## Examples 100 | Matching all expressions: 101 | 102 | iex> node = %{"kind" => "Node", "metadata" => %{"labels" => %{"env" => "prod", "tier" => "frontend"}}} 103 | ...> expr1 = %{"operator" => "In", "key" => "env", "values" => ["prod", "qa"]} 104 | ...> expr2 = %{"operator" => "Exists", "key" => "tier"} 105 | ...> Ballast.Kube.Node.match_expressions?(node, [expr1, expr2]) 106 | true 107 | 108 | Matching some expressions: 109 | 110 | iex> node = %{"kind" => "Node", "metadata" => %{"labels" => %{"env" => "prod", "tier" => "frontend"}}} 111 | ...> expr1 = %{"operator" => "In", "key" => "env", "values" => ["prod", "qa"]} 112 | ...> expr2 = %{"operator" => "Exists", "key" => "foo"} 113 | ...> Ballast.Kube.Node.match_expressions?(node, [expr1, expr2]) 114 | false 115 | """ 116 | @spec match_expressions?(map, list(map)) :: boolean 117 | def match_expressions?(node, exprs) do 118 | Enum.all?(exprs, fn expr -> match_expression?(node, expr) end) 119 | end 120 | 121 | @doc """ 122 | Checks whether a node matches a selector `matchExpression` 123 | 124 | ## Examples 125 | When an `In` expression matches 126 | iex> node = %{"kind" => "Node", "metadata" => %{"labels" => %{"env" => "prod"}}} 127 | ...> expr = %{"operator" => "In", "key" => "env", "values" => ["prod", "qa"]} 128 | ...> Ballast.Kube.Node.match_expression?(node, expr) 129 | true 130 | 131 | When an `In` expression doesnt match 132 | iex> node = %{"kind" => "Node", "metadata" => %{"labels" => %{"env" => "dev"}}} 133 | ...> expr = %{"operator" => "In", "key" => "env", "values" => ["prod", "qa"]} 134 | ...> Ballast.Kube.Node.match_expression?(node, expr) 135 | false 136 | 137 | When an `NotIn` expression matches 138 | iex> node = %{"kind" => "Node", "metadata" => %{"labels" => %{"env" => "dev"}}} 139 | ...> expr = %{"operator" => "NotIn", "key" => "env", "values" => ["prod"]} 140 | ...> Ballast.Kube.Node.match_expression?(node, expr) 141 | true 142 | 143 | When an `NotIn` expression doesnt match 144 | iex> node = %{"kind" => "Node", "metadata" => %{"labels" => %{"env" => "dev"}}} 145 | ...> expr = %{"operator" => "NotIn", "key" => "env", "values" => ["dev"]} 146 | ...> Ballast.Kube.Node.match_expression?(node, expr) 147 | false 148 | 149 | When an `Exists` expression matches 150 | iex> node = %{"kind" => "Node", "metadata" => %{"labels" => %{"env" => "dev"}}} 151 | ...> expr = %{"operator" => "Exists", "key" => "env"} 152 | ...> Ballast.Kube.Node.match_expression?(node, expr) 153 | true 154 | 155 | When an `Exists` expression doesnt match 156 | iex> node = %{"kind" => "Node", "metadata" => %{"labels" => %{"env" => "dev"}}} 157 | ...> expr = %{"operator" => "Exists", "key" => "tier"} 158 | ...> Ballast.Kube.Node.match_expression?(node, expr) 159 | false 160 | 161 | When an `DoesNotExist` expression matches 162 | iex> node = %{"kind" => "Node", "metadata" => %{"labels" => %{"env" => "dev"}}} 163 | ...> expr = %{"operator" => "DoesNotExist", "key" => "tier"} 164 | ...> Ballast.Kube.Node.match_expression?(node, expr) 165 | true 166 | 167 | When an `DoesNotExist` expression doesnt match 168 | iex> node = %{"kind" => "Node", "metadata" => %{"labels" => %{"env" => "dev"}}} 169 | ...> expr = %{"operator" => "DoesNotExist", "key" => "env"} 170 | ...> Ballast.Kube.Node.match_expression?(node, expr) 171 | false 172 | """ 173 | @spec match_expression?(map(), map()) :: boolean() 174 | def match_expression?(%{} = node, %{"operator" => "In", "key" => k, "values" => v}) do 175 | label = Resource.label(node, k) 176 | Enum.member?(v, label) 177 | end 178 | 179 | def match_expression?(%{} = node, %{"operator" => "NotIn", "key" => k, "values" => v}) do 180 | label = Resource.label(node, k) 181 | !Enum.member?(v, label) 182 | end 183 | 184 | def match_expression?(%{} = node, %{"operator" => "Exists", "key" => k}) do 185 | Resource.has_label?(node, k) 186 | end 187 | 188 | def match_expression?(%{} = node, %{"operator" => "DoesNotExist", "key" => k}) do 189 | !Resource.has_label?(node, k) 190 | end 191 | 192 | def match_expression?(_, _), do: false 193 | 194 | @doc """ 195 | Percent CPU available 196 | 197 | ## Examples 198 | iex> node = %{"status" => %{"allocatable" => %{"cpu" => "940m"}, "capacity" => %{"cpu" => "1"}}} 199 | ...> Ballast.Kube.Node.percent_cpu_available(node) 200 | 0.94 201 | """ 202 | @spec percent_cpu_available(map) :: float 203 | def percent_cpu_available(node) do 204 | {allocatable, capacity} = get_field_status(node, "cpu") 205 | Resource.cpu(allocatable) / Resource.cpu(capacity) 206 | end 207 | 208 | @doc """ 209 | Percent memory available 210 | 211 | ## Examples 212 | iex> node = %{"status" => %{"allocatable" => %{"memory" => "8Gi"}, "capacity" => %{"memory" => "16Gi"}}} 213 | ...> Ballast.Kube.Node.percent_memory_available(node) 214 | 0.50 215 | """ 216 | @spec percent_memory_available(map) :: float 217 | def percent_memory_available(node) do 218 | {allocatable, capacity} = get_field_status(node, "memory") 219 | Resource.memory(allocatable) / Resource.memory(capacity) 220 | end 221 | 222 | @doc """ 223 | Percent pods available 224 | 225 | ## Examples 226 | iex> node = %{"status" => %{"allocatable" => %{"pods" => "20"}, "capacity" => %{"pods" => "100"}}} 227 | ...> Ballast.Kube.Node.percent_pods_available(node) 228 | 0.20 229 | """ 230 | @spec percent_pods_available(map) :: float 231 | def percent_pods_available(node) do 232 | {allocatable, capacity} = get_field_status(node, "pods") 233 | String.to_integer(allocatable) / String.to_integer(capacity) 234 | end 235 | 236 | defp get_field_status(node, field) do 237 | allocatable = get_in(node, ["status", "allocatable", field]) 238 | capacity = get_in(node, ["status", "capacity", field]) 239 | 240 | {allocatable, capacity} 241 | end 242 | 243 | @doc """ 244 | Finds the node with the most CPU 245 | 246 | ## Examples 247 | iex> node1 = %{"metadata" => %{"name" => "foo"}, "status" => %{"allocatable" => %{"cpu" => "940m"}}} 248 | ...> node2 = %{"metadata" => %{"name" => "bar"}, "status" => %{"allocatable" => %{"cpu" => "1"}}} 249 | ...> Ballast.Kube.Node.with_most_cpu([node1, node2]) 250 | %{"metadata" => %{"name" => "bar"},"status" => %{"allocatable" => %{"cpu" => "1"}}} 251 | """ 252 | @spec with_most_cpu(list(map)) :: map 253 | def with_most_cpu(nodes) do 254 | initial = {0, nil} 255 | 256 | {_highest, node} = 257 | Enum.reduce(nodes, initial, fn node, {highest, _} = acc -> 258 | cpu = 259 | node 260 | |> get_in(["status", "allocatable", "cpu"]) 261 | |> Resource.cpu() 262 | 263 | case cpu > highest do 264 | true -> 265 | {cpu, node} 266 | 267 | false -> 268 | acc 269 | end 270 | end) 271 | 272 | node 273 | end 274 | 275 | @doc """ 276 | Finds the node with the most memory 277 | 278 | ## Examples 279 | iex> node1 = %{"metadata" => %{"name" => "foo"}, "status" => %{"allocatable" => %{"memory" => "10Gi"}}} 280 | ...> node2 = %{"metadata" => %{"name" => "bar"}, "status" => %{"allocatable" => %{"memory" => "3Gi"}}} 281 | ...> Ballast.Kube.Node.with_most_memory([node1, node2]) 282 | %{"metadata" => %{"name" => "foo"}, "status" => %{"allocatable" => %{"memory" => "10Gi"}}} 283 | """ 284 | @spec with_most_memory(list(map)) :: map 285 | def with_most_memory(nodes) do 286 | initial = {0, nil} 287 | 288 | {_highest, node} = 289 | Enum.reduce(nodes, initial, fn node, {highest, _} = acc -> 290 | memory = 291 | node 292 | |> get_in(["status", "allocatable", "memory"]) 293 | |> Resource.memory() 294 | 295 | case memory > highest do 296 | true -> 297 | {memory, node} 298 | 299 | false -> 300 | acc 301 | end 302 | end) 303 | 304 | node 305 | end 306 | 307 | @doc """ 308 | Check the node's conditions to see if they are contrained, under pressure, or insufficient. 309 | 310 | [Node Conditions](https://kubernetes.io/docs/concepts/architecture/nodes/#condition) 311 | 312 | The checked conditions are: `#{inspect(@resources_constrainted_conditions)}` 313 | 314 | ## Examples 315 | Is constrained if any of the conditions listed above are constrained 316 | 317 | iex> node = %{ 318 | ...> "kind" => "#{@kind}", 319 | ...> "status" => %{ 320 | ...> "conditions" => [ 321 | ...> %{"type" => "PIDPressure", "status" => "False"}, 322 | ...> %{"type" => "MemoryPressure", "status" => "True"} 323 | ...> ] 324 | ...> } 325 | ...> } 326 | ...> Ballast.Kube.Node.resources_constrained?(node) 327 | true 328 | 329 | Is constrained if the status is "Unknown" 330 | 331 | Is not constrained when an unknown condition is constrained 332 | """ 333 | @spec resources_constrained?(map) :: boolean 334 | def resources_constrained?(node), do: !!first_constrained_condition(node) 335 | 336 | @doc """ 337 | Returns the first constrained condition 338 | 339 | The checked conditions are: `#{inspect(@resources_constrainted_conditions)}` 340 | 341 | ## Examples 342 | iex> node = %{ 343 | ...> "kind" => "#{@kind}", 344 | ...> "status" => %{ 345 | ...> "conditions" => [ 346 | ...> %{"type" => "PIDPressure", "status" => "False"}, 347 | ...> %{"type" => "MemoryPressure", "status" => "True"} 348 | ...> ] 349 | ...> } 350 | ...> } 351 | ...> Ballast.Kube.Node.first_constrained_condition(node) 352 | %{"type" => "MemoryPressure", "status" => "True"} 353 | """ 354 | @spec first_constrained_condition(map) :: map | nil 355 | def first_constrained_condition(%{"status" => %{"conditions" => conditions}} = _node) do 356 | Enum.find(conditions, fn %{"type" => type} = condition -> 357 | case condition_has_status?(condition, "True") do 358 | true -> 359 | Enum.member?(@resources_constrainted_conditions, type) 360 | 361 | _ -> 362 | false 363 | end 364 | end) 365 | end 366 | 367 | def first_constrained_condition(_), do: nil 368 | 369 | @spec find_condition_by_type(list(map()), binary()) :: map() 370 | defp find_condition_by_type([], _), do: nil 371 | 372 | defp find_condition_by_type(conditions, type) do 373 | Enum.find(conditions, fn condition -> 374 | condition["type"] == type 375 | end) 376 | end 377 | 378 | @spec condition_has_status?(map | nil, binary) :: boolean() 379 | defp condition_has_status?(%{"status" => status}, status), do: true 380 | defp condition_has_status?(_, _), do: false 381 | end 382 | --------------------------------------------------------------------------------