├── .gitignore ├── generate_load.sh ├── images ├── contino.png ├── spot-pricing.png ├── eks-architecture.png ├── fleet-target-capacity.png └── spot-pricing-history.png ├── tf-spot-workers ├── _provider.tf ├── userdata.tpl ├── _data.tf ├── _variables.tf └── spot-workers.tf ├── test_url.sh ├── k8s-tools ├── tiller-rbac.yaml ├── spot-interrupt-handler │ ├── iam.yaml │ └── spot-interrupt-handler.yml ├── monte-carlo.yaml └── cluster-autoscaler │ ├── cluster_autoscaler.yml │ └── rbac.yaml ├── eksctl-example.yml └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .terraform 2 | terraform.tfstate* 3 | -------------------------------------------------------------------------------- /generate_load.sh: -------------------------------------------------------------------------------- 1 | url=$1 2 | while true 3 | do 4 | wget -q -O - $url 5 | done 6 | -------------------------------------------------------------------------------- /images/contino.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/contino/k8s-spot/HEAD/images/contino.png -------------------------------------------------------------------------------- /tf-spot-workers/_provider.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | region = "ap-southeast-2" 3 | } 4 | -------------------------------------------------------------------------------- /images/spot-pricing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/contino/k8s-spot/HEAD/images/spot-pricing.png -------------------------------------------------------------------------------- /images/eks-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/contino/k8s-spot/HEAD/images/eks-architecture.png -------------------------------------------------------------------------------- /images/fleet-target-capacity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/contino/k8s-spot/HEAD/images/fleet-target-capacity.png -------------------------------------------------------------------------------- /images/spot-pricing-history.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/contino/k8s-spot/HEAD/images/spot-pricing-history.png -------------------------------------------------------------------------------- /tf-spot-workers/userdata.tpl: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o xtrace 3 | /etc/eks/bootstrap.sh ${cluster_name} --kubelet-extra-args --node-labels=lifecycle=Ec2Spot -------------------------------------------------------------------------------- /test_url.sh: -------------------------------------------------------------------------------- 1 | i=0 2 | url=$1 3 | while true 4 | do 5 | http_status=`curl -s -o /dev/null -w "%{http_code}" $url --connect-timeout 0 --max-time 1 --retry 0` 6 | if [ $http_status -eq 200 ];then 7 | echo request $i - $http_status - OK 8 | else 9 | echo request $i - ERROR 10 | fi 11 | sleep 0.5 12 | i=$((i + 1)) 13 | done 14 | -------------------------------------------------------------------------------- /k8s-tools/tiller-rbac.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: tiller 6 | namespace: kube-system 7 | --- 8 | apiVersion: rbac.authorization.k8s.io/v1beta1 9 | kind: ClusterRoleBinding 10 | metadata: 11 | name: tiller 12 | roleRef: 13 | apiGroup: rbac.authorization.k8s.io 14 | kind: ClusterRole 15 | name: cluster-admin 16 | subjects: 17 | - kind: ServiceAccount 18 | name: tiller 19 | namespace: kube-system 20 | -------------------------------------------------------------------------------- /tf-spot-workers/_data.tf: -------------------------------------------------------------------------------- 1 | 2 | data "template_file" "userdata" { 3 | template = file("${path.module}/userdata.tpl") 4 | 5 | vars = { 6 | cluster_name = var.cluster_name 7 | } 8 | } 9 | 10 | data "aws_vpc" "selected" { 11 | filter { 12 | name = "tag:Name" 13 | values = [var.vpc_name] 14 | } 15 | } 16 | 17 | data "aws_iam_instance_profile" "eks_worker" { 18 | name = var.instance_profile_name 19 | } 20 | 21 | data "aws_iam_role" "fleet_role" { 22 | name = var.fleet_role_name 23 | } 24 | 25 | data "aws_security_group" "selected" { 26 | filter { 27 | name = "tag:Name" 28 | values = [var.eks_sg_name] 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /eksctl-example.yml: -------------------------------------------------------------------------------- 1 | apiVersion: eksctl.io/v1alpha5 2 | kind: ClusterConfig 3 | 4 | metadata: 5 | name: caio-eks-test 6 | region: ap-southeast-2 7 | 8 | nodeGroups: 9 | - name: spot-instances 10 | minSize: 2 11 | maxSize: 10 12 | instancesDistribution: 13 | instanceTypes: ["t3.medium", "m4.large", "m5.large"] 14 | onDemandBaseCapacity: 0 15 | onDemandPercentageAboveBaseCapacity: 0 16 | spotInstancePools: 3 17 | labels: 18 | lifecycle: Ec2Spot 19 | intent: apps 20 | taints: 21 | spotInstance: "true:PreferNoSchedule" 22 | iam: 23 | withAddonPolicies: 24 | autoScaler: true 25 | cloudWatch: true 26 | albIngress: true 27 | -------------------------------------------------------------------------------- /k8s-tools/spot-interrupt-handler/iam.yaml: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | kind: ClusterRole 4 | apiVersion: rbac.authorization.k8s.io/v1 5 | metadata: 6 | name: spot-interrupt-handler 7 | namespace: default 8 | rules: 9 | - apiGroups: 10 | - "*" 11 | resources: 12 | - "*" 13 | verbs: 14 | - "*" 15 | --- 16 | apiVersion: v1 17 | kind: ServiceAccount 18 | metadata: 19 | name: spot-interrupt-handler 20 | --- 21 | kind: ClusterRoleBinding 22 | apiVersion: rbac.authorization.k8s.io/v1 23 | metadata: 24 | name: spot-interrupt-handler 25 | namespace: default 26 | subjects: 27 | - kind: ServiceAccount 28 | name: spot-interrupt-handler 29 | namespace: default 30 | roleRef: 31 | kind: ClusterRole 32 | name: spot-interrupt-handler 33 | apiGroup: rbac.authorization.k8s.io 34 | -------------------------------------------------------------------------------- /tf-spot-workers/_variables.tf: -------------------------------------------------------------------------------- 1 | variable "cluster_name" { 2 | default = "caio-eks-test" 3 | } 4 | 5 | variable "instance_ami_id" { 6 | default = "ami-09f2d86f2d8c4f77d" 7 | } 8 | 9 | variable "instance_profile_name" { 10 | default = "eksctl-caio-eks-test-nodegroup-spot-instances-NodeInstanceProfile-C865U0J2XYEK" 11 | } 12 | 13 | variable "fleet_role_name" { 14 | default = "aws-ec2-spot-fleet-tagging-role" 15 | } 16 | 17 | variable "vpc_name" { 18 | default = "eksctl-caio-eks-test-cluster/VPC" 19 | } 20 | 21 | variable "eks_sg_name" { 22 | default = "eksctl-caio-eks-test-nodegroup-spot-instances/SG" 23 | } 24 | 25 | variable "spot_instances" { 26 | default = [ 27 | { 28 | instance_type = "t3.micro", 29 | availability_zone = "ap-southeast-2a", 30 | subnet_id = "subnet-0cc51247eb7768c9b" 31 | }, 32 | { 33 | instance_type = "t3.micro", 34 | availability_zone = "ap-southeast-2b", 35 | subnet_id = "subnet-0af0e3bf78fd8751b" 36 | } 37 | ] 38 | } 39 | -------------------------------------------------------------------------------- /tf-spot-workers/spot-workers.tf: -------------------------------------------------------------------------------- 1 | resource "aws_spot_fleet_request" "sport-workers" { 2 | iam_fleet_role = data.aws_iam_role.fleet_role.arn 3 | target_capacity = 1 4 | allocation_strategy = "lowestPrice" 5 | fleet_type = "maintain" 6 | instance_pools_to_use_count = "2" 7 | 8 | dynamic "launch_specification" { 9 | for_each = var.spot_instances 10 | content { 11 | instance_type = launch_specification.value["instance_type"] 12 | subnet_id = launch_specification.value["subnet_id"] 13 | availability_zone = launch_specification.value["availability_zone"] 14 | associate_public_ip_address = true 15 | ami = var.instance_ami_id 16 | iam_instance_profile_arn = data.aws_iam_instance_profile.eks_worker.arn 17 | vpc_security_group_ids = [data.aws_security_group.selected.id] 18 | tags = { 19 | Name = "${var.cluster_name}-spot-instances-Node", 20 | "alpha.eksctl.io/cluster-name" = "${var.cluster_name}", 21 | "alpha.eksctl.io/nodegroup-name" = "spot-instances", 22 | "eksctl.cluster.k8s.io/v1alpha1/cluster-name" = "${var.cluster_name}", 23 | "eksctl.io/v1alpha2/nodegroup-name" = "spot-instances", 24 | "k8s.io/cluster-autoscaler/${var.cluster_name}" = "owned", 25 | "k8s.io/cluster-autoscaler/enabled" = "true", 26 | "kubernetes.io/cluster/${var.cluster_name}" = "owned" 27 | } 28 | user_data = base64encode(data.template_file.userdata.rendered) 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /k8s-tools/spot-interrupt-handler/spot-interrupt-handler.yml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: ClusterRole 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | metadata: 5 | name: spot-interrupt-handler 6 | namespace: default 7 | rules: 8 | - apiGroups: 9 | - "*" 10 | resources: 11 | - "*" 12 | verbs: 13 | - "*" 14 | --- 15 | apiVersion: v1 16 | kind: ServiceAccount 17 | metadata: 18 | name: spot-interrupt-handler 19 | --- 20 | kind: ClusterRoleBinding 21 | apiVersion: rbac.authorization.k8s.io/v1 22 | metadata: 23 | name: spot-interrupt-handler 24 | namespace: default 25 | subjects: 26 | - kind: ServiceAccount 27 | name: spot-interrupt-handler 28 | namespace: default 29 | roleRef: 30 | kind: ClusterRole 31 | name: spot-interrupt-handler 32 | apiGroup: rbac.authorization.k8s.io 33 | 34 | --- 35 | apiVersion: apps/v1beta2 36 | kind: DaemonSet 37 | metadata: 38 | name: spot-interrupt-handler 39 | namespace: default 40 | spec: 41 | selector: 42 | matchLabels: 43 | app: spot-interrupt-handler 44 | template: 45 | metadata: 46 | labels: 47 | app: spot-interrupt-handler 48 | spec: 49 | nodeSelector: 50 | lifecycle: Ec2Spot 51 | serviceAccountName: spot-interrupt-handler 52 | containers: 53 | - name: spot-interrupt-handler 54 | image: kubeaws/kube-spot-termination-notice-handler:1.13.7-1 55 | imagePullPolicy: Always 56 | env: 57 | - name: POD_NAME 58 | valueFrom: 59 | fieldRef: 60 | fieldPath: metadata.name 61 | - name: NAMESPACE 62 | valueFrom: 63 | fieldRef: 64 | fieldPath: metadata.namespace 65 | - name: SPOT_POD_IP 66 | valueFrom: 67 | fieldRef: 68 | fieldPath: status.podIP -------------------------------------------------------------------------------- /k8s-tools/monte-carlo.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: monte-carlo-pi-service 5 | spec: 6 | type: LoadBalancer 7 | ports: 8 | - port: 80 9 | targetPort: 8080 10 | selector: 11 | app: monte-carlo-pi-service 12 | --- 13 | apiVersion: apps/v1 14 | kind: Deployment 15 | metadata: 16 | name: monte-carlo-pi-service 17 | labels: 18 | app: monte-carlo-pi-service 19 | spec: 20 | replicas: 2 21 | selector: 22 | matchLabels: 23 | app: monte-carlo-pi-service 24 | template: 25 | metadata: 26 | labels: 27 | app: monte-carlo-pi-service 28 | spec: 29 | affinity: 30 | nodeAffinity: 31 | preferredDuringSchedulingIgnoredDuringExecution: 32 | - weight: 1 33 | preference: 34 | matchExpressions: 35 | - key: lifecycle 36 | operator: In 37 | values: 38 | - Ec2Spot 39 | requiredDuringSchedulingIgnoredDuringExecution: 40 | nodeSelectorTerms: 41 | - matchExpressions: 42 | - key: intent 43 | operator: In 44 | values: 45 | - apps 46 | tolerations: 47 | - key: "spotInstance" 48 | operator: "Equal" 49 | value: "true" 50 | effect: "PreferNoSchedule" 51 | containers: 52 | - name: monte-carlo-pi-service 53 | image: ruecarlo/monte-carlo-pi-service 54 | resources: 55 | requests: 56 | memory: "512Mi" 57 | cpu: "1024m" 58 | limits: 59 | memory: "512Mi" 60 | cpu: "1024m" 61 | securityContext: 62 | privileged: false 63 | readOnlyRootFilesystem: true 64 | allowPrivilegeEscalation: false 65 | ports: 66 | - containerPort: 8080 -------------------------------------------------------------------------------- /k8s-tools/cluster-autoscaler/cluster_autoscaler.yml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: extensions/v1beta1 3 | kind: Deployment 4 | metadata: 5 | name: cluster-autoscaler 6 | namespace: kube-system 7 | labels: 8 | app: cluster-autoscaler 9 | spec: 10 | replicas: 1 11 | selector: 12 | matchLabels: 13 | app: cluster-autoscaler 14 | template: 15 | metadata: 16 | labels: 17 | app: cluster-autoscaler 18 | spec: 19 | serviceAccountName: cluster-autoscaler 20 | containers: 21 | - image: k8s.gcr.io/cluster-autoscaler:v1.13.7 22 | name: cluster-autoscaler 23 | resources: 24 | limits: 25 | cpu: 100m 26 | memory: 300Mi 27 | requests: 28 | cpu: 100m 29 | memory: 300Mi 30 | command: 31 | - ./cluster-autoscaler 32 | - --v=4 33 | - --stderrthreshold=info 34 | - --cloud-provider=aws 35 | - --skip-nodes-with-local-storage=false 36 | - --nodes=2:10: 37 | - --expander=random 38 | - --expendable-pods-priority-cutoff=-10 39 | - --scale-down-unneeded-time=30s 40 | - --scale-down-unready-time=30s 41 | - --scale-down-delay-after-add=30s 42 | - --scale-down-utilization-threshold=0.7 43 | - --balance-similar-node-groups 44 | - --max-total-unready-percentage=75 45 | - --ok-total-unready-count=20 46 | - --max-empty-bulk-delete=30 47 | env: 48 | - name: AWS_REGION 49 | value: ap-southeast-2 50 | volumeMounts: 51 | - name: ssl-certs 52 | mountPath: /etc/ssl/certs/ca-certificates.crt 53 | readOnly: true 54 | imagePullPolicy: "Always" 55 | volumes: 56 | - name: ssl-certs 57 | hostPath: 58 | path: "/etc/ssl/certs/ca-bundle.crt" 59 | -------------------------------------------------------------------------------- /k8s-tools/cluster-autoscaler/rbac.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | labels: 5 | k8s-addon: cluster-autoscaler.addons.k8s.io 6 | k8s-app: cluster-autoscaler 7 | name: cluster-autoscaler 8 | namespace: kube-system 9 | --- 10 | apiVersion: rbac.authorization.k8s.io/v1beta1 11 | kind: ClusterRole 12 | metadata: 13 | name: cluster-autoscaler 14 | labels: 15 | k8s-addon: cluster-autoscaler.addons.k8s.io 16 | k8s-app: cluster-autoscaler 17 | rules: 18 | - apiGroups: [""] 19 | resources: ["events","endpoints"] 20 | verbs: ["create", "patch"] 21 | - apiGroups: [""] 22 | resources: ["pods/eviction"] 23 | verbs: ["create"] 24 | - apiGroups: [""] 25 | resources: ["pods/status"] 26 | verbs: ["update"] 27 | - apiGroups: [""] 28 | resources: ["endpoints"] 29 | resourceNames: ["cluster-autoscaler"] 30 | verbs: ["get","update"] 31 | - apiGroups: [""] 32 | resources: ["nodes"] 33 | verbs: ["watch","list","get","update"] 34 | - apiGroups: [""] 35 | resources: ["pods","services","replicationcontrollers","persistentvolumeclaims","persistentvolumes"] 36 | verbs: ["watch","list","get"] 37 | - apiGroups: ["extensions"] 38 | resources: ["replicasets","daemonsets"] 39 | verbs: ["watch","list","get"] 40 | - apiGroups: ["policy"] 41 | resources: ["poddisruptionbudgets"] 42 | verbs: ["watch","list"] 43 | - apiGroups: ["apps"] 44 | resources: ["statefulsets","replicasets"] 45 | verbs: ["watch","list","get"] 46 | - apiGroups: ["storage.k8s.io"] 47 | resources: ["storageclasses"] 48 | verbs: ["watch","list","get"] 49 | 50 | --- 51 | apiVersion: rbac.authorization.k8s.io/v1beta1 52 | kind: Role 53 | metadata: 54 | name: cluster-autoscaler 55 | namespace: kube-system 56 | labels: 57 | k8s-addon: cluster-autoscaler.addons.k8s.io 58 | k8s-app: cluster-autoscaler 59 | rules: 60 | - apiGroups: [""] 61 | resources: ["configmaps"] 62 | verbs: ["create"] 63 | - apiGroups: [""] 64 | resources: ["configmaps"] 65 | resourceNames: ["cluster-autoscaler-status"] 66 | verbs: ["delete","get","update"] 67 | 68 | --- 69 | apiVersion: rbac.authorization.k8s.io/v1beta1 70 | kind: ClusterRoleBinding 71 | metadata: 72 | name: cluster-autoscaler 73 | labels: 74 | k8s-addon: cluster-autoscaler.addons.k8s.io 75 | k8s-app: cluster-autoscaler 76 | roleRef: 77 | apiGroup: rbac.authorization.k8s.io 78 | kind: ClusterRole 79 | name: cluster-autoscaler 80 | subjects: 81 | - kind: ServiceAccount 82 | name: cluster-autoscaler 83 | namespace: kube-system 84 | --- 85 | apiVersion: rbac.authorization.k8s.io/v1beta1 86 | kind: RoleBinding 87 | metadata: 88 | name: cluster-autoscaler 89 | namespace: kube-system 90 | labels: 91 | k8s-addon: cluster-autoscaler.addons.k8s.io 92 | k8s-app: cluster-autoscaler 93 | roleRef: 94 | apiGroup: rbac.authorization.k8s.io 95 | kind: Role 96 | name: cluster-autoscaler 97 | subjects: 98 | - kind: ServiceAccount 99 | name: cluster-autoscaler 100 | namespace: kube-system 101 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # How to save money running EKS Workers on Spot Instances 4 | 5 | # whois 6 | 7 | *Caio Trevisan* - Cloud Engineer at Contino 8 | 9 | [www.caiotrevisan.com](https://www.caiotrevisan.com) 10 | 11 | # Agenda 12 | 13 | - [Introduction](#introduction) 14 | - [AWS AWS Amazon Elastic Container Service for Kubernetes (EKS)](#aws-amazon-elastic-container-service-for-kubernetes-(eks)) 15 | - [AWS EC2 Purchase Options](#aws-ec2-purchase-options) 16 | - [AWS Spot Instances](#aws-spot-instances) 17 | - [AWS Spot Fleet](#aws-spot-fleet) 18 | - [AWS Auto Scaling Group](#aws-auto-scaling-group) 19 | - [Use Cases](#use-cases) 20 | - [peak traffic](#peak-traffic) 21 | - [batch jobs](#batch-jobs) 22 | - [failover](#failover) 23 | - [CICD private runners](#cicd-private-runners) 24 | - [intensive tasks](#intensive-tasks) 25 | - [state persistence](#state-persistence) 26 | - [How to Achieve That?](#how-to-achieve-that?) 27 | - [Cluster AutoScaler (CA)](#cluster-autoscaler-(ca)) 28 | - [Spot Interrupt Handler](#spot-interrupt-handler) 29 | - [Horizontal Pod Autoscaler (HPA)](#horizontal-pod-autoscaler-(hpa)) 30 | - [Affinity/Taints/Tolerations](#affinity/taints/tolerations) 31 | - [Kubernetes Operational View](#kubernetes-operational-view) 32 | - [Lab Demos](#lab-demo) 33 | - [demo 1 - spot termination notice](#demo-1---spot-termination-notice) 34 | - [demo 2 - cluster auto scaling](#demo-2---cluster-auto-scaling) 35 | - [Tips](#tips) 36 | - [Bonus CKA & CKAD](#bonus-cka-&-ckad) 37 | - [References](#references) 38 | 39 | # Introduction 40 | 41 | ## AWS Amazon Elastic Container Service for Kubernetes (EKS) 42 | 43 | 44 | 45 | - Multi-AZ Kubernetes control plane deployment managed by AWS 46 | - auto-healing 47 | - ondemand patching and upgrades 48 | - ~$150USD/monthly 49 | 50 | - need workers -- pay separately 51 | 52 | [AWS EKS Documentation](https://aws.amazon.com/eks/) 53 | 54 | ## AWS EC2 Purchase Options 55 | 56 | - On-Demand: pay by the hour or second 57 | - Reserved Instances: up to 75% discount, one to three-year commitment 58 | - Spot Instances: bid for spare EC2 capacity, up to 90% discount 59 | 60 | [AWS EC2 Pricing Documentation](https://aws.amazon.com/ec2/pricing/) 61 | 62 | ## Spot Instances 63 | 64 | - predictable pricing 65 | - up to 90% of savings 66 | - termination notice 67 | - ~2 minutes -- metadada warning 68 | 69 | Good use for: 70 | 71 | - Flexible start/end times 72 | - Applications that handle well failure 73 | - Large computing needs for jobs like data processing 74 | 75 | [AWS EC2 Spot Instances Documentation](https://aws.amazon.com/ec2/spot/) 76 | 77 | ### Pricing History 78 | 79 | Pricing of a `m5.large` instance from `Jul/19` to `Oct19` in region SydneyAU `ap-southeast-2`. 80 | 81 | |Instance Type|Price| 82 | |-|-| 83 | |On-demand|0.12| 84 | |Spot|0.0362| 85 | 86 | ![](images/spot-pricing-history.png) 87 | 88 | ### Spot Instance Advisor 89 | 90 | ![](images/spot-pricing.png) 91 | 92 | [AWS Spot Instances Advisor](https://aws.amazon.com/ec2/spot/instance-advisor/) 93 | 94 | 95 | ## Spot Fleet 96 | 97 | - collection/group/fleet of spot instances 98 | - request is fulfilled either by reaching target capacity or exceeding the maximum price 99 | 100 | ### Spot Fleet Request 101 | 102 | - one-time / maintain 103 | - launch specifications: instance types / az (up to 50) 104 | - target capacity 105 | - on-demand portion 106 | - defined price vs on-demand price 107 | 108 | [AWS Spot Fleet Documentation](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-fleet.html) 109 | 110 | # Use cases 111 | 112 | ## peak traffic 113 | 114 | - two set of worker nodes 115 | - spot: scale up above 70% of load 116 | - ondemand: scale up above 90% of load 117 | 118 | PS.: think ahead and overprovision in case of any expected event 119 | 120 | ## batch jobs 121 | 122 | - queue requests on SQS or any other queue service 123 | - scale workers based on quantity of jobs queued 124 | 125 | ## failover 126 | 127 | - Taint spot workers with `PreferNoSchedule` so jobs will run first on ondemand workers and only if not resources available will use Spot anyway 128 | 129 | ## CICD private runners 130 | 131 | - as non-critical services that can retry in case of failing make a good use case for savings 132 | 133 | ## intensive tasks 134 | 135 | - Western Digital has run a simulation of close to 2.5million tasks in just 8 hours with more than 50k instances (1 million vCPU!!) costing around $140kUSD. Estimated in half of the costs of running even on in-house infrastructure. 136 | - S3 were used to save data results and checkpoints when the instance were schedule to terminate 137 | 138 | # How to Achieve That? 139 | 140 | ## Cluster AutoScaler (CA) 141 | 142 | - scale up/down NODES when pods are not able to be scheduled 143 | - keeps checking for pendind pods 144 | - send a api call to the ASG when scale is needed 145 | - userdata/scripts insert the new node to the cluster 146 | - kubernetes allocate pods to newly added nodes 147 | - CA is not based on actual load but instead in `requests/limits` 148 | - how much memory/cpu you allocate to a pod 149 | 150 | ### CA Installation 151 | 152 | - Update your ASG name so the service can trigger the scale up/down for you 153 | 154 | `sed -i '' "s//test/g" "k8s-tools/cluster-autoscaler/cluster_autoscaler.yml"` 155 | 156 | - Run the CA deployment 157 | 158 | `kubectl apply -f k8s-tools/cluster-autoscaler` 159 | 160 | - Watch logs 161 | 162 | `kubectl logs -f deployment/cluster-autoscaler -n kube-system` 163 | 164 | ## Spot Interrupt Handler 165 | 166 | - run as daemonsets 167 | - keep polling instance metadata for termination notice 168 | - drain the node -- taint as NoSchedule 169 | - node can be gracefully removed 170 | 171 | ### SIH Installation 172 | 173 | - Run the Spot Interrup Handler Daemonset 174 | 175 | `kubectl apply -f k8s-toolks/spot-interrupt-handler` 176 | 177 | ## Horizontal Pod Autoscaler (HPA) 178 | 179 | - Auto scale at pod level based on cpu utilisation 180 | - query utilisation every 15 seconds 181 | 182 | [Kubernetes HPA Documentation](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough/) 183 | 184 | ### HPA Installation 185 | 186 | - Install metrics server for pod load monitoring 187 | 188 | `helm install stable/metrics-server --name metrics-server --version 2.0.2 --namespace metrics` 189 | 190 | - Create a test deployment and expose it 191 | 192 | `kubectl run php-apache --image=k8s.gcr.io/hpa-example --requests=cpu=200m --limits=cpu=500m` 193 | 194 | - Create deployment autoscaler 195 | 196 | `kubectl autoscale deployment php-apache --cpu-percent=30 --min=1 --max=10` 197 | 198 | - Expose the service 199 | 200 | `kubectl expose deploy php-apache --target-port=80 --port=80 --type=LoadBalancer` 201 | 202 | - Increase load 203 | 204 | ```bash 205 | kubectl run -i --tty load-generator --image=busybox /bin/sh 206 | 207 | Hit enter for command prompt 208 | 209 | while true; do wget -q -O- http://php-apache.default.svc.cluster.local; done 210 | ``` 211 | 212 | - Monitor HPA and deployment 213 | 214 | `kubectl get hpa -w` 215 | 216 | `kubectl get deployment php-apache -w` 217 | 218 | ## Affinity/Taints/Tolerations 219 | 220 | - affinity attracts pods to a set of nodes 221 | - create rules based on labels to: 222 | - hard: need to match label 223 | - soft: preference to match but not required 224 | - taints allow nodes to repel pods 225 | - tolerations are applied to pods to allow (not require) schedule with matching taints 226 | 227 | [Taints and Tolerations](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/) 228 | 229 | ### Example 230 | 231 | - Node tainted with NoSchedule 232 | 233 | `kubectl taint nodes node1 key=value:NoSchedule` 234 | 235 | - A pod needs a toleration to be able to run on that node 236 | 237 | ```yaml 238 | tolerations: 239 | - key: "key" 240 | operator: "Exists" 241 | effect: "NoSchedule" 242 | ``` 243 | 244 | - Hard and soft affinities: 245 | 246 | ```yaml 247 | affinity: 248 | nodeAffinity: 249 | requiredDuringSchedulingIgnoredDuringExecution: 250 | nodeSelectorTerms: 251 | - matchExpressions: 252 | - key: jobType 253 | operator: In 254 | values: 255 | - batch 256 | preferredDuringSchedulingIgnoredDuringExecution: 257 | - weight: 1 258 | preference: 259 | matchExpressions: 260 | - key: instanceType 261 | operator: In 262 | values: 263 | - Ec2Spot 264 | ``` 265 | 266 | ## Kubernetes Operational View 267 | 268 | - Visual graphics of cluster working 269 | - good for learning and dashboards 270 | 271 | [Kubernetes Operational View Documentation](https://github.com/hjacobs/kube-ops-view) 272 | 273 | ### KOV Installation 274 | 275 | - Instal via helm 276 | 277 | ``` 278 | helm repo update 279 | helm install stable/kube-ops-view --name kube-ops-view --set service.type=LoadBalancer --set rbac.create=True 280 | ``` 281 | 282 | - Get service url 283 | 284 | `kubectl get svc kube-ops-view | tail -n 1 | awk '{ print "Kube-ops-view URL = http://"$4 }'` 285 | 286 | # Lab Demo 287 | 288 | - cluster created with `eksctl` using all default settings 289 | 290 | `eksctl create cluster caio-eks-test` 291 | 292 | - deploy [CA](#ca-installation), [Spot Interrupt Handler](#sih-installation), [metrics server](#hpa-installation) and [Kubernetes Operational Viewer](#kov-installation) to your cluster 293 | - run `kubectl apply -f k8s-tools/monte-carlo.yaml` so it can fill existing nodes with workloads 294 | 295 | ## demo 1 - spot termination notice 296 | 297 | - spot instance workers via Spot Fleet Request using terraform 298 | 299 | ### USE AT YOUR OWN RISK - EXAMPLE POLICIES ARE VERY PERMISSIVE 300 | 301 | - go to `tf-spot-workers` folder and update variables according to your recently create EKS cluster before applying 302 | 303 | - wait until the new instance joins the cluster 304 | 305 | - run a nginx pod and expose it 306 | 307 | ``` 308 | kubectl run nginx --image=nginx 309 | kubectl expose deployment nginx --port=80 --target-port=80 --type=LoadBalancer 310 | ``` 311 | 312 | - make sure the `nginx` replica is running on the spot instance node 313 | 314 | - run `test_url.sh ` for constant polling the url 315 | 316 | - go to AWS console Spot Fleet Requests and modify the fleet target capacity to 0 317 | - this will trigger the termination notice 318 | 319 | ![](images/fleet-target-capacity.png) 320 | 321 | - observe: 322 | - the pod needs to be reallocated to a healthy node before the node is removed 323 | - service will have little to none interruption on the polling 324 | 325 | ## demo 2 - cluster auto scaling 326 | 327 | - run [HPA load test steps](#hpa-installation) to create a `php-apache` pod and generate some load 328 | 329 | - behaviour to expect/monitor through the dashboard: 330 | - HPA scale deployment replicas based on CPU load 331 | - once no nodes available for schedule pods, CA should scale up the cluster 332 | 333 | 334 | # Tips 335 | 336 | ### state persistence 337 | 338 | - use lambda with cloudwatch events or builtin application function for: 339 | - re-assigning elastic IP 340 | - load balancer handling 341 | - update DNS entries 342 | - any environment changes 343 | 344 | ### ebs volumes 345 | 346 | - ebs volumes cannot span multiple aws availability zone 347 | - use either Affinity rules and/or taint/tolerations to force use of nodes 348 | - efs for multiaz agnostic 349 | 350 | ### Cluster Autoscaler (CA) 351 | 352 | - currently does not support multi az 353 | - one ASG per az and enable `--balance-similar-node-groups` feature 354 | - require to exists `/etc/ssl/certs/ca-bundle.crt` in your cluster. 355 | - tools like `kops` need customization 356 | - by default CA won't move pods on `kube-system` namespace 357 | - you can change this behaviour 358 | - you can overprosion with `pause pods` 359 | - keep pods with `requests/limits` close to real needs 360 | - avoid local storage 361 | 362 | # Bonus CKA & CKAD 363 | 364 | [Contino Ultimate Guide to Passing the CKA Exam](https://www.contino.io/insights/the-ultimate-guide-to-passing-the-cka-exam) 365 | 366 | - best content around -- Linux Academy CKA course 367 | - keep track of questions and weight as you go on the notepad 368 | - skip if it's too hard and worth less than 5% 369 | - you only need 74% and 66% to pass 370 | - bookmarks for Kubernetes documentation 371 | - no need for auto-completion as the terminal comes pre-configured 372 | - you can split view your browser with k8s documentation and the exam (only these two tabs open) 373 | - book exam in the morning so you are 100% for a 3 hours exam 374 | - basic set of aliases on `.bash_profile` first thing once the test start 375 | 376 | ``` 377 | alias k='kubectl' 378 | alias kgp='k get po' 379 | alias kgd='k get deploy' 380 | alias kgs='k get svc' 381 | alias kcc='k config current-context' 382 | alias kuc='k config use-context' 383 | alias ka='k apply' 384 | alias kc='k create' 385 | alias kd='k delete' 386 | alias kg='k get' 387 | ``` 388 | 389 | # References 390 | 391 | - Spot Instances termination notices: 392 | https://aws.amazon.com/blogs/aws/new-ec2-spot-instance-termination-notices/ 393 | 394 | - Running EKS workloads on Spot Instances: 395 | https://aws.amazon.com/blogs/compute/run-your-kubernetes-workloads-on-amazon-ec2-spot-instances-with-amazon-eks/ 396 | 397 | - AWS Spot Instances Pricing Advisor: 398 | https://aws.amazon.com/ec2/spot/instance-advisor/ 399 | 400 | - Using Spot Instances for cost optimizations: 401 | https://d1.awsstatic.com/whitepapers/cost-optimization-leveraging-ec2-spot-instances.pdf 402 | 403 | - Purchase options types on ASG: 404 | https://docs.aws.amazon.com/en_pv/autoscaling/ec2/userguide/asg-purchase-options.html#asg-allocation-strategies 405 | 406 | - Using EKSCTL with existing iam and vpc: 407 | https://eksctl.io/examples/reusing-iam-and-vpc/ 408 | 409 | - Spot Instances termination notice handler: 410 | https://github.com/kube-aws/kube-spot-termination-notice-handler 411 | 412 | - Overprovisioning with Cluster Autoscaler (CA): 413 | https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#how-can-i-configure-overprovisioning-with-cluster-autoscaler 414 | 415 | - Gotchas when using Cluster Autoscaler (CA): 416 | https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler/cloudprovider/aws#common-notes-and-gotchas 417 | 418 | - Re:Invent 2018 Spot Instances with EKS: 419 | https://www.slideshare.net/AmazonWebServices/amazon-ec2-spot-with-amazon-eks-con406r1-aws-reinvent-2018 420 | 421 | - AWS Getting Started with EKS: 422 | https://aws.amazon.com/getting-started/projects/deploy-kubernetes-app-amazon-eks/ 423 | 424 | - AWS Quickstart EKS: 425 | https://s3.amazonaws.com/aws-quickstart/quickstart-amazon-eks/doc/amazon-eks-architecture.pdf 426 | 427 | - Kubernetes Docs: 428 | https://kubernetes.io/docs/ 429 | 430 | - kubectl drain: 431 | https://kubernetes.io/images/docs/kubectl_drain.svg 432 | 433 | --------------------------------------------------------------------------------