├── .drone.yml ├── .gitignore ├── Dockerfile ├── README.md ├── glide.yaml ├── helm ├── Chart.yaml ├── templates │ └── deployment.yaml ├── values-dev.yaml ├── values-prod.yaml ├── values-test.yaml └── values.yaml └── main.go /.drone.yml: -------------------------------------------------------------------------------- 1 | workspace: 2 | base: /go 3 | 4 | image-latest: &image-latest 5 | image: gcr.io/npd-shared/cd:latest 6 | 7 | pipeline: 8 | 9 | test: 10 | image: instrumentisto/glide:0.13.1-go1.9 11 | commands: 12 | - glide update 13 | - go test 14 | - GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o pod-reaper 15 | when: 16 | event: [push, pull_request, tag] 17 | 18 | publish: 19 | <<: *image-latest 20 | run: publish 21 | privileged: true 22 | environment: 23 | - GOPATH=/golang 24 | volumes: 25 | - /var/run/docker.sock:/var/run/docker.sock 26 | tag: latest 27 | when: 28 | event: [push, pull_request, tag] 29 | branch: [master] 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | pod-reaper -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13.4-alpine 2 | 3 | RUN apk --no-cache add curl git && curl https://glide.sh/get | sh && apk del curl 4 | 5 | WORKDIR /go/src/app 6 | COPY ["glide.yaml", "main.go", "/go/src/app/"] 7 | 8 | RUN glide install 9 | 10 | RUN go build -o /pod-reaper 11 | 12 | CMD /pod-reaper 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pod-reaper 2 | 3 | A kubernetes operator that reaps 4 | 5 | * pods that have reached their lifetime 6 | * evicted pods 7 | 8 | ## Configuration 9 | 10 | To give a lifetime to your pods, add the following annotation: 11 | 12 | `pod.kubernetes.io/lifetime: $DURATION` 13 | 14 | `DURATION` has to be a [valid golang duration string](https://golang.org/pkg/time/#ParseDuration). 15 | 16 | A duration string is a possibly signed sequence of decimal numbers, each with optional fraction and a unit suffix, such as "300ms", "-1.5h" or "2h45m". Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". 17 | 18 | Example: `kubernetes.io/lifetime: 720h` 19 | 20 | The above annotation will cause the pod to be reaped (killed) once it reaches the age of 30d (720h) 21 | 22 | ## Deployment Details 23 | 24 | The pod reaper needs to be deployed in a kubernetes cluster. 25 | 26 | The following environment variables can be set: 27 | 28 | | Env Variable | Description | Sample Values | Default value | Required | 29 | |------------------------|----------------------------------------------------------------------------------------------|-----------------------|---------------|----------| 30 | | REMOTE_EXEC | Should be set to true when running within the cluster, to false when running locally | true | N/A | yes | 31 | | REAPER_NAMESPACES | List of namespaces that the reaper would inspect | namespace1,namespace2 | N/A | yes | 32 | | CRON_JOB | Whether this should be run just once or in a loop. Set to true if running this as a cron job | true | false | no | 33 | | MAX_REAP_COUNT_PER_RUN | Maximum Pods to reap in each run | 100 | 30 | no | 34 | | REAP_EVICTED_PODS | Whether or not to delete evicted pods | true | false | no | 35 | 36 | ## Todo 37 | 38 | * Support RBAC 39 | * Support Rolling Updates 40 | -------------------------------------------------------------------------------- /glide.yaml: -------------------------------------------------------------------------------- 1 | package: github.corp.ebay.com/N/pod-reaper 2 | import: 3 | - package: github.com/cloudflare/cfssl 4 | version: ^1.3.2 5 | subpackages: 6 | - log 7 | - package: k8s.io/apimachinery 8 | subpackages: 9 | - pkg/apis/meta/v1 10 | - package: k8s.io/client-go 11 | version: ^7.0.0 12 | subpackages: 13 | - kubernetes 14 | - plugin/pkg/client/auth/gcp 15 | - rest 16 | - tools/clientcmd 17 | -------------------------------------------------------------------------------- /helm/Chart.yaml: -------------------------------------------------------------------------------- 1 | name: pod-reaper 2 | version: 0.1 3 | -------------------------------------------------------------------------------- /helm/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: {{ .Chart.Name }} 5 | spec: 6 | replicas: {{ .Values.replicas }} 7 | template: 8 | metadata: 9 | labels: 10 | app: {{ .Chart.Name }} 11 | tier: {{ .Values.tier }} 12 | team: {{ .Values.team }} 13 | spec: 14 | containers: 15 | - name: {{ .Chart.Name }} 16 | image: {{ .Values.docker_image }} 17 | env: 18 | - name: REMOTE_EXEC 19 | value: "true" 20 | - name: APP 21 | value: {{ .Chart.Name }} 22 | - name: SUMO_ACCESS_ID 23 | value: {{ .Values.sumoAccessId }} 24 | - name: SUMO_ACCESS_KEY 25 | value: {{ .Values.sumoAccessKey }} 26 | - name: SUMO_RECEIVER_URL 27 | value: {{ .Values.sumoReceiverURL }} 28 | - name: COMMIT_AUTHOR 29 | value: {{ .Values.author }} 30 | - name: REAPER_NAMESPACES 31 | value: {{ .Release.Namespace }} 32 | - name: MAX_REAP_COUNT_PER_RUN 33 | value: {{ .Values.maxReapCountPerRun | default 30 | quote}} 34 | - name: REAP_EVICTED_PODS 35 | value: {{ .Values.reapEvictedPods | default false | quote}} 36 | - name: CRON_JOB 37 | value: {{ .Values.cronJob | default false | quote}} 38 | resources: 39 | limits: 40 | cpu: 1000m 41 | memory: 1Gi 42 | requests: 43 | cpu: 50m 44 | memory: 128Mi -------------------------------------------------------------------------------- /helm/values-dev.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ptagr/pod-reaper/0dd904b0ce9d9e0840e3b8e9899da9dcf62642d3/helm/values-dev.yaml -------------------------------------------------------------------------------- /helm/values-prod.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ptagr/pod-reaper/0dd904b0ce9d9e0840e3b8e9899da9dcf62642d3/helm/values-prod.yaml -------------------------------------------------------------------------------- /helm/values-test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ptagr/pod-reaper/0dd904b0ce9d9e0840e3b8e9899da9dcf62642d3/helm/values-test.yaml -------------------------------------------------------------------------------- /helm/values.yaml: -------------------------------------------------------------------------------- 1 | owner: punagrawal 2 | replicas: 1 3 | tier: infra 4 | team: shared 5 | maxReapCountPerRun: 30 -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "os" 6 | "path/filepath" 7 | "strconv" 8 | "strings" 9 | "time" 10 | 11 | "github.com/cloudflare/cfssl/log" 12 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 13 | "k8s.io/client-go/kubernetes" 14 | _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" 15 | "k8s.io/client-go/rest" 16 | "k8s.io/client-go/tools/clientcmd" 17 | ) 18 | 19 | const ( 20 | lifetimeAnnotation string = "pod.kubernetes.io/lifetime" 21 | ) 22 | 23 | func main() { 24 | 25 | log.Level = log.LevelDebug 26 | 27 | log.Infof("Hello from pod reaper! Hide all the pods!\n") 28 | 29 | var config *rest.Config 30 | var err error 31 | var maxReaperCount = maxReaperCountPerRun() 32 | var ( 33 | reapEvicted = reapEvictedPods() 34 | runAsCronJob = cronJob() 35 | ) 36 | 37 | if !reapEvicted { 38 | log.Debugf("REAP_EVICTED_PODS not set. Not reaping evicted pods.") 39 | } 40 | 41 | if remoteExec() { 42 | log.Debug("Loading kubeconfig from in cluster config") 43 | config, err = rest.InClusterConfig() 44 | } else { 45 | var kubeconfig *string 46 | if home := homeDir(); home != "" { 47 | kubeconfig = flag.String("kubeconfig", filepath.Join(home, ".kube", "config"), "(optional) absolute path to the kubeconfig file") 48 | } else { 49 | kubeconfig = flag.String("kubeconfig", "", "absolute path to the kubeconfig file") 50 | } 51 | 52 | flag.Parse() 53 | log.Infof("Loading kubeconfig from %s\n", *kubeconfig) 54 | 55 | // use the current context in kubeconfig 56 | config, err = clientcmd.BuildConfigFromFlags("", *kubeconfig) 57 | } 58 | 59 | if err != nil { 60 | panic(err.Error()) 61 | } 62 | 63 | // create the clientset 64 | clientset, err := kubernetes.NewForConfig(config) 65 | if err != nil { 66 | panic(err.Error()) 67 | } 68 | 69 | for { 70 | reaperNamespaces := namespaces() 71 | if len(reaperNamespaces) == 0 { 72 | panic("No namespace specified. Exiting.") 73 | } 74 | for _, ns := range reaperNamespaces { 75 | pods, err := clientset.CoreV1().Pods(ns).List(metav1.ListOptions{}) 76 | if err != nil { 77 | panic(err.Error()) 78 | } 79 | 80 | log.Infof("Checking %d pods in namespace %s\n", len(pods.Items), ns) 81 | killedPods := 0 82 | for _, v := range pods.Items { 83 | if val, ok := v.Annotations[lifetimeAnnotation]; ok { 84 | log.Debugf("pod %s : Found annotation %s with value %s\n", v.Name, lifetimeAnnotation, val) 85 | lifetime, _ := time.ParseDuration(val) 86 | if lifetime == 0 { 87 | log.Debugf("pod %s : provided value %s is incorrect\n", v.Name, val) 88 | } else if killedPods < maxReaperCount { 89 | log.Debugf("pod %s : %s\n", v.Name, v.CreationTimestamp) 90 | currentLifetime := time.Now().Sub(v.CreationTimestamp.Time) 91 | if currentLifetime > lifetime { 92 | log.Infof("pod %s : pod is past its lifetime and will be killed.\n", v.Name) 93 | err := clientset.CoreV1().Pods(v.Namespace).Delete(v.Name, &metav1.DeleteOptions{}) 94 | if err != nil { 95 | panic(err.Error()) 96 | } 97 | log.Infof("pod %s : pod killed.\n", v.Name) 98 | killedPods++ 99 | } 100 | } else { 101 | log.Debugf("pod %s : max %d pods killed\n", v.Name, maxReaperCount) 102 | } 103 | } 104 | 105 | if reapEvicted && strings.Contains(v.Status.Reason, "Evicted") { 106 | log.Debugf("pod %s : pod is evicted and needs to be deleted", v.Name) 107 | err := clientset.CoreV1().Pods(v.Namespace).Delete(v.Name, &metav1.DeleteOptions{}) 108 | if err != nil { 109 | panic(err.Error()) 110 | } 111 | log.Infof("pod %s : pod killed.\n", v.Name) 112 | killedPods++ 113 | } 114 | 115 | } 116 | 117 | log.Infof("Killed %d Old/Evicted Pods.", killedPods) 118 | } 119 | if !runAsCronJob { 120 | log.Infof("Now sleeping for %d seconds", int(sleepDuration().Seconds())) 121 | time.Sleep(sleepDuration()) 122 | } else { 123 | break 124 | } 125 | } 126 | 127 | } 128 | 129 | func remoteExec() bool { 130 | if val, ok := os.LookupEnv("REMOTE_EXEC"); ok { 131 | boolVal, err := strconv.ParseBool(val) 132 | if err == nil { 133 | return boolVal 134 | } else { 135 | panic("REMOTE_EXEC var incorrectly set") 136 | } 137 | } 138 | panic("REMOTE_EXEC var not set") 139 | } 140 | 141 | func maxReaperCountPerRun() int { 142 | i, err := strconv.Atoi(os.Getenv("MAX_REAPER_COUNT_PER_RUN")) 143 | if err != nil { 144 | i = 30 145 | } 146 | return i 147 | } 148 | 149 | func reapEvictedPods() bool { 150 | if val, ok := os.LookupEnv("REAP_EVICTED_PODS"); ok { 151 | boolVal, err := strconv.ParseBool(val) 152 | if err == nil { 153 | return boolVal 154 | } 155 | } 156 | return false 157 | } 158 | 159 | func cronJob() bool { 160 | if val, ok := os.LookupEnv("CRON_JOB"); ok { 161 | boolVal, err := strconv.ParseBool(val) 162 | if err == nil { 163 | return boolVal 164 | } 165 | } 166 | return false 167 | } 168 | 169 | func sleepDuration() time.Duration { 170 | if h := os.Getenv("REAPER_INTERVAL_IN_SEC"); h != "" { 171 | s, _ := strconv.Atoi(h) 172 | return time.Duration(s) * time.Second 173 | } 174 | return 60 * time.Second 175 | } 176 | 177 | func namespaces() []string { 178 | if h := os.Getenv("REAPER_NAMESPACES"); h != "" { 179 | namespaces := strings.Split(h, ",") 180 | if len(namespaces) == 1 && strings.ToLower(namespaces[0]) == "all" { 181 | return []string{metav1.NamespaceAll} 182 | } 183 | return namespaces 184 | } 185 | return []string{} 186 | } 187 | 188 | func homeDir() string { 189 | if h := os.Getenv("HOME"); h != "" { 190 | return h 191 | } 192 | return os.Getenv("USERPROFILE") // windows 193 | } 194 | --------------------------------------------------------------------------------