├── .gitignore ├── .travis.yml ├── Dockerfile ├── LICENSE ├── README.md ├── cmd └── draino │ └── draino.go ├── go.mod ├── go.sum ├── helm └── draino │ ├── .helmignore │ ├── Chart.yaml │ ├── templates │ ├── _helpers.tpl │ ├── clusterrole.yaml │ ├── clusterrolebinding.yaml │ ├── deployment.yaml │ └── serviceaccount.yaml │ └── values.yaml ├── internal └── kubernetes │ ├── drainSchedule.go │ ├── drainSchedule_test.go │ ├── drainer.go │ ├── drainer_test.go │ ├── eventhandler.go │ ├── eventhandler_test.go │ ├── nodefilters.go │ ├── nodefilters_test.go │ ├── podfilters.go │ ├── podfilters_test.go │ ├── util.go │ ├── watch.go │ └── watch_test.go ├── manifest.yml └── scripts ├── build.sh ├── push.sh ├── run.sh └── test.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | draino 9 | 10 | # Test binary, build with `go test -c` 11 | *.test 12 | 13 | # Output of the go coverage tool, specifically when used with LiteIDE 14 | *.out 15 | 16 | # Go vendor directory 17 | vendor/ 18 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | services: 4 | - docker 5 | 6 | go: 7 | - 1.13.x 8 | 9 | jobs: 10 | include: 11 | - stage: test 12 | name: go tests 13 | script: 14 | - ./scripts/test.sh 15 | after_success: 16 | - bash <(curl -s https://codecov.io/bash) 17 | - stage: test 18 | name: go lint 19 | install: 20 | - curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s v1.24.0 21 | script: 22 | - GOGC=10 ./bin/golangci-lint run -j 4 --timeout 5m --exclude "\.pb.*\.go" --exclude "_strings\.go" --exclude "_test\.go" --exclude "not checked.+Close" ./... 23 | - stage: test 24 | name: helm check 25 | install: 26 | - curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash 27 | - curl -LO https://github.com/instrumenta/kubeval/releases/latest/download/kubeval-linux-amd64.tar.gz 28 | - tar xf kubeval-linux-amd64.tar.gz 29 | script: 30 | - helm template ./helm/draino/ | ./kubeval --strict 31 | - stage: push 32 | install: skip 33 | script: 34 | - ./scripts/build.sh 35 | - docker login -u "$DOCKER_USERNAME" -p "$DOCKER_PASSWORD" 36 | - ./scripts/push.sh 37 | 38 | stages: 39 | - name: test 40 | - name: push 41 | if: branch = master and not type = pull_request 42 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13.15-alpine3.11 AS build 2 | 3 | RUN apk update && apk add git && apk add curl 4 | 5 | WORKDIR /go/src/github.com/planetlabs/draino 6 | COPY . . 7 | 8 | RUN go build -o /draino ./cmd/draino 9 | 10 | FROM alpine:3.11 11 | 12 | RUN apk update && apk add ca-certificates 13 | RUN addgroup -S user && adduser -S user -G user 14 | USER user 15 | COPY --from=build /draino /draino 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # draino [![Docker Pulls](https://img.shields.io/docker/pulls/planetlabs/draino.svg)](https://hub.docker.com/r/planetlabs/draino/) [![Godoc](https://img.shields.io/badge/godoc-reference-blue.svg)](https://godoc.org/github.com/planetlabs/draino) [![Travis](https://img.shields.io/travis/com/planetlabs/draino.svg?maxAge=300)](https://travis-ci.com/planetlabs/draino/) [![Codecov](https://img.shields.io/codecov/c/github/planetlabs/draino.svg?maxAge=3600)](https://codecov.io/gh/planetlabs/draino/) 2 | Draino automatically drains Kubernetes nodes based on labels and node 3 | conditions. Nodes that match _all_ of the supplied labels and _any_ of the 4 | supplied node conditions will be cordoned immediately and drained after a 5 | configurable `drain-buffer` time. 6 | 7 | Draino is intended for use alongside the Kubernetes [Node Problem Detector](https://github.com/kubernetes/node-problem-detector) 8 | and [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler). 9 | The Node Problem Detector can set a node condition when it detects something 10 | wrong with a node - for instance by watching node logs or running a script. The 11 | Cluster Autoscaler can be configured to delete nodes that are underutilised. 12 | Adding Draino to the mix enables autoremediation: 13 | 14 | 1. The Node Problem Detector detects a permanent node problem and sets the 15 | corresponding node condition. 16 | 2. Draino notices the node condition. It immediately cordons the node to prevent 17 | new pods being scheduled there, and schedules a drain of the node. 18 | 3. Once the node has been drained the Cluster Autoscaler will consider it 19 | underutilised. It will be eligible for scale down (i.e. termination) by the 20 | Autoscaler after a configurable period of time. 21 | 22 | ## Usage 23 | ``` 24 | $ docker run planetlabs/draino /draino --help 25 | usage: draino [] ... 26 | 27 | Automatically cordons and drains nodes that match the supplied conditions. 28 | 29 | Flags: 30 | --help Show context-sensitive help (also try --help-long and --help-man). 31 | -d, --debug Run with debug logging. 32 | --listen=":10002" Address at which to expose /metrics and /healthz. 33 | --kubeconfig=KUBECONFIG Path to kubeconfig file. Leave unset to use in-cluster config. 34 | --master=MASTER Address of Kubernetes API server. Leave unset to use in-cluster config. 35 | --dry-run Emit an event without cordoning or draining matching nodes. 36 | --max-grace-period=8m0s Maximum time evicted pods will be given to terminate gracefully. 37 | --eviction-headroom=30s Additional time to wait after a pod's termination grace period for it to have been deleted. 38 | --drain-buffer=10m0s Minimum time between starting each drain. Nodes are always cordoned immediately. 39 | --node-label="foo=bar" (DEPRECATED) Only nodes with this label will be eligible for cordoning and draining. May be specified multiple times. 40 | --node-label-expr="metadata.labels.foo == 'bar'" 41 | This is an expr string https://github.com/antonmedv/expr that must return true or false. See `nodefilters_test.go` for examples 42 | --namespace="kube-system" Namespace used to create leader election lock object. 43 | --leader-election-lease-duration=15s 44 | Lease duration for leader election. 45 | --leader-election-renew-deadline=10s 46 | Leader election renew deadline. 47 | --leader-election-retry-period=2s 48 | Leader election retry period. 49 | --skip-drain Whether to skip draining nodes after cordoning. 50 | --evict-daemonset-pods Evict pods that were created by an extant DaemonSet. 51 | --evict-emptydir-pods Evict pods with local storage, i.e. with emptyDir volumes. 52 | --evict-unreplicated-pods Evict pods that were not created by a replication controller. 53 | --protected-pod-annotation=KEY[=VALUE] ... 54 | Protect pods with this annotation from eviction. May be specified multiple times. 55 | 56 | Args: 57 | Nodes for which any of these conditions are true will be cordoned and drained. 58 | ``` 59 | 60 | ### Labels and Label Expressions 61 | 62 | Draino allows filtering the elligible set of nodes using `--node-label` and `--node-label-expr`. 63 | The original flag `--node-label` is limited to the boolean AND of the specified labels. To express more complex predicates, the new `--node-label-expr` 64 | flag allows for mixed OR/AND/NOT logic via https://github.com/antonmedv/expr. 65 | 66 | An example of `--node-label-expr`: 67 | 68 | ``` 69 | (metadata.labels.region == 'us-west-1' && metadata.labels.app == 'nginx') || (metadata.labels.region == 'us-west-2' && metadata.labels.app == 'nginx') 70 | ``` 71 | 72 | ## Considerations 73 | Keep the following in mind before deploying Draino: 74 | 75 | * Always run Draino in `--dry-run` mode first to ensure it would drain the nodes 76 | you expect it to. In dry run mode Draino will emit logs, metrics, and events 77 | but will not actually cordon or drain nodes. 78 | * Draino immediately cordons nodes that match its configured labels and node 79 | conditions, but will wait a configurable amount of time (10 minutes by default) 80 | between draining nodes. i.e. If two nodes begin exhibiting a node condition 81 | simultaneously one node will be drained immediately and the other in 10 minutes. 82 | * Draino considers a drain to have failed if at least one pod eviction triggered 83 | by that drain fails. If Draino fails to evict two of five pods it will consider 84 | the Drain to have failed, but the remaining three pods will always be evicted. 85 | * Pods that can't be evicted by the cluster-autoscaler won't be evicted by draino. 86 | See annotation `"cluster-autoscaler.kubernetes.io/safe-to-evict": "false"` in 87 | [cluster-autoscaler documentation](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#what-types-of-pods-can-prevent-ca-from-removing-a-node) 88 | 89 | ## Deployment 90 | 91 | Draino is automatically built from master and pushed to the [Docker Hub](https://hub.docker.com/r/planetlabs/draino/). 92 | Builds are tagged `planetlabs/draino:$(git rev-parse --short HEAD)`. 93 | 94 | **Note:** As of September, 2020 we no longer publish `planetlabs/draino:latest` 95 | in order to encourage explicit and pinned releases. 96 | 97 | An [example Kubernetes deployment manifest](manifest.yml) is provided. 98 | 99 | ## Monitoring 100 | 101 | ### Metrics 102 | Draino provides a simple healthcheck endpoint at `/healthz` and Prometheus 103 | metrics at `/metrics`. The following metrics exist: 104 | 105 | ```bash 106 | $ kubectl -n kube-system exec -it ${DRAINO_POD} -- apk add curl 107 | $ kubectl -n kube-system exec -it ${DRAINO_POD} -- curl http://localhost:10002/metrics 108 | # HELP draino_cordoned_nodes_total Number of nodes cordoned. 109 | # TYPE draino_cordoned_nodes_total counter 110 | draino_cordoned_nodes_total{result="succeeded"} 2 111 | draino_cordoned_nodes_total{result="failed"} 1 112 | # HELP draino_drained_nodes_total Number of nodes drained. 113 | # TYPE draino_drained_nodes_total counter 114 | draino_drained_nodes_total{result="succeeded"} 1 115 | draino_drained_nodes_total{result="failed"} 1 116 | ``` 117 | 118 | ### Events 119 | Draino is generating event for every relevant step of the eviction process. Here is an example that ends with a reason `DrainFailed`. When everything is fine the last event for a given node will have a reason `DrainSucceeded`. 120 | ``` 121 | > kubectl get events -n default | grep -E '(^LAST|draino)' 122 | 123 | LAST SEEN FIRST SEEN COUNT NAME KIND TYPE REASON SOURCE MESSAGE 124 | 5m 5m 1 node-demo.15fe0c35f0b4bd10 Node Warning CordonStarting draino Cordoning node 125 | 5m 5m 1 node-demo.15fe0c35fe3386d8 Node Warning CordonSucceeded draino Cordoned node 126 | 5m 5m 1 node-demo.15fe0c360bd516f8 Node Warning DrainScheduled draino Will drain node after 2020-03-20T16:19:14.91905+01:00 127 | 5m 5m 1 node-demo.15fe0c3852986fe8 Node Warning DrainStarting draino Draining node 128 | 4m 4m 1 node-demo.15fe0c48d010ecb0 Node Warning DrainFailed draino Draining failed: timed out waiting for evictions to complete: timed out 129 | ``` 130 | 131 | ### Conditions 132 | When a drain is scheduled, on top of the event, a condition is added to the status of the node. This condition will hold information about the beginning and the end of the drain procedure. This is something that you can see by describing the node resource: 133 | 134 | ``` 135 | > kubectl describe node {node-name} 136 | ...... 137 | Unschedulable: true 138 | Conditions: 139 | Type Status LastHeartbeatTime LastTransitionTime Reason Message 140 | ---- ------ ----------------- ------------------ ------ ------- 141 | OutOfDisk False Fri, 20 Mar 2020 15:52:41 +0100 Fri, 20 Mar 2020 14:01:59 +0100 KubeletHasSufficientDisk kubelet has sufficient disk space available 142 | MemoryPressure False Fri, 20 Mar 2020 15:52:41 +0100 Fri, 20 Mar 2020 14:01:59 +0100 KubeletHasSufficientMemory kubelet has sufficient memory available 143 | DiskPressure False Fri, 20 Mar 2020 15:52:41 +0100 Fri, 20 Mar 2020 14:01:59 +0100 KubeletHasNoDiskPressure kubelet has no disk pressure 144 | PIDPressure False Fri, 20 Mar 2020 15:52:41 +0100 Fri, 20 Mar 2020 14:01:59 +0100 KubeletHasSufficientPID kubelet has sufficient PID available 145 | Ready True Fri, 20 Mar 2020 15:52:41 +0100 Fri, 20 Mar 2020 14:02:09 +0100 KubeletReady kubelet is posting ready status. AppArmor enabled 146 | ec2-host-retirement True Fri, 20 Mar 2020 15:23:26 +0100 Fri, 20 Mar 2020 15:23:26 +0100 NodeProblemDetector Condition added with tooling 147 | DrainScheduled True Fri, 20 Mar 2020 15:50:50 +0100 Fri, 20 Mar 2020 15:23:26 +0100 Draino Drain activity scheduled 2020-03-20T15:50:34+01:00 148 | ``` 149 | 150 | Later when the drain activity will be completed the condition will be amended letting you know if it succeeded of failed: 151 | 152 | ``` 153 | > kubectl describe node {node-name} 154 | ...... 155 | Unschedulable: true 156 | Conditions: 157 | Type Status LastHeartbeatTime LastTransitionTime Reason Message 158 | ---- ------ ----------------- ------------------ ------ ------- 159 | OutOfDisk False Fri, 20 Mar 2020 15:52:41 +0100 Fri, 20 Mar 2020 14:01:59 +0100 KubeletHasSufficientDisk kubelet has sufficient disk space available 160 | MemoryPressure False Fri, 20 Mar 2020 15:52:41 +0100 Fri, 20 Mar 2020 14:01:59 +0100 KubeletHasSufficientMemory kubelet has sufficient memory available 161 | DiskPressure False Fri, 20 Mar 2020 15:52:41 +0100 Fri, 20 Mar 2020 14:01:59 +0100 KubeletHasNoDiskPressure kubelet has no disk pressure 162 | PIDPressure False Fri, 20 Mar 2020 15:52:41 +0100 Fri, 20 Mar 2020 14:01:59 +0100 KubeletHasSufficientPID kubelet has sufficient PID available 163 | Ready True Fri, 20 Mar 2020 15:52:41 +0100 Fri, 20 Mar 2020 14:02:09 +0100 KubeletReady kubelet is posting ready status. AppArmor enabled 164 | ec2-host-retirement True Fri, 20 Mar 2020 15:23:26 +0100 Fri, 20 Mar 2020 15:23:26 +0100 NodeProblemDetector Condition added with tooling 165 | DrainScheduled True Fri, 20 Mar 2020 15:50:50 +0100 Fri, 20 Mar 2020 15:23:26 +0100 Draino Drain activity scheduled 2020-03-20T15:50:34+01:00 | Completed: 2020-03-20T15:50:50+01:00 166 | ``` 167 | 168 | If the drain had failed the condition line would look like: 169 | ``` 170 | DrainScheduled True Fri, 20 Mar 2020 15:50:50 +0100 Fri, 20 Mar 2020 15:23:26 +0100 Draino Drain activity scheduled 2020-03-20T15:50:34+01:00| Failed:2020-03-20T15:55:50+01:00 171 | ``` 172 | 173 | ## Retrying drain 174 | 175 | In some cases the drain activity may failed because of restrictive Pod Disruption Budget or any other reason external to Draino. The node remains `cordon` and the drain condition 176 | is marked as `Failed`. If you want to reschedule a drain tentative on that node, add the annotation: `draino/drain-retry: true`. A new drain schedule will be created. Note that the annotation is not modified and will trigger retries in loop in case the drain fails again. 177 | 178 | ``` 179 | kubectl annotate node {node-name} draino/drain-retry=true 180 | ``` 181 | ## Modes 182 | 183 | ### Dry Run 184 | Draino can be run in dry run mode using the `--dry-run` flag. 185 | 186 | ### Cordon Only 187 | Draino can also optionally be run in a mode where the nodes are only cordoned, and not drained. This can be achieved by using the `--skip-drain` flag. 188 | -------------------------------------------------------------------------------- /cmd/draino/draino.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Planet Labs Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 13 | implied. See the License for the specific language governing permissions 14 | and limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import ( 20 | "context" 21 | "net/http" 22 | "os" 23 | "path/filepath" 24 | "time" 25 | 26 | "contrib.go.opencensus.io/exporter/prometheus" 27 | "github.com/julienschmidt/httprouter" 28 | "github.com/oklog/run" 29 | "go.opencensus.io/stats/view" 30 | "go.opencensus.io/tag" 31 | "go.uber.org/zap" 32 | "gopkg.in/alecthomas/kingpin.v2" 33 | client "k8s.io/client-go/kubernetes" 34 | "k8s.io/client-go/tools/cache" 35 | "k8s.io/client-go/tools/leaderelection" 36 | "k8s.io/client-go/tools/leaderelection/resourcelock" 37 | "k8s.io/klog" 38 | 39 | "github.com/planetlabs/draino/internal/kubernetes" 40 | ) 41 | 42 | // Default leader election settings. 43 | const ( 44 | DefaultLeaderElectionLeaseDuration time.Duration = 15 * time.Second 45 | DefaultLeaderElectionRenewDeadline time.Duration = 10 * time.Second 46 | DefaultLeaderElectionRetryPeriod time.Duration = 2 * time.Second 47 | ) 48 | 49 | func main() { 50 | var ( 51 | app = kingpin.New(filepath.Base(os.Args[0]), "Automatically cordons and drains nodes that match the supplied conditions.").DefaultEnvars() 52 | 53 | debug = app.Flag("debug", "Run with debug logging.").Short('d').Bool() 54 | listen = app.Flag("listen", "Address at which to expose /metrics and /healthz.").Default(":10002").String() 55 | kubecfg = app.Flag("kubeconfig", "Path to kubeconfig file. Leave unset to use in-cluster config.").String() 56 | apiserver = app.Flag("master", "Address of Kubernetes API server. Leave unset to use in-cluster config.").String() 57 | dryRun = app.Flag("dry-run", "Emit an event without cordoning or draining matching nodes.").Bool() 58 | maxGracePeriod = app.Flag("max-grace-period", "Maximum time evicted pods will be given to terminate gracefully.").Default(kubernetes.DefaultMaxGracePeriod.String()).Duration() 59 | evictionHeadroom = app.Flag("eviction-headroom", "Additional time to wait after a pod's termination grace period for it to have been deleted.").Default(kubernetes.DefaultEvictionOverhead.String()).Duration() 60 | drainBuffer = app.Flag("drain-buffer", "Minimum time between starting each drain. Nodes are always cordoned immediately.").Default(kubernetes.DefaultDrainBuffer.String()).Duration() 61 | nodeLabels = app.Flag("node-label", "(Deprecated) Nodes with this label will be eligible for cordoning and draining. May be specified multiple times").Strings() 62 | nodeLabelsExpr = app.Flag("node-label-expr", "Nodes that match this expression will be eligible for cordoning and draining.").String() 63 | namespace = app.Flag("namespace", "Namespace used to create leader election lock object.").Default("kube-system").String() 64 | 65 | leaderElectionLeaseDuration = app.Flag("leader-election-lease-duration", "Lease duration for leader election.").Default(DefaultLeaderElectionLeaseDuration.String()).Duration() 66 | leaderElectionRenewDeadline = app.Flag("leader-election-renew-deadline", "Leader election renew deadline.").Default(DefaultLeaderElectionRenewDeadline.String()).Duration() 67 | leaderElectionRetryPeriod = app.Flag("leader-election-retry-period", "Leader election retry period.").Default(DefaultLeaderElectionRetryPeriod.String()).Duration() 68 | leaderElectionTokenName = app.Flag("leader-election-token-name", "Leader election token name.").Default(kubernetes.Component).String() 69 | 70 | skipDrain = app.Flag("skip-drain", "Whether to skip draining nodes after cordoning.").Default("false").Bool() 71 | evictDaemonSetPods = app.Flag("evict-daemonset-pods", "Evict pods that were created by an extant DaemonSet.").Bool() 72 | evictStatefulSetPods = app.Flag("evict-statefulset-pods", "Evict pods that were created by an extant StatefulSet.").Bool() 73 | evictLocalStoragePods = app.Flag("evict-emptydir-pods", "Evict pods with local storage, i.e. with emptyDir volumes.").Bool() 74 | evictUnreplicatedPods = app.Flag("evict-unreplicated-pods", "Evict pods that were not created by a replication controller.").Bool() 75 | 76 | protectedPodAnnotations = app.Flag("protected-pod-annotation", "Protect pods with this annotation from eviction. May be specified multiple times.").PlaceHolder("KEY[=VALUE]").Strings() 77 | 78 | conditions = app.Arg("node-conditions", "Nodes for which any of these conditions are true will be cordoned and drained.").Required().Strings() 79 | ) 80 | kingpin.MustParse(app.Parse(os.Args[1:])) 81 | 82 | // this is required to make all packages using klog write to stderr instead of tmp files 83 | klog.InitFlags(nil) 84 | 85 | var ( 86 | nodesCordoned = &view.View{ 87 | Name: "cordoned_nodes_total", 88 | Measure: kubernetes.MeasureNodesCordoned, 89 | Description: "Number of nodes cordoned.", 90 | Aggregation: view.Count(), 91 | TagKeys: []tag.Key{kubernetes.TagResult}, 92 | } 93 | nodesUncordoned = &view.View{ 94 | Name: "uncordoned_nodes_total", 95 | Measure: kubernetes.MeasureNodesUncordoned, 96 | Description: "Number of nodes uncordoned.", 97 | Aggregation: view.Count(), 98 | TagKeys: []tag.Key{kubernetes.TagResult}, 99 | } 100 | nodesDrained = &view.View{ 101 | Name: "drained_nodes_total", 102 | Measure: kubernetes.MeasureNodesDrained, 103 | Description: "Number of nodes drained.", 104 | Aggregation: view.Count(), 105 | TagKeys: []tag.Key{kubernetes.TagResult}, 106 | } 107 | nodesDrainScheduled = &view.View{ 108 | Name: "drain_scheduled_nodes_total", 109 | Measure: kubernetes.MeasureNodesDrainScheduled, 110 | Description: "Number of nodes scheduled for drain.", 111 | Aggregation: view.Count(), 112 | TagKeys: []tag.Key{kubernetes.TagResult}, 113 | } 114 | ) 115 | 116 | kingpin.FatalIfError(view.Register(nodesCordoned, nodesUncordoned, nodesDrained, nodesDrainScheduled), "cannot create metrics") 117 | p, err := prometheus.NewExporter(prometheus.Options{Namespace: kubernetes.Component}) 118 | kingpin.FatalIfError(err, "cannot export metrics") 119 | view.RegisterExporter(p) 120 | 121 | web := &httpRunner{l: *listen, h: map[string]http.Handler{ 122 | "/metrics": p, 123 | "/healthz": http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { r.Body.Close() }), // nolint:errcheck 124 | }} 125 | 126 | log, err := zap.NewProduction() 127 | if *debug { 128 | log, err = zap.NewDevelopment() 129 | } 130 | kingpin.FatalIfError(err, "cannot create log") 131 | defer log.Sync() // nolint:errcheck 132 | 133 | go func() { 134 | log.Info("web server is running", zap.String("listen", *listen)) 135 | kingpin.FatalIfError(await(web), "error serving") 136 | }() 137 | 138 | c, err := kubernetes.BuildConfigFromFlags(*apiserver, *kubecfg) 139 | kingpin.FatalIfError(err, "cannot create Kubernetes client configuration") 140 | 141 | cs, err := client.NewForConfig(c) 142 | kingpin.FatalIfError(err, "cannot create Kubernetes client") 143 | 144 | pf := []kubernetes.PodFilterFunc{kubernetes.MirrorPodFilter} 145 | if !*evictLocalStoragePods { 146 | pf = append(pf, kubernetes.LocalStoragePodFilter) 147 | } 148 | if !*evictUnreplicatedPods { 149 | pf = append(pf, kubernetes.UnreplicatedPodFilter) 150 | } 151 | if !*evictDaemonSetPods { 152 | pf = append(pf, kubernetes.NewDaemonSetPodFilter(cs)) 153 | } 154 | if !*evictStatefulSetPods { 155 | pf = append(pf, kubernetes.NewStatefulSetPodFilter(cs)) 156 | } 157 | systemKnownAnnotations := []string{ 158 | "cluster-autoscaler.kubernetes.io/safe-to-evict=false", // https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#what-types-of-pods-can-prevent-ca-from-removing-a-node 159 | } 160 | pf = append(pf, kubernetes.UnprotectedPodFilter(append(systemKnownAnnotations, *protectedPodAnnotations...)...)) 161 | var h cache.ResourceEventHandler = kubernetes.NewDrainingResourceEventHandler( 162 | kubernetes.NewAPICordonDrainer(cs, 163 | kubernetes.MaxGracePeriod(*maxGracePeriod), 164 | kubernetes.EvictionHeadroom(*evictionHeadroom), 165 | kubernetes.WithSkipDrain(*skipDrain), 166 | kubernetes.WithPodFilter(kubernetes.NewPodFilters(pf...)), 167 | kubernetes.WithAPICordonDrainerLogger(log), 168 | ), 169 | kubernetes.NewEventRecorder(cs), 170 | kubernetes.WithLogger(log), 171 | kubernetes.WithDrainBuffer(*drainBuffer), 172 | kubernetes.WithConditionsFilter(*conditions)) 173 | 174 | if *dryRun { 175 | h = cache.FilteringResourceEventHandler{ 176 | FilterFunc: kubernetes.NewNodeProcessed().Filter, 177 | Handler: kubernetes.NewDrainingResourceEventHandler( 178 | &kubernetes.NoopCordonDrainer{}, 179 | kubernetes.NewEventRecorder(cs), 180 | kubernetes.WithLogger(log), 181 | kubernetes.WithDrainBuffer(*drainBuffer), 182 | kubernetes.WithConditionsFilter(*conditions)), 183 | } 184 | } 185 | 186 | if len(*nodeLabels) > 0 { 187 | log.Debug("node labels", zap.Any("labels", nodeLabels)) 188 | if *nodeLabelsExpr != "" { 189 | kingpin.Fatalf("nodeLabels and NodeLabelsExpr cannot both be set") 190 | } 191 | if nodeLabelsExpr, err = kubernetes.ConvertLabelsToFilterExpr(*nodeLabels); err != nil { 192 | kingpin.Fatalf(err.Error()) 193 | } 194 | } 195 | 196 | var nodeLabelFilter cache.ResourceEventHandler 197 | log.Debug("label expression", zap.Any("expr", nodeLabelsExpr)) 198 | 199 | nodeLabelFilterFunc, err := kubernetes.NewNodeLabelFilter(nodeLabelsExpr, log) 200 | if err != nil { 201 | log.Sugar().Fatalf("Failed to parse node label expression: %v", err) 202 | } 203 | 204 | nodeLabelFilter = cache.FilteringResourceEventHandler{FilterFunc: nodeLabelFilterFunc, Handler: h} 205 | 206 | nodes := kubernetes.NewNodeWatch(cs, nodeLabelFilter) 207 | 208 | id, err := os.Hostname() 209 | kingpin.FatalIfError(err, "cannot get hostname") 210 | 211 | // use a Go context so we can tell the leaderelection code when we 212 | // want to step down 213 | ctx, cancel := context.WithCancel(context.Background()) 214 | defer cancel() 215 | 216 | lock, err := resourcelock.New( 217 | resourcelock.EndpointsResourceLock, 218 | *namespace, 219 | *leaderElectionTokenName, 220 | cs.CoreV1(), 221 | cs.CoordinationV1(), 222 | resourcelock.ResourceLockConfig{ 223 | Identity: id, 224 | EventRecorder: kubernetes.NewEventRecorder(cs), 225 | }, 226 | ) 227 | kingpin.FatalIfError(err, "cannot create lock") 228 | 229 | leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{ 230 | Lock: lock, 231 | LeaseDuration: *leaderElectionLeaseDuration, 232 | RenewDeadline: *leaderElectionRenewDeadline, 233 | RetryPeriod: *leaderElectionRetryPeriod, 234 | Callbacks: leaderelection.LeaderCallbacks{ 235 | OnStartedLeading: func(ctx context.Context) { 236 | log.Info("node watcher is running") 237 | kingpin.FatalIfError(await(nodes), "error watching") 238 | }, 239 | OnStoppedLeading: func() { 240 | kingpin.Fatalf("lost leader election") 241 | }, 242 | }, 243 | }) 244 | } 245 | 246 | type runner interface { 247 | Run(stop <-chan struct{}) 248 | } 249 | 250 | func await(rs ...runner) error { 251 | stop := make(chan struct{}) 252 | g := &run.Group{} 253 | for i := range rs { 254 | r := rs[i] // https://golang.org/doc/faq#closures_and_goroutines 255 | g.Add(func() error { r.Run(stop); return nil }, func(err error) { close(stop) }) 256 | } 257 | return g.Run() 258 | } 259 | 260 | type httpRunner struct { 261 | l string 262 | h map[string]http.Handler 263 | } 264 | 265 | func (r *httpRunner) Run(stop <-chan struct{}) { 266 | rt := httprouter.New() 267 | for path, handler := range r.h { 268 | rt.Handler("GET", path, handler) 269 | } 270 | 271 | s := &http.Server{Addr: r.l, Handler: rt} 272 | ctx, cancel := context.WithTimeout(context.Background(), 0*time.Second) 273 | go func() { 274 | <-stop 275 | s.Shutdown(ctx) // nolint:errcheck 276 | }() 277 | s.ListenAndServe() // nolint:errcheck 278 | cancel() 279 | } 280 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/planetlabs/draino 2 | 3 | go 1.13 4 | 5 | // Kube 1.15.3 6 | replace k8s.io/client-go => k8s.io/client-go v0.0.0-20190819141724-e14f31a72a77 7 | 8 | // Kube 1.15.3 9 | replace k8s.io/api => k8s.io/api v0.0.0-20190819141258-3544db3b9e44 10 | 11 | // Kube 1.15.3 12 | replace k8s.io/apimachinery => k8s.io/apimachinery v0.0.0-20190817020851-f2f3a405f61d 13 | 14 | require ( 15 | contrib.go.opencensus.io/exporter/prometheus v0.1.0 16 | github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc // indirect 17 | github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf // indirect 18 | github.com/antonmedv/expr v1.8.8 19 | github.com/go-test/deep v1.0.1 20 | github.com/julienschmidt/httprouter v1.1.0 21 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect 22 | github.com/oklog/run v1.0.0 23 | github.com/onsi/ginkgo v1.12.0 // indirect 24 | github.com/onsi/gomega v1.9.0 // indirect 25 | github.com/pkg/errors v0.8.0 26 | github.com/stretchr/testify v1.5.1 27 | go.opencensus.io v0.21.0 28 | go.uber.org/atomic v1.3.2 // indirect 29 | go.uber.org/multierr v1.1.0 // indirect 30 | go.uber.org/zap v1.9.1 31 | gopkg.in/alecthomas/kingpin.v2 v2.2.6 32 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect 33 | gotest.tools v2.2.0+incompatible 34 | k8s.io/api v0.0.0-20190819141258-3544db3b9e44 35 | k8s.io/apimachinery v0.0.0-20190817020851-f2f3a405f61d 36 | k8s.io/client-go v8.0.0+incompatible 37 | k8s.io/klog v0.3.1 38 | ) 39 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= 2 | cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= 3 | contrib.go.opencensus.io/exporter/prometheus v0.1.0 h1:SByaIoWwNgMdPSgl5sMqM2KDE5H/ukPWBRo314xiDvg= 4 | contrib.go.opencensus.io/exporter/prometheus v0.1.0/go.mod h1:cGFniUXGZlKRjzOyuZJ6mgB+PgBcCIa79kEKR8YCW+A= 5 | github.com/Azure/go-autorest v11.1.2+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= 6 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 7 | github.com/DATA-DOG/go-sqlmock v1.3.3/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= 8 | github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc h1:cAKDfWh5VpdgMhJosfJnn5/FoN2SRZ4p7fJNX58YPaU= 9 | github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= 10 | github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf h1:qet1QNfXsQxTZqLG4oE62mJzwPIB8+Tee4RNCL9ulrY= 11 | github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= 12 | github.com/antonmedv/expr v1.8.8 h1:uVwIkIBNO2yn4vY2u2DQUqXTmv9jEEMCEcHa19G5weY= 13 | github.com/antonmedv/expr v1.8.8/go.mod h1:5qsM3oLGDND7sDmQGDXHkYfkjYMUX14qsgqmHhwGEk8= 14 | github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973 h1:xJ4a3vCFaGF/jqvzLMYoU8P317H5OQ+Via4RmuPwCS0= 15 | github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= 16 | github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= 17 | github.com/davecgh/go-spew v0.0.0-20161028175848-04cdfd42973b/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 18 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 19 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 20 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 21 | github.com/dgrijalva/jwt-go v0.0.0-20160705203006-01aeca54ebda/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= 22 | github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM= 23 | github.com/elazarl/goproxy v0.0.0-20170405201442-c4fc26588b6e/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc= 24 | github.com/evanphx/json-patch v0.0.0-20190203023257-5858425f7550 h1:mV9jbLoSW/8m4VK16ZkHTozJa8sesK5u5kTMFysTYac= 25 | github.com/evanphx/json-patch v0.0.0-20190203023257-5858425f7550/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= 26 | github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= 27 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= 28 | github.com/gdamore/encoding v1.0.0/go.mod h1:alR0ol34c49FCSBLjhosxzcPHQbf2trDkoo5dl+VrEg= 29 | github.com/gdamore/tcell v1.3.0/go.mod h1:Hjvr+Ofd+gLglo7RYKxxnzCBmev3BzsS67MebKS4zMM= 30 | github.com/go-test/deep v1.0.1 h1:UQhStjbkDClarlmv0am7OXXO4/GaPdCGiUiMTvi28sg= 31 | github.com/go-test/deep v1.0.1/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA= 32 | github.com/gogo/protobuf v0.0.0-20171007142547-342cbe0a0415 h1:WSBJMqJbLxsn+bTCPyPYZfqHdJmc8MK4wrBjMft6BAM= 33 | github.com/gogo/protobuf v0.0.0-20171007142547-342cbe0a0415/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= 34 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= 35 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= 36 | github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903 h1:LbsanbbD6LieFkXbj9YNNBupiGHJgFeLpO0j0Fza1h8= 37 | github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= 38 | github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= 39 | github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM= 40 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 41 | github.com/google/btree v0.0.0-20160524151835-7d79101e329e h1:JHB7F/4TJCrYBW8+GZO8VkWDj1jxcWuCl6uxKODiyi4= 42 | github.com/google/btree v0.0.0-20160524151835-7d79101e329e/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= 43 | github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ= 44 | github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= 45 | github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY= 46 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 47 | github.com/google/gofuzz v0.0.0-20170612174753-24818f796faf h1:+RRA9JqSOZFfKrOeqr2z77+8R2RKyh8PG66dcu1V0ck= 48 | github.com/google/gofuzz v0.0.0-20170612174753-24818f796faf/go.mod h1:HP5RmnzzSNb993RKQDq4+1A4ia9nllfqcQFTQJedwGI= 49 | github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 50 | github.com/googleapis/gnostic v0.0.0-20170729233727-0c5108395e2d h1:7XGaL1e6bYS1yIonGp9761ExpPPV1ui0SAC59Yube9k= 51 | github.com/googleapis/gnostic v0.0.0-20170729233727-0c5108395e2d/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY= 52 | github.com/gophercloud/gophercloud v0.0.0-20190126172459-c818fa66e4c8/go.mod h1:3WdhXV3rUYy9p6AUW8d94kr+HS62Y4VL9mBnFxsD8q4= 53 | github.com/gregjones/httpcache v0.0.0-20170728041850-787624de3eb7 h1:6TSoaYExHper8PYsJu23GWVNOyYRCSnIFyxKgLSZ54w= 54 | github.com/gregjones/httpcache v0.0.0-20170728041850-787624de3eb7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= 55 | github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo= 56 | github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= 57 | github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= 58 | github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= 59 | github.com/imdario/mergo v0.3.5 h1:JboBksRwiiAJWvIYJVo46AfV+IAIKZpfrSzVKj42R4Q= 60 | github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= 61 | github.com/json-iterator/go v0.0.0-20180701071628-ab8a2e0c74be h1:AHimNtVIpiBjPUhEF5KNCkrUyqTSA5zWUl8sQ2bfGBE= 62 | github.com/json-iterator/go v0.0.0-20180701071628-ab8a2e0c74be/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= 63 | github.com/julienschmidt/httprouter v1.1.0 h1:7wLdtIiIpzOkC9u6sXOozpBauPdskj3ru4EI5MABq68= 64 | github.com/julienschmidt/httprouter v1.1.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= 65 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 66 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= 67 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 68 | github.com/lucasb-eyer/go-colorful v1.0.2/go.mod h1:0MS4r+7BZKSJ5mw4/S5MPN+qHFF1fYclkSPilDOKW0s= 69 | github.com/lucasb-eyer/go-colorful v1.0.3/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= 70 | github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= 71 | github.com/mattn/go-runewidth v0.0.8/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= 72 | github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= 73 | github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= 74 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= 75 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 76 | github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI= 77 | github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= 78 | github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= 79 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= 80 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= 81 | github.com/oklog/run v1.0.0 h1:Ru7dDtJNOyC66gQ5dQmaCa0qIsAUFY3sFpK1Xk8igrw= 82 | github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA= 83 | github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= 84 | github.com/onsi/ginkgo v1.12.0 h1:Iw5WCbBcaAAd0fpRb1c9r5YCylv4XDoCSigm1zLevwU= 85 | github.com/onsi/ginkgo v1.12.0/go.mod h1:oUhWkIvk5aDxtKvDDuw8gItl8pKl42LzjC9KZE0HfGg= 86 | github.com/onsi/gomega v0.0.0-20190113212917-5533ce8a0da3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= 87 | github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= 88 | github.com/onsi/gomega v1.9.0 h1:R1uwffexN6Pr340GtYRIdZmAiN4J+iw6WG4wog1DUXg= 89 | github.com/onsi/gomega v1.9.0/go.mod h1:Ho0h+IUsWyvy1OpqCwxlQ/21gkhVunqlU8fDGcoTdcA= 90 | github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI= 91 | github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= 92 | github.com/pkg/errors v0.8.0 h1:WdK/asTD0HN+q6hsWO3/vpuAkAr+tw6aNJNDFFf0+qw= 93 | github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 94 | github.com/pmezard/go-difflib v0.0.0-20151028094244-d8ed2627bdf0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 95 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 96 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 97 | github.com/prometheus/client_golang v0.9.2 h1:awm861/B8OKDd2I/6o1dy3ra4BamzKhYOiGItCeZ740= 98 | github.com/prometheus/client_golang v0.9.2/go.mod h1:OsXs2jCmiKlQ1lTBmv21f2mNfw4xf/QclQDMrYNZzcM= 99 | github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910 h1:idejC8f05m9MGOsuEi1ATq9shN03HrxNkD/luQvxCv8= 100 | github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= 101 | github.com/prometheus/common v0.0.0-20181126121408-4724e9255275 h1:PnBWHBf+6L0jOqq0gIVUe6Yk0/QMZ640k6NvkxcBf+8= 102 | github.com/prometheus/common v0.0.0-20181126121408-4724e9255275/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= 103 | github.com/prometheus/procfs v0.0.0-20181204211112-1dc9a6cbc91a h1:9a8MnZMP0X2nLJdBg+pBmGgkJlSaKC2KaQmTCk1XDtE= 104 | github.com/prometheus/procfs v0.0.0-20181204211112-1dc9a6cbc91a/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= 105 | github.com/rivo/tview v0.0.0-20200219210816-cd38d7432498/go.mod h1:6lkG1x+13OShEf0EaOCaTQYyB7d5nSbb181KtjlS+84= 106 | github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= 107 | github.com/sanity-io/litter v1.2.0/go.mod h1:JF6pZUFgu2Q0sBZ+HSV35P8TVPI1TTzEwyu9FXAw2W4= 108 | github.com/spf13/pflag v1.0.1 h1:aCvUg6QPl3ibpQUxyLkrEkCHtPqYJL4x9AuhqVqFis4= 109 | github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= 110 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 111 | github.com/stretchr/testify v0.0.0-20161117074351-18a02ba4a312/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 112 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 113 | github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4= 114 | github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= 115 | go.opencensus.io v0.21.0 h1:mU6zScU4U1YAFPHEHYk+3JC4SY7JxgkqS10ZOSyksNg= 116 | go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= 117 | go.uber.org/atomic v1.3.2 h1:2Oa65PReHzfn29GpvgsYwloV9AVFHPDk8tYxt2c2tr4= 118 | go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= 119 | go.uber.org/multierr v1.1.0 h1:HoEmRHQPVSqub6w2z2d2EOVs2fjyFRGyofhKuyDq0QI= 120 | go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= 121 | go.uber.org/zap v1.9.1 h1:XCJQEf3W6eZaVwhRBof6ImoYGJSITeKWsyeh3HFu/5o= 122 | go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= 123 | golang.org/x/crypto v0.0.0-20181025213731-e84da0312774 h1:a4tQYYYuK9QdeO/+kEvNYyuR21S+7ve5EANok6hABhI= 124 | golang.org/x/crypto v0.0.0-20181025213731-e84da0312774/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= 125 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90PveolxSbWFaJdECFbxSq0Mqo2M= 126 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 127 | golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 128 | golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= 129 | golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= 130 | golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 131 | golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 132 | golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 133 | golang.org/x/net v0.0.0-20181201002055-351d144fa1fc h1:a3CU5tJYVj92DY2LaA1kUkrsqD5/3mLDhx2NcNqyW+0= 134 | golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 135 | golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 136 | golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 137 | golang.org/x/net v0.0.0-20190311183353-d8887717615a h1:oWX7TPOiFAMXLq8o0ikBYfCJVlRHBcsciT5bXOrH628= 138 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 139 | golang.org/x/net v0.0.0-20190812203447-cdfb69ac37fc h1:gkKoSkUmnU6bpS/VhkuO27bzQeSA51uaEfbOW5dNb68= 140 | golang.org/x/net v0.0.0-20190812203447-cdfb69ac37fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 141 | golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= 142 | golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a h1:tImsplftrFpALCYumobsd0K86vlAs/eXGFms2txfJfA= 143 | golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= 144 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 145 | golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 146 | golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 147 | golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6 h1:bjcUS9ztw9kFmmIxJInhon/0Is3p+EHBKNgquIzo1OI= 148 | golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 149 | golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 150 | golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 151 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 152 | golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 153 | golang.org/x/sys v0.0.0-20190626150813-e07cf5db2756/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 154 | golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e h1:N7DeIrjYszNmSW409R3frPPwglRwMkXSBzwVbkOjLLA= 155 | golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 156 | golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4 h1:sfkvUWPNGwSV+8/fNqctR5lS2AqCSqYwXdrjCxp/dXo= 157 | golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 158 | golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= 159 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 160 | golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db h1:6/JqlYfC1CCaLnGceQTI+sDGhC9UBSPAsBqI0Gun6kU= 161 | golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 162 | golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= 163 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 164 | golang.org/x/time v0.0.0-20161028155119-f51c12702a4d h1:TnM+PKb3ylGmZvyPXmo9m/wktg7Jn/a/fNmr33HSj8g= 165 | golang.org/x/time v0.0.0-20161028155119-f51c12702a4d/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= 166 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 167 | golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 168 | golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= 169 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7 h1:9zdDQZ7Thm29KFXgAX/+yaf3eVbP7djjWp/dXAppNCc= 170 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 171 | google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= 172 | google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= 173 | google.golang.org/appengine v1.5.0 h1:KxkO13IPW4Lslp2bz+KHP2E3gtFlrIGNThxkZQ3g+4c= 174 | google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= 175 | google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= 176 | google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= 177 | google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= 178 | gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc= 179 | gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= 180 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 181 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= 182 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 183 | gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= 184 | gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= 185 | gopkg.in/inf.v0 v0.9.0 h1:3zYtXIO92bvsdS3ggAdA8Gb4Azj0YU+TVY1uGYNFA8o= 186 | gopkg.in/inf.v0 v0.9.0/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= 187 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= 188 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= 189 | gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 190 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 191 | gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I= 192 | gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 193 | gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= 194 | gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= 195 | honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 196 | k8s.io/api v0.0.0-20190819141258-3544db3b9e44 h1:7Gz7/nQ7X2qmPXMyN0bNq7Zm9Uip+UnFuMZTd2l3vms= 197 | k8s.io/api v0.0.0-20190819141258-3544db3b9e44/go.mod h1:AOxZTnaXR/xiarlQL0JUfwQPxjmKDvVYoRp58cA7lUo= 198 | k8s.io/apimachinery v0.0.0-20190817020851-f2f3a405f61d h1:7Kns6qqhMAQWvGkxYOLSLRZ5hJO0/5pcE5lPGP2fxUw= 199 | k8s.io/apimachinery v0.0.0-20190817020851-f2f3a405f61d/go.mod h1:3jediapYqJ2w1BFw7lAZPCx7scubsTfosqHkhXCWJKw= 200 | k8s.io/client-go v0.0.0-20190819141724-e14f31a72a77 h1:w1BoabVnPpPqQCY3sHK4qVwa12Lk8ip1pKMR1C+qbdo= 201 | k8s.io/client-go v0.0.0-20190819141724-e14f31a72a77/go.mod h1:DmkJD5UDP87MVqUQ5VJ6Tj9Oen8WzXPhk3la4qpyG4g= 202 | k8s.io/klog v0.3.1 h1:RVgyDHY/kFKtLqh67NvEWIgkMneNoIrdkN0CxDSQc68= 203 | k8s.io/klog v0.3.1/go.mod h1:Gq+BEi5rUBO/HRz0bTSXDUcqjScdoY3a9IHpCEIOOfk= 204 | k8s.io/kube-openapi v0.0.0-20190228160746-b3a7cee44a30 h1:TRb4wNWoBVrH9plmkp2q86FIDppkbrEXdXlxU3a3BMI= 205 | k8s.io/kube-openapi v0.0.0-20190228160746-b3a7cee44a30/go.mod h1:BXM9ceUBTj2QnfH2MK1odQs778ajze1RxcmP6S8RVVc= 206 | k8s.io/utils v0.0.0-20190221042446-c2654d5206da h1:ElyM7RPonbKnQqOcw7dG2IK5uvQQn3b/WPHqD5mBvP4= 207 | k8s.io/utils v0.0.0-20190221042446-c2654d5206da/go.mod h1:8k8uAuAQ0rXslZKaEWd0c3oVhZz7sSzSiPnVZayjIX0= 208 | sigs.k8s.io/yaml v1.1.0 h1:4A07+ZFc2wgJwo8YNlQpr1rVlgUDlxXHhPJciaPY5gs= 209 | sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= 210 | -------------------------------------------------------------------------------- /helm/draino/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | .vscode/ 23 | -------------------------------------------------------------------------------- /helm/draino/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | description: draino chart 3 | name: draino 4 | version: 0.1.0 5 | -------------------------------------------------------------------------------- /helm/draino/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "draino.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "draino.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "draino.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | -------------------------------------------------------------------------------- /helm/draino/templates/clusterrole.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.rbac.create -}} 2 | apiVersion: rbac.authorization.k8s.io/v1beta1 3 | kind: ClusterRole 4 | metadata: 5 | name: {{ include "draino.fullname" . }} 6 | labels: 7 | app.kubernetes.io/name: {{ include "draino.name" . }} 8 | helm.sh/chart: {{ include "draino.chart" . }} 9 | app.kubernetes.io/instance: {{ .Release.Name }} 10 | app.kubernetes.io/managed-by: {{ .Release.Service }} 11 | rules: 12 | - apiGroups: [''] 13 | resources: [events] 14 | verbs: [create, patch, update] 15 | - apiGroups: [''] 16 | resources: [nodes] 17 | verbs: [get, watch, list, update] 18 | - apiGroups: [''] 19 | resources: [nodes/status] 20 | verbs: [patch, update] 21 | - apiGroups: [''] 22 | resources: [pods] 23 | verbs: [get, watch, list] 24 | - apiGroups: [''] 25 | resources: [pods/eviction] 26 | verbs: [create] 27 | - apiGroups: [apps] 28 | resources: [daemonsets] 29 | verbs: [get, watch, list] 30 | - apiGroups: ['*'] 31 | resources: [statefulsets] 32 | verbs: [get] 33 | - apiGroups: [''] 34 | resources: [endpoints] 35 | verbs: [get, create, update] 36 | 37 | {{- end -}} 38 | -------------------------------------------------------------------------------- /helm/draino/templates/clusterrolebinding.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.rbac.create -}} 2 | apiVersion: rbac.authorization.k8s.io/v1beta1 3 | kind: ClusterRoleBinding 4 | metadata: 5 | name: {{ include "draino.fullname" . }} 6 | labels: 7 | app.kubernetes.io/name: {{ include "draino.name" . }} 8 | helm.sh/chart: {{ include "draino.chart" . }} 9 | app.kubernetes.io/instance: {{ .Release.Name }} 10 | app.kubernetes.io/managed-by: {{ .Release.Service }} 11 | roleRef: 12 | apiGroup: rbac.authorization.k8s.io 13 | kind: ClusterRole 14 | name: {{ template "draino.fullname" . }} 15 | subjects: 16 | - kind: ServiceAccount 17 | name: {{ template "draino.fullname" . }} 18 | namespace: {{ .Release.Namespace }} 19 | {{- end -}} 20 | -------------------------------------------------------------------------------- /helm/draino/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ include "draino.fullname" . }} 5 | labels: 6 | app.kubernetes.io/name: {{ include "draino.name" . }} 7 | helm.sh/chart: {{ include "draino.chart" . }} 8 | app.kubernetes.io/instance: {{ .Release.Name }} 9 | app.kubernetes.io/managed-by: {{ .Release.Service }} 10 | spec: 11 | replicas: {{ .Values.replicaCount }} 12 | selector: 13 | matchLabels: 14 | app.kubernetes.io/name: {{ include "draino.name" . }} 15 | app.kubernetes.io/instance: {{ .Release.Name }} 16 | template: 17 | metadata: 18 | labels: 19 | app.kubernetes.io/name: {{ include "draino.name" . }} 20 | app.kubernetes.io/instance: {{ .Release.Name }} 21 | {{- with .Values.podLabels }} 22 | {{ toYaml . | indent 8 }} 23 | {{- end }} 24 | {{- with .Values.podAnnotations }} 25 | annotations: 26 | {{ toYaml . | indent 8 }} 27 | {{- end }} 28 | spec: 29 | containers: 30 | - name: {{ .Chart.Name }} 31 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" 32 | imagePullPolicy: {{ .Values.image.pullPolicy }} 33 | command: 34 | - /draino 35 | {{- if .Values.dryRun }} 36 | - --dry-run 37 | {{ end }} 38 | {{- range $key, $value := .Values.extraArgs }} 39 | - --{{ $key }}{{ if $value }}={{ $value }}{{ end }} 40 | {{- end }} 41 | {{- range .Values.conditions }} 42 | - {{ . }} 43 | {{- end }} 44 | livenessProbe: 45 | initialDelaySeconds: 30 46 | httpGet: 47 | path: /healthz 48 | port: 10002 49 | resources: 50 | {{- toYaml .Values.resources | nindent 12 }} 51 | {{- with .Values.containerSecurityContext }} 52 | securityContext: 53 | {{- toYaml . | nindent 12 }} 54 | {{- end }} 55 | serviceAccountName: {{ if .Values.rbac.create }}{{ template "draino.fullname" . }}{{ else }}"{{ .Values.rbac.serviceAccountName }}"{{ end }} 56 | {{- with .Values.securityContext }} 57 | securityContext: 58 | {{- toYaml . | nindent 8 }} 59 | {{- end }} 60 | {{- with .Values.nodeSelector }} 61 | nodeSelector: 62 | {{- toYaml . | nindent 8 }} 63 | {{- end }} 64 | {{- with .Values.affinity }} 65 | affinity: 66 | {{- toYaml . | nindent 8 }} 67 | {{- end }} 68 | {{- with .Values.tolerations }} 69 | tolerations: 70 | {{- toYaml . | nindent 8 }} 71 | {{- end }} 72 | -------------------------------------------------------------------------------- /helm/draino/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.rbac.create -}} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "draino.fullname" . }} 6 | labels: 7 | app.kubernetes.io/name: {{ include "draino.name" . }} 8 | helm.sh/chart: {{ include "draino.chart" . }} 9 | app.kubernetes.io/instance: {{ .Release.Name }} 10 | app.kubernetes.io/managed-by: {{ .Release.Service }} 11 | {{- end -}} 12 | -------------------------------------------------------------------------------- /helm/draino/values.yaml: -------------------------------------------------------------------------------- 1 | dryRun: false 2 | 3 | extraArgs: {} 4 | 5 | conditions: {} 6 | 7 | replicaCount: 1 8 | 9 | image: 10 | repository: planetlabs/draino 11 | tag: 450a853 12 | pullPolicy: IfNotPresent 13 | 14 | resources: 15 | limits: 16 | cpu: 100m 17 | memory: 128Mi 18 | requests: 19 | cpu: 100m 20 | memory: 128Mi 21 | 22 | # Add these annotations to all pods 23 | podAnnotations: {} 24 | # name: value 25 | # prometheus.io/scrape: 'true' 26 | # prometheus.io/port: '9102' 27 | 28 | # Add these labels to all pods 29 | podLabels: {} 30 | # name: value 31 | 32 | rbac: 33 | create: true 34 | serviceAccountName: 35 | 36 | nodeSelector: {} 37 | 38 | tolerations: [] 39 | 40 | affinity: {} 41 | 42 | # Security Context policies for pods 43 | # ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ 44 | securityContext: 45 | fsGroup: 101 46 | runAsGroup: 101 47 | runAsNonRoot: true 48 | runAsUser: 100 49 | 50 | containerSecurityContext: 51 | privileged: false 52 | readOnlyRootFilesystem: true 53 | -------------------------------------------------------------------------------- /internal/kubernetes/drainSchedule.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "sync" 7 | "sync/atomic" 8 | "time" 9 | 10 | "go.opencensus.io/stats" 11 | "go.opencensus.io/tag" 12 | "go.uber.org/zap" 13 | core "k8s.io/api/core/v1" 14 | v1 "k8s.io/api/core/v1" 15 | "k8s.io/apimachinery/pkg/types" 16 | "k8s.io/client-go/tools/record" 17 | ) 18 | 19 | const ( 20 | SetConditionTimeout = 10 * time.Second 21 | SetConditionRetryPeriod = 50 * time.Millisecond 22 | ) 23 | 24 | type DrainScheduler interface { 25 | HasSchedule(name string) (has, failed bool) 26 | Schedule(node *v1.Node) (time.Time, error) 27 | DeleteSchedule(name string) 28 | } 29 | 30 | type DrainSchedules struct { 31 | sync.Mutex 32 | schedules map[string]*schedule 33 | 34 | lastDrainScheduledFor time.Time 35 | period time.Duration 36 | 37 | logger *zap.Logger 38 | drainer Drainer 39 | eventRecorder record.EventRecorder 40 | } 41 | 42 | func NewDrainSchedules(drainer Drainer, eventRecorder record.EventRecorder, period time.Duration, logger *zap.Logger) DrainScheduler { 43 | return &DrainSchedules{ 44 | schedules: map[string]*schedule{}, 45 | period: period, 46 | logger: logger, 47 | drainer: drainer, 48 | eventRecorder: eventRecorder, 49 | } 50 | } 51 | 52 | func (d *DrainSchedules) HasSchedule(name string) (has, failed bool) { 53 | d.Lock() 54 | defer d.Unlock() 55 | sched, ok := d.schedules[name] 56 | if !ok { 57 | return false, false 58 | } 59 | return true, sched.isFailed() 60 | } 61 | 62 | func (d *DrainSchedules) DeleteSchedule(name string) { 63 | d.Lock() 64 | defer d.Unlock() 65 | if s, ok := d.schedules[name]; ok { 66 | s.timer.Stop() 67 | } else { 68 | d.logger.Error("Failed schedule deletion", zap.String("key", name)) 69 | } 70 | delete(d.schedules, name) 71 | } 72 | 73 | func (d *DrainSchedules) WhenNextSchedule() time.Time { 74 | // compute drain schedule time 75 | sooner := time.Now().Add(SetConditionTimeout + time.Second) 76 | when := d.lastDrainScheduledFor.Add(d.period) 77 | if when.Before(sooner) { 78 | when = sooner 79 | } 80 | return when 81 | } 82 | 83 | func (d *DrainSchedules) Schedule(node *v1.Node) (time.Time, error) { 84 | d.Lock() 85 | if sched, ok := d.schedules[node.GetName()]; ok { 86 | d.Unlock() 87 | return sched.when, NewAlreadyScheduledError() // we already have a schedule planned 88 | } 89 | 90 | // compute drain schedule time 91 | when := d.WhenNextSchedule() 92 | d.lastDrainScheduledFor = when 93 | d.schedules[node.GetName()] = d.newSchedule(node, when) 94 | d.Unlock() 95 | 96 | // Mark the node with the condition stating that drain is scheduled 97 | if err := RetryWithTimeout( 98 | func() error { 99 | return d.drainer.MarkDrain(node, when, time.Time{}, false) 100 | }, 101 | SetConditionRetryPeriod, 102 | SetConditionTimeout, 103 | ); err != nil { 104 | // if we cannot mark the node, let's remove the schedule 105 | d.DeleteSchedule(node.GetName()) 106 | return time.Time{}, err 107 | } 108 | return when, nil 109 | } 110 | 111 | type schedule struct { 112 | when time.Time 113 | failed int32 114 | finish time.Time 115 | timer *time.Timer 116 | } 117 | 118 | func (s *schedule) setFailed() { 119 | atomic.StoreInt32(&s.failed, 1) 120 | } 121 | 122 | func (s *schedule) isFailed() bool { 123 | return atomic.LoadInt32(&s.failed) == 1 124 | } 125 | 126 | func (d *DrainSchedules) newSchedule(node *v1.Node, when time.Time) *schedule { 127 | sched := &schedule{ 128 | when: when, 129 | } 130 | sched.timer = time.AfterFunc(time.Until(when), func() { 131 | log := d.logger.With(zap.String("node", node.GetName())) 132 | nr := &core.ObjectReference{Kind: "Node", Name: node.GetName(), UID: types.UID(node.GetName())} 133 | tags, _ := tag.New(context.Background(), tag.Upsert(TagNodeName, node.GetName())) // nolint:gosec 134 | d.eventRecorder.Event(nr, core.EventTypeWarning, eventReasonDrainStarting, "Draining node") 135 | if err := d.drainer.Drain(node); err != nil { 136 | sched.finish = time.Now() 137 | sched.setFailed() 138 | log.Info("Failed to drain", zap.Error(err)) 139 | tags, _ = tag.New(tags, tag.Upsert(TagResult, tagResultFailed)) // nolint:gosec 140 | stats.Record(tags, MeasureNodesDrained.M(1)) 141 | d.eventRecorder.Eventf(nr, core.EventTypeWarning, eventReasonDrainFailed, "Draining failed: %v", err) 142 | if err := RetryWithTimeout( 143 | func() error { 144 | return d.drainer.MarkDrain(node, when, sched.finish, true) 145 | }, 146 | SetConditionRetryPeriod, 147 | SetConditionTimeout, 148 | ); err != nil { 149 | log.Error("Failed to place condition following drain failure") 150 | } 151 | return 152 | } 153 | sched.finish = time.Now() 154 | log.Info("Drained") 155 | tags, _ = tag.New(tags, tag.Upsert(TagResult, tagResultSucceeded)) // nolint:gosec 156 | stats.Record(tags, MeasureNodesDrained.M(1)) 157 | d.eventRecorder.Event(nr, core.EventTypeWarning, eventReasonDrainSucceeded, "Drained node") 158 | if err := RetryWithTimeout( 159 | func() error { 160 | return d.drainer.MarkDrain(node, when, sched.finish, false) 161 | }, 162 | SetConditionRetryPeriod, 163 | SetConditionTimeout, 164 | ); err != nil { 165 | d.eventRecorder.Eventf(nr, core.EventTypeWarning, eventReasonDrainFailed, "Failed to place drain condition: %v", err) 166 | log.Error(fmt.Sprintf("Failed to place condition following drain success : %v", err)) 167 | } 168 | }) 169 | return sched 170 | } 171 | 172 | type AlreadyScheduledError struct { 173 | error 174 | } 175 | 176 | func NewAlreadyScheduledError() error { 177 | return &AlreadyScheduledError{ 178 | fmt.Errorf("drain schedule is already planned for that node"), 179 | } 180 | } 181 | func IsAlreadyScheduledError(err error) bool { 182 | _, ok := err.(*AlreadyScheduledError) 183 | return ok 184 | } 185 | -------------------------------------------------------------------------------- /internal/kubernetes/drainSchedule_test.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | "time" 7 | 8 | "github.com/pkg/errors" 9 | "go.uber.org/zap" 10 | v1 "k8s.io/api/core/v1" 11 | meta "k8s.io/apimachinery/pkg/apis/meta/v1" 12 | "k8s.io/client-go/tools/record" 13 | ) 14 | 15 | func TestDrainSchedules_Schedule(t *testing.T) { 16 | fmt.Println("Now: " + time.Now().Format(time.RFC3339)) 17 | period := time.Minute 18 | scheduler := NewDrainSchedules(&NoopCordonDrainer{}, &record.FakeRecorder{}, period, zap.NewNop()) 19 | whenFirstSched := scheduler.(*DrainSchedules).WhenNextSchedule() 20 | 21 | type timeWindow struct { 22 | from, to time.Time 23 | } 24 | 25 | tests := []struct { 26 | name string 27 | node *v1.Node 28 | window timeWindow 29 | wantErr bool 30 | }{ 31 | { 32 | name: "first schedule", 33 | node: &v1.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 34 | window: timeWindow{ 35 | from: whenFirstSched, 36 | to: whenFirstSched.Add(2 * time.Second), 37 | }, 38 | }, 39 | { 40 | name: "second schedule", 41 | node: &v1.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName + "2"}}, 42 | window: timeWindow{ 43 | from: whenFirstSched.Add(period - 2*time.Second), 44 | to: whenFirstSched.Add(period + 2*time.Second), 45 | }, 46 | }, 47 | { 48 | name: "third schedule", 49 | node: &v1.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName + "3"}}, 50 | window: timeWindow{ 51 | from: whenFirstSched.Add(2*period - 2*time.Second), 52 | to: whenFirstSched.Add(2*period + 2*time.Second), 53 | }, 54 | }, 55 | } 56 | for _, tt := range tests { 57 | t.Run(tt.name, func(t *testing.T) { 58 | // Check that node is not yet scheduled for drain 59 | hasSchedule, _ := scheduler.HasSchedule(tt.node.Name) 60 | if hasSchedule { 61 | t.Errorf("Node %v should not have any schedule", tt.node.Name) 62 | } 63 | 64 | when, err := scheduler.Schedule(tt.node) 65 | if (err != nil) != tt.wantErr { 66 | t.Errorf("DrainSchedules.Schedule() error = %v, wantErr %v", err, tt.wantErr) 67 | return 68 | } 69 | // Check that node is scheduled for drain 70 | hasSchedule, _ = scheduler.HasSchedule(tt.node.Name) 71 | if !hasSchedule { 72 | t.Errorf("Missing schedule record for node %v", tt.node.Name) 73 | } 74 | // Check that scheduled are place in the goog time window 75 | if when.Before(tt.window.from) || when.After(tt.window.to) { 76 | t.Errorf("Schedule out of timeWindow") 77 | } 78 | // Deleting schedule 79 | scheduler.DeleteSchedule(tt.node.Name) 80 | // Check that node is no more scheduled for drain 81 | hasSchedule, _ = scheduler.HasSchedule(tt.node.Name) 82 | if hasSchedule { 83 | t.Errorf("Node %v should not been scheduled anymore", tt.node.Name) 84 | } 85 | }) 86 | } 87 | } 88 | 89 | type failDrainer struct { 90 | NoopCordonDrainer 91 | } 92 | 93 | func (d *failDrainer) Drain(n *v1.Node) error { return errors.New("myerr") } 94 | 95 | // Test to ensure there are no races when calling HasSchedule while the 96 | // scheduler is draining a node. 97 | func TestDrainSchedules_HasSchedule_Polling(t *testing.T) { 98 | scheduler := NewDrainSchedules(&failDrainer{}, &record.FakeRecorder{}, 0, zap.NewNop()) 99 | node := &v1.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}} 100 | 101 | when, err := scheduler.Schedule(node) 102 | if err != nil { 103 | t.Fatalf("DrainSchedules.Schedule() error = %v", err) 104 | } 105 | 106 | timeout := time.After(time.Until(when) + time.Minute) 107 | for { 108 | hasSchedule, failed := scheduler.HasSchedule(node.Name) 109 | if !hasSchedule { 110 | t.Fatalf("Missing schedule record for node %v", node.Name) 111 | } 112 | if failed { 113 | // Having `failed` as true is the expected result here since this 114 | // test is using the `failDrainer{}` drainer. It means that 115 | // HasSchedule was successfully called during or after the draining 116 | // function was scheduled and the test can complete successfully. 117 | break 118 | } 119 | select { 120 | case <-time.After(time.Second): 121 | // Small sleep to ensure we're not running the CPU hot while 122 | // polling `HasSchedule`. 123 | case <-timeout: 124 | // This timeout prevents this test from running forever in case 125 | // some bug caused the draining function never to be scheduled. 126 | t.Fatalf("timeout waiting for HasSchedule to fail") 127 | } 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /internal/kubernetes/drainer.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Planet Labs Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 13 | implied. See the License for the specific language governing permissions 14 | and limitations under the License. 15 | */ 16 | 17 | package kubernetes 18 | 19 | import ( 20 | "fmt" 21 | "time" 22 | 23 | "github.com/pkg/errors" 24 | "go.uber.org/zap" 25 | core "k8s.io/api/core/v1" 26 | policy "k8s.io/api/policy/v1beta1" 27 | apierrors "k8s.io/apimachinery/pkg/api/errors" 28 | meta "k8s.io/apimachinery/pkg/apis/meta/v1" 29 | "k8s.io/apimachinery/pkg/fields" 30 | "k8s.io/apimachinery/pkg/util/wait" 31 | "k8s.io/client-go/kubernetes" 32 | ) 33 | 34 | // Default pod eviction settings. 35 | const ( 36 | DefaultMaxGracePeriod time.Duration = 8 * time.Minute 37 | DefaultEvictionOverhead time.Duration = 30 * time.Second 38 | 39 | kindDaemonSet = "DaemonSet" 40 | kindStatefulSet = "StatefulSet" 41 | 42 | ConditionDrainedScheduled = "DrainScheduled" 43 | DefaultSkipDrain = false 44 | ) 45 | 46 | type nodeMutatorFn func(*core.Node) 47 | 48 | type errTimeout struct{} 49 | 50 | func (e errTimeout) Error() string { 51 | return "timed out" 52 | } 53 | 54 | func (e errTimeout) Timeout() {} 55 | 56 | // IsTimeout returns true if the supplied error was caused by a timeout. 57 | func IsTimeout(err error) bool { 58 | err = errors.Cause(err) 59 | _, ok := err.(interface { 60 | Timeout() 61 | }) 62 | return ok 63 | } 64 | 65 | // A Cordoner cordons nodes. 66 | type Cordoner interface { 67 | // Cordon the supplied node. Marks it unschedulable for new pods. 68 | Cordon(n *core.Node, mutators ...nodeMutatorFn) error 69 | 70 | // Uncordon the supplied node. Marks it schedulable for new pods. 71 | Uncordon(n *core.Node, mutators ...nodeMutatorFn) error 72 | } 73 | 74 | // A Drainer drains nodes. 75 | type Drainer interface { 76 | // Drain the supplied node. Evicts the node of all but mirror and DaemonSet pods. 77 | Drain(n *core.Node) error 78 | MarkDrain(n *core.Node, when, finish time.Time, failed bool) error 79 | } 80 | 81 | // A CordonDrainer both cordons and drains nodes! 82 | type CordonDrainer interface { 83 | Cordoner 84 | Drainer 85 | } 86 | 87 | // A NoopCordonDrainer does nothing. 88 | type NoopCordonDrainer struct{} 89 | 90 | // Cordon does nothing. 91 | func (d *NoopCordonDrainer) Cordon(n *core.Node, mutators ...nodeMutatorFn) error { return nil } 92 | 93 | // Uncordon does nothing. 94 | func (d *NoopCordonDrainer) Uncordon(n *core.Node, mutators ...nodeMutatorFn) error { return nil } 95 | 96 | // Drain does nothing. 97 | func (d *NoopCordonDrainer) Drain(n *core.Node) error { return nil } 98 | 99 | // MarkDrain does nothing. 100 | func (d *NoopCordonDrainer) MarkDrain(n *core.Node, when, finish time.Time, failed bool) error { 101 | return nil 102 | } 103 | 104 | // APICordonDrainer drains Kubernetes nodes via the Kubernetes API. 105 | type APICordonDrainer struct { 106 | c kubernetes.Interface 107 | l *zap.Logger 108 | 109 | filter PodFilterFunc 110 | 111 | maxGracePeriod time.Duration 112 | evictionHeadroom time.Duration 113 | skipDrain bool 114 | } 115 | 116 | // SuppliedCondition defines the condition will be watched. 117 | type SuppliedCondition struct { 118 | Type core.NodeConditionType 119 | Status core.ConditionStatus 120 | MinimumDuration time.Duration 121 | } 122 | 123 | // APICordonDrainerOption configures an APICordonDrainer. 124 | type APICordonDrainerOption func(d *APICordonDrainer) 125 | 126 | // MaxGracePeriod configures the maximum time to wait for a pod eviction. Pod 127 | // containers will be allowed this much time to shutdown once they receive a 128 | // SIGTERM before they are sent a SIGKILL. 129 | func MaxGracePeriod(m time.Duration) APICordonDrainerOption { 130 | return func(d *APICordonDrainer) { 131 | d.maxGracePeriod = m 132 | } 133 | } 134 | 135 | // EvictionHeadroom configures an amount of time to wait in addition to the 136 | // MaxGracePeriod for the API server to report a pod deleted. 137 | func EvictionHeadroom(h time.Duration) APICordonDrainerOption { 138 | return func(d *APICordonDrainer) { 139 | d.evictionHeadroom = h 140 | } 141 | } 142 | 143 | // WithPodFilter configures a filter that may be used to exclude certain pods 144 | // from eviction when draining. 145 | func WithPodFilter(f PodFilterFunc) APICordonDrainerOption { 146 | return func(d *APICordonDrainer) { 147 | d.filter = f 148 | } 149 | } 150 | 151 | // WithDrain determines if we're actually going to drain nodes 152 | func WithSkipDrain(b bool) APICordonDrainerOption { 153 | return func(d *APICordonDrainer) { 154 | d.skipDrain = b 155 | } 156 | } 157 | 158 | // WithAPICordonDrainerLogger configures a APICordonDrainer to use the supplied 159 | // logger. 160 | func WithAPICordonDrainerLogger(l *zap.Logger) APICordonDrainerOption { 161 | return func(d *APICordonDrainer) { 162 | d.l = l 163 | } 164 | } 165 | 166 | // NewAPICordonDrainer returns a CordonDrainer that cordons and drains nodes via 167 | // the Kubernetes API. 168 | func NewAPICordonDrainer(c kubernetes.Interface, ao ...APICordonDrainerOption) *APICordonDrainer { 169 | d := &APICordonDrainer{ 170 | c: c, 171 | l: zap.NewNop(), 172 | filter: NewPodFilters(), 173 | maxGracePeriod: DefaultMaxGracePeriod, 174 | evictionHeadroom: DefaultEvictionOverhead, 175 | skipDrain: DefaultSkipDrain, 176 | } 177 | for _, o := range ao { 178 | o(d) 179 | } 180 | return d 181 | } 182 | 183 | func (d *APICordonDrainer) deleteTimeout() time.Duration { 184 | return d.maxGracePeriod + d.evictionHeadroom 185 | } 186 | 187 | // Cordon the supplied node. Marks it unschedulable for new pods. 188 | func (d *APICordonDrainer) Cordon(n *core.Node, mutators ...nodeMutatorFn) error { 189 | fresh, err := d.c.CoreV1().Nodes().Get(n.GetName(), meta.GetOptions{}) 190 | if err != nil { 191 | return errors.Wrapf(err, "cannot get node %s", n.GetName()) 192 | } 193 | if fresh.Spec.Unschedulable { 194 | return nil 195 | } 196 | fresh.Spec.Unschedulable = true 197 | for _, m := range mutators { 198 | m(fresh) 199 | } 200 | if _, err := d.c.CoreV1().Nodes().Update(fresh); err != nil { 201 | return errors.Wrapf(err, "cannot cordon node %s", fresh.GetName()) 202 | } 203 | return nil 204 | } 205 | 206 | // Uncordon the supplied node. Marks it schedulable for new pods. 207 | func (d *APICordonDrainer) Uncordon(n *core.Node, mutators ...nodeMutatorFn) error { 208 | fresh, err := d.c.CoreV1().Nodes().Get(n.GetName(), meta.GetOptions{}) 209 | if err != nil { 210 | return errors.Wrapf(err, "cannot get node %s", n.GetName()) 211 | } 212 | if !fresh.Spec.Unschedulable { 213 | return nil 214 | } 215 | fresh.Spec.Unschedulable = false 216 | for _, m := range mutators { 217 | m(fresh) 218 | } 219 | if _, err := d.c.CoreV1().Nodes().Update(fresh); err != nil { 220 | return errors.Wrapf(err, "cannot uncordon node %s", fresh.GetName()) 221 | } 222 | return nil 223 | } 224 | 225 | // MarkDrain set a condition on the node to mark that that drain is scheduled. 226 | func (d *APICordonDrainer) MarkDrain(n *core.Node, when, finish time.Time, failed bool) error { 227 | nodeName := n.Name 228 | // Refresh the node object 229 | freshNode, err := d.c.CoreV1().Nodes().Get(nodeName, meta.GetOptions{}) 230 | if err != nil { 231 | if !apierrors.IsNotFound(err) { 232 | return err 233 | } 234 | return nil 235 | } 236 | 237 | msgSuffix := "" 238 | conditionStatus := core.ConditionTrue 239 | if !finish.IsZero() { 240 | if failed { 241 | msgSuffix = fmt.Sprintf(" | Failed: %s", finish.Format(time.RFC3339)) 242 | } else { 243 | msgSuffix = fmt.Sprintf(" | Completed: %s", finish.Format(time.RFC3339)) 244 | } 245 | conditionStatus = core.ConditionFalse 246 | } 247 | 248 | // Create or update the condition associated to the monitor 249 | now := meta.Time{Time: time.Now()} 250 | conditionUpdated := false 251 | for i, condition := range freshNode.Status.Conditions { 252 | if string(condition.Type) == ConditionDrainedScheduled { 253 | freshNode.Status.Conditions[i].LastHeartbeatTime = now 254 | freshNode.Status.Conditions[i].Message = "Drain activity scheduled " + when.Format(time.RFC3339) + msgSuffix 255 | freshNode.Status.Conditions[i].Status = conditionStatus 256 | conditionUpdated = true 257 | break 258 | } 259 | } 260 | if !conditionUpdated { // There was no condition found, let's create one 261 | freshNode.Status.Conditions = append(freshNode.Status.Conditions, 262 | core.NodeCondition{ 263 | Type: core.NodeConditionType(ConditionDrainedScheduled), 264 | Status: conditionStatus, 265 | LastHeartbeatTime: now, 266 | LastTransitionTime: now, 267 | Reason: "Draino", 268 | Message: "Drain activity scheduled " + when.Format(time.RFC3339) + msgSuffix, 269 | }, 270 | ) 271 | } 272 | if _, err := d.c.CoreV1().Nodes().UpdateStatus(freshNode); err != nil { 273 | return err 274 | } 275 | return nil 276 | } 277 | 278 | func IsMarkedForDrain(n *core.Node) bool { 279 | for _, condition := range n.Status.Conditions { 280 | if string(condition.Type) == ConditionDrainedScheduled && condition.Status == core.ConditionTrue { 281 | return true 282 | } 283 | } 284 | return false 285 | } 286 | 287 | // Drain the supplied node. Evicts the node of all but mirror and DaemonSet pods. 288 | func (d *APICordonDrainer) Drain(n *core.Node) error { 289 | 290 | // Do nothing if draining is not enabled. 291 | if d.skipDrain { 292 | d.l.Debug("Skipping drain because draining is disabled") 293 | return nil 294 | } 295 | 296 | pods, err := d.getPods(n.GetName()) 297 | if err != nil { 298 | return errors.Wrapf(err, "cannot get pods for node %s", n.GetName()) 299 | } 300 | 301 | abort := make(chan struct{}) 302 | errs := make(chan error, 1) 303 | for _, pod := range pods { 304 | go d.evict(pod, abort, errs) 305 | } 306 | // This will _eventually_ abort evictions. Evictions may spend up to 307 | // d.deleteTimeout() in d.awaitDeletion(), or 5 seconds in backoff before 308 | // noticing they've been aborted. 309 | defer close(abort) 310 | 311 | deadline := time.After(d.deleteTimeout()) 312 | for range pods { 313 | select { 314 | case err := <-errs: 315 | if err != nil { 316 | return errors.Wrap(err, "cannot evict all pods") 317 | } 318 | case <-deadline: 319 | return errors.Wrap(errTimeout{}, "timed out waiting for evictions to complete") 320 | } 321 | } 322 | return nil 323 | } 324 | 325 | func (d *APICordonDrainer) getPods(node string) ([]core.Pod, error) { 326 | l, err := d.c.CoreV1().Pods(meta.NamespaceAll).List(meta.ListOptions{ 327 | FieldSelector: fields.SelectorFromSet(fields.Set{"spec.nodeName": node}).String(), 328 | }) 329 | if err != nil { 330 | return nil, errors.Wrapf(err, "cannot get pods for node %s", node) 331 | } 332 | 333 | include := make([]core.Pod, 0, len(l.Items)) 334 | for _, p := range l.Items { 335 | passes, err := d.filter(p) 336 | if err != nil { 337 | return nil, errors.Wrap(err, "cannot filter pods") 338 | } 339 | if passes { 340 | include = append(include, p) 341 | } 342 | } 343 | return include, nil 344 | } 345 | 346 | func (d *APICordonDrainer) evict(p core.Pod, abort <-chan struct{}, e chan<- error) { 347 | gracePeriod := int64(d.maxGracePeriod.Seconds()) 348 | if p.Spec.TerminationGracePeriodSeconds != nil && *p.Spec.TerminationGracePeriodSeconds < gracePeriod { 349 | gracePeriod = *p.Spec.TerminationGracePeriodSeconds 350 | } 351 | for { 352 | select { 353 | case <-abort: 354 | e <- errors.New("pod eviction aborted") 355 | return 356 | default: 357 | err := d.c.CoreV1().Pods(p.GetNamespace()).Evict(&policy.Eviction{ 358 | ObjectMeta: meta.ObjectMeta{Namespace: p.GetNamespace(), Name: p.GetName()}, 359 | DeleteOptions: &meta.DeleteOptions{GracePeriodSeconds: &gracePeriod}, 360 | }) 361 | switch { 362 | // The eviction API returns 429 Too Many Requests if a pod 363 | // cannot currently be evicted, for example due to a pod 364 | // disruption budget. 365 | case apierrors.IsTooManyRequests(err): 366 | time.Sleep(5 * time.Second) 367 | case apierrors.IsNotFound(err): 368 | e <- nil 369 | return 370 | case err != nil: 371 | e <- errors.Wrapf(err, "cannot evict pod %s/%s", p.GetNamespace(), p.GetName()) 372 | return 373 | default: 374 | e <- errors.Wrapf(d.awaitDeletion(p, d.deleteTimeout()), "cannot confirm pod %s/%s was deleted", p.GetNamespace(), p.GetName()) 375 | return 376 | } 377 | } 378 | } 379 | } 380 | 381 | func (d *APICordonDrainer) awaitDeletion(p core.Pod, timeout time.Duration) error { 382 | return wait.PollImmediate(1*time.Second, timeout, func() (bool, error) { 383 | got, err := d.c.CoreV1().Pods(p.GetNamespace()).Get(p.GetName(), meta.GetOptions{}) 384 | if apierrors.IsNotFound(err) { 385 | return true, nil 386 | } 387 | if err != nil { 388 | return false, errors.Wrapf(err, "cannot get pod %s/%s", p.GetNamespace(), p.GetName()) 389 | } 390 | if got.GetUID() != p.GetUID() { 391 | return true, nil 392 | } 393 | return false, nil 394 | }) 395 | } 396 | -------------------------------------------------------------------------------- /internal/kubernetes/drainer_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Planet Labs Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 13 | implied. See the License for the specific language governing permissions 14 | and limitations under the License. 15 | */ 16 | 17 | package kubernetes 18 | 19 | import ( 20 | "reflect" 21 | "testing" 22 | "time" 23 | 24 | "github.com/pkg/errors" 25 | core "k8s.io/api/core/v1" 26 | apierrors "k8s.io/apimachinery/pkg/api/errors" 27 | meta "k8s.io/apimachinery/pkg/apis/meta/v1" 28 | "k8s.io/apimachinery/pkg/runtime" 29 | "k8s.io/apimachinery/pkg/runtime/schema" 30 | "k8s.io/client-go/kubernetes" 31 | "k8s.io/client-go/kubernetes/fake" 32 | clienttesting "k8s.io/client-go/testing" 33 | ) 34 | 35 | const ( 36 | nodeName = "coolNode" 37 | podName = "coolPod" 38 | 39 | daemonsetName = "coolDaemonSet" 40 | statefulsetName = "coolStatefulSet" 41 | deploymentName = "coolDeployment" 42 | kindDeployment = "Deployment" 43 | ) 44 | 45 | var ( 46 | _ CordonDrainer = (*APICordonDrainer)(nil) 47 | _ CordonDrainer = (*NoopCordonDrainer)(nil) 48 | ) 49 | 50 | var podGracePeriodSeconds int64 = 10 51 | var isController = true 52 | var errExploded = errors.New("kaboom") 53 | 54 | type reactor struct { 55 | verb string 56 | resource string 57 | subresource string 58 | ret runtime.Object 59 | err error 60 | } 61 | 62 | func (r reactor) Fn() clienttesting.ReactionFunc { 63 | return func(a clienttesting.Action) (bool, runtime.Object, error) { 64 | if r.subresource != "" && a.GetSubresource() != r.subresource { 65 | return true, nil, errors.Errorf("incorrect subresource: %v", a.GetSubresource()) 66 | } 67 | return true, r.ret, r.err 68 | } 69 | } 70 | 71 | func newFakeClientSet(rs ...reactor) kubernetes.Interface { 72 | cs := &fake.Clientset{} 73 | for _, r := range rs { 74 | cs.AddReactor(r.verb, r.resource, r.Fn()) 75 | } 76 | return cs 77 | } 78 | 79 | func TestCordon(t *testing.T) { 80 | cases := []struct { 81 | name string 82 | node *core.Node 83 | mutators []nodeMutatorFn 84 | expected *core.Node 85 | reactions []reactor 86 | }{ 87 | { 88 | name: "CordonSchedulableNode", 89 | node: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 90 | expected: &core.Node{ 91 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 92 | Spec: core.NodeSpec{Unschedulable: true}, 93 | }, 94 | }, 95 | { 96 | name: "CordonUnschedulableNode", 97 | node: &core.Node{ 98 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 99 | Spec: core.NodeSpec{Unschedulable: true}, 100 | }, 101 | expected: &core.Node{ 102 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 103 | Spec: core.NodeSpec{Unschedulable: true}, 104 | }, 105 | }, 106 | { 107 | name: "CordonNonExistentNode", 108 | node: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 109 | reactions: []reactor{ 110 | {verb: "get", resource: "nodes", err: errors.New("nope")}, 111 | }, 112 | }, 113 | { 114 | name: "ErrorCordoningSchedulableNode", 115 | node: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 116 | reactions: []reactor{ 117 | {verb: "update", resource: "nodes", err: errors.New("nope")}, 118 | }, 119 | }, 120 | { 121 | name: "CordonSchedulableNodeWithMutator", 122 | node: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 123 | mutators: []nodeMutatorFn{func(n *core.Node) { 124 | n.Annotations = map[string]string{"foo": "1"} 125 | }}, 126 | expected: &core.Node{ 127 | ObjectMeta: meta.ObjectMeta{Name: nodeName, Annotations: map[string]string{"foo": "1"}}, 128 | Spec: core.NodeSpec{Unschedulable: true}, 129 | }, 130 | }, 131 | { 132 | name: "CordonUnschedulableNodeWithMutator", 133 | node: &core.Node{ 134 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 135 | Spec: core.NodeSpec{Unschedulable: true}, 136 | }, 137 | mutators: []nodeMutatorFn{func(n *core.Node) { 138 | n.Annotations = map[string]string{"foo": "1"} 139 | }}, 140 | expected: &core.Node{ 141 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 142 | Spec: core.NodeSpec{Unschedulable: true}, 143 | }, 144 | }, 145 | } 146 | 147 | for _, tc := range cases { 148 | t.Run(tc.name, func(t *testing.T) { 149 | c := fake.NewSimpleClientset(tc.node) 150 | for _, r := range tc.reactions { 151 | c.PrependReactor(r.verb, r.resource, r.Fn()) 152 | } 153 | d := NewAPICordonDrainer(c) 154 | if err := d.Cordon(tc.node, tc.mutators...); err != nil { 155 | for _, r := range tc.reactions { 156 | if errors.Cause(err) == r.err { 157 | return 158 | } 159 | } 160 | t.Errorf("d.Cordon(%v): %v", tc.node.Name, err) 161 | } 162 | { 163 | n, err := c.CoreV1().Nodes().Get(tc.node.GetName(), meta.GetOptions{}) 164 | if err != nil { 165 | t.Errorf("node.Get(%v): %v", tc.node.Name, err) 166 | } 167 | if !reflect.DeepEqual(tc.expected, n) { 168 | t.Errorf("node.Get(%v): want %#v, got %#v", tc.node.Name, tc.expected, n) 169 | } 170 | } 171 | }) 172 | } 173 | } 174 | 175 | func TestUncordon(t *testing.T) { 176 | cases := []struct { 177 | name string 178 | node *core.Node 179 | mutators []nodeMutatorFn 180 | expected *core.Node 181 | reactions []reactor 182 | }{ 183 | { 184 | name: "UncordonSchedulableNode", 185 | node: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 186 | expected: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 187 | }, 188 | { 189 | name: "UncordonUnschedulableNode", 190 | node: &core.Node{ 191 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 192 | Spec: core.NodeSpec{Unschedulable: true}, 193 | }, 194 | expected: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 195 | }, 196 | { 197 | name: "UncordonNonExistentNode", 198 | node: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 199 | reactions: []reactor{ 200 | {verb: "get", resource: "nodes", err: errors.New("nope")}, 201 | }, 202 | }, 203 | { 204 | name: "ErrorUncordoningUnschedulableNode", 205 | node: &core.Node{ 206 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 207 | Spec: core.NodeSpec{Unschedulable: true}, 208 | }, 209 | reactions: []reactor{ 210 | {verb: "update", resource: "nodes", err: errors.New("nope")}, 211 | }, 212 | }, 213 | { 214 | name: "UncordonSchedulableNodeWithMutator", 215 | node: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 216 | mutators: []nodeMutatorFn{func(n *core.Node) { 217 | n.Annotations = map[string]string{"foo": "1"} 218 | }}, 219 | expected: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 220 | }, 221 | { 222 | name: "UncordonUnschedulableNodeWithMutator", 223 | node: &core.Node{ 224 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 225 | Spec: core.NodeSpec{Unschedulable: true}, 226 | }, 227 | mutators: []nodeMutatorFn{func(n *core.Node) { 228 | n.Annotations = map[string]string{"foo": "1"} 229 | }}, 230 | expected: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName, Annotations: map[string]string{"foo": "1"}}}, 231 | }, 232 | } 233 | 234 | for _, tc := range cases { 235 | t.Run(tc.name, func(t *testing.T) { 236 | c := fake.NewSimpleClientset(tc.node) 237 | for _, r := range tc.reactions { 238 | c.PrependReactor(r.verb, r.resource, r.Fn()) 239 | } 240 | d := NewAPICordonDrainer(c) 241 | if err := d.Uncordon(tc.node, tc.mutators...); err != nil { 242 | for _, r := range tc.reactions { 243 | if errors.Cause(err) == r.err { 244 | return 245 | } 246 | } 247 | t.Errorf("d.Uncordon(%v): %v", tc.node.Name, err) 248 | } 249 | { 250 | n, err := c.CoreV1().Nodes().Get(tc.node.GetName(), meta.GetOptions{}) 251 | if err != nil { 252 | t.Errorf("node.Get(%v): %v", tc.node.Name, err) 253 | } 254 | if !reflect.DeepEqual(tc.expected, n) { 255 | t.Errorf("node.Get(%v): want %#v, got %#v", tc.node.Name, tc.expected, n) 256 | } 257 | } 258 | }) 259 | } 260 | } 261 | 262 | func TestDrain(t *testing.T) { 263 | cases := []struct { 264 | name string 265 | options []APICordonDrainerOption 266 | node *core.Node 267 | reactions []reactor 268 | errFn func(err error) bool 269 | }{ 270 | { 271 | name: "EvictOnePod", 272 | node: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 273 | reactions: []reactor{ 274 | reactor{ 275 | verb: "list", 276 | resource: "pods", 277 | ret: &core.PodList{Items: []core.Pod{ 278 | core.Pod{ 279 | ObjectMeta: meta.ObjectMeta{ 280 | Name: podName, 281 | OwnerReferences: []meta.OwnerReference{meta.OwnerReference{ 282 | Controller: &isController, 283 | Kind: "Deployment", 284 | }}, 285 | }, 286 | Spec: core.PodSpec{TerminationGracePeriodSeconds: &podGracePeriodSeconds}, 287 | }, 288 | }}, 289 | }, 290 | reactor{ 291 | verb: "create", 292 | resource: "pods", 293 | subresource: "eviction", 294 | }, 295 | reactor{ 296 | verb: "get", 297 | resource: "pods", 298 | err: apierrors.NewNotFound(schema.GroupResource{Resource: "pods"}, podName), 299 | }, 300 | }, 301 | }, 302 | { 303 | name: "PodDisappearsBeforeEviction", 304 | node: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 305 | options: []APICordonDrainerOption{MaxGracePeriod(1 * time.Second), EvictionHeadroom(1 * time.Second)}, 306 | reactions: []reactor{ 307 | reactor{ 308 | verb: "list", 309 | resource: "pods", 310 | ret: &core.PodList{Items: []core.Pod{ 311 | core.Pod{ObjectMeta: meta.ObjectMeta{Name: podName}}, 312 | }}, 313 | }, 314 | reactor{ 315 | verb: "create", 316 | resource: "pods", 317 | subresource: "eviction", 318 | err: apierrors.NewNotFound(schema.GroupResource{Resource: "pods"}, podName), 319 | }, 320 | }, 321 | }, 322 | { 323 | name: "ErrorEvictingPod", 324 | node: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 325 | reactions: []reactor{ 326 | reactor{ 327 | verb: "list", 328 | resource: "pods", 329 | ret: &core.PodList{Items: []core.Pod{ 330 | core.Pod{ObjectMeta: meta.ObjectMeta{Name: podName}}, 331 | }}, 332 | }, 333 | reactor{ 334 | verb: "create", 335 | resource: "pods", 336 | subresource: "eviction", 337 | err: errors.New("nope"), 338 | }, 339 | }, 340 | }, 341 | { 342 | name: "PodEvictionNotAllowed", 343 | node: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 344 | options: []APICordonDrainerOption{MaxGracePeriod(1 * time.Second), EvictionHeadroom(1 * time.Second)}, 345 | reactions: []reactor{ 346 | reactor{ 347 | verb: "list", 348 | resource: "pods", 349 | ret: &core.PodList{Items: []core.Pod{ 350 | core.Pod{ObjectMeta: meta.ObjectMeta{Name: podName}}, 351 | }}, 352 | }, 353 | reactor{ 354 | verb: "create", 355 | resource: "pods", 356 | subresource: "eviction", 357 | err: apierrors.NewTooManyRequests("nope", 5), 358 | }, 359 | }, 360 | errFn: IsTimeout, 361 | }, 362 | { 363 | name: "EvictedPodReplacedWithDifferentUID", 364 | node: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 365 | reactions: []reactor{ 366 | reactor{ 367 | verb: "list", 368 | resource: "pods", 369 | ret: &core.PodList{Items: []core.Pod{ 370 | core.Pod{ObjectMeta: meta.ObjectMeta{Name: podName, UID: "a"}}, 371 | }}, 372 | }, 373 | reactor{ 374 | verb: "create", 375 | resource: "pods", 376 | subresource: "eviction", 377 | }, 378 | reactor{ 379 | verb: "get", 380 | resource: "pods", 381 | ret: &core.Pod{ObjectMeta: meta.ObjectMeta{Name: podName, UID: "b"}}, 382 | }, 383 | }, 384 | }, 385 | { 386 | name: "ErrorConfirmingPodDeletion", 387 | node: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 388 | reactions: []reactor{ 389 | reactor{ 390 | verb: "list", 391 | resource: "pods", 392 | ret: &core.PodList{Items: []core.Pod{ 393 | core.Pod{ObjectMeta: meta.ObjectMeta{Name: podName}}, 394 | }}, 395 | }, 396 | reactor{ 397 | verb: "create", 398 | resource: "pods", 399 | subresource: "eviction", 400 | }, 401 | reactor{ 402 | verb: "get", 403 | resource: "pods", 404 | err: errors.New("nope"), 405 | }, 406 | }, 407 | }, 408 | { 409 | name: "PodDoesNotPassFilter", 410 | node: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 411 | options: []APICordonDrainerOption{WithPodFilter(func(p core.Pod) (bool, error) { 412 | if p.GetName() == "lamePod" { 413 | // This pod does not pass the filter. 414 | return false, nil 415 | } 416 | return true, nil 417 | })}, 418 | reactions: []reactor{ 419 | reactor{ 420 | verb: "list", 421 | resource: "pods", 422 | ret: &core.PodList{Items: []core.Pod{ 423 | core.Pod{ObjectMeta: meta.ObjectMeta{Name: "lamePod"}}, 424 | core.Pod{ObjectMeta: meta.ObjectMeta{Name: podName}}, 425 | }}, 426 | }, 427 | reactor{ 428 | verb: "create", 429 | resource: "pods", 430 | subresource: "eviction", 431 | }, 432 | reactor{ 433 | verb: "get", 434 | resource: "pods", 435 | err: apierrors.NewNotFound(schema.GroupResource{Resource: "pods"}, podName), 436 | }, 437 | }, 438 | }, 439 | { 440 | name: "PodFilterErrors", 441 | node: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 442 | options: []APICordonDrainerOption{WithPodFilter(func(p core.Pod) (bool, error) { 443 | if p.GetName() == "explodeyPod" { 444 | return false, errExploded 445 | } 446 | return true, nil 447 | })}, 448 | reactions: []reactor{ 449 | reactor{ 450 | verb: "list", 451 | resource: "pods", 452 | ret: &core.PodList{Items: []core.Pod{ 453 | core.Pod{ObjectMeta: meta.ObjectMeta{Name: "explodeyPod"}}, 454 | core.Pod{ObjectMeta: meta.ObjectMeta{Name: podName}}, 455 | }}, 456 | }, 457 | reactor{ 458 | verb: "create", 459 | resource: "pods", 460 | subresource: "eviction", 461 | }, 462 | reactor{ 463 | verb: "get", 464 | resource: "pods", 465 | err: apierrors.NewNotFound(schema.GroupResource{Resource: "pods"}, podName), 466 | }, 467 | }, 468 | errFn: func(err error) bool { return errors.Cause(err) == errExploded }, 469 | }, 470 | { 471 | name: "ErrorListingPods", 472 | node: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 473 | reactions: []reactor{ 474 | reactor{ 475 | verb: "list", 476 | resource: "pods", 477 | err: errors.New("nope"), 478 | }, 479 | }, 480 | }, 481 | } 482 | 483 | for _, tc := range cases { 484 | t.Run(tc.name, func(t *testing.T) { 485 | c := newFakeClientSet(tc.reactions...) 486 | d := NewAPICordonDrainer(c, tc.options...) 487 | if err := d.Drain(tc.node); err != nil { 488 | for _, r := range tc.reactions { 489 | if errors.Cause(err) == r.err { 490 | return 491 | } 492 | } 493 | if tc.errFn != nil && tc.errFn(err) { 494 | return 495 | } 496 | t.Errorf("d.Drain(%v): %v", tc.node.Name, err) 497 | } 498 | }) 499 | } 500 | } 501 | 502 | func TestMarkDrain(t *testing.T) { 503 | now := meta.Time{Time: time.Now()} 504 | cases := []struct { 505 | name string 506 | node *core.Node 507 | isMarked bool 508 | }{ 509 | { 510 | name: "markDrain", 511 | node: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 512 | isMarked: false, 513 | }, 514 | { 515 | name: "markDrain again", 516 | node: &core.Node{ 517 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 518 | Status: core.NodeStatus{ 519 | Conditions: []core.NodeCondition{ 520 | { 521 | Type: core.NodeConditionType(ConditionDrainedScheduled), 522 | Status: core.ConditionTrue, 523 | LastHeartbeatTime: now, 524 | LastTransitionTime: now, 525 | Reason: "Draino", 526 | Message: "Drain activity scheduled", 527 | }, 528 | }, 529 | }, 530 | }, 531 | isMarked: true, 532 | }, 533 | } 534 | 535 | for _, tc := range cases { 536 | t.Run(tc.name, func(t *testing.T) { 537 | c := fake.NewSimpleClientset(tc.node) 538 | d := NewAPICordonDrainer(c) 539 | { 540 | n, err := c.CoreV1().Nodes().Get(tc.node.GetName(), meta.GetOptions{}) 541 | if err != nil { 542 | t.Errorf("node.Get(%v): %v", tc.node.Name, err) 543 | } 544 | if IsMarkedForDrain(n) != tc.isMarked { 545 | t.Errorf("node %v initial mark is not correct", tc.node.Name) 546 | } 547 | } 548 | if err := d.MarkDrain(tc.node, time.Now(), time.Time{}, false); err != nil { 549 | t.Errorf("d.MarkDrain(%v): %v", tc.node.Name, err) 550 | } 551 | { 552 | n, err := c.CoreV1().Nodes().Get(tc.node.GetName(), meta.GetOptions{}) 553 | if err != nil { 554 | t.Errorf("node.Get(%v): %v", tc.node.Name, err) 555 | } 556 | if !IsMarkedForDrain(n) { 557 | t.Errorf("node %v is not marked for drain", tc.node.Name) 558 | } 559 | } 560 | }) 561 | } 562 | } 563 | -------------------------------------------------------------------------------- /internal/kubernetes/eventhandler.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Planet Labs Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 13 | implied. See the License for the specific language governing permissions 14 | and limitations under the License. 15 | */ 16 | 17 | package kubernetes 18 | 19 | import ( 20 | "context" 21 | "fmt" 22 | "strings" 23 | "time" 24 | 25 | "go.opencensus.io/stats" 26 | "go.opencensus.io/tag" 27 | "go.uber.org/zap" 28 | core "k8s.io/api/core/v1" 29 | "k8s.io/apimachinery/pkg/types" 30 | "k8s.io/client-go/tools/cache" 31 | "k8s.io/client-go/tools/record" 32 | ) 33 | 34 | const ( 35 | // DefaultDrainBuffer is the default minimum time between node drains. 36 | DefaultDrainBuffer = 10 * time.Minute 37 | 38 | eventReasonCordonStarting = "CordonStarting" 39 | eventReasonCordonSucceeded = "CordonSucceeded" 40 | eventReasonCordonFailed = "CordonFailed" 41 | 42 | eventReasonUncordonStarting = "UncordonStarting" 43 | eventReasonUncordonSucceeded = "UncordonSucceeded" 44 | eventReasonUncordonFailed = "UncordonFailed" 45 | 46 | eventReasonDrainScheduled = "DrainScheduled" 47 | eventReasonDrainSchedulingFailed = "DrainSchedulingFailed" 48 | eventReasonDrainStarting = "DrainStarting" 49 | eventReasonDrainSucceeded = "DrainSucceeded" 50 | eventReasonDrainFailed = "DrainFailed" 51 | 52 | tagResultSucceeded = "succeeded" 53 | tagResultFailed = "failed" 54 | 55 | drainRetryAnnotationKey = "draino/drain-retry" 56 | drainRetryAnnotationValue = "true" 57 | 58 | drainoConditionsAnnotationKey = "draino.planet.com/conditions" 59 | ) 60 | 61 | // Opencensus measurements. 62 | var ( 63 | MeasureNodesCordoned = stats.Int64("draino/nodes_cordoned", "Number of nodes cordoned.", stats.UnitDimensionless) 64 | MeasureNodesUncordoned = stats.Int64("draino/nodes_uncordoned", "Number of nodes uncordoned.", stats.UnitDimensionless) 65 | MeasureNodesDrained = stats.Int64("draino/nodes_drained", "Number of nodes drained.", stats.UnitDimensionless) 66 | MeasureNodesDrainScheduled = stats.Int64("draino/nodes_drainScheduled", "Number of nodes drain scheduled.", stats.UnitDimensionless) 67 | 68 | TagNodeName, _ = tag.NewKey("node_name") 69 | TagResult, _ = tag.NewKey("result") 70 | ) 71 | 72 | // A DrainingResourceEventHandler cordons and drains any added or updated nodes. 73 | type DrainingResourceEventHandler struct { 74 | logger *zap.Logger 75 | cordonDrainer CordonDrainer 76 | eventRecorder record.EventRecorder 77 | drainScheduler DrainScheduler 78 | 79 | lastDrainScheduledFor time.Time 80 | buffer time.Duration 81 | 82 | conditions []SuppliedCondition 83 | } 84 | 85 | // DrainingResourceEventHandlerOption configures an DrainingResourceEventHandler. 86 | type DrainingResourceEventHandlerOption func(d *DrainingResourceEventHandler) 87 | 88 | // WithLogger configures a DrainingResourceEventHandler to use the supplied 89 | // logger. 90 | func WithLogger(l *zap.Logger) DrainingResourceEventHandlerOption { 91 | return func(h *DrainingResourceEventHandler) { 92 | h.logger = l 93 | } 94 | } 95 | 96 | // WithDrainBuffer configures the minimum time between scheduled drains. 97 | func WithDrainBuffer(d time.Duration) DrainingResourceEventHandlerOption { 98 | return func(h *DrainingResourceEventHandler) { 99 | h.buffer = d 100 | } 101 | } 102 | 103 | // WithConditionsFilter configures which conditions should be handled. 104 | func WithConditionsFilter(conditions []string) DrainingResourceEventHandlerOption { 105 | return func(h *DrainingResourceEventHandler) { 106 | h.conditions = ParseConditions(conditions) 107 | } 108 | } 109 | 110 | // NewDrainingResourceEventHandler returns a new DrainingResourceEventHandler. 111 | func NewDrainingResourceEventHandler(d CordonDrainer, e record.EventRecorder, ho ...DrainingResourceEventHandlerOption) *DrainingResourceEventHandler { 112 | h := &DrainingResourceEventHandler{ 113 | logger: zap.NewNop(), 114 | cordonDrainer: d, 115 | eventRecorder: e, 116 | lastDrainScheduledFor: time.Now(), 117 | buffer: DefaultDrainBuffer, 118 | } 119 | for _, o := range ho { 120 | o(h) 121 | } 122 | h.drainScheduler = NewDrainSchedules(d, e, h.buffer, h.logger) 123 | return h 124 | } 125 | 126 | // OnAdd cordons and drains the added node. 127 | func (h *DrainingResourceEventHandler) OnAdd(obj interface{}) { 128 | n, ok := obj.(*core.Node) 129 | if !ok { 130 | return 131 | } 132 | h.HandleNode(n) 133 | } 134 | 135 | // OnUpdate cordons and drains the updated node. 136 | func (h *DrainingResourceEventHandler) OnUpdate(_, newObj interface{}) { 137 | h.OnAdd(newObj) 138 | } 139 | 140 | // OnDelete does nothing. There's no point cordoning or draining deleted nodes. 141 | 142 | func (h *DrainingResourceEventHandler) OnDelete(obj interface{}) { 143 | n, ok := obj.(*core.Node) 144 | if !ok { 145 | d, ok := obj.(cache.DeletedFinalStateUnknown) 146 | if !ok { 147 | return 148 | } 149 | h.drainScheduler.DeleteSchedule(d.Key) 150 | } 151 | 152 | h.drainScheduler.DeleteSchedule(n.GetName()) 153 | } 154 | 155 | func (h *DrainingResourceEventHandler) HandleNode(n *core.Node) { 156 | badConditions := h.offendingConditions(n) 157 | if len(badConditions) == 0 { 158 | if shouldUncordon(n) { 159 | h.drainScheduler.DeleteSchedule(n.GetName()) 160 | h.uncordon(n) 161 | } 162 | return 163 | } 164 | 165 | // First cordon the node if it is not yet cordonned 166 | if !n.Spec.Unschedulable { 167 | h.cordon(n, badConditions) 168 | } 169 | 170 | // Let's ensure that a drain is scheduled 171 | hasSChedule, failedDrain := h.drainScheduler.HasSchedule(n.GetName()) 172 | if !hasSChedule { 173 | h.scheduleDrain(n) 174 | return 175 | } 176 | 177 | // Is there a request to retry a failed drain activity. If yes reschedule drain 178 | if failedDrain && HasDrainRetryAnnotation(n) { 179 | h.drainScheduler.DeleteSchedule(n.GetName()) 180 | h.scheduleDrain(n) 181 | return 182 | } 183 | } 184 | 185 | func (h *DrainingResourceEventHandler) offendingConditions(n *core.Node) []SuppliedCondition { 186 | var conditions []SuppliedCondition 187 | for _, suppliedCondition := range h.conditions { 188 | for _, nodeCondition := range n.Status.Conditions { 189 | if suppliedCondition.Type == nodeCondition.Type && 190 | suppliedCondition.Status == nodeCondition.Status && 191 | time.Since(nodeCondition.LastTransitionTime.Time) >= suppliedCondition.MinimumDuration { 192 | conditions = append(conditions, suppliedCondition) 193 | } 194 | } 195 | } 196 | return conditions 197 | } 198 | 199 | func shouldUncordon(n *core.Node) bool { 200 | if !n.Spec.Unschedulable { 201 | return false 202 | } 203 | previousConditions := parseConditionsFromAnnotation(n) 204 | if len(previousConditions) == 0 { 205 | return false 206 | } 207 | for _, previousCondition := range previousConditions { 208 | for _, nodeCondition := range n.Status.Conditions { 209 | if previousCondition.Type == nodeCondition.Type && 210 | previousCondition.Status != nodeCondition.Status && 211 | time.Since(nodeCondition.LastTransitionTime.Time) >= previousCondition.MinimumDuration { 212 | return true 213 | } 214 | } 215 | } 216 | return false 217 | } 218 | 219 | func parseConditionsFromAnnotation(n *core.Node) []SuppliedCondition { 220 | if n.Annotations == nil { 221 | return nil 222 | } 223 | if n.Annotations[drainoConditionsAnnotationKey] == "" { 224 | return nil 225 | } 226 | rawConditions := strings.Split(n.Annotations[drainoConditionsAnnotationKey], ";") 227 | return ParseConditions(rawConditions) 228 | } 229 | 230 | func (h *DrainingResourceEventHandler) uncordon(n *core.Node) { 231 | log := h.logger.With(zap.String("node", n.GetName())) 232 | tags, _ := tag.New(context.Background(), tag.Upsert(TagNodeName, n.GetName())) // nolint:gosec 233 | nr := &core.ObjectReference{Kind: "Node", Name: n.GetName(), UID: types.UID(n.GetName())} 234 | 235 | log.Debug("Uncordoning") 236 | h.eventRecorder.Event(nr, core.EventTypeWarning, eventReasonUncordonStarting, "Uncordoning node") 237 | if err := h.cordonDrainer.Uncordon(n, removeAnnotationMutator); err != nil { 238 | log.Info("Failed to uncordon", zap.Error(err)) 239 | tags, _ = tag.New(tags, tag.Upsert(TagResult, tagResultFailed)) // nolint:gosec 240 | stats.Record(tags, MeasureNodesUncordoned.M(1)) 241 | h.eventRecorder.Eventf(nr, core.EventTypeWarning, eventReasonUncordonFailed, "Uncordoning failed: %v", err) 242 | return 243 | } 244 | log.Info("Uncordoned") 245 | tags, _ = tag.New(tags, tag.Upsert(TagResult, tagResultSucceeded)) // nolint:gosec 246 | stats.Record(tags, MeasureNodesUncordoned.M(1)) 247 | h.eventRecorder.Event(nr, core.EventTypeWarning, eventReasonUncordonSucceeded, "Uncordoned node") 248 | } 249 | 250 | func removeAnnotationMutator(n *core.Node) { 251 | delete(n.Annotations, drainoConditionsAnnotationKey) 252 | } 253 | 254 | func (h *DrainingResourceEventHandler) cordon(n *core.Node, badConditions []SuppliedCondition) { 255 | log := h.logger.With(zap.String("node", n.GetName())) 256 | tags, _ := tag.New(context.Background(), tag.Upsert(TagNodeName, n.GetName())) // nolint:gosec 257 | // Events must be associated with this object reference, rather than the 258 | // node itself, in order to appear under `kubectl describe node` due to the 259 | // way that command is implemented. 260 | // https://github.com/kubernetes/kubernetes/blob/17740a2/pkg/printers/internalversion/describe.go#L2711 261 | nr := &core.ObjectReference{Kind: "Node", Name: n.GetName(), UID: types.UID(n.GetName())} 262 | 263 | log.Debug("Cordoning") 264 | h.eventRecorder.Event(nr, core.EventTypeWarning, eventReasonCordonStarting, "Cordoning node") 265 | if err := h.cordonDrainer.Cordon(n, conditionAnnotationMutator(badConditions)); err != nil { 266 | log.Info("Failed to cordon", zap.Error(err)) 267 | tags, _ = tag.New(tags, tag.Upsert(TagResult, tagResultFailed)) // nolint:gosec 268 | stats.Record(tags, MeasureNodesCordoned.M(1)) 269 | h.eventRecorder.Eventf(nr, core.EventTypeWarning, eventReasonCordonFailed, "Cordoning failed: %v", err) 270 | return 271 | } 272 | log.Info("Cordoned") 273 | tags, _ = tag.New(tags, tag.Upsert(TagResult, tagResultSucceeded)) // nolint:gosec 274 | stats.Record(tags, MeasureNodesCordoned.M(1)) 275 | h.eventRecorder.Event(nr, core.EventTypeWarning, eventReasonCordonSucceeded, "Cordoned node") 276 | } 277 | 278 | func conditionAnnotationMutator(conditions []SuppliedCondition) func(*core.Node) { 279 | var value []string 280 | for _, c := range conditions { 281 | value = append(value, fmt.Sprintf("%v=%v,%v", c.Type, c.Status, c.MinimumDuration)) 282 | } 283 | return func(n *core.Node) { 284 | if n.Annotations == nil { 285 | n.Annotations = make(map[string]string) 286 | } 287 | n.Annotations[drainoConditionsAnnotationKey] = strings.Join(value, ";") 288 | } 289 | } 290 | 291 | // drain schedule the draining activity 292 | func (h *DrainingResourceEventHandler) scheduleDrain(n *core.Node) { 293 | log := h.logger.With(zap.String("node", n.GetName())) 294 | tags, _ := tag.New(context.Background(), tag.Upsert(TagNodeName, n.GetName())) // nolint:gosec 295 | nr := &core.ObjectReference{Kind: "Node", Name: n.GetName(), UID: types.UID(n.GetName())} 296 | log.Debug("Scheduling drain") 297 | when, err := h.drainScheduler.Schedule(n) 298 | if err != nil { 299 | if IsAlreadyScheduledError(err) { 300 | return 301 | } 302 | log.Info("Failed to schedule the drain activity", zap.Error(err)) 303 | tags, _ = tag.New(tags, tag.Upsert(TagResult, tagResultFailed)) // nolint:gosec 304 | stats.Record(tags, MeasureNodesDrainScheduled.M(1)) 305 | h.eventRecorder.Eventf(nr, core.EventTypeWarning, eventReasonDrainSchedulingFailed, "Drain scheduling failed: %v", err) 306 | return 307 | } 308 | log.Info("Drain scheduled ", zap.Time("after", when)) 309 | tags, _ = tag.New(tags, tag.Upsert(TagResult, tagResultSucceeded)) // nolint:gosec 310 | stats.Record(tags, MeasureNodesDrainScheduled.M(1)) 311 | h.eventRecorder.Eventf(nr, core.EventTypeWarning, eventReasonDrainScheduled, "Will drain node after %s", when.Format(time.RFC3339Nano)) 312 | } 313 | 314 | func HasDrainRetryAnnotation(n *core.Node) bool { 315 | return n.GetAnnotations()[drainRetryAnnotationKey] == drainRetryAnnotationValue 316 | } 317 | -------------------------------------------------------------------------------- /internal/kubernetes/eventhandler_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Planet Labs Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 13 | implied. See the License for the specific language governing permissions 14 | and limitations under the License. 15 | */ 16 | 17 | package kubernetes 18 | 19 | import ( 20 | "reflect" 21 | "testing" 22 | "time" 23 | 24 | "k8s.io/client-go/tools/record" 25 | 26 | core "k8s.io/api/core/v1" 27 | meta "k8s.io/apimachinery/pkg/apis/meta/v1" 28 | ) 29 | 30 | type mockCordonDrainer struct { 31 | calls []mockCall 32 | } 33 | 34 | type mockCall struct { 35 | name string 36 | node string 37 | } 38 | 39 | func (d *mockCordonDrainer) Cordon(n *core.Node, mutators ...nodeMutatorFn) error { 40 | d.calls = append(d.calls, mockCall{ 41 | name: "Cordon", 42 | node: n.Name, 43 | }) 44 | return nil 45 | } 46 | 47 | func (d *mockCordonDrainer) Uncordon(n *core.Node, mutators ...nodeMutatorFn) error { 48 | d.calls = append(d.calls, mockCall{ 49 | name: "Uncordon", 50 | node: n.Name, 51 | }) 52 | return nil 53 | } 54 | 55 | func (d *mockCordonDrainer) Drain(n *core.Node) error { 56 | d.calls = append(d.calls, mockCall{ 57 | name: "Drain", 58 | node: n.Name, 59 | }) 60 | return nil 61 | } 62 | 63 | func (d *mockCordonDrainer) MarkDrain(n *core.Node, when, finish time.Time, failed bool) error { 64 | d.calls = append(d.calls, mockCall{ 65 | name: "MarkDrain", 66 | node: n.Name, 67 | }) 68 | return nil 69 | } 70 | 71 | func (d *mockCordonDrainer) HasSchedule(name string) (has, failed bool) { 72 | d.calls = append(d.calls, mockCall{ 73 | name: "HasSchedule", 74 | node: name, 75 | }) 76 | return false, false 77 | } 78 | 79 | func (d *mockCordonDrainer) Schedule(node *core.Node) (time.Time, error) { 80 | d.calls = append(d.calls, mockCall{ 81 | name: "Schedule", 82 | node: node.Name, 83 | }) 84 | return time.Now(), nil 85 | } 86 | 87 | func (d *mockCordonDrainer) DeleteSchedule(name string) { 88 | d.calls = append(d.calls, mockCall{ 89 | name: "DeleteSchedule", 90 | node: name, 91 | }) 92 | } 93 | 94 | func TestDrainingResourceEventHandler(t *testing.T) { 95 | cases := []struct { 96 | name string 97 | obj interface{} 98 | conditions []string 99 | expected []mockCall 100 | }{ 101 | { 102 | name: "NoConditions", 103 | obj: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 104 | }, 105 | { 106 | name: "NotANode", 107 | obj: &core.Pod{ObjectMeta: meta.ObjectMeta{Name: podName}}, 108 | }, 109 | { 110 | name: "NoBadConditions", 111 | conditions: []string{"KernelPanic"}, 112 | obj: &core.Node{ 113 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 114 | Status: core.NodeStatus{ 115 | Conditions: []core.NodeCondition{{ 116 | Type: "Other", 117 | Status: core.ConditionTrue, 118 | }}, 119 | }, 120 | }, 121 | }, 122 | { 123 | name: "WithBadConditions", 124 | conditions: []string{"KernelPanic"}, 125 | obj: &core.Node{ 126 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 127 | Status: core.NodeStatus{ 128 | Conditions: []core.NodeCondition{{ 129 | Type: "KernelPanic", 130 | Status: core.ConditionTrue, 131 | }}, 132 | }, 133 | }, 134 | expected: []mockCall{ 135 | {name: "Cordon", node: nodeName}, 136 | {name: "HasSchedule", node: nodeName}, 137 | {name: "Schedule", node: nodeName}, 138 | }, 139 | }, 140 | { 141 | name: "WithBadConditionsAlreadyCordoned", 142 | conditions: []string{"KernelPanic"}, 143 | obj: &core.Node{ 144 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 145 | Spec: core.NodeSpec{Unschedulable: true}, 146 | Status: core.NodeStatus{ 147 | Conditions: []core.NodeCondition{{ 148 | Type: "KernelPanic", 149 | Status: core.ConditionTrue, 150 | }}, 151 | }, 152 | }, 153 | expected: []mockCall{ 154 | {name: "HasSchedule", node: nodeName}, 155 | {name: "Schedule", node: nodeName}, 156 | }, 157 | }, 158 | { 159 | name: "NoBadConditionsAlreadyCordoned", 160 | conditions: []string{"KernelPanic"}, 161 | obj: &core.Node{ 162 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 163 | Spec: core.NodeSpec{Unschedulable: true}, 164 | Status: core.NodeStatus{ 165 | Conditions: []core.NodeCondition{{ 166 | Type: "KernelPanic", 167 | Status: core.ConditionFalse, 168 | }}, 169 | }, 170 | }, 171 | }, 172 | { 173 | name: "NoBadConditionsAlreadyCordonedByDraino", 174 | conditions: []string{"KernelPanic"}, 175 | obj: &core.Node{ 176 | ObjectMeta: meta.ObjectMeta{ 177 | Name: nodeName, 178 | Annotations: map[string]string{drainoConditionsAnnotationKey: "KernelPanic=True,0s"}, 179 | }, 180 | Spec: core.NodeSpec{Unschedulable: true}, 181 | Status: core.NodeStatus{ 182 | Conditions: []core.NodeCondition{{ 183 | Type: "KernelPanic", 184 | Status: core.ConditionFalse, 185 | }}, 186 | }, 187 | }, 188 | expected: []mockCall{ 189 | {name: "DeleteSchedule", node: nodeName}, 190 | {name: "Uncordon", node: nodeName}, 191 | }, 192 | }, 193 | { 194 | name: "WithBadConditionsAlreadyCordonedByDraino", 195 | conditions: []string{"KernelPanic"}, 196 | obj: &core.Node{ 197 | ObjectMeta: meta.ObjectMeta{ 198 | Name: nodeName, 199 | Annotations: map[string]string{drainoConditionsAnnotationKey: "KernelPanic=True,0s"}, 200 | }, 201 | Spec: core.NodeSpec{Unschedulable: true}, 202 | Status: core.NodeStatus{ 203 | Conditions: []core.NodeCondition{{ 204 | Type: "KernelPanic", 205 | Status: core.ConditionTrue, 206 | }}, 207 | }, 208 | }, 209 | expected: []mockCall{ 210 | {name: "HasSchedule", node: nodeName}, 211 | {name: "Schedule", node: nodeName}, 212 | }, 213 | }, 214 | } 215 | 216 | for _, tc := range cases { 217 | t.Run(tc.name, func(t *testing.T) { 218 | cordonDrainer := &mockCordonDrainer{} 219 | h := NewDrainingResourceEventHandler(cordonDrainer, &record.FakeRecorder{}, WithDrainBuffer(0*time.Second), WithConditionsFilter(tc.conditions)) 220 | h.drainScheduler = cordonDrainer 221 | h.OnUpdate(nil, tc.obj) 222 | 223 | if !reflect.DeepEqual(tc.expected, cordonDrainer.calls) { 224 | t.Errorf("cordonDrainer.calls: want %#v\ngot %#v", tc.expected, cordonDrainer.calls) 225 | } 226 | }) 227 | } 228 | } 229 | 230 | func TestOffendingConditions(t *testing.T) { 231 | cases := []struct { 232 | name string 233 | obj *core.Node 234 | conditions []string 235 | expected []SuppliedCondition 236 | }{ 237 | { 238 | name: "SingleMatchingCondition", 239 | obj: &core.Node{ 240 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 241 | Status: core.NodeStatus{Conditions: []core.NodeCondition{ 242 | {Type: "Cool", Status: core.ConditionTrue}, 243 | }}, 244 | }, 245 | conditions: []string{"Cool"}, 246 | expected: []SuppliedCondition{{Type: "Cool", Status: core.ConditionTrue}}, 247 | }, 248 | { 249 | name: "ManyMatchingConditions", 250 | obj: &core.Node{ 251 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 252 | Status: core.NodeStatus{Conditions: []core.NodeCondition{ 253 | {Type: "Cool", Status: core.ConditionTrue}, 254 | {Type: "Rad", Status: core.ConditionTrue}, 255 | }}, 256 | }, 257 | conditions: []string{"Cool", "Rad"}, 258 | expected: []SuppliedCondition{ 259 | {Type: "Cool", Status: core.ConditionTrue}, 260 | {Type: "Rad", Status: core.ConditionTrue}, 261 | }, 262 | }, 263 | { 264 | name: "PartiallyMatchingConditions", 265 | obj: &core.Node{ 266 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 267 | Status: core.NodeStatus{Conditions: []core.NodeCondition{ 268 | {Type: "Cool", Status: core.ConditionTrue}, 269 | {Type: "Rad", Status: core.ConditionFalse}, 270 | }}, 271 | }, 272 | conditions: []string{"Cool", "Rad"}, 273 | expected: []SuppliedCondition{ 274 | {Type: "Cool", Status: core.ConditionTrue}, 275 | }, 276 | }, 277 | { 278 | name: "PartiallyAbsentConditions", 279 | obj: &core.Node{ 280 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 281 | Status: core.NodeStatus{Conditions: []core.NodeCondition{ 282 | {Type: "Rad", Status: core.ConditionTrue}, 283 | }}, 284 | }, 285 | conditions: []string{"Cool", "Rad"}, 286 | expected: []SuppliedCondition{ 287 | {Type: "Rad", Status: core.ConditionTrue}, 288 | }, 289 | }, 290 | { 291 | name: "SingleFalseCondition", 292 | obj: &core.Node{ 293 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 294 | Status: core.NodeStatus{Conditions: []core.NodeCondition{ 295 | {Type: "Cool", Status: core.ConditionFalse}, 296 | }}, 297 | }, 298 | conditions: []string{"Cool"}, 299 | expected: nil, 300 | }, 301 | { 302 | name: "NoNodeConditions", 303 | obj: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName}}, 304 | conditions: []string{"Cool"}, 305 | expected: nil, 306 | }, 307 | { 308 | name: "NoFilterConditions", 309 | obj: &core.Node{ 310 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 311 | Status: core.NodeStatus{Conditions: []core.NodeCondition{ 312 | {Type: "Cool", Status: core.ConditionFalse}, 313 | }}, 314 | }, 315 | expected: nil, 316 | }, 317 | { 318 | name: "NewConditionFormat", 319 | obj: &core.Node{ 320 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 321 | Status: core.NodeStatus{Conditions: []core.NodeCondition{ 322 | {Type: "Cool", Status: core.ConditionUnknown}, 323 | }}, 324 | }, 325 | conditions: []string{"Cool=Unknown,10m"}, 326 | expected: []SuppliedCondition{ 327 | {Type: "Cool", Status: core.ConditionUnknown, MinimumDuration: 10 * time.Minute}, 328 | }, 329 | }, 330 | { 331 | name: "NewConditionFormatDurationNotEnough", 332 | obj: &core.Node{ 333 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 334 | Status: core.NodeStatus{Conditions: []core.NodeCondition{ 335 | {Type: "Cool", Status: core.ConditionUnknown, LastTransitionTime: meta.NewTime(time.Now().Add(time.Duration(-9) * time.Minute))}, 336 | }}, 337 | }, 338 | conditions: []string{"Cool=Unknown,10m"}, 339 | expected: nil, 340 | }, 341 | { 342 | name: "NewConditionFormatDurationIsEnough", 343 | obj: &core.Node{ 344 | ObjectMeta: meta.ObjectMeta{Name: nodeName}, 345 | Status: core.NodeStatus{Conditions: []core.NodeCondition{ 346 | {Type: "Cool", Status: core.ConditionUnknown, LastTransitionTime: meta.NewTime(time.Now().Add(time.Duration(-15) * time.Minute))}, 347 | }}, 348 | }, 349 | conditions: []string{"Cool=Unknown,14m"}, 350 | expected: []SuppliedCondition{ 351 | {Type: "Cool", Status: core.ConditionUnknown, MinimumDuration: 14 * time.Minute}, 352 | }, 353 | }, 354 | } 355 | 356 | for _, tc := range cases { 357 | t.Run(tc.name, func(t *testing.T) { 358 | h := NewDrainingResourceEventHandler(&NoopCordonDrainer{}, &record.FakeRecorder{}, WithConditionsFilter(tc.conditions)) 359 | badConditions := h.offendingConditions(tc.obj) 360 | if !reflect.DeepEqual(badConditions, tc.expected) { 361 | t.Errorf("offendingConditions(tc.obj): want %#v, got %#v", tc.expected, badConditions) 362 | } 363 | }) 364 | } 365 | } 366 | -------------------------------------------------------------------------------- /internal/kubernetes/nodefilters.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Planet Labs Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 13 | implied. See the License for the specific language governing permissions 14 | and limitations under the License. 15 | */ 16 | 17 | package kubernetes 18 | 19 | import ( 20 | "fmt" 21 | "sort" 22 | "strings" 23 | "time" 24 | 25 | "go.uber.org/zap" 26 | core "k8s.io/api/core/v1" 27 | "k8s.io/apimachinery/pkg/types" 28 | 29 | "github.com/antonmedv/expr" 30 | ) 31 | 32 | // NewNodeLabelFilter returns a filter that returns true if the supplied node satisfies the boolean expression 33 | func NewNodeLabelFilter(expressionStr *string, log *zap.Logger) (func(o interface{}) bool, error) { 34 | //This feels wrong but this is how the previous behavior worked so I'm only keeping it to maintain compatibility. 35 | 36 | expression, err := expr.Compile(*expressionStr) 37 | if err != nil && *expressionStr != "" { 38 | return nil, err 39 | } 40 | 41 | return func(o interface{}) bool { 42 | //This feels wrong but this is how the previous behavior worked so I'm only keeping it to maintain compatibility. 43 | if *expressionStr == "" { 44 | return true 45 | } 46 | 47 | n, ok := o.(*core.Node) 48 | if !ok { 49 | return false 50 | } 51 | 52 | nodeLabels := n.GetLabels() 53 | 54 | parameters := map[string]interface{}{ 55 | "metadata": map[string]map[string]string{ 56 | "labels": nodeLabels, 57 | }, 58 | } 59 | 60 | result, err := expr.Run(expression, parameters) 61 | if err != nil { 62 | log.Error(fmt.Sprintf("Could not parse expression: %v", err)) 63 | } 64 | return result.(bool) 65 | }, nil 66 | } 67 | 68 | // ParseConditions can parse the string array of conditions to a list of 69 | // SuppliedContion to support particular status value and duration. 70 | func ParseConditions(conditions []string) []SuppliedCondition { 71 | parsed := make([]SuppliedCondition, len(conditions)) 72 | for i, c := range conditions { 73 | ts := strings.SplitN(c, "=", 2) 74 | if len(ts) != 2 { 75 | // Keep backward compatibility 76 | ts = []string{c, "True,0s"} 77 | } 78 | sm := strings.SplitN(ts[1], ",", 2) 79 | duration, err := time.ParseDuration(sm[1]) 80 | if err == nil { 81 | parsed[i] = SuppliedCondition{core.NodeConditionType(ts[0]), core.ConditionStatus(sm[0]), duration} 82 | } 83 | } 84 | return parsed 85 | } 86 | 87 | // NodeProcessed tracks whether nodes have been processed before using a map. 88 | type NodeProcessed map[types.UID]bool 89 | 90 | // NewNodeProcessed returns a new node processed filter. 91 | func NewNodeProcessed() NodeProcessed { 92 | return make(NodeProcessed) 93 | } 94 | 95 | // Filter returns true if the supplied object is a node that this filter has 96 | // not seen before. It is not threadsafe and should always be the last filter 97 | // applied. 98 | func (processed NodeProcessed) Filter(o interface{}) bool { 99 | n, ok := o.(*core.Node) 100 | if !ok { 101 | return false 102 | } 103 | if processed[n.GetUID()] { 104 | return false 105 | } 106 | processed[n.GetUID()] = true 107 | return true 108 | } 109 | 110 | // ConvertLabelsToFilterExpr Convert old list labels into new expression syntax 111 | func ConvertLabelsToFilterExpr(labelsSlice []string) (*string, error) { 112 | labels := map[string]string{} 113 | for _, label := range labelsSlice { 114 | tokens := strings.SplitN(label, "=", 2) 115 | key := tokens[0] 116 | value := tokens[1] 117 | if v, found := labels[key]; found && v != value { 118 | return nil, fmt.Errorf("node-label parameter is used twice with the same key and different values: '%s' , '%s", v, value) 119 | } 120 | labels[key] = value 121 | } 122 | res := []string{} 123 | //sort the maps so that the unit tests actually work 124 | keys := make([]string, 0, len(labels)) 125 | for k := range labels { 126 | keys = append(keys, k) 127 | } 128 | sort.Strings(keys) 129 | 130 | for _, k := range keys { 131 | if k != "" && labels[k] == "" { 132 | res = append(res, fmt.Sprintf(`'%s' in metadata.labels`, k)) 133 | } else { 134 | res = append(res, fmt.Sprintf(`metadata.labels['%s'] == '%s'`, k, labels[k])) 135 | } 136 | } 137 | temp := strings.Join(res, " && ") 138 | return &temp, nil 139 | } 140 | -------------------------------------------------------------------------------- /internal/kubernetes/nodefilters_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Planet Labs Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 13 | implied. See the License for the specific language governing permissions 14 | and limitations under the License. 15 | */ 16 | 17 | package kubernetes 18 | 19 | import ( 20 | "reflect" 21 | "testing" 22 | "time" 23 | 24 | "github.com/stretchr/testify/assert" 25 | "go.uber.org/zap" 26 | 27 | core "k8s.io/api/core/v1" 28 | meta "k8s.io/apimachinery/pkg/apis/meta/v1" 29 | ) 30 | 31 | func TestNodeLabelFilter(t *testing.T) { 32 | cases := []struct { 33 | name string 34 | logicType string 35 | obj interface{} 36 | expression string 37 | passesFilter bool 38 | }{ 39 | { 40 | name: "SingleMatchingLabel", 41 | obj: &core.Node{ 42 | ObjectMeta: meta.ObjectMeta{ 43 | Name: nodeName, 44 | Labels: map[string]string{"cool": "very"}, 45 | }, 46 | }, 47 | expression: "metadata.labels.cool == 'very'", 48 | passesFilter: true, 49 | }, 50 | { 51 | name: "MatchesAllLabels", 52 | obj: &core.Node{ 53 | ObjectMeta: meta.ObjectMeta{ 54 | Name: nodeName, 55 | Labels: map[string]string{"cool": "very", "lame": "nope"}, 56 | }, 57 | }, 58 | expression: "metadata.labels.cool == 'very' && metadata.labels.lame == 'nope'", 59 | passesFilter: true, 60 | }, 61 | { 62 | name: "DoesntMatchWrongLabel", 63 | obj: &core.Node{ 64 | ObjectMeta: meta.ObjectMeta{ 65 | Name: nodeName, 66 | Labels: map[string]string{"cool": "nope"}, 67 | }, 68 | }, 69 | expression: "metadata.labels.cool == 'very'", 70 | passesFilter: false, 71 | }, 72 | { 73 | name: "PR75Example1", 74 | obj: &core.Node{ 75 | ObjectMeta: meta.ObjectMeta{ 76 | Name: nodeName, 77 | Labels: map[string]string{ 78 | "region": "us-west-2", 79 | "app": "nginx", 80 | "type": "sup", 81 | }, 82 | }, 83 | }, 84 | expression: "(metadata.labels.region == 'us-west-2' && metadata.labels.app == 'nginx') || (metadata.labels.region == 'us-west-2' && metadata.labels.foo == 'bar') || (metadata.labels.type == 'toolbox')", 85 | passesFilter: true, 86 | }, 87 | { 88 | name: "PR75Example2", 89 | obj: &core.Node{ 90 | ObjectMeta: meta.ObjectMeta{ 91 | Name: nodeName, 92 | Labels: map[string]string{ 93 | "region": "us-west-2", 94 | "app": "nginx", 95 | "type": "sup", 96 | }, 97 | }, 98 | }, 99 | expression: "(metadata.labels.region == 'us-west-1' && metadata.labels.app == 'nginx') || (metadata.labels.region == 'us-west-2' && metadata.labels.foo == 'bar') || (metadata.labels.type == 'toolbox')", 100 | passesFilter: false, 101 | }, 102 | { 103 | name: "MatchesSomeLabels", 104 | obj: &core.Node{ 105 | ObjectMeta: meta.ObjectMeta{ 106 | Name: nodeName, 107 | Labels: map[string]string{"cool": "very"}, 108 | }, 109 | }, 110 | expression: "(metadata.labels.cool == 'lame') || (metadata.labels.cool == 'very')", 111 | passesFilter: true, 112 | }, 113 | { 114 | name: "MatchesNoLabels", 115 | obj: &core.Node{ 116 | ObjectMeta: meta.ObjectMeta{ 117 | Name: nodeName, 118 | Labels: map[string]string{"cool": "nope"}, 119 | }, 120 | }, 121 | expression: "(metadata.labels.cool == 'lame') || (metadata.labels.cool == 'very')", 122 | passesFilter: false, 123 | }, 124 | { 125 | name: "InOperatorMatches", 126 | obj: &core.Node{ 127 | ObjectMeta: meta.ObjectMeta{ 128 | Name: nodeName, 129 | Labels: map[string]string{"cool": "very"}, 130 | }, 131 | }, 132 | expression: "metadata.labels.cool in ['very', 'lame']", 133 | passesFilter: true, 134 | }, 135 | { 136 | name: "InOperatorFails", 137 | obj: &core.Node{ 138 | ObjectMeta: meta.ObjectMeta{ 139 | Name: nodeName, 140 | Labels: map[string]string{"cool": "very"}, 141 | }, 142 | }, 143 | expression: "metadata.labels.cool in ['lame', 'nope']", 144 | passesFilter: false, 145 | }, 146 | { 147 | name: "NoNodeLabels", 148 | obj: &core.Node{ 149 | ObjectMeta: meta.ObjectMeta{ 150 | Name: nodeName, 151 | Labels: make(map[string]string), 152 | }, 153 | }, 154 | expression: "metadata.labels.cool == 'very'", 155 | passesFilter: false, 156 | }, 157 | { 158 | name: "NoFilterLabels", 159 | obj: &core.Node{ 160 | ObjectMeta: meta.ObjectMeta{ 161 | Name: nodeName, 162 | Labels: map[string]string{"cool": "very"}, 163 | }, 164 | }, 165 | expression: "", 166 | passesFilter: true, 167 | }, 168 | { 169 | name: "FilterNodeThatHasLabel_InSyntax", 170 | obj: &core.Node{ 171 | ObjectMeta: meta.ObjectMeta{ 172 | Name: nodeName, 173 | Labels: map[string]string{"cool": "very"}, 174 | }, 175 | }, 176 | expression: "'cool' in metadata.labels", 177 | passesFilter: true, 178 | }, 179 | { 180 | name: "FilterNodeThatHasLabel_AccessorSyntax", 181 | obj: &core.Node{ 182 | ObjectMeta: meta.ObjectMeta{ 183 | Name: nodeName, 184 | Labels: map[string]string{"cool": "very"}, 185 | }, 186 | }, 187 | expression: "metadata.labels.cool != ''", 188 | passesFilter: true, 189 | }, 190 | { 191 | name: "FilterNodeThatIsMissingLabel_AccessorSyntax", 192 | obj: &core.Node{ 193 | ObjectMeta: meta.ObjectMeta{ 194 | Name: nodeName, 195 | Labels: map[string]string{"sup": "very"}, 196 | }, 197 | }, 198 | expression: "metadata.labels.cool != ''", 199 | passesFilter: false, 200 | }, 201 | } 202 | log, _ := zap.NewDevelopment() 203 | 204 | for _, tc := range cases { 205 | 206 | t.Run(tc.name, func(t *testing.T) { 207 | filter, err := NewNodeLabelFilter(&tc.expression, log) 208 | if err != nil { 209 | t.Errorf("Filter expression: %v, did not compile", err) 210 | t.FailNow() 211 | } 212 | 213 | passesFilter := filter(tc.obj) 214 | assert.Equal(t, tc.passesFilter, passesFilter) 215 | }) 216 | } 217 | } 218 | 219 | func TestOldNodeLabelFilter(t *testing.T) { 220 | cases := []struct { 221 | name string 222 | obj interface{} 223 | labels []string 224 | passesFilter bool 225 | }{ 226 | { 227 | name: "SingleMatchingLabel", 228 | obj: &core.Node{ 229 | ObjectMeta: meta.ObjectMeta{ 230 | Name: nodeName, 231 | Labels: map[string]string{"cool": "very"}, 232 | }, 233 | }, 234 | labels: []string{"cool=very"}, 235 | passesFilter: true, 236 | }, 237 | { 238 | name: "SingleMatchingLabel.WithDomain", 239 | obj: &core.Node{ 240 | ObjectMeta: meta.ObjectMeta{ 241 | Name: nodeName, 242 | Labels: map[string]string{"planetlabs.com/cool": "very"}, 243 | }, 244 | }, 245 | labels: []string{"planetlabs.com/cool=very"}, 246 | passesFilter: true, 247 | }, 248 | { 249 | name: "ManyMatchingLabels", 250 | obj: &core.Node{ 251 | ObjectMeta: meta.ObjectMeta{ 252 | Name: nodeName, 253 | Labels: map[string]string{"cool": "very", "lame": "nope"}, 254 | }, 255 | }, 256 | labels: []string{"cool=very", "lame=nope"}, 257 | passesFilter: true, 258 | }, 259 | { 260 | name: "SingleUnmatchingLabel", 261 | obj: &core.Node{ 262 | ObjectMeta: meta.ObjectMeta{ 263 | Name: nodeName, 264 | Labels: map[string]string{"cool": "notsocool"}, 265 | }, 266 | }, 267 | labels: []string{"cool=very"}, 268 | passesFilter: false, 269 | }, 270 | { 271 | name: "PartiallyMatchingLabels", 272 | obj: &core.Node{ 273 | ObjectMeta: meta.ObjectMeta{ 274 | Name: nodeName, 275 | Labels: map[string]string{"cool": "very", "lame": "somehowyes"}, 276 | }, 277 | }, 278 | labels: []string{"cool=very", "lame=nope"}, 279 | passesFilter: false, 280 | }, { 281 | name: "PartiallyAbsentLabels", 282 | obj: &core.Node{ 283 | ObjectMeta: meta.ObjectMeta{ 284 | Name: nodeName, 285 | Labels: map[string]string{"cool": "very"}, 286 | }, 287 | }, 288 | labels: []string{"cool=very", "lame=nope"}, 289 | passesFilter: false, 290 | }, 291 | { 292 | name: "NoNodeLabels", 293 | obj: &core.Node{ 294 | ObjectMeta: meta.ObjectMeta{ 295 | Name: nodeName, 296 | Labels: map[string]string{}, 297 | }, 298 | }, 299 | labels: []string{"cool=very"}, 300 | passesFilter: false, 301 | }, 302 | { 303 | name: "NoFilterLabels", 304 | obj: &core.Node{ 305 | ObjectMeta: meta.ObjectMeta{ 306 | Name: nodeName, 307 | Labels: map[string]string{"cool": "very"}, 308 | }, 309 | }, 310 | passesFilter: true, 311 | }, 312 | { 313 | name: "FilterWithEmptyValue", 314 | obj: &core.Node{ 315 | ObjectMeta: meta.ObjectMeta{ 316 | Name: nodeName, 317 | Labels: map[string]string{"cool": "very"}, 318 | }, 319 | }, 320 | labels: []string{"keyWithNoValue="}, 321 | passesFilter: false, 322 | }, 323 | { 324 | name: "FilterWithEmptyValueAndNodeWithEmptyValue", 325 | obj: &core.Node{ 326 | ObjectMeta: meta.ObjectMeta{ 327 | Name: nodeName, 328 | Labels: map[string]string{"cool": "very", "keyWithNoValue": ""}, 329 | }, 330 | }, 331 | labels: []string{"keyWithNoValue="}, 332 | passesFilter: true, 333 | }, 334 | { 335 | name: "NotANode", 336 | obj: &core.Pod{ 337 | ObjectMeta: meta.ObjectMeta{ 338 | Name: podName, 339 | Labels: map[string]string{"cool": "very"}, 340 | }, 341 | }, 342 | labels: []string{"cool=very"}, 343 | passesFilter: false, 344 | }, 345 | } 346 | 347 | log, _ := zap.NewDevelopment() 348 | 349 | for _, tc := range cases { 350 | t.Run(tc.name, func(t *testing.T) { 351 | labelExpr, err := ConvertLabelsToFilterExpr(tc.labels) 352 | 353 | filter, err := NewNodeLabelFilter(labelExpr, log) 354 | if err != nil { 355 | t.Errorf("Filter expression: %v, did not compile", err) 356 | t.FailNow() 357 | } 358 | 359 | passesFilter := filter(tc.obj) 360 | assert.Equal(t, tc.passesFilter, passesFilter) 361 | }) 362 | } 363 | } 364 | 365 | func TestNodeProcessedFilter(t *testing.T) { 366 | cases := []struct { 367 | name string 368 | existing interface{} 369 | obj interface{} 370 | passesFilter bool 371 | }{ 372 | { 373 | name: "NoNodesProcessed", 374 | obj: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName, UID: "a"}}, 375 | passesFilter: true, 376 | }, 377 | { 378 | name: "DifferentNodeProcessed", 379 | existing: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName + "-b", UID: "b"}}, 380 | obj: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName, UID: "a"}}, 381 | passesFilter: true, 382 | }, 383 | { 384 | name: "NodeAlreadyProcessed", 385 | existing: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName, UID: "a"}}, 386 | obj: &core.Node{ObjectMeta: meta.ObjectMeta{Name: nodeName, UID: "a"}}, 387 | passesFilter: false, 388 | }, 389 | } 390 | 391 | for _, tc := range cases { 392 | t.Run(tc.name, func(t *testing.T) { 393 | np := NewNodeProcessed() 394 | np.Filter(tc.existing) 395 | passesFilter := np.Filter(tc.obj) 396 | if passesFilter != tc.passesFilter { 397 | t.Errorf("np.Filter(tc.obj): want %v, got %v", tc.passesFilter, passesFilter) 398 | } 399 | }) 400 | } 401 | } 402 | 403 | func TestParseConditions(t *testing.T) { 404 | cases := []struct { 405 | name string 406 | conditions []string 407 | expect []SuppliedCondition 408 | }{ 409 | { 410 | name: "OldFormat", 411 | conditions: []string{"Ready"}, 412 | expect: []SuppliedCondition{SuppliedCondition{core.NodeConditionType("Ready"), core.ConditionStatus("True"), time.Duration(0) * time.Second}}, 413 | }, 414 | { 415 | name: "Mixed", 416 | conditions: []string{"Ready", "OutOfDisk=True,10m"}, 417 | expect: []SuppliedCondition{ 418 | SuppliedCondition{core.NodeConditionType("Ready"), core.ConditionStatus("True"), time.Duration(0) * time.Second}, 419 | SuppliedCondition{core.NodeConditionType("OutOfDisk"), core.ConditionStatus("True"), time.Duration(10) * time.Minute}, 420 | }, 421 | }, 422 | { 423 | name: "NewFormat", 424 | conditions: []string{"Ready=Unknown,30m"}, 425 | expect: []SuppliedCondition{SuppliedCondition{core.NodeConditionType("Ready"), core.ConditionStatus("Unknown"), time.Duration(30) * time.Minute}}, 426 | }, 427 | } 428 | 429 | for _, tc := range cases { 430 | t.Run(tc.name, func(t *testing.T) { 431 | parsed := ParseConditions(tc.conditions) 432 | if !reflect.DeepEqual(tc.expect, parsed) { 433 | t.Errorf("expect %v, got: %v", tc.expect, parsed) 434 | } 435 | }) 436 | } 437 | } 438 | 439 | func TestConvertLabelsToFilterExpr(t *testing.T) { 440 | cases := []struct { 441 | name string 442 | input []string 443 | expected string 444 | wantErr bool 445 | }{ 446 | { 447 | name: "2 labels", 448 | input: []string{"foo=bar", "sup=cool"}, 449 | expected: "metadata.labels['foo'] == 'bar' && metadata.labels['sup'] == 'cool'", 450 | }, 451 | { 452 | name: "2 labels same key", 453 | input: []string{"foo=bar", "foo=cool"}, 454 | expected: "", 455 | wantErr: true, 456 | }, 457 | { 458 | name: "no filter", 459 | input: nil, 460 | expected: "", 461 | wantErr: false, 462 | }, 463 | } 464 | 465 | for _, tc := range cases { 466 | t.Run(tc.name, func(t *testing.T) { 467 | actual, err := ConvertLabelsToFilterExpr(tc.input) 468 | if tc.wantErr && err == nil { 469 | t.Errorf("error was expected for that case") 470 | return 471 | } 472 | if !tc.wantErr && err != nil { 473 | t.Errorf("no error was expected for that case") 474 | return 475 | } 476 | if tc.wantErr && err != nil { 477 | return 478 | } 479 | if actual == nil { 480 | t.Errorf("string value was expected") 481 | return 482 | } 483 | got := *actual 484 | if !reflect.DeepEqual(tc.expected, got) { 485 | t.Errorf("expect %v, got: %v", tc.expected, got) 486 | } 487 | }) 488 | } 489 | } 490 | -------------------------------------------------------------------------------- /internal/kubernetes/podfilters.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Planet Labs Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 13 | implied. See the License for the specific language governing permissions 14 | and limitations under the License. 15 | */ 16 | 17 | package kubernetes 18 | 19 | import ( 20 | "strings" 21 | 22 | "github.com/pkg/errors" 23 | core "k8s.io/api/core/v1" 24 | apierrors "k8s.io/apimachinery/pkg/api/errors" 25 | meta "k8s.io/apimachinery/pkg/apis/meta/v1" 26 | "k8s.io/client-go/kubernetes" 27 | ) 28 | 29 | // A PodFilterFunc returns true if the supplied pod passes the filter. 30 | type PodFilterFunc func(p core.Pod) (bool, error) 31 | 32 | // MirrorPodFilter returns true if the supplied pod is not a mirror pod, i.e. a 33 | // pod created by a manifest on the node rather than the API server. 34 | func MirrorPodFilter(p core.Pod) (bool, error) { 35 | _, mirrorPod := p.GetAnnotations()[core.MirrorPodAnnotationKey] 36 | return !mirrorPod, nil 37 | } 38 | 39 | // LocalStoragePodFilter returns true if the supplied pod does not have local 40 | // storage, i.e. does not use any 'empty dir' volumes. 41 | func LocalStoragePodFilter(p core.Pod) (bool, error) { 42 | for _, v := range p.Spec.Volumes { 43 | if v.EmptyDir != nil { 44 | return false, nil 45 | } 46 | } 47 | return true, nil 48 | } 49 | 50 | // UnreplicatedPodFilter returns true if the pod is replicated, i.e. is managed 51 | // by a controller (deployment, daemonset, statefulset, etc) of some sort. 52 | func UnreplicatedPodFilter(p core.Pod) (bool, error) { 53 | // We're fine with 'evicting' unreplicated pods that aren't actually running. 54 | if p.Status.Phase == core.PodSucceeded || p.Status.Phase == core.PodFailed { 55 | return true, nil 56 | } 57 | if meta.GetControllerOf(&p) == nil { 58 | return false, nil 59 | } 60 | return true, nil 61 | } 62 | 63 | // NewDaemonSetPodFilter returns a FilterFunc that returns true if the supplied 64 | // pod is not managed by an extant DaemonSet. 65 | func NewDaemonSetPodFilter(client kubernetes.Interface) PodFilterFunc { 66 | return func(p core.Pod) (bool, error) { 67 | c := meta.GetControllerOf(&p) 68 | if c == nil || c.Kind != kindDaemonSet { 69 | return true, nil 70 | } 71 | 72 | // Pods pass the filter if they were created by a DaemonSet that no 73 | // longer exists. 74 | if _, err := client.AppsV1().DaemonSets(p.GetNamespace()).Get(c.Name, meta.GetOptions{}); err != nil { 75 | if apierrors.IsNotFound(err) { 76 | return true, nil 77 | } 78 | return false, errors.Wrapf(err, "cannot get DaemonSet %s/%s", p.GetNamespace(), c.Name) 79 | } 80 | return false, nil 81 | } 82 | } 83 | 84 | // NewStatefulSetPodFilter returns a FilterFunc that returns true if the supplied 85 | // pod is not managed by an extant StatefulSet. 86 | func NewStatefulSetPodFilter(client kubernetes.Interface) PodFilterFunc { 87 | return func(p core.Pod) (bool, error) { 88 | c := meta.GetControllerOf(&p) 89 | if c == nil || c.Kind != kindStatefulSet { 90 | return true, nil 91 | } 92 | 93 | // Pods pass the filter if they were created by a StatefulSet that no 94 | // longer exists. 95 | if _, err := client.AppsV1().StatefulSets(p.GetNamespace()).Get(c.Name, meta.GetOptions{}); err != nil { 96 | if apierrors.IsNotFound(err) { 97 | return true, nil 98 | } 99 | return false, errors.Wrapf(err, "cannot get StatefulSet %s/%s", p.GetNamespace(), c.Name) 100 | } 101 | return false, nil 102 | } 103 | } 104 | 105 | // UnprotectedPodFilter returns a FilterFunc that returns true if the 106 | // supplied pod does not have any of the user-specified annotations for 107 | // protection from eviction 108 | func UnprotectedPodFilter(annotations ...string) PodFilterFunc { 109 | return func(p core.Pod) (bool, error) { 110 | var filter bool 111 | for _, annot := range annotations { 112 | // Try to split the annotation into key-value pairs 113 | kv := strings.SplitN(annot, "=", 2) 114 | if len(kv) < 2 { 115 | // If the annotation is a single string, then simply check for 116 | // the existence of the annotation key 117 | _, filter = p.GetAnnotations()[kv[0]] 118 | } else { 119 | // If the annotation is a key-value pair, then check if the 120 | // value for the pod annotation matches that of the 121 | // user-specified value 122 | v, ok := p.GetAnnotations()[kv[0]] 123 | filter = ok && v == kv[1] 124 | } 125 | if filter { 126 | return false, nil 127 | } 128 | } 129 | return true, nil 130 | } 131 | } 132 | 133 | // NewPodFilters returns a FilterFunc that returns true if all of the supplied 134 | // FilterFuncs return true. 135 | func NewPodFilters(filters ...PodFilterFunc) PodFilterFunc { 136 | return func(p core.Pod) (bool, error) { 137 | for _, fn := range filters { 138 | passes, err := fn(p) 139 | if err != nil { 140 | return false, errors.Wrap(err, "cannot apply filters") 141 | } 142 | if !passes { 143 | return false, nil 144 | } 145 | } 146 | return true, nil 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /internal/kubernetes/podfilters_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Planet Labs Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 13 | implied. See the License for the specific language governing permissions 14 | and limitations under the License. 15 | */ 16 | 17 | package kubernetes 18 | 19 | import ( 20 | "testing" 21 | 22 | "github.com/pkg/errors" 23 | core "k8s.io/api/core/v1" 24 | apierrors "k8s.io/apimachinery/pkg/api/errors" 25 | meta "k8s.io/apimachinery/pkg/apis/meta/v1" 26 | "k8s.io/apimachinery/pkg/runtime/schema" 27 | ) 28 | 29 | func TestPodFilters(t *testing.T) { 30 | cases := []struct { 31 | name string 32 | filter PodFilterFunc 33 | pod core.Pod 34 | passesFilter bool 35 | errFn func(err error) bool 36 | }{ 37 | { 38 | name: "IsMirror", 39 | pod: core.Pod{ 40 | ObjectMeta: meta.ObjectMeta{ 41 | Name: podName, 42 | Annotations: map[string]string{core.MirrorPodAnnotationKey: "definitelyahash"}, 43 | }, 44 | }, 45 | filter: MirrorPodFilter, 46 | passesFilter: false, 47 | }, 48 | { 49 | name: "IsNotMirror", 50 | pod: core.Pod{ObjectMeta: meta.ObjectMeta{Name: podName}}, 51 | filter: MirrorPodFilter, 52 | passesFilter: true, 53 | }, 54 | { 55 | name: "HasLocalStorage", 56 | pod: core.Pod{ 57 | ObjectMeta: meta.ObjectMeta{Name: podName}, 58 | Spec: core.PodSpec{ 59 | Volumes: []core.Volume{ 60 | core.Volume{VolumeSource: core.VolumeSource{HostPath: &core.HostPathVolumeSource{}}}, 61 | core.Volume{VolumeSource: core.VolumeSource{EmptyDir: &core.EmptyDirVolumeSource{}}}, 62 | }, 63 | }, 64 | }, 65 | filter: LocalStoragePodFilter, 66 | passesFilter: false, 67 | }, 68 | { 69 | name: "DoesNotHaveLocalStorage", 70 | pod: core.Pod{ 71 | ObjectMeta: meta.ObjectMeta{Name: podName}, 72 | Spec: core.PodSpec{ 73 | Volumes: []core.Volume{core.Volume{VolumeSource: core.VolumeSource{HostPath: &core.HostPathVolumeSource{}}}}, 74 | }, 75 | }, 76 | filter: LocalStoragePodFilter, 77 | passesFilter: true, 78 | }, 79 | { 80 | name: "Unreplicated", 81 | pod: core.Pod{ObjectMeta: meta.ObjectMeta{Name: podName}}, 82 | filter: UnreplicatedPodFilter, 83 | passesFilter: false, 84 | }, 85 | { 86 | name: "Replicated", 87 | pod: core.Pod{ 88 | ObjectMeta: meta.ObjectMeta{ 89 | Name: podName, 90 | OwnerReferences: []meta.OwnerReference{meta.OwnerReference{ 91 | Controller: &isController, 92 | Kind: kindDeployment, 93 | Name: deploymentName, 94 | }}, 95 | }, 96 | }, 97 | filter: UnreplicatedPodFilter, 98 | passesFilter: true, 99 | }, 100 | { 101 | name: "UnreplicatedButSucceeded", 102 | pod: core.Pod{ 103 | ObjectMeta: meta.ObjectMeta{Name: podName}, 104 | Status: core.PodStatus{Phase: core.PodSucceeded}, 105 | }, 106 | filter: UnreplicatedPodFilter, 107 | passesFilter: true, 108 | }, 109 | { 110 | name: "UnreplicatedButFailed", 111 | pod: core.Pod{ 112 | ObjectMeta: meta.ObjectMeta{Name: podName}, 113 | Status: core.PodStatus{Phase: core.PodFailed}, 114 | }, 115 | filter: UnreplicatedPodFilter, 116 | passesFilter: true, 117 | }, 118 | { 119 | name: "PartOfDaemonSet", 120 | pod: core.Pod{ 121 | ObjectMeta: meta.ObjectMeta{ 122 | Name: podName, 123 | OwnerReferences: []meta.OwnerReference{meta.OwnerReference{ 124 | Controller: &isController, 125 | Kind: kindDaemonSet, 126 | Name: daemonsetName, 127 | }}, 128 | }, 129 | }, 130 | filter: NewDaemonSetPodFilter(newFakeClientSet(reactor{verb: "get", resource: "daemonsets"})), 131 | passesFilter: false, 132 | }, 133 | { 134 | name: "ErrorGettingDaemonSet", 135 | pod: core.Pod{ 136 | ObjectMeta: meta.ObjectMeta{ 137 | Name: podName, 138 | OwnerReferences: []meta.OwnerReference{meta.OwnerReference{ 139 | Controller: &isController, 140 | Kind: kindDaemonSet, 141 | Name: daemonsetName, 142 | }}, 143 | }, 144 | }, 145 | filter: NewDaemonSetPodFilter(newFakeClientSet(reactor{ 146 | verb: "get", 147 | resource: "daemonsets", 148 | err: errExploded, 149 | })), 150 | errFn: func(err error) bool { return errors.Cause(err) == errExploded }, 151 | }, 152 | { 153 | name: "OrphanedFromDaemonSet", 154 | pod: core.Pod{ 155 | ObjectMeta: meta.ObjectMeta{ 156 | Name: podName, 157 | OwnerReferences: []meta.OwnerReference{meta.OwnerReference{ 158 | Controller: &isController, 159 | Kind: kindDaemonSet, 160 | Name: daemonsetName, 161 | }}, 162 | }, 163 | }, 164 | filter: NewDaemonSetPodFilter(newFakeClientSet(reactor{ 165 | verb: "get", 166 | resource: "daemonsets", 167 | err: apierrors.NewNotFound(schema.GroupResource{Resource: "daemonsets"}, daemonsetName), 168 | })), 169 | passesFilter: true, 170 | }, 171 | { 172 | name: "NotPartOfDaemonSet", 173 | pod: core.Pod{ 174 | ObjectMeta: meta.ObjectMeta{ 175 | Name: podName, 176 | OwnerReferences: []meta.OwnerReference{meta.OwnerReference{ 177 | Controller: &isController, 178 | Kind: kindDeployment, 179 | Name: deploymentName, 180 | }}, 181 | }, 182 | }, 183 | filter: NewDaemonSetPodFilter(newFakeClientSet()), 184 | passesFilter: true, 185 | }, 186 | { 187 | name: "PartOfStatefulSet", 188 | pod: core.Pod{ 189 | ObjectMeta: meta.ObjectMeta{ 190 | Name: podName, 191 | OwnerReferences: []meta.OwnerReference{meta.OwnerReference{ 192 | Controller: &isController, 193 | Kind: kindStatefulSet, 194 | Name: statefulsetName, 195 | }}, 196 | }, 197 | }, 198 | filter: NewStatefulSetPodFilter(newFakeClientSet(reactor{verb: "get", resource: "statefulsets"})), 199 | passesFilter: false, 200 | }, 201 | { 202 | name: "ErrorGettingStatefulSet", 203 | pod: core.Pod{ 204 | ObjectMeta: meta.ObjectMeta{ 205 | Name: podName, 206 | OwnerReferences: []meta.OwnerReference{meta.OwnerReference{ 207 | Controller: &isController, 208 | Kind: kindStatefulSet, 209 | Name: statefulsetName, 210 | }}, 211 | }, 212 | }, 213 | filter: NewStatefulSetPodFilter(newFakeClientSet(reactor{ 214 | verb: "get", 215 | resource: "statefulsets", 216 | err: errExploded, 217 | })), 218 | errFn: func(err error) bool { return errors.Cause(err) == errExploded }, 219 | }, 220 | { 221 | name: "OrphanedFromStatefulSet", 222 | pod: core.Pod{ 223 | ObjectMeta: meta.ObjectMeta{ 224 | Name: podName, 225 | OwnerReferences: []meta.OwnerReference{meta.OwnerReference{ 226 | Controller: &isController, 227 | Kind: kindStatefulSet, 228 | Name: statefulsetName, 229 | }}, 230 | }, 231 | }, 232 | filter: NewStatefulSetPodFilter(newFakeClientSet(reactor{ 233 | verb: "get", 234 | resource: "statefulsets", 235 | err: apierrors.NewNotFound(schema.GroupResource{Resource: "statefulsets"}, statefulsetName), 236 | })), 237 | passesFilter: true, 238 | }, 239 | { 240 | name: "NotPartOfStatefulSet", 241 | pod: core.Pod{ 242 | ObjectMeta: meta.ObjectMeta{ 243 | Name: podName, 244 | OwnerReferences: []meta.OwnerReference{meta.OwnerReference{ 245 | Controller: &isController, 246 | Kind: kindDeployment, 247 | Name: deploymentName, 248 | }}, 249 | }, 250 | }, 251 | filter: NewStatefulSetPodFilter(newFakeClientSet()), 252 | passesFilter: true, 253 | }, 254 | { 255 | name: "NoProtectionFromPodEviction", 256 | pod: core.Pod{ 257 | ObjectMeta: meta.ObjectMeta{ 258 | Name: podName, 259 | Annotations: map[string]string{"Random": "true"}, 260 | }, 261 | }, 262 | filter: UnprotectedPodFilter(), 263 | passesFilter: true, 264 | }, 265 | { 266 | name: "NoPodAnnotations", 267 | pod: core.Pod{ 268 | ObjectMeta: meta.ObjectMeta{ 269 | Name: podName, 270 | }, 271 | }, 272 | filter: UnprotectedPodFilter("Protect"), 273 | passesFilter: true, 274 | }, 275 | { 276 | name: "NoPodAnnotationsWithEmptyUserValue", 277 | pod: core.Pod{ 278 | ObjectMeta: meta.ObjectMeta{ 279 | Name: podName, 280 | }, 281 | }, 282 | filter: UnprotectedPodFilter("Protect="), 283 | passesFilter: true, 284 | }, 285 | { 286 | name: "NoMatchingProtectionAnnotations", 287 | pod: core.Pod{ 288 | ObjectMeta: meta.ObjectMeta{ 289 | Name: podName, 290 | Annotations: map[string]string{"Useless": "true"}, 291 | }, 292 | }, 293 | filter: UnprotectedPodFilter("Protect", "ProtectTwo=true"), 294 | passesFilter: true, 295 | }, 296 | { 297 | name: "AltNoMatchingProtectionAnnotations", 298 | pod: core.Pod{ 299 | ObjectMeta: meta.ObjectMeta{ 300 | Name: podName, 301 | Annotations: map[string]string{"NeedsAValue": ""}, 302 | }, 303 | }, 304 | filter: UnprotectedPodFilter("Protect", "ProtectTwo=true", "NeedsAValue=true"), 305 | passesFilter: true, 306 | }, 307 | { 308 | name: "KeyOnlyProtectionAnnotation", 309 | pod: core.Pod{ 310 | ObjectMeta: meta.ObjectMeta{ 311 | Name: podName, 312 | Annotations: map[string]string{"Protect": ""}, 313 | }, 314 | }, 315 | filter: UnprotectedPodFilter("Protect"), 316 | passesFilter: false, 317 | }, 318 | { 319 | name: "MultipleKeyOnlyProtectionAnnotations", 320 | pod: core.Pod{ 321 | ObjectMeta: meta.ObjectMeta{ 322 | Name: podName, 323 | Annotations: map[string]string{"ProtectTwo": ""}, 324 | }, 325 | }, 326 | filter: UnprotectedPodFilter("ProtectOne", "ProtectTwo"), 327 | passesFilter: false, 328 | }, 329 | { 330 | name: "SingleProtectionAnnotation", 331 | pod: core.Pod{ 332 | ObjectMeta: meta.ObjectMeta{ 333 | Name: podName, 334 | Annotations: map[string]string{"Protect": "true"}, 335 | }, 336 | }, 337 | filter: UnprotectedPodFilter("Protect=true"), 338 | passesFilter: false, 339 | }, 340 | { 341 | name: "MultipleProtectionAnnotations", 342 | pod: core.Pod{ 343 | ObjectMeta: meta.ObjectMeta{ 344 | Name: podName, 345 | Annotations: map[string]string{"ProtectTwo": "true"}, 346 | }, 347 | }, 348 | filter: UnprotectedPodFilter("ProtectOne=true", "ProtectTwo=true"), 349 | passesFilter: false, 350 | }, 351 | { 352 | name: "MultipleMixedProtectionAnnotations", 353 | pod: core.Pod{ 354 | ObjectMeta: meta.ObjectMeta{ 355 | Name: podName, 356 | Annotations: map[string]string{"ProtectTwo": ""}, 357 | }, 358 | }, 359 | filter: UnprotectedPodFilter("ProtectOne=true", "ProtectTwo"), 360 | passesFilter: false, 361 | }, 362 | { 363 | name: "AltMultipleMixedProtectionAnnotations", 364 | pod: core.Pod{ 365 | ObjectMeta: meta.ObjectMeta{ 366 | Name: podName, 367 | Annotations: map[string]string{"ProtectOne": "true"}, 368 | }, 369 | }, 370 | filter: UnprotectedPodFilter("ProtectOne", "ProtectTwo=true"), 371 | passesFilter: false, 372 | }, 373 | { 374 | name: "NoFiltersProvided", 375 | pod: core.Pod{ObjectMeta: meta.ObjectMeta{Name: podName}}, 376 | filter: NewPodFilters(), 377 | passesFilter: true, 378 | }, 379 | { 380 | name: "AllFiltersPass", 381 | pod: core.Pod{ObjectMeta: meta.ObjectMeta{Name: podName}}, 382 | filter: NewPodFilters( 383 | func(_ core.Pod) (bool, error) { return true, nil }, 384 | func(_ core.Pod) (bool, error) { return true, nil }, 385 | ), 386 | passesFilter: true, 387 | }, 388 | { 389 | name: "OneFilterFails", 390 | pod: core.Pod{ObjectMeta: meta.ObjectMeta{Name: podName}}, 391 | filter: NewPodFilters( 392 | func(_ core.Pod) (bool, error) { return true, nil }, 393 | func(_ core.Pod) (bool, error) { return false, nil }, 394 | ), 395 | passesFilter: false, 396 | }, 397 | { 398 | name: "OneFilterErrors", 399 | pod: core.Pod{ObjectMeta: meta.ObjectMeta{Name: podName}}, 400 | filter: NewPodFilters( 401 | func(_ core.Pod) (bool, error) { return true, nil }, 402 | func(_ core.Pod) (bool, error) { return false, errExploded }, 403 | ), 404 | errFn: func(err error) bool { return errors.Cause(err) == errExploded }, 405 | }, 406 | } 407 | 408 | for _, tc := range cases { 409 | t.Run(tc.name, func(t *testing.T) { 410 | passesFilter, err := tc.filter(tc.pod) 411 | if err != nil && tc.errFn != nil && !tc.errFn(err) { 412 | t.Errorf("tc.filter(%v): %v", tc.pod.GetName(), err) 413 | } 414 | if passesFilter != tc.passesFilter { 415 | t.Errorf("tc.filter(%v): want %v, got %v", tc.pod.GetName(), tc.passesFilter, passesFilter) 416 | } 417 | }) 418 | } 419 | } 420 | -------------------------------------------------------------------------------- /internal/kubernetes/util.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Planet Labs Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 13 | implied. See the License for the specific language governing permissions 14 | and limitations under the License. 15 | */ 16 | 17 | package kubernetes 18 | 19 | import ( 20 | "time" 21 | 22 | core "k8s.io/api/core/v1" 23 | "k8s.io/apimachinery/pkg/util/wait" 24 | "k8s.io/client-go/kubernetes" 25 | "k8s.io/client-go/kubernetes/scheme" 26 | typedcore "k8s.io/client-go/kubernetes/typed/core/v1" 27 | "k8s.io/client-go/rest" 28 | "k8s.io/client-go/tools/clientcmd" 29 | "k8s.io/client-go/tools/clientcmd/api" 30 | "k8s.io/client-go/tools/record" 31 | ) 32 | 33 | // Component is the name of this application. 34 | const Component = "draino" 35 | 36 | // BuildConfigFromFlags is clientcmd.BuildConfigFromFlags with no annoying 37 | // dependencies on glog. 38 | // https://godoc.org/k8s.io/client-go/tools/clientcmd#BuildConfigFromFlags 39 | func BuildConfigFromFlags(apiserver, kubecfg string) (*rest.Config, error) { 40 | if kubecfg != "" || apiserver != "" { 41 | return clientcmd.NewNonInteractiveDeferredLoadingClientConfig( 42 | &clientcmd.ClientConfigLoadingRules{ExplicitPath: kubecfg}, 43 | &clientcmd.ConfigOverrides{ClusterInfo: api.Cluster{Server: apiserver}}).ClientConfig() 44 | } 45 | return rest.InClusterConfig() 46 | } 47 | 48 | // NewEventRecorder returns a new record.EventRecorder for the given client. 49 | func NewEventRecorder(c kubernetes.Interface) record.EventRecorder { 50 | b := record.NewBroadcaster() 51 | b.StartRecordingToSink(&typedcore.EventSinkImpl{Interface: typedcore.New(c.CoreV1().RESTClient()).Events("")}) 52 | return b.NewRecorder(scheme.Scheme, core.EventSource{Component: Component}) 53 | } 54 | 55 | func RetryWithTimeout(f func() error, retryPeriod, timeout time.Duration) error { 56 | return wait.PollImmediate(retryPeriod, timeout, 57 | func() (bool, error) { 58 | if err := f(); err != nil { 59 | return false, nil 60 | } 61 | return true, nil 62 | }) 63 | } 64 | -------------------------------------------------------------------------------- /internal/kubernetes/watch.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Planet Labs Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 13 | implied. See the License for the specific language governing permissions 14 | and limitations under the License. 15 | */ 16 | 17 | package kubernetes 18 | 19 | import ( 20 | "time" 21 | 22 | "github.com/pkg/errors" 23 | core "k8s.io/api/core/v1" 24 | meta "k8s.io/apimachinery/pkg/apis/meta/v1" 25 | "k8s.io/apimachinery/pkg/runtime" 26 | "k8s.io/apimachinery/pkg/watch" 27 | "k8s.io/client-go/kubernetes" 28 | "k8s.io/client-go/tools/cache" 29 | ) 30 | 31 | // An NodeStore is a cache of node resources. 32 | type NodeStore interface { 33 | // Get an node by name. Returns an error if the node does not exist. 34 | Get(name string) (*core.Node, error) 35 | } 36 | 37 | // An NodeWatch is a cache of node resources that notifies registered 38 | // handlers when its contents change. 39 | type NodeWatch struct { 40 | cache.SharedInformer 41 | } 42 | 43 | // NewNodeWatch creates a watch on node resources. Nodes are cached and the 44 | // provided ResourceEventHandlers are called when the cache changes. 45 | func NewNodeWatch(c kubernetes.Interface, rs ...cache.ResourceEventHandler) *NodeWatch { 46 | lw := &cache.ListWatch{ 47 | ListFunc: func(o meta.ListOptions) (runtime.Object, error) { return c.CoreV1().Nodes().List(o) }, 48 | WatchFunc: func(o meta.ListOptions) (watch.Interface, error) { return c.CoreV1().Nodes().Watch(o) }, 49 | } 50 | i := cache.NewSharedInformer(lw, &core.Node{}, 30*time.Minute) 51 | for _, r := range rs { 52 | i.AddEventHandler(r) 53 | } 54 | return &NodeWatch{i} 55 | } 56 | 57 | // Get an node by name. Returns an error if the node does not exist. 58 | func (w *NodeWatch) Get(name string) (*core.Node, error) { 59 | o, exists, err := w.GetStore().GetByKey(name) 60 | if err != nil { 61 | return nil, errors.Wrapf(err, "cannot get node %s", name) 62 | } 63 | if !exists { 64 | return nil, errors.Errorf("node %s does not exist", name) 65 | } 66 | return o.(*core.Node), nil 67 | } 68 | -------------------------------------------------------------------------------- /internal/kubernetes/watch_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Planet Labs Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 13 | implied. See the License for the specific language governing permissions 14 | and limitations under the License. 15 | */ 16 | 17 | package kubernetes 18 | 19 | import ( 20 | "testing" 21 | 22 | "github.com/go-test/deep" 23 | "github.com/pkg/errors" 24 | core "k8s.io/api/core/v1" 25 | "k8s.io/client-go/tools/cache" 26 | ) 27 | 28 | const ( 29 | name = "name" 30 | ) 31 | 32 | type getByKeyFunc func(key string) (interface{}, bool, error) 33 | 34 | type predictableInformer struct { 35 | cache.SharedInformer 36 | fn getByKeyFunc 37 | } 38 | 39 | func (i *predictableInformer) GetStore() cache.Store { 40 | return &cache.FakeCustomStore{GetByKeyFunc: i.fn} 41 | } 42 | 43 | func TestNodeWatcher(t *testing.T) { 44 | cases := []struct { 45 | name string 46 | fn getByKeyFunc 47 | want *core.Node 48 | wantErr bool 49 | }{ 50 | { 51 | name: "NodeExists", 52 | fn: func(k string) (interface{}, bool, error) { 53 | return &core.Node{}, true, nil 54 | }, 55 | want: &core.Node{}, 56 | }, 57 | { 58 | name: "NodeDoesNotExist", 59 | fn: func(k string) (interface{}, bool, error) { 60 | return nil, false, nil 61 | }, 62 | wantErr: true, 63 | }, 64 | { 65 | name: "ErrorGettingNode", 66 | fn: func(k string) (interface{}, bool, error) { 67 | return nil, false, errors.New("boom") 68 | }, 69 | wantErr: true, 70 | }, 71 | } 72 | 73 | for _, tc := range cases { 74 | t.Run(tc.name, func(t *testing.T) { 75 | i := &predictableInformer{fn: tc.fn} 76 | w := &NodeWatch{i} 77 | got, err := w.Get(name) 78 | if err != nil { 79 | if tc.wantErr { 80 | return 81 | } 82 | t.Errorf("w.Get(%v): %v", name, err) 83 | } 84 | 85 | if diff := deep.Equal(tc.want, got); diff != nil { 86 | t.Errorf("w.Get(%v): want != got %v", name, diff) 87 | } 88 | }) 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /manifest.yml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | labels: {component: draino} 6 | name: draino 7 | namespace: kube-system 8 | --- 9 | apiVersion: rbac.authorization.k8s.io/v1 10 | kind: ClusterRole 11 | metadata: 12 | labels: {component: draino} 13 | name: draino 14 | rules: 15 | - apiGroups: [''] 16 | resources: [events] 17 | verbs: [create, patch, update] 18 | - apiGroups: [''] 19 | resources: [nodes] 20 | verbs: [get, watch, list, update] 21 | - apiGroups: [''] 22 | resources: [nodes/status] 23 | verbs: [patch] 24 | - apiGroups: [''] 25 | resources: [pods] 26 | verbs: [get, watch, list] 27 | - apiGroups: [''] 28 | resources: [pods/eviction] 29 | verbs: [create] 30 | - apiGroups: [extensions] 31 | resources: [daemonsets] 32 | verbs: [get, watch, list] 33 | --- 34 | apiVersion: rbac.authorization.k8s.io/v1 35 | kind: ClusterRoleBinding 36 | metadata: 37 | labels: {component: draino} 38 | name: draino 39 | roleRef: {apiGroup: rbac.authorization.k8s.io, kind: ClusterRole, name: draino} 40 | subjects: 41 | - {kind: ServiceAccount, name: draino, namespace: kube-system} 42 | --- 43 | apiVersion: apps/v1 44 | kind: Deployment 45 | metadata: 46 | labels: {component: draino} 47 | name: draino 48 | namespace: kube-system 49 | spec: 50 | # Draino does not currently support locking/master election, so you should 51 | # only run one draino at a time. Draino won't start draining nodes immediately 52 | # so it's usually safe for multiple drainos to exist for a brief period of 53 | # time. 54 | replicas: 1 55 | selector: 56 | matchLabels: {component: draino} 57 | template: 58 | metadata: 59 | labels: {component: draino} 60 | name: draino 61 | namespace: kube-system 62 | spec: 63 | containers: 64 | # You'll want to change these labels and conditions to suit your deployment. 65 | - command: [/draino, --dry-run, --node-label=draino-enabled=true, BadCondition, ReallyBadCondition] 66 | image: planetlabs/draino:5e07e93 67 | livenessProbe: 68 | httpGet: {path: /healthz, port: 10002} 69 | initialDelaySeconds: 30 70 | name: draino 71 | serviceAccountName: draino 72 | -------------------------------------------------------------------------------- /scripts/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | VERSION=$(git rev-parse --short HEAD) 6 | docker build --tag "planetlabs/draino:${VERSION}" . 7 | -------------------------------------------------------------------------------- /scripts/push.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | VERSION=$(git rev-parse --short HEAD) 6 | docker push "planetlabs/draino:${VERSION}" 7 | -------------------------------------------------------------------------------- /scripts/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./draino --kubeconfig ~/.kube/config --node-label-expr="metadata['labels']['node-role'] in ['default', 'default', 'default-compute', 'default-memory']" --evict-unreplicated-pods --evict-emptydir-pods --evict-daemonset-pods AMIProblem KernelDeadlock ReadonlyFilesystem OutOfDisk -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | echo "" >coverage.txt 5 | 6 | for d in $(go list ./... | grep -v "vendor/"); do 7 | go test -race -coverprofile=c $d 8 | if [ -f c ]; then 9 | cat c >>coverage.txt 10 | rm c 11 | fi 12 | done 13 | --------------------------------------------------------------------------------