├── .dockerignore
├── .gitignore
├── CONTRIBUTING.md
├── Dockerfile
├── Gopkg.lock
├── Gopkg.toml
├── LICENSE
├── Makefile
├── README.md
├── configure
├── deploy
    ├── clusterrole.yaml
    ├── clusterrolebinding.yaml
    ├── deployment.yaml
    └── serviceaccount.yaml
├── metrics
    └── metrics.go
├── nodes
    ├── nodes.go
    └── nodes_test.go
├── rescheduler.go
├── rescheduler_test.go
├── scaler
    └── scaler.go
└── version.go


/.dockerignore:
--------------------------------------------------------------------------------
1 | vendor
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binaries for programs and plugins
 2 | *.exe
 3 | *.dll
 4 | *.so
 5 | *.dylib
 6 | 
 7 | # Test binary, build with `go test -c`
 8 | *.test
 9 | 
10 | # Output of the go coverage tool, specifically when used with LiteIDE
11 | *.out
12 | 
13 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
14 | .glide/
15 | 
16 | # Project-local wercker build cache
17 | .wercker/
18 | 
19 | # Project-local vendor folder
20 | vendor/
21 | 
22 | # Kubernetes config
23 | kubeconfig
24 | credentials/
25 | 
26 | # Local environmental config
27 | .env
28 | k8s-spot-rescheduler
29 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | To develop on this project, please fork the repo and clone into your `$GOPATH`.
 3 | 
 4 | Dependencies are **not** checked in so please download those seperately.
 5 | Download the dependencies using [`glide`](https://github.com/Masterminds/glide).
 6 | 
 7 | ```bash
 8 | cd $GOPATH/src/github.com # Create this directory if it doesn't exist
 9 | git clone git@github.com:<YOUR_FORK>/k8s-spot-rescheduler pusher/k8s-spot-rescheduler
10 | cd $GOPATH/src/github.com/pusher/k8s-spot-recheduler
11 | ./configure # Configure local tooling - install anything reported as missing
12 | make vendor # Clone required project dependencies
13 | ```
14 | 
15 | The main package is within `rescheduler.go` and an overview of it's operating logic is described in the [Readme](README.md/#operating-logic).
16 | 
17 | If you want to run the rescheduler locally you must have a valid `kubeconfig` file somewhere on your machine and then run the program with the flag `--running-in-cluster=false`.
18 | 
19 | ## Pull Requests and Issues
20 | We track bugs and issues using Github .
21 | 
22 | If you find a bug, please open an Issue.
23 | 
24 | If you want to fix a bug, please fork, fix the bug and open a PR back to this repo.
25 | Please mention the open bug issue number within your PR if applicable.
26 | 
27 | ### Tests
28 | Unit tests are covering the decision making parts of this code and can be run using the built in Go test suite.
29 | 
30 | To run the tests: `make test`
31 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG VERSION=undefined
 2 | 
 3 | FROM golang:1.12 AS builder
 4 | ARG VERSION
 5 | 
 6 | RUN curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh
 7 | 
 8 | WORKDIR /go/src/github.com/pusher/k8s-spot-rescheduler
 9 | 
10 | COPY Gopkg.lock Gopkg.lock
11 | COPY Gopkg.toml Gopkg.toml
12 | 
13 | RUN dep ensure --vendor-only
14 | 
15 | COPY *.go ./
16 | COPY deploy deploy/
17 | COPY metrics metrics/
18 | COPY nodes nodes/
19 | COPY scaler scaler/
20 | 
21 | RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags="-X main.VERSION=${VERSION}" -a -o k8s-spot-rescheduler github.com/pusher/k8s-spot-rescheduler
22 | 
23 | FROM alpine:3.9
24 | RUN apk --no-cache add ca-certificates
25 | WORKDIR /bin
26 | COPY --from=builder /go/src/github.com/pusher/k8s-spot-rescheduler/k8s-spot-rescheduler .
27 | 
28 | ENTRYPOINT ["/bin/k8s-spot-rescheduler"]
29 | 


--------------------------------------------------------------------------------
/Gopkg.lock:
--------------------------------------------------------------------------------
  1 | # This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
  2 | 
  3 | 
  4 | [[projects]]
  5 |   branch = "master"
  6 |   name = "github.com/Azure/go-ansiterm"
  7 |   packages = [
  8 |     ".",
  9 |     "winterm"
 10 |   ]
 11 |   revision = "d6e3b3328b783f23731bc4d058875b0371ff8109"
 12 | 
 13 | [[projects]]
 14 |   branch = "master"
 15 |   name = "github.com/MakeNowJust/heredoc"
 16 |   packages = ["."]
 17 |   revision = "e9091a26100e9cfb2b6a8f470085bfa541931a91"
 18 | 
 19 | [[projects]]
 20 |   name = "github.com/PuerkitoBio/purell"
 21 |   packages = ["."]
 22 |   revision = "0bcb03f4b4d0a9428594752bd2a3b9aa0a9d4bd4"
 23 |   version = "v1.1.0"
 24 | 
 25 | [[projects]]
 26 |   branch = "master"
 27 |   name = "github.com/PuerkitoBio/urlesc"
 28 |   packages = ["."]
 29 |   revision = "de5bf2ad457846296e2031421a34e2568e304e35"
 30 | 
 31 | [[projects]]
 32 |   name = "github.com/Sirupsen/logrus"
 33 |   packages = ["."]
 34 |   revision = "c155da19408a8799da419ed3eeb0cb5db0ad5dbc"
 35 |   version = "v1.0.5"
 36 | 
 37 | [[projects]]
 38 |   branch = "master"
 39 |   name = "github.com/beorn7/perks"
 40 |   packages = ["quantile"]
 41 |   revision = "3a771d992973f24aa725d07868b467d1ddfceafb"
 42 | 
 43 | [[projects]]
 44 |   name = "github.com/davecgh/go-spew"
 45 |   packages = ["spew"]
 46 |   revision = "346938d642f2ec3594ed81d874461961cd0faa76"
 47 |   version = "v1.1.0"
 48 | 
 49 | [[projects]]
 50 |   name = "github.com/docker/distribution"
 51 |   packages = [
 52 |     "digestset",
 53 |     "reference"
 54 |   ]
 55 |   revision = "f0cc927784781fa395c06317c58dea2841ece3a9"
 56 | 
 57 | [[projects]]
 58 |   name = "github.com/docker/docker"
 59 |   packages = [
 60 |     "api/types",
 61 |     "api/types/blkiodev",
 62 |     "api/types/container",
 63 |     "api/types/filters",
 64 |     "api/types/mount",
 65 |     "api/types/network",
 66 |     "api/types/registry",
 67 |     "api/types/strslice",
 68 |     "api/types/swarm",
 69 |     "api/types/versions",
 70 |     "pkg/term",
 71 |     "pkg/term/windows"
 72 |   ]
 73 |   revision = "092cba3727bb9b4a2f0e922cd6c0f93ea270e363"
 74 |   version = "v1.13.1"
 75 | 
 76 | [[projects]]
 77 |   name = "github.com/docker/go-connections"
 78 |   packages = ["nat"]
 79 |   revision = "3ede32e2033de7505e6500d6c868c2b9ed9f169d"
 80 |   version = "v0.3.0"
 81 | 
 82 | [[projects]]
 83 |   name = "github.com/docker/go-units"
 84 |   packages = ["."]
 85 |   revision = "47565b4f722fb6ceae66b95f853feed578a4a51c"
 86 |   version = "v0.3.3"
 87 | 
 88 | [[projects]]
 89 |   branch = "master"
 90 |   name = "github.com/docker/spdystream"
 91 |   packages = [
 92 |     ".",
 93 |     "spdy"
 94 |   ]
 95 |   revision = "bc6354cbbc295e925e4c611ffe90c1f287ee54db"
 96 | 
 97 | [[projects]]
 98 |   name = "github.com/evanphx/json-patch"
 99 |   packages = ["."]
100 |   revision = "afac545df32f2287a079e2dfb7ba2745a643747e"
101 |   version = "v3.0.0"
102 | 
103 | [[projects]]
104 |   branch = "master"
105 |   name = "github.com/exponent-io/jsonpath"
106 |   packages = ["."]
107 |   revision = "d6023ce2651d8eafb5c75bb0c7167536102ec9f5"
108 | 
109 | [[projects]]
110 |   branch = "master"
111 |   name = "github.com/fatih/camelcase"
112 |   packages = ["."]
113 |   revision = "44e46d280b43ec1531bb25252440e34f1b800b65"
114 | 
115 | [[projects]]
116 |   name = "github.com/ghodss/yaml"
117 |   packages = ["."]
118 |   revision = "0ca9ea5df5451ffdf184b4428c902747c2c11cd7"
119 |   version = "v1.0.0"
120 | 
121 | [[projects]]
122 |   branch = "master"
123 |   name = "github.com/go-openapi/jsonpointer"
124 |   packages = ["."]
125 |   revision = "3a0015ad55fa9873f41605d3e8f28cd279c32ab2"
126 | 
127 | [[projects]]
128 |   branch = "master"
129 |   name = "github.com/go-openapi/jsonreference"
130 |   packages = ["."]
131 |   revision = "3fb327e6747da3043567ee86abd02bb6376b6be2"
132 | 
133 | [[projects]]
134 |   branch = "master"
135 |   name = "github.com/go-openapi/spec"
136 |   packages = ["."]
137 |   revision = "bcff419492eeeb01f76e77d2ebc714dc97b607f5"
138 | 
139 | [[projects]]
140 |   branch = "master"
141 |   name = "github.com/go-openapi/swag"
142 |   packages = ["."]
143 |   revision = "811b1089cde9dad18d4d0c2d09fbdbf28dbd27a5"
144 | 
145 | [[projects]]
146 |   name = "github.com/gogo/protobuf"
147 |   packages = [
148 |     "proto",
149 |     "sortkeys"
150 |   ]
151 |   revision = "1adfc126b41513cc696b209667c8656ea7aac67c"
152 |   version = "v1.0.0"
153 | 
154 | [[projects]]
155 |   branch = "master"
156 |   name = "github.com/golang/glog"
157 |   packages = ["."]
158 |   revision = "23def4e6c14b4da8ac2ed8007337bc5eb5007998"
159 | 
160 | [[projects]]
161 |   branch = "master"
162 |   name = "github.com/golang/groupcache"
163 |   packages = ["lru"]
164 |   revision = "24b0969c4cb722950103eed87108c8d291a8df00"
165 | 
166 | [[projects]]
167 |   name = "github.com/golang/protobuf"
168 |   packages = [
169 |     "proto",
170 |     "ptypes",
171 |     "ptypes/any",
172 |     "ptypes/duration",
173 |     "ptypes/timestamp"
174 |   ]
175 |   revision = "b4deda0973fb4c70b50d226b1af49f3da59f5265"
176 |   version = "v1.1.0"
177 | 
178 | [[projects]]
179 |   branch = "master"
180 |   name = "github.com/google/btree"
181 |   packages = ["."]
182 |   revision = "e89373fe6b4a7413d7acd6da1725b83ef713e6e4"
183 | 
184 | [[projects]]
185 |   branch = "master"
186 |   name = "github.com/google/gofuzz"
187 |   packages = ["."]
188 |   revision = "24818f796faf91cd76ec7bddd72458fbced7a6c1"
189 | 
190 | [[projects]]
191 |   name = "github.com/googleapis/gnostic"
192 |   packages = [
193 |     "OpenAPIv2",
194 |     "compiler",
195 |     "extensions"
196 |   ]
197 |   revision = "7c663266750e7d82587642f65e60bc4083f1f84e"
198 |   version = "v0.2.0"
199 | 
200 | [[projects]]
201 |   branch = "master"
202 |   name = "github.com/gregjones/httpcache"
203 |   packages = [
204 |     ".",
205 |     "diskcache"
206 |   ]
207 |   revision = "9cad4c3443a7200dd6400aef47183728de563a38"
208 | 
209 | [[projects]]
210 |   branch = "master"
211 |   name = "github.com/hashicorp/golang-lru"
212 |   packages = [
213 |     ".",
214 |     "simplelru"
215 |   ]
216 |   revision = "0fb14efe8c47ae851c0034ed7a448854d3d34cf3"
217 | 
218 | [[projects]]
219 |   branch = "master"
220 |   name = "github.com/howeyc/gopass"
221 |   packages = ["."]
222 |   revision = "bf9dde6d0d2c004a008c27aaee91170c786f6db8"
223 | 
224 | [[projects]]
225 |   name = "github.com/imdario/mergo"
226 |   packages = ["."]
227 |   revision = "9316a62528ac99aaecb4e47eadd6dc8aa6533d58"
228 |   version = "v0.3.5"
229 | 
230 | [[projects]]
231 |   name = "github.com/inconshreveable/mousetrap"
232 |   packages = ["."]
233 |   revision = "76626ae9c91c4f2a10f34cad8ce83ea42c93bb75"
234 |   version = "v1.0"
235 | 
236 | [[projects]]
237 |   name = "github.com/json-iterator/go"
238 |   packages = ["."]
239 |   revision = "f2b4162afba35581b6d4a50d3b8f34e33c144682"
240 | 
241 | [[projects]]
242 |   branch = "master"
243 |   name = "github.com/mailru/easyjson"
244 |   packages = [
245 |     "buffer",
246 |     "jlexer",
247 |     "jwriter"
248 |   ]
249 |   revision = "3fdea8d05856a0c8df22ed4bc71b3219245e4485"
250 | 
251 | [[projects]]
252 |   name = "github.com/matttproud/golang_protobuf_extensions"
253 |   packages = ["pbutil"]
254 |   revision = "c12348ce28de40eed0136aa2b644d0ee0650e56c"
255 |   version = "v1.0.1"
256 | 
257 | [[projects]]
258 |   branch = "master"
259 |   name = "github.com/mitchellh/go-wordwrap"
260 |   packages = ["."]
261 |   revision = "ad45545899c7b13c020ea92b2072220eefad42b8"
262 | 
263 | [[projects]]
264 |   name = "github.com/modern-go/concurrent"
265 |   packages = ["."]
266 |   revision = "bacd9c7ef1dd9b15be4a9909b8ac7a4e313eec94"
267 |   version = "1.0.3"
268 | 
269 | [[projects]]
270 |   name = "github.com/modern-go/reflect2"
271 |   packages = ["."]
272 |   revision = "1df9eeb2bb81f327b96228865c5687bc2194af3f"
273 |   version = "1.0.0"
274 | 
275 | [[projects]]
276 |   name = "github.com/opencontainers/go-digest"
277 |   packages = ["."]
278 |   revision = "279bed98673dd5bef374d3b6e4b09e2af76183bf"
279 |   version = "v1.0.0-rc1"
280 | 
281 | [[projects]]
282 |   name = "github.com/pborman/uuid"
283 |   packages = ["."]
284 |   revision = "e790cca94e6cc75c7064b1332e63811d4aae1a53"
285 |   version = "v1.1"
286 | 
287 | [[projects]]
288 |   branch = "master"
289 |   name = "github.com/petar/GoLLRB"
290 |   packages = ["llrb"]
291 |   revision = "53be0d36a84c2a886ca057d34b6aa4468df9ccb4"
292 | 
293 | [[projects]]
294 |   name = "github.com/peterbourgon/diskv"
295 |   packages = ["."]
296 |   revision = "5f041e8faa004a95c88a202771f4cc3e991971e6"
297 |   version = "v2.0.1"
298 | 
299 | [[projects]]
300 |   name = "github.com/pmezard/go-difflib"
301 |   packages = ["difflib"]
302 |   revision = "792786c7400a136282c1664665ae0a8db921c6c2"
303 |   version = "v1.0.0"
304 | 
305 | [[projects]]
306 |   name = "github.com/prometheus/client_golang"
307 |   packages = ["prometheus"]
308 |   revision = "c5b7fccd204277076155f10851dad72b76a49317"
309 |   version = "v0.8.0"
310 | 
311 | [[projects]]
312 |   branch = "master"
313 |   name = "github.com/prometheus/client_model"
314 |   packages = ["go"]
315 |   revision = "99fa1f4be8e564e8a6b613da7fa6f46c9edafc6c"
316 | 
317 | [[projects]]
318 |   branch = "master"
319 |   name = "github.com/prometheus/common"
320 |   packages = [
321 |     "expfmt",
322 |     "internal/bitbucket.org/ww/goautoneg",
323 |     "model"
324 |   ]
325 |   revision = "7600349dcfe1abd18d72d3a1770870d9800a7801"
326 | 
327 | [[projects]]
328 |   branch = "master"
329 |   name = "github.com/prometheus/procfs"
330 |   packages = [
331 |     ".",
332 |     "internal/util",
333 |     "nfs",
334 |     "xfs"
335 |   ]
336 |   revision = "7d6f385de8bea29190f15ba9931442a0eaef9af7"
337 | 
338 | [[projects]]
339 |   name = "github.com/russross/blackfriday"
340 |   packages = ["."]
341 |   revision = "300106c228d52c8941d4b3de6054a6062a86dda3"
342 | 
343 | [[projects]]
344 |   branch = "master"
345 |   name = "github.com/shurcooL/sanitized_anchor_name"
346 |   packages = ["."]
347 |   revision = "86672fcb3f950f35f2e675df2240550f2a50762f"
348 | 
349 | [[projects]]
350 |   name = "github.com/spf13/cobra"
351 |   packages = ["."]
352 |   revision = "ef82de70bb3f60c65fb8eebacbb2d122ef517385"
353 |   version = "v0.0.3"
354 | 
355 | [[projects]]
356 |   name = "github.com/spf13/pflag"
357 |   packages = ["."]
358 |   revision = "583c0c0531f06d5278b7d917446061adc344b5cd"
359 |   version = "v1.0.1"
360 | 
361 | [[projects]]
362 |   name = "github.com/stretchr/testify"
363 |   packages = ["assert"]
364 |   revision = "f35b8ab0b5a2cef36673838d662e249dd9c94686"
365 |   version = "v1.2.2"
366 | 
367 | [[projects]]
368 |   branch = "master"
369 |   name = "golang.org/x/crypto"
370 |   packages = [
371 |     "ed25519",
372 |     "ed25519/internal/edwards25519",
373 |     "ssh/terminal"
374 |   ]
375 |   revision = "a49355c7e3f8fe157a85be2f77e6e269a0f89602"
376 | 
377 | [[projects]]
378 |   branch = "master"
379 |   name = "golang.org/x/net"
380 |   packages = [
381 |     "context",
382 |     "http/httpguts",
383 |     "http2",
384 |     "http2/hpack",
385 |     "idna"
386 |   ]
387 |   revision = "4cb1c02c05b0e749b0365f61ae859a8e0cfceed9"
388 | 
389 | [[projects]]
390 |   branch = "master"
391 |   name = "golang.org/x/sys"
392 |   packages = [
393 |     "unix",
394 |     "windows"
395 |   ]
396 |   revision = "7138fd3d9dc8335c567ca206f4333fb75eb05d56"
397 | 
398 | [[projects]]
399 |   name = "golang.org/x/text"
400 |   packages = [
401 |     "collate",
402 |     "collate/build",
403 |     "encoding",
404 |     "encoding/internal",
405 |     "encoding/internal/identifier",
406 |     "encoding/unicode",
407 |     "internal/colltab",
408 |     "internal/gen",
409 |     "internal/tag",
410 |     "internal/triegen",
411 |     "internal/ucd",
412 |     "internal/utf8internal",
413 |     "language",
414 |     "runes",
415 |     "secure/bidirule",
416 |     "transform",
417 |     "unicode/bidi",
418 |     "unicode/cldr",
419 |     "unicode/norm",
420 |     "unicode/rangetable",
421 |     "width"
422 |   ]
423 |   revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0"
424 |   version = "v0.3.0"
425 | 
426 | [[projects]]
427 |   branch = "master"
428 |   name = "golang.org/x/time"
429 |   packages = ["rate"]
430 |   revision = "fbb02b2291d28baffd63558aa44b4b56f178d650"
431 | 
432 | [[projects]]
433 |   name = "gopkg.in/inf.v0"
434 |   packages = ["."]
435 |   revision = "d2d2541c53f18d2a059457998ce2876cc8e67cbf"
436 |   version = "v0.9.1"
437 | 
438 | [[projects]]
439 |   name = "gopkg.in/square/go-jose.v2"
440 |   packages = [
441 |     ".",
442 |     "cipher",
443 |     "json",
444 |     "jwt"
445 |   ]
446 |   revision = "76dd09796242edb5b897103a75df2645c028c960"
447 |   version = "v2.1.6"
448 | 
449 | [[projects]]
450 |   name = "gopkg.in/yaml.v2"
451 |   packages = ["."]
452 |   revision = "5420a8b6744d3b0345ab293f6fcba19c978f1183"
453 |   version = "v2.2.1"
454 | 
455 | [[projects]]
456 |   name = "k8s.io/api"
457 |   packages = [
458 |     "admission/v1beta1",
459 |     "admissionregistration/v1alpha1",
460 |     "admissionregistration/v1beta1",
461 |     "apps/v1",
462 |     "apps/v1beta1",
463 |     "apps/v1beta2",
464 |     "authentication/v1",
465 |     "authentication/v1beta1",
466 |     "authorization/v1",
467 |     "authorization/v1beta1",
468 |     "autoscaling/v1",
469 |     "autoscaling/v2beta1",
470 |     "batch/v1",
471 |     "batch/v1beta1",
472 |     "batch/v2alpha1",
473 |     "certificates/v1beta1",
474 |     "core/v1",
475 |     "events/v1beta1",
476 |     "extensions/v1beta1",
477 |     "imagepolicy/v1alpha1",
478 |     "networking/v1",
479 |     "policy/v1beta1",
480 |     "rbac/v1",
481 |     "rbac/v1alpha1",
482 |     "rbac/v1beta1",
483 |     "scheduling/v1alpha1",
484 |     "settings/v1alpha1",
485 |     "storage/v1",
486 |     "storage/v1alpha1",
487 |     "storage/v1beta1"
488 |   ]
489 |   revision = "6c0bbc3e58fab96285be9b6ed41b12b58c737a96"
490 |   version = "kubernetes-1.10.5"
491 | 
492 | [[projects]]
493 |   branch = "master"
494 |   name = "k8s.io/apiextensions-apiserver"
495 |   packages = ["pkg/features"]
496 |   revision = "4340dd9c24063611b7a269f847e16f9ad758bd52"
497 | 
498 | [[projects]]
499 |   branch = "release-1.10"
500 |   name = "k8s.io/apimachinery"
501 |   packages = [
502 |     "pkg/api/equality",
503 |     "pkg/api/errors",
504 |     "pkg/api/meta",
505 |     "pkg/api/resource",
506 |     "pkg/api/validation",
507 |     "pkg/apimachinery",
508 |     "pkg/apimachinery/announced",
509 |     "pkg/apimachinery/registered",
510 |     "pkg/apis/meta/internalversion",
511 |     "pkg/apis/meta/v1",
512 |     "pkg/apis/meta/v1/unstructured",
513 |     "pkg/apis/meta/v1/validation",
514 |     "pkg/apis/meta/v1beta1",
515 |     "pkg/conversion",
516 |     "pkg/conversion/queryparams",
517 |     "pkg/fields",
518 |     "pkg/labels",
519 |     "pkg/runtime",
520 |     "pkg/runtime/schema",
521 |     "pkg/runtime/serializer",
522 |     "pkg/runtime/serializer/json",
523 |     "pkg/runtime/serializer/protobuf",
524 |     "pkg/runtime/serializer/recognizer",
525 |     "pkg/runtime/serializer/streaming",
526 |     "pkg/runtime/serializer/versioning",
527 |     "pkg/selection",
528 |     "pkg/types",
529 |     "pkg/util/cache",
530 |     "pkg/util/clock",
531 |     "pkg/util/diff",
532 |     "pkg/util/duration",
533 |     "pkg/util/errors",
534 |     "pkg/util/framer",
535 |     "pkg/util/httpstream",
536 |     "pkg/util/httpstream/spdy",
537 |     "pkg/util/intstr",
538 |     "pkg/util/json",
539 |     "pkg/util/mergepatch",
540 |     "pkg/util/net",
541 |     "pkg/util/rand",
542 |     "pkg/util/remotecommand",
543 |     "pkg/util/runtime",
544 |     "pkg/util/sets",
545 |     "pkg/util/strategicpatch",
546 |     "pkg/util/uuid",
547 |     "pkg/util/validation",
548 |     "pkg/util/validation/field",
549 |     "pkg/util/wait",
550 |     "pkg/util/yaml",
551 |     "pkg/version",
552 |     "pkg/watch",
553 |     "third_party/forked/golang/json",
554 |     "third_party/forked/golang/netutil",
555 |     "third_party/forked/golang/reflect"
556 |   ]
557 |   revision = "e386b2658ed20923da8cc9250e552f082899a1ee"
558 | 
559 | [[projects]]
560 |   name = "k8s.io/apiserver"
561 |   packages = [
562 |     "pkg/apis/audit",
563 |     "pkg/authentication/authenticator",
564 |     "pkg/authentication/serviceaccount",
565 |     "pkg/authentication/user",
566 |     "pkg/endpoints/request",
567 |     "pkg/features",
568 |     "pkg/util/feature",
569 |     "pkg/util/flag",
570 |     "pkg/util/trace"
571 |   ]
572 |   revision = "1bfbd2d59262936b587dc0611082edaee5218df2"
573 |   version = "kubernetes-1.10.5"
574 | 
575 | [[projects]]
576 |   name = "k8s.io/autoscaler"
577 |   packages = [
578 |     "cluster-autoscaler/simulator",
579 |     "cluster-autoscaler/utils/deletetaint",
580 |     "cluster-autoscaler/utils/drain",
581 |     "cluster-autoscaler/utils/errors",
582 |     "cluster-autoscaler/utils/glogx",
583 |     "cluster-autoscaler/utils/kubernetes",
584 |     "cluster-autoscaler/utils/scheduler"
585 |   ]
586 |   revision = "a909d0cc3695148f097b56e62f052063d354ee99"
587 |   version = "cluster-autoscaler-1.2.2"
588 | 
589 | [[projects]]
590 |   name = "k8s.io/client-go"
591 |   packages = [
592 |     "discovery",
593 |     "discovery/fake",
594 |     "dynamic",
595 |     "informers",
596 |     "informers/admissionregistration",
597 |     "informers/admissionregistration/v1alpha1",
598 |     "informers/admissionregistration/v1beta1",
599 |     "informers/apps",
600 |     "informers/apps/v1",
601 |     "informers/apps/v1beta1",
602 |     "informers/apps/v1beta2",
603 |     "informers/autoscaling",
604 |     "informers/autoscaling/v1",
605 |     "informers/autoscaling/v2beta1",
606 |     "informers/batch",
607 |     "informers/batch/v1",
608 |     "informers/batch/v1beta1",
609 |     "informers/batch/v2alpha1",
610 |     "informers/certificates",
611 |     "informers/certificates/v1beta1",
612 |     "informers/core",
613 |     "informers/core/v1",
614 |     "informers/events",
615 |     "informers/events/v1beta1",
616 |     "informers/extensions",
617 |     "informers/extensions/v1beta1",
618 |     "informers/internalinterfaces",
619 |     "informers/networking",
620 |     "informers/networking/v1",
621 |     "informers/policy",
622 |     "informers/policy/v1beta1",
623 |     "informers/rbac",
624 |     "informers/rbac/v1",
625 |     "informers/rbac/v1alpha1",
626 |     "informers/rbac/v1beta1",
627 |     "informers/scheduling",
628 |     "informers/scheduling/v1alpha1",
629 |     "informers/settings",
630 |     "informers/settings/v1alpha1",
631 |     "informers/storage",
632 |     "informers/storage/v1",
633 |     "informers/storage/v1alpha1",
634 |     "informers/storage/v1beta1",
635 |     "kubernetes",
636 |     "kubernetes/fake",
637 |     "kubernetes/scheme",
638 |     "kubernetes/typed/admissionregistration/v1alpha1",
639 |     "kubernetes/typed/admissionregistration/v1alpha1/fake",
640 |     "kubernetes/typed/admissionregistration/v1beta1",
641 |     "kubernetes/typed/admissionregistration/v1beta1/fake",
642 |     "kubernetes/typed/apps/v1",
643 |     "kubernetes/typed/apps/v1/fake",
644 |     "kubernetes/typed/apps/v1beta1",
645 |     "kubernetes/typed/apps/v1beta1/fake",
646 |     "kubernetes/typed/apps/v1beta2",
647 |     "kubernetes/typed/apps/v1beta2/fake",
648 |     "kubernetes/typed/authentication/v1",
649 |     "kubernetes/typed/authentication/v1/fake",
650 |     "kubernetes/typed/authentication/v1beta1",
651 |     "kubernetes/typed/authentication/v1beta1/fake",
652 |     "kubernetes/typed/authorization/v1",
653 |     "kubernetes/typed/authorization/v1/fake",
654 |     "kubernetes/typed/authorization/v1beta1",
655 |     "kubernetes/typed/authorization/v1beta1/fake",
656 |     "kubernetes/typed/autoscaling/v1",
657 |     "kubernetes/typed/autoscaling/v1/fake",
658 |     "kubernetes/typed/autoscaling/v2beta1",
659 |     "kubernetes/typed/autoscaling/v2beta1/fake",
660 |     "kubernetes/typed/batch/v1",
661 |     "kubernetes/typed/batch/v1/fake",
662 |     "kubernetes/typed/batch/v1beta1",
663 |     "kubernetes/typed/batch/v1beta1/fake",
664 |     "kubernetes/typed/batch/v2alpha1",
665 |     "kubernetes/typed/batch/v2alpha1/fake",
666 |     "kubernetes/typed/certificates/v1beta1",
667 |     "kubernetes/typed/certificates/v1beta1/fake",
668 |     "kubernetes/typed/core/v1",
669 |     "kubernetes/typed/core/v1/fake",
670 |     "kubernetes/typed/events/v1beta1",
671 |     "kubernetes/typed/events/v1beta1/fake",
672 |     "kubernetes/typed/extensions/v1beta1",
673 |     "kubernetes/typed/extensions/v1beta1/fake",
674 |     "kubernetes/typed/networking/v1",
675 |     "kubernetes/typed/networking/v1/fake",
676 |     "kubernetes/typed/policy/v1beta1",
677 |     "kubernetes/typed/policy/v1beta1/fake",
678 |     "kubernetes/typed/rbac/v1",
679 |     "kubernetes/typed/rbac/v1/fake",
680 |     "kubernetes/typed/rbac/v1alpha1",
681 |     "kubernetes/typed/rbac/v1alpha1/fake",
682 |     "kubernetes/typed/rbac/v1beta1",
683 |     "kubernetes/typed/rbac/v1beta1/fake",
684 |     "kubernetes/typed/scheduling/v1alpha1",
685 |     "kubernetes/typed/scheduling/v1alpha1/fake",
686 |     "kubernetes/typed/settings/v1alpha1",
687 |     "kubernetes/typed/settings/v1alpha1/fake",
688 |     "kubernetes/typed/storage/v1",
689 |     "kubernetes/typed/storage/v1/fake",
690 |     "kubernetes/typed/storage/v1alpha1",
691 |     "kubernetes/typed/storage/v1alpha1/fake",
692 |     "kubernetes/typed/storage/v1beta1",
693 |     "kubernetes/typed/storage/v1beta1/fake",
694 |     "listers/admissionregistration/v1alpha1",
695 |     "listers/admissionregistration/v1beta1",
696 |     "listers/apps/v1",
697 |     "listers/apps/v1beta1",
698 |     "listers/apps/v1beta2",
699 |     "listers/autoscaling/v1",
700 |     "listers/autoscaling/v2beta1",
701 |     "listers/batch/v1",
702 |     "listers/batch/v1beta1",
703 |     "listers/batch/v2alpha1",
704 |     "listers/certificates/v1beta1",
705 |     "listers/core/v1",
706 |     "listers/events/v1beta1",
707 |     "listers/extensions/v1beta1",
708 |     "listers/networking/v1",
709 |     "listers/policy/v1beta1",
710 |     "listers/rbac/v1",
711 |     "listers/rbac/v1alpha1",
712 |     "listers/rbac/v1beta1",
713 |     "listers/scheduling/v1alpha1",
714 |     "listers/settings/v1alpha1",
715 |     "listers/storage/v1",
716 |     "listers/storage/v1alpha1",
717 |     "listers/storage/v1beta1",
718 |     "pkg/apis/clientauthentication",
719 |     "pkg/apis/clientauthentication/v1alpha1",
720 |     "pkg/version",
721 |     "plugin/pkg/client/auth/exec",
722 |     "rest",
723 |     "rest/watch",
724 |     "scale",
725 |     "scale/scheme",
726 |     "scale/scheme/appsint",
727 |     "scale/scheme/appsv1beta1",
728 |     "scale/scheme/appsv1beta2",
729 |     "scale/scheme/autoscalingv1",
730 |     "scale/scheme/extensionsint",
731 |     "scale/scheme/extensionsv1beta1",
732 |     "testing",
733 |     "third_party/forked/golang/template",
734 |     "tools/auth",
735 |     "tools/cache",
736 |     "tools/clientcmd",
737 |     "tools/clientcmd/api",
738 |     "tools/clientcmd/api/latest",
739 |     "tools/clientcmd/api/v1",
740 |     "tools/leaderelection",
741 |     "tools/leaderelection/resourcelock",
742 |     "tools/metrics",
743 |     "tools/pager",
744 |     "tools/record",
745 |     "tools/reference",
746 |     "tools/remotecommand",
747 |     "transport",
748 |     "transport/spdy",
749 |     "util/buffer",
750 |     "util/cert",
751 |     "util/exec",
752 |     "util/flowcontrol",
753 |     "util/homedir",
754 |     "util/integer",
755 |     "util/jsonpath",
756 |     "util/retry",
757 |     "util/workqueue"
758 |   ]
759 |   revision = "23781f4d6632d88e869066eaebb743857aa1ef9b"
760 |   version = "v7.0.0"
761 | 
762 | [[projects]]
763 |   branch = "master"
764 |   name = "k8s.io/kube-openapi"
765 |   packages = [
766 |     "pkg/util/proto",
767 |     "pkg/util/proto/validation"
768 |   ]
769 |   revision = "d83b052f768a50a309c692a9c271da3f3276ff88"
770 | 
771 | [[projects]]
772 |   name = "k8s.io/kubernetes"
773 |   packages = [
774 |     "pkg/api/events",
775 |     "pkg/api/legacyscheme",
776 |     "pkg/api/pod",
777 |     "pkg/api/ref",
778 |     "pkg/api/resource",
779 |     "pkg/api/service",
780 |     "pkg/api/v1/pod",
781 |     "pkg/apis/admissionregistration",
782 |     "pkg/apis/admissionregistration/install",
783 |     "pkg/apis/admissionregistration/v1alpha1",
784 |     "pkg/apis/admissionregistration/v1beta1",
785 |     "pkg/apis/apps",
786 |     "pkg/apis/apps/install",
787 |     "pkg/apis/apps/v1",
788 |     "pkg/apis/apps/v1beta1",
789 |     "pkg/apis/apps/v1beta2",
790 |     "pkg/apis/authentication",
791 |     "pkg/apis/authentication/install",
792 |     "pkg/apis/authentication/v1",
793 |     "pkg/apis/authentication/v1beta1",
794 |     "pkg/apis/authorization",
795 |     "pkg/apis/authorization/install",
796 |     "pkg/apis/authorization/v1",
797 |     "pkg/apis/authorization/v1beta1",
798 |     "pkg/apis/autoscaling",
799 |     "pkg/apis/autoscaling/install",
800 |     "pkg/apis/autoscaling/v1",
801 |     "pkg/apis/autoscaling/v2beta1",
802 |     "pkg/apis/batch",
803 |     "pkg/apis/batch/install",
804 |     "pkg/apis/batch/v1",
805 |     "pkg/apis/batch/v1beta1",
806 |     "pkg/apis/batch/v2alpha1",
807 |     "pkg/apis/certificates",
808 |     "pkg/apis/certificates/install",
809 |     "pkg/apis/certificates/v1beta1",
810 |     "pkg/apis/componentconfig",
811 |     "pkg/apis/componentconfig/install",
812 |     "pkg/apis/componentconfig/v1alpha1",
813 |     "pkg/apis/core",
814 |     "pkg/apis/core/helper",
815 |     "pkg/apis/core/helper/qos",
816 |     "pkg/apis/core/install",
817 |     "pkg/apis/core/pods",
818 |     "pkg/apis/core/v1",
819 |     "pkg/apis/core/v1/helper",
820 |     "pkg/apis/core/v1/helper/qos",
821 |     "pkg/apis/core/validation",
822 |     "pkg/apis/events",
823 |     "pkg/apis/events/install",
824 |     "pkg/apis/events/v1beta1",
825 |     "pkg/apis/extensions",
826 |     "pkg/apis/extensions/install",
827 |     "pkg/apis/extensions/v1beta1",
828 |     "pkg/apis/networking",
829 |     "pkg/apis/networking/install",
830 |     "pkg/apis/networking/v1",
831 |     "pkg/apis/policy",
832 |     "pkg/apis/policy/install",
833 |     "pkg/apis/policy/v1beta1",
834 |     "pkg/apis/rbac",
835 |     "pkg/apis/rbac/install",
836 |     "pkg/apis/rbac/v1",
837 |     "pkg/apis/rbac/v1alpha1",
838 |     "pkg/apis/rbac/v1beta1",
839 |     "pkg/apis/scheduling",
840 |     "pkg/apis/scheduling/install",
841 |     "pkg/apis/scheduling/v1alpha1",
842 |     "pkg/apis/settings",
843 |     "pkg/apis/settings/install",
844 |     "pkg/apis/settings/v1alpha1",
845 |     "pkg/apis/storage",
846 |     "pkg/apis/storage/install",
847 |     "pkg/apis/storage/util",
848 |     "pkg/apis/storage/v1",
849 |     "pkg/apis/storage/v1alpha1",
850 |     "pkg/apis/storage/v1beta1",
851 |     "pkg/capabilities",
852 |     "pkg/client/clientset_generated/internalclientset",
853 |     "pkg/client/clientset_generated/internalclientset/scheme",
854 |     "pkg/client/clientset_generated/internalclientset/typed/admissionregistration/internalversion",
855 |     "pkg/client/clientset_generated/internalclientset/typed/apps/internalversion",
856 |     "pkg/client/clientset_generated/internalclientset/typed/authentication/internalversion",
857 |     "pkg/client/clientset_generated/internalclientset/typed/authorization/internalversion",
858 |     "pkg/client/clientset_generated/internalclientset/typed/autoscaling/internalversion",
859 |     "pkg/client/clientset_generated/internalclientset/typed/batch/internalversion",
860 |     "pkg/client/clientset_generated/internalclientset/typed/certificates/internalversion",
861 |     "pkg/client/clientset_generated/internalclientset/typed/core/internalversion",
862 |     "pkg/client/clientset_generated/internalclientset/typed/events/internalversion",
863 |     "pkg/client/clientset_generated/internalclientset/typed/extensions/internalversion",
864 |     "pkg/client/clientset_generated/internalclientset/typed/networking/internalversion",
865 |     "pkg/client/clientset_generated/internalclientset/typed/policy/internalversion",
866 |     "pkg/client/clientset_generated/internalclientset/typed/rbac/internalversion",
867 |     "pkg/client/clientset_generated/internalclientset/typed/scheduling/internalversion",
868 |     "pkg/client/clientset_generated/internalclientset/typed/settings/internalversion",
869 |     "pkg/client/clientset_generated/internalclientset/typed/storage/internalversion",
870 |     "pkg/client/leaderelectionconfig",
871 |     "pkg/cloudprovider",
872 |     "pkg/controller",
873 |     "pkg/controller/daemon",
874 |     "pkg/controller/daemon/util",
875 |     "pkg/controller/deployment/util",
876 |     "pkg/controller/history",
877 |     "pkg/controller/statefulset",
878 |     "pkg/controller/volume/events",
879 |     "pkg/controller/volume/persistentvolume",
880 |     "pkg/controller/volume/persistentvolume/metrics",
881 |     "pkg/credentialprovider",
882 |     "pkg/features",
883 |     "pkg/fieldpath",
884 |     "pkg/kubectl",
885 |     "pkg/kubectl/apps",
886 |     "pkg/kubectl/categories",
887 |     "pkg/kubectl/cmd/templates",
888 |     "pkg/kubectl/cmd/util",
889 |     "pkg/kubectl/cmd/util/openapi",
890 |     "pkg/kubectl/cmd/util/openapi/validation",
891 |     "pkg/kubectl/plugins",
892 |     "pkg/kubectl/resource",
893 |     "pkg/kubectl/scheme",
894 |     "pkg/kubectl/util",
895 |     "pkg/kubectl/util/hash",
896 |     "pkg/kubectl/util/slice",
897 |     "pkg/kubectl/util/term",
898 |     "pkg/kubectl/util/transport",
899 |     "pkg/kubectl/validation",
900 |     "pkg/kubelet/apis",
901 |     "pkg/kubelet/types",
902 |     "pkg/master/ports",
903 |     "pkg/printers",
904 |     "pkg/printers/internalversion",
905 |     "pkg/registry/rbac/validation",
906 |     "pkg/scheduler",
907 |     "pkg/scheduler/algorithm",
908 |     "pkg/scheduler/algorithm/predicates",
909 |     "pkg/scheduler/algorithm/priorities",
910 |     "pkg/scheduler/algorithm/priorities/util",
911 |     "pkg/scheduler/algorithmprovider",
912 |     "pkg/scheduler/algorithmprovider/defaults",
913 |     "pkg/scheduler/api",
914 |     "pkg/scheduler/api/validation",
915 |     "pkg/scheduler/core",
916 |     "pkg/scheduler/factory",
917 |     "pkg/scheduler/metrics",
918 |     "pkg/scheduler/schedulercache",
919 |     "pkg/scheduler/util",
920 |     "pkg/scheduler/volumebinder",
921 |     "pkg/security/apparmor",
922 |     "pkg/serviceaccount",
923 |     "pkg/util/file",
924 |     "pkg/util/goroutinemap",
925 |     "pkg/util/goroutinemap/exponentialbackoff",
926 |     "pkg/util/hash",
927 |     "pkg/util/interrupt",
928 |     "pkg/util/io",
929 |     "pkg/util/labels",
930 |     "pkg/util/metrics",
931 |     "pkg/util/mount",
932 |     "pkg/util/net/sets",
933 |     "pkg/util/node",
934 |     "pkg/util/nsenter",
935 |     "pkg/util/parsers",
936 |     "pkg/util/pointer",
937 |     "pkg/util/slice",
938 |     "pkg/util/taints",
939 |     "pkg/version",
940 |     "pkg/volume",
941 |     "pkg/volume/util",
942 |     "pkg/volume/util/fs",
943 |     "pkg/volume/util/recyclerclient",
944 |     "pkg/volume/util/types"
945 |   ]
946 |   revision = "32ac1c9073b132b8ba18aa830f46b77dcceb0723"
947 |   version = "v1.10.5"
948 | 
949 | [[projects]]
950 |   branch = "master"
951 |   name = "k8s.io/utils"
952 |   packages = ["exec"]
953 |   revision = "733eca437aa39379e4bcc25e726439dfca40fcff"
954 | 
955 | [[projects]]
956 |   branch = "master"
957 |   name = "vbom.ml/util"
958 |   packages = ["sortorder"]
959 |   revision = "256737ac55c46798123f754ab7d2c784e2c71783"
960 | 
961 | [solve-meta]
962 |   analyzer-name = "dep"
963 |   analyzer-version = 1
964 |   inputs-digest = "b919b5572a20e63d1e4012b4f068cc3aaf040e12987f856d28224508f22417ad"
965 |   solver-name = "gps-cdcl"
966 |   solver-version = 1
967 | 


--------------------------------------------------------------------------------
/Gopkg.toml:
--------------------------------------------------------------------------------
 1 | # Gopkg.toml example
 2 | #
 3 | # Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md
 4 | # for detailed Gopkg.toml documentation.
 5 | #
 6 | # required = ["github.com/user/thing/cmd/thing"]
 7 | # ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
 8 | #
 9 | # [[constraint]]
10 | #   name = "github.com/user/project"
11 | #   version = "1.0.0"
12 | #
13 | # [[constraint]]
14 | #   name = "github.com/user/project2"
15 | #   branch = "dev"
16 | #   source = "github.com/myfork/project2"
17 | #
18 | # [[override]]
19 | #   name = "github.com/x/y"
20 | #   version = "2.4.0"
21 | 
22 | [[constraint]]
23 |   name = "k8s.io/kubernetes"
24 |   version = "1.10.0"
25 | 
26 | [[constraint]]
27 |   name = "k8s.io/client-go"
28 |   version = "7.0.0"
29 | 
30 | [[constraint]]
31 |   name = "k8s.io/api"
32 |   version = "kubernetes-1.10.5"
33 | 
34 | [[constraint]]
35 |   name = "k8s.io/apimachinery"
36 |   version = "kubernetes-1.10.5"
37 | 
38 | [[override]]
39 |   name = "k8s.io/apiserver"
40 |   version = "kubernetes-1.10.5"
41 | 
42 | [[constraint]]
43 |   name = "k8s.io/autoscaler"
44 |   version = "cluster-autoscaler-1.2.2"
45 | 
46 | [[override]]
47 |   name = "github.com/docker/distribution"
48 |   revision = "f0cc927784781fa395c06317c58dea2841ece3a9"
49 | 
50 | [[override]]
51 |   name = "github.com/json-iterator/go"
52 |   revision = "f2b4162afba35581b6d4a50d3b8f34e33c144682"
53 | 
54 | [[override]]
55 |   name = "github.com/russross/blackfriday"
56 |   revision = "300106c228d52c8941d4b3de6054a6062a86dda3"
57 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                 Apache License
  2 |                           Version 2.0, January 2004
  3 |                        http://www.apache.org/licenses/
  4 | 
  5 |   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |   1. Definitions.
  8 | 
  9 |      "License" shall mean the terms and conditions for use, reproduction,
 10 |      and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |      "Licensor" shall mean the copyright owner or entity authorized by
 13 |      the copyright owner that is granting the License.
 14 | 
 15 |      "Legal Entity" shall mean the union of the acting entity and all
 16 |      other entities that control, are controlled by, or are under common
 17 |      control with that entity. For the purposes of this definition,
 18 |      "control" means (i) the power, direct or indirect, to cause the
 19 |      direction or management of such entity, whether by contract or
 20 |      otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |      outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |      "You" (or "Your") shall mean an individual or Legal Entity
 24 |      exercising permissions granted by this License.
 25 | 
 26 |      "Source" form shall mean the preferred form for making modifications,
 27 |      including but not limited to software source code, documentation
 28 |      source, and configuration files.
 29 | 
 30 |      "Object" form shall mean any form resulting from mechanical
 31 |      transformation or translation of a Source form, including but
 32 |      not limited to compiled object code, generated documentation,
 33 |      and conversions to other media types.
 34 | 
 35 |      "Work" shall mean the work of authorship, whether in Source or
 36 |      Object form, made available under the License, as indicated by a
 37 |      copyright notice that is included in or attached to the work
 38 |      (an example is provided in the Appendix below).
 39 | 
 40 |      "Derivative Works" shall mean any work, whether in Source or Object
 41 |      form, that is based on (or derived from) the Work and for which the
 42 |      editorial revisions, annotations, elaborations, or other modifications
 43 |      represent, as a whole, an original work of authorship. For the purposes
 44 |      of this License, Derivative Works shall not include works that remain
 45 |      separable from, or merely link (or bind by name) to the interfaces of,
 46 |      the Work and Derivative Works thereof.
 47 | 
 48 |      "Contribution" shall mean any work of authorship, including
 49 |      the original version of the Work and any modifications or additions
 50 |      to that Work or Derivative Works thereof, that is intentionally
 51 |      submitted to Licensor for inclusion in the Work by the copyright owner
 52 |      or by an individual or Legal Entity authorized to submit on behalf of
 53 |      the copyright owner. For the purposes of this definition, "submitted"
 54 |      means any form of electronic, verbal, or written communication sent
 55 |      to the Licensor or its representatives, including but not limited to
 56 |      communication on electronic mailing lists, source code control systems,
 57 |      and issue tracking systems that are managed by, or on behalf of, the
 58 |      Licensor for the purpose of discussing and improving the Work, but
 59 |      excluding communication that is conspicuously marked or otherwise
 60 |      designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |      "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |      on behalf of whom a Contribution has been received by Licensor and
 64 |      subsequently incorporated within the Work.
 65 | 
 66 |   2. Grant of Copyright License. Subject to the terms and conditions of
 67 |      this License, each Contributor hereby grants to You a perpetual,
 68 |      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |      copyright license to reproduce, prepare Derivative Works of,
 70 |      publicly display, publicly perform, sublicense, and distribute the
 71 |      Work and such Derivative Works in Source or Object form.
 72 | 
 73 |   3. Grant of Patent License. Subject to the terms and conditions of
 74 |      this License, each Contributor hereby grants to You a perpetual,
 75 |      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |      (except as stated in this section) patent license to make, have made,
 77 |      use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |      where such license applies only to those patent claims licensable
 79 |      by such Contributor that are necessarily infringed by their
 80 |      Contribution(s) alone or by combination of their Contribution(s)
 81 |      with the Work to which such Contribution(s) was submitted. If You
 82 |      institute patent litigation against any entity (including a
 83 |      cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |      or a Contribution incorporated within the Work constitutes direct
 85 |      or contributory patent infringement, then any patent licenses
 86 |      granted to You under this License for that Work shall terminate
 87 |      as of the date such litigation is filed.
 88 | 
 89 |   4. Redistribution. You may reproduce and distribute copies of the
 90 |      Work or Derivative Works thereof in any medium, with or without
 91 |      modifications, and in Source or Object form, provided that You
 92 |      meet the following conditions:
 93 | 
 94 |      (a) You must give any other recipients of the Work or
 95 |          Derivative Works a copy of this License; and
 96 | 
 97 |      (b) You must cause any modified files to carry prominent notices
 98 |          stating that You changed the files; and
 99 | 
100 |      (c) You must retain, in the Source form of any Derivative Works
101 |          that You distribute, all copyright, patent, trademark, and
102 |          attribution notices from the Source form of the Work,
103 |          excluding those notices that do not pertain to any part of
104 |          the Derivative Works; and
105 | 
106 |      (d) If the Work includes a "NOTICE" text file as part of its
107 |          distribution, then any Derivative Works that You distribute must
108 |          include a readable copy of the attribution notices contained
109 |          within such NOTICE file, excluding those notices that do not
110 |          pertain to any part of the Derivative Works, in at least one
111 |          of the following places: within a NOTICE text file distributed
112 |          as part of the Derivative Works; within the Source form or
113 |          documentation, if provided along with the Derivative Works; or,
114 |          within a display generated by the Derivative Works, if and
115 |          wherever such third-party notices normally appear. The contents
116 |          of the NOTICE file are for informational purposes only and
117 |          do not modify the License. You may add Your own attribution
118 |          notices within Derivative Works that You distribute, alongside
119 |          or as an addendum to the NOTICE text from the Work, provided
120 |          that such additional attribution notices cannot be construed
121 |          as modifying the License.
122 | 
123 |      You may add Your own copyright statement to Your modifications and
124 |      may provide additional or different license terms and conditions
125 |      for use, reproduction, or distribution of Your modifications, or
126 |      for any such Derivative Works as a whole, provided Your use,
127 |      reproduction, and distribution of the Work otherwise complies with
128 |      the conditions stated in this License.
129 | 
130 |   5. Submission of Contributions. Unless You explicitly state otherwise,
131 |      any Contribution intentionally submitted for inclusion in the Work
132 |      by You to the Licensor shall be under the terms and conditions of
133 |      this License, without any additional terms or conditions.
134 |      Notwithstanding the above, nothing herein shall supersede or modify
135 |      the terms of any separate license agreement you may have executed
136 |      with Licensor regarding such Contributions.
137 | 
138 |   6. Trademarks. This License does not grant permission to use the trade
139 |      names, trademarks, service marks, or product names of the Licensor,
140 |      except as required for reasonable and customary use in describing the
141 |      origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |   7. Disclaimer of Warranty. Unless required by applicable law or
144 |      agreed to in writing, Licensor provides the Work (and each
145 |      Contributor provides its Contributions) on an "AS IS" BASIS,
146 |      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |      implied, including, without limitation, any warranties or conditions
148 |      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |      PARTICULAR PURPOSE. You are solely responsible for determining the
150 |      appropriateness of using or redistributing the Work and assume any
151 |      risks associated with Your exercise of permissions under this License.
152 | 
153 |   8. Limitation of Liability. In no event and under no legal theory,
154 |      whether in tort (including negligence), contract, or otherwise,
155 |      unless required by applicable law (such as deliberate and grossly
156 |      negligent acts) or agreed to in writing, shall any Contributor be
157 |      liable to You for damages, including any direct, indirect, special,
158 |      incidental, or consequential damages of any character arising as a
159 |      result of this License or out of the use or inability to use the
160 |      Work (including but not limited to damages for loss of goodwill,
161 |      work stoppage, computer failure or malfunction, or any and all
162 |      other commercial damages or losses), even if such Contributor
163 |      has been advised of the possibility of such damages.
164 | 
165 |   9. Accepting Warranty or Additional Liability. While redistributing
166 |      the Work or Derivative Works thereof, You may choose to offer,
167 |      and charge a fee for, acceptance of support, warranty, indemnity,
168 |      or other liability obligations and/or rights consistent with this
169 |      License. However, in accepting such obligations, You may act only
170 |      on Your own behalf and on Your sole responsibility, not on behalf
171 |      of any other Contributor, and only if You agree to indemnify,
172 |      defend, and hold each Contributor harmless for any liability
173 |      incurred by, or claims asserted against, such Contributor by reason
174 |      of your accepting any such warranty or additional liability.
175 | 
176 |   END OF TERMS AND CONDITIONS
177 | 
178 |   APPENDIX: How to apply the Apache License to your work.
179 | 
180 |      To apply the Apache License to your work, attach the following
181 |      boilerplate notice, with the fields enclosed by brackets "[]"
182 |      replaced with your own identifying information. (Don't include
183 |      the brackets!)  The text should be enclosed in the appropriate
184 |      comment syntax for the file format. We also recommend that a
185 |      file or class name and description of purpose be included on the
186 |      same "printed page" as the copyright notice for easier
187 |      identification within third-party archives.
188 | 
189 |   Copyright [yyyy] [name of copyright owner]
190 | 
191 |   Licensed under the Apache License, Version 2.0 (the "License");
192 |   you may not use this file except in compliance with the License.
193 |   You may obtain a copy of the License at
194 | 
195 |       http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |   Unless required by applicable law or agreed to in writing, software
198 |   distributed under the License is distributed on an "AS IS" BASIS,
199 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |   See the License for the specific language governing permissions and
201 |   limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | include .env
 2 | BINARY := k8s-spot-rescheduler
 3 | VERSION := $(shell git describe --always --dirty --tags 2>/dev/null || echo "undefined")
 4 | 
 5 | RED := \033[31m
 6 | GREEN := \033[32m
 7 | NC := \033[0m
 8 | 
 9 | IMG ?= quay.io/pusher/k8s-spot-rescheduler
10 | 
11 | .NOTPARALLEL:
12 | 
13 | .PHONY: all
14 | all: distclean test build
15 | 
16 | .PHONY: build
17 | build: clean $(BINARY)
18 | 
19 | .PHONY: clean
20 | clean:
21 | 	rm -f $(BINARY)
22 | 
23 | .PHONY: distclean
24 | distclean: clean
25 | 	rm -rf vendor
26 | 	rm -rf release
27 | 
28 | .PHONY: fmt
29 | fmt:
30 | 	$(GO) fmt ./...
31 | 
32 | .PHONY: vet
33 | vet: vendor
34 | 	$(GO) vet ./...
35 | 
36 | .PHONY: lint
37 | lint: vendor
38 | 	@ echo "$(GREEN)Linting code$(NC)"
39 | 	$(LINTER) run --disable-all \
40 | 		--exclude-use-default=false \
41 | 		--enable=govet \
42 | 		--enable=ineffassign \
43 | 		--enable=deadcode \
44 | 		--enable=golint \
45 | 		--enable=goconst \
46 | 		--enable=gofmt \
47 | 		--enable=goimports \
48 | 		--skip-dirs=pkg/client/ \
49 | 		--deadline=120s \
50 | 		--tests ./...
51 | 	@ echo
52 | 
53 | vendor:
54 | 	@ echo "$(GREEN)Pulling dependencies$(NC)"
55 | 	$(DEP) ensure --vendor-only
56 | 	@ echo
57 | 
58 | .PHONY: test
59 | test: vendor
60 | 	@ echo "$(GREEN)Running test suite$(NC)"
61 | 	$(GO) test ./...
62 | 	@ echo
63 | 
64 | .PHONY: check
65 | check: fmt lint vet test
66 | 
67 | .PHONY: build
68 | build: clean $(BINARY)
69 | 
70 | $(BINARY): fmt vet
71 | 	CGO_ENABLED=0 $(GO) build -o $(BINARY) -ldflags="-X main.VERSION=${VERSION}" github.com/pusher/k8s-spot-rescheduler
72 | 
73 | .PHONY: docker-build
74 | docker-build: check
75 | 	docker build --build-arg VERSION=${VERSION}  . -t ${IMG}:${VERSION}
76 | 	@echo "$(GREEN)Built $(IMG):$(VERSION)$(NC)"
77 | 
78 | TAGS ?= latest
79 | .PHONY: docker-tag
80 | docker-tag: docker-build
81 | 	@IFS=","; tags=${TAGS}; for tag in $${tags}; do docker tag ${IMG}:${VERSION} ${IMG}:$${tag}; echo "$(GREEN)Tagged $(IMG):$(VERSION) as $${tag}$(NC)"; done
82 | 
83 | PUSH_TAGS ?= ${VERSION}, latest
84 | .PHONY: docker-push
85 | docker-push: docker-build docker-tag
86 | 	@IFS=","; tags=${PUSH_TAGS}; for tag in $${tags}; do docker push ${IMG}:$${tag}; echo "$(GREEN)Pushed $(IMG):$${tag}$(NC)"; done
87 | 
88 | TAGS ?= latest
89 | .PHONY: docker-clean
90 | docker-clean:
91 | 	@IFS=","; tags=${TAGS}; for tag in $${tags}; do docker rmi -f ${IMG}:${VERSION} ${IMG}:$${tag}; done
92 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # K8s Spot Rescheduler
  2 | 
  3 | > NOTE: this repository is currently **UNMAINTAINED** and is looking for new owner(s).
  4 | > See [#74](/../../issues/74) for more information.
  5 | 
  6 | ## Table of contents
  7 | * [Introduction](#introduction)
  8 | * [Motivation](#motivation)
  9 | * [Usage](#usage)
 10 | * [Scope of the project](#scope-of-the-project)
 11 | * [Operating logic](#operating-logic)
 12 | * [Related](#related)
 13 | * [Communication](#communication)
 14 | * [Contributing](#contributing)
 15 | * [License](#license)
 16 | 
 17 | ## Introduction
 18 | 
 19 | K8s Spot rescheduler is a tool that tries to reduce load on a set of Kubernetes nodes. It was designed with the purpose of moving Pods scheduled on AWS on-demand instances to AWS spot instances to allow the on-demand instances to be safely scaled down (By the [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler)).
 20 | 
 21 | In reality the rescheduler can be used to remove load from any group of nodes onto a different group of nodes. They just need to be labelled appropriately.
 22 | 
 23 | For example, it could also be used to allow controller nodes to take up slack while new nodes are being scaled up, and then rescheduling those pods when the new capacity becomes available, thus reducing the load on the controllers once again.
 24 | 
 25 | ## Attribution
 26 | This project was inspired by the [Critical Pod Rescheduler](https://github.com/kubernetes/contrib/tree/master/rescheduler) and takes portions of code from both the [Critical Pod Rescheduler](https://github.com/kubernetes/contrib/tree/master/rescheduler) and the [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler).
 27 | 
 28 | ## Motivation
 29 | 
 30 | AWS spot instances are a great way to reduce the cost of your infrastructure running costs. They do however come with a significant drawback; at any point, the spot price for the instances you are using could rise above your bid and your instances will be terminated. To solve this problem, you can use an AutoScaling group backed by on-demand instances and managed by the [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) to take up the slack when spot instances are removed from your cluster.
 31 | 
 32 | The problem however, comes when the spot price drops and you are given new spot instances back into your cluster. At this point you are left with empty spot instances and full, expensive on-demand instances.
 33 | 
 34 | By tainting the on-demand instances with the Kubernetes `PreferNoSchedule` taint, we can ensure that, if at any point the scheduler needs to choose between spot and on-demand instances, it will choose the preferred spot instances to schedule the new Pods onto.
 35 | 
 36 | However, the scheduler won't reschedule Pods that are already running on on-demand instances, blocking them from being scaled down. At this point, the K8s Spot Rescheduler is required to start the process of moving Pods from the on-demand instances back onto the spot instances.
 37 | 
 38 | ## Usage
 39 | 
 40 | ### Deploy to Kubernetes
 41 | A docker image is available at `quay.io/pusher/k8s-spot-rescheduler`.
 42 | These images are currently built on pushes to master. Releases will be tagged as and when releases are made.
 43 | 
 44 | Sample Kubernetes manifests are available in the [deploy](deploy/) folder.
 45 | 
 46 | To deploy in clusters using RBAC, please apply all of the manifests (Deployment, ClusterRole, ClusterRoleBinding and ServiceAccount) in the [deploy](deploy/) folder but uncomment the `serviceAccountName` in the [deployment](deploy/deployment.yaml)
 47 | 
 48 | #### Requirements
 49 | 
 50 | For the K8s Spot Rescheduler to process nodes as expected; you will need identifying labels which can be passed to the program to allow it to distinguish which nodes it should consider as on-demand and which it should consider as spot instances.
 51 | 
 52 | For instance you could add labels `node-role.kubernetes.io/worker` and `node-role.kubernetes.io/spot-worker` to your on-demand and spot instances respectively.
 53 | 
 54 | You should also add the `PreferNoSchedule` taint to your on-demand instances to ensure that the scheduler prefers spot instances when making it's scheduling decisions.
 55 | 
 56 | For example you could add the following flags to your Kubelet:
 57 | ```
 58 | --register-with-taints="node-role.kubernetes.io/worker=true:PreferNoSchedule"
 59 | --node-labels="node-role.kubernetes.io/worker=true"
 60 | ```
 61 | 
 62 | ### Building
 63 | If you wish to build the binary yourself; first make sure you have go installed and set up. Then clone this repo into your `$GOPATH` and download the dependencies using [`dep`](https://github.com/golang/dep).
 64 | 
 65 | ```bash
 66 | cd $GOPATH/src/github.com # Create this directory if it doesn't exist
 67 | git clone git@github.com:pusher/k8s-spot-rescheduler pusher/k8s-spot-rescheduler
 68 | dep ensure -v # Installs dependencies to vendor folder.
 69 | ```
 70 | 
 71 | Then build the code using `go build` which will produce the built binary in a file `k8s-spot-rescheduler`.
 72 | 
 73 | ### Flags
 74 | `-v` (default: 0): The log verbosity level the program should run in, currently numeric with values between 2 & 4, recommended to use `-v=2`
 75 | 
 76 | `--running-in-cluster` (default: `true`): Optional, if this controller is running in a kubernetes cluster, use the pod secrets for creating a Kubernetes client.
 77 | 
 78 | `--namespace` (deafult: `kube-system`): Namespace in which k8s-spot-rescheduler is run.
 79 | 
 80 |  `--kube-api-content-type` (default: `application/vnd.kubernetes.protobuf`): Content type of requests sent to apiserver.
 81 | 
 82 | `--housekeeping-interval` (default: 10s): How often rescheduler takes actions.
 83 | 
 84 | `--node-drain-delay` (default: 10m): How long the scheduler should wait between draining nodes.
 85 | 
 86 | `--pod-eviction-timeout` (default: 2m): How long should the rescheduler attempt to retrieve successful pod evictions for.
 87 | 
 88 |  `--max-graceful-termination` (default: 2m): How long should the rescheduler wait for pods to shutdown gracefully before failing the node drain attempt.
 89 | 
 90 | `--listen-address` (default: `localhost:9235`): Address to listen on for serving prometheus metrics.
 91 | 
 92 | `--on-demand-node-label` (default: `node-role.kubernetes.io/worker`) Name of label on nodes to be considered for draining.
 93 | 
 94 | `--spot-node-label` (default: `node-role.kubernetes.io/spot-worker`) Name of label on nodes to be considered as targets for pods.
 95 | 
 96 | `--delete-non-replicated-pods` (default: `false`) Delete non-replicated pods running on on-demand instance. Note that some non-replicated pods will not be rescheduled.
 97 | 
 98 | ## Scope of the project
 99 | ### Does
100 | * Look for Pods on on-demand instances
101 | * Look for space for Pods on spot instances
102 | * Checks the following [predicates](https://github.com/kubernetes/kubernetes/blob/v1.8.0-alpha.3/plugin/pkg/scheduler/algorithm/predicates/predicates.go) when determining whether a pod can be moved:
103 |   * CheckNodeMemoryPressure
104 |   * CheckNodeDiskPressure
105 |   * GeneralPredicates
106 |   * MaxAzureDiskVolumeCount
107 |   * MaxGCEPDVolumeCount
108 |   * NoDiskConflict
109 |   * MatchInterPodAffinity
110 |   * PodToleratesNodeTaints
111 |   * MaxEBSVolumeCount
112 |   * NoVolumeZoneConflict
113 |   * ready
114 | * Checks whether there is enough capacity to move all pods on the on-demand node to spot nodes
115 | * Evicts all pods on the node if the previous check passes
116 | * Leaves the node in a schedulable state - in case it's capacity is required again
117 | 
118 | 
119 | ### Does not
120 | * Schedule pods (The default scheduler handles this)
121 | * Scale down empty nodes on your cloud provider (Try the [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler))
122 | 
123 | ## Operating logic
124 | 
125 | The rescheduler logic roughly follows the below:
126 | 
127 | 1. Gets a list of on-demand and spot nodes and their respective Pods
128 |   * Builds a map of nodeInfo structs
129 |     * Add node to struct
130 |     * Add pods for that node to struct
131 |     * Add requested and free CPU fields to struct
132 |   * Map these structs based on whether they are on-demand or spot instances.
133 |   * Sort on-demand instances by least requested CPU
134 |   * Sort spot instances by most free CPU
135 | 2. Iterate through each on-demand node and try to drain it
136 |   * Iterate through each pod
137 |     * Determine if a spot node has space for the pod
138 |     * Add the pod to the prospective spot node
139 |     * Move onto next node if no spot node space available
140 |   * Drain the node
141 |     * Iterate through pods and evict them in turn
142 |       * Evict pod
143 |       * Wait for deletion and reschedule
144 |     * Cancel all further processing
145 | 
146 | This process is repeated every `housekeeping-interval` seconds.
147 | 
148 | The effect of this algorithm should be, that we take the emptiest nodes first and empty those before we empty a node which is busier, thus resulting in the highest number of 'empty' nodes that can be removed from the cluster.
149 | 
150 | ## Related
151 | - [K8s Spot Termination Handler](https://github.com/pusher/k8s-spot-termination-handler): Gracefully drain spot instances when they are issued with a termination notice.
152 | 
153 | ## Communication
154 | 
155 | * Found a bug? Please open an issue.
156 | * Have a feature request. Please open an issue.
157 | * If you want to contribute, please submit a pull request
158 | 
159 | ## Contributing
160 | Please see our [Contributing](CONTRIBUTING.md) guidelines.
161 | 
162 | ## License
163 | This project is licensed under Apache 2.0 and a copy of the license is available [here](https://github.com/pusher/k8s-spot-rescheduler/blob/master/LICENSE).
164 | 


--------------------------------------------------------------------------------
/configure:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | if (( ${BASH_VERSION:0:1} < 4 )); then
  4 |   echo "This configure script requires bash 4"
  5 |   exit 1
  6 | fi
  7 | 
  8 | RED='\033[0;31m'
  9 | GREEN='\033[0;32m'
 10 | BLUE='\033[0;34m'
 11 | NC='\033[0m'
 12 | 
 13 | declare -A tools=()
 14 | 
 15 | vercomp () {
 16 |     if [[ $1 == $2 ]]
 17 |     then
 18 |         return 0
 19 |     fi
 20 |     local IFS=.
 21 |     local i ver1=($1) ver2=($2)
 22 |     # fill empty fields in ver1 with zeros
 23 |     for ((i=${#ver1[@]}; i<${#ver2[@]}; i++))
 24 |     do
 25 |         ver1[i]=0
 26 |     done
 27 |     for ((i=0; i<${#ver1[@]}; i++))
 28 |     do
 29 |         if [[ -z ${ver2[i]} ]]
 30 |         then
 31 |             # fill empty fields in ver2 with zeros
 32 |             ver2[i]=0
 33 |         fi
 34 |         if ((10#${ver1[i]} > 10#${ver2[i]}))
 35 |         then
 36 |             return 1
 37 |         fi
 38 |         if ((10#${ver1[i]} < 10#${ver2[i]}))
 39 |         then
 40 |             return 2
 41 |         fi
 42 |     done
 43 |     return 0
 44 | }
 45 | 
 46 | check_for() {
 47 |   echo -n "Checking for $1... "
 48 |   TOOL_PATH=$(command -v $1)
 49 |   if ! [ -x "$TOOL_PATH" -a -f "$TOOL_PATH" ]; then
 50 |     printf "${RED}not found${NC}\n"
 51 |     cd - > /dev/null
 52 |     exit 1
 53 |   else
 54 |     printf "${GREEN}found${NC}\n"
 55 |     tools[$1]=$TOOL_PATH
 56 |   fi
 57 | }
 58 | 
 59 | check_go_env() {
 60 |   echo -n "Checking \$GOPATH... "
 61 |   if [ -z "$GOPATH" ]; then
 62 |     printf "${RED}invalid${NC} - GOPATH not set\n"
 63 |     exit 1
 64 |   fi
 65 |   printf "${GREEN}valid${NC} - $GOPATH\n"
 66 | }
 67 | 
 68 | check_go_version() {
 69 |   echo -n "Checking go version... "
 70 |   GO_VERSION=$(${tools[go]} version | ${tools[awk]} '{where = match($0, /[0-9]\.[0-9]+[\.0-9]*/); if (where != 0) print substr($0, RSTART, RLENGTH)}')
 71 |   vercomp $GO_VERSION 1.10
 72 |   case $? in
 73 |     0) ;&
 74 |     1)
 75 |       printf "${GREEN}"
 76 |       echo $GO_VERSION
 77 |       printf "${NC}"
 78 |       ;;
 79 |     2)
 80 |       printf "${RED}"
 81 |       echo "$GO_VERSION < 1.10"
 82 |       exit 1
 83 |       ;;
 84 |   esac
 85 | }
 86 | 
 87 | cd ${0%/*}
 88 | 
 89 | check_for make
 90 | check_for awk
 91 | check_for go
 92 | check_for dep
 93 | check_for golangci-lint
 94 | check_for shasum
 95 | check_go_env
 96 | check_go_version
 97 | 
 98 | cat <<- EOF > .env
 99 | MAKE := ${tools[make]}
100 | SHASUM := ${tools[shasum]}
101 | GO := ${tools[go]}
102 | GOVERSION := $GO_VERSION
103 | DEP := ${tools[dep]}
104 | LINTER := ${tools[golangci-lint]}
105 | EOF
106 | 
107 | echo "Environment configuration written to .env"
108 | 


--------------------------------------------------------------------------------
/deploy/clusterrole.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | kind: ClusterRole
 3 | apiVersion: rbac.authorization.k8s.io/v1
 4 | metadata:
 5 |   name: k8s-spot-rescheduler
 6 | rules:
 7 |   # For leader election
 8 |   - apiGroups:
 9 |       - ""
10 |     resources:
11 |       - endpoints
12 |     verbs:
13 |       - get
14 |       - update
15 |       - create
16 |   - apiGroups:
17 |       - ""
18 |     resources:
19 |       - events
20 |     verbs:
21 |       - create
22 |       - patch
23 | 
24 |   # For listing and watching items
25 |   - apiGroups:
26 |       - ""
27 |     resources:
28 |       - nodes
29 |       - pods
30 |       - replicasets
31 |       - replicationcontrollers
32 |       - services
33 |       - statefulsets
34 |       - poddisruptionbudgets
35 |       - persistentvolumes
36 |       - persistentvolumeclaims
37 |     verbs:
38 |       - list
39 |       - get
40 |       - watch
41 |   - apiGroups:
42 |     - apps
43 |     resources:
44 |       - statefulsets
45 |     verbs:
46 |       - list
47 |       - get
48 |       - watch
49 |   - apiGroups:
50 |     - extensions
51 |     resources:
52 |       - replicasets
53 |     verbs:
54 |       - list
55 |       - get
56 |       - watch
57 |   - apiGroups:
58 |     - policy
59 |     resources:
60 |       - poddisruptionbudgets
61 |     verbs:
62 |       - list
63 |       - get
64 |       - watch
65 | 
66 |   # For rescheduling pods
67 |   - apiGroups:
68 |     - ""
69 |     resources:
70 |       - nodes
71 |     verbs:
72 |       - update
73 |   - apiGroups:
74 |     - ""
75 |     resources:
76 |       - pods/eviction
77 |     verbs:
78 |       - create
79 | 
80 |   - apiGroups:
81 |       - storage.k8s.io
82 |     resources:
83 |       - storageclasses
84 |     verbs:
85 |       - list
86 |       - get
87 |       - watch
88 | 


--------------------------------------------------------------------------------
/deploy/clusterrolebinding.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: rbac.authorization.k8s.io/v1
 3 | kind: ClusterRoleBinding
 4 | metadata:
 5 |   name: k8s-spot-rescheduler
 6 | roleRef:
 7 |   apiGroup: rbac.authorization.k8s.io
 8 |   kind: ClusterRole
 9 |   name: k8s-spot-rescheduler
10 | subjects:
11 | - kind: ServiceAccount
12 |   name: k8s-spot-rescheduler
13 |   namespace: kube-system
14 | 


--------------------------------------------------------------------------------
/deploy/deployment.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: extensions/v1beta1
 3 | kind: Deployment
 4 | metadata:
 5 |   name: k8s-spot-rescheduler
 6 |   namespace: kube-system
 7 |   labels:
 8 |     app: k8s-spot-rescheduler
 9 | spec:
10 |   replicas: 2
11 |   selector:
12 |     matchLabels:
13 |       app: k8s-spot-rescheduler
14 |   template:
15 |     metadata:
16 |       labels:
17 |         app: k8s-spot-rescheduler
18 |     spec:
19 |       # Uncomment the following line if using RBAC
20 |       #serviceAccountName: k8s-spot-rescheduler
21 |       containers:
22 |         - image: quay.io/pusher/k8s-spot-rescheduler:v0.3.0
23 |           name: k8s-spot-rescheduler
24 |           resources:
25 |             limits:
26 |               cpu: 20m
27 |               memory: 100Mi
28 |             requests:
29 |               cpu: 10m
30 |               memory: 50Mi
31 |           command:
32 |             - k8s-spot-rescheduler
33 |             - -v=2
34 |             - --running-in-cluster=true
35 |             - --namespace=kube-system
36 |             - --housekeeping-interval=10s
37 |             - --node-drain-delay=10m
38 |             - --pod-eviction-timeout=2m
39 |             - --max-graceful-termination=2m
40 |             - --listen-address=0.0.0.0:9235
41 |             - --on-demand-node-label=node-role.kubernetes.io/worker
42 |             - --spot-node-label=node-role.kubernetes.io/spot-worker
43 |           ports:
44 |           - name: http
45 |             containerPort: 9235
46 | 


--------------------------------------------------------------------------------
/deploy/serviceaccount.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | apiVersion: v1
3 | kind: ServiceAccount
4 | metadata:
5 |   labels:
6 |     k8s-app: k8s-spot-rescheduler
7 |   name: k8s-spot-rescheduler
8 |   namespace: kube-system
9 | 


--------------------------------------------------------------------------------
/metrics/metrics.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2017 Pusher Ltd.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | 
17 | package metrics
18 | 
19 | import (
20 | 	"github.com/prometheus/client_golang/prometheus"
21 | 	"github.com/pusher/k8s-spot-rescheduler/nodes"
22 | )
23 | 
24 | const (
25 | 	reschedulerNamespace = "spot_rescheduler"
26 | )
27 | 
28 | var (
29 | 	// nodePodsCount tracks how many pods are nodes by type and by node name.
30 | 	nodePodsCount = prometheus.NewCounterVec(
31 | 		prometheus.CounterOpts{
32 | 			Namespace: reschedulerNamespace,
33 | 			Name:      "node_pods_count",
34 | 			Help:      "Number of pods on each node.",
35 | 		},
36 | 		[]string{"node_type", "node"})
37 | 
38 | 	// nodesCount tracks the number of nodes in the cluster.
39 | 	nodesCount = prometheus.NewGaugeVec(
40 | 		prometheus.GaugeOpts{
41 | 			Namespace: reschedulerNamespace,
42 | 			Name:      "nodes_count",
43 | 			Help:      "Number of nodes in cluster.",
44 | 		}, []string{"node_type"},
45 | 	)
46 | 
47 | 	// nodeDrainCount counts the number of nodes drained by the rescheduler.
48 | 	nodeDrainCount = prometheus.NewCounterVec(
49 | 		prometheus.CounterOpts{
50 | 			Namespace: reschedulerNamespace,
51 | 			Name:      "node_drain_total",
52 | 			Help:      "Number of nodes drained by rescheduler.",
53 | 		}, []string{"drain_state", "node"},
54 | 	)
55 | 
56 | 	// evictionsCount counts the number of pods evicted by the rescheduler
57 | 	evictionsCount = prometheus.NewCounter(
58 | 		prometheus.CounterOpts{
59 | 			Namespace: reschedulerNamespace,
60 | 			Name:      "evicted_pods_total",
61 | 			Help:      "Number of pods evicted by the rescheduler.",
62 | 		},
63 | 	)
64 | )
65 | 
66 | func init() {
67 | 	prometheus.MustRegister(nodePodsCount)
68 | 	prometheus.MustRegister(nodesCount)
69 | 	prometheus.MustRegister(nodeDrainCount)
70 | 	prometheus.MustRegister(evictionsCount)
71 | }
72 | 
73 | // UpdateNodesMap updates the metrics calculated by the nodes map
74 | func UpdateNodesMap(nm nodes.Map) {
75 | 	if nm == nil {
76 | 		return
77 | 	}
78 | 	nodesCount.WithLabelValues(nodes.OnDemandNodeLabel).Set(float64(len(nm[nodes.OnDemand])))
79 | 	nodesCount.WithLabelValues(nodes.SpotNodeLabel).Set(float64(len(nm[nodes.Spot])))
80 | 
81 | }
82 | 
83 | // UpdateNodePodsCount updates nodePodsCount for a given node
84 | func UpdateNodePodsCount(nodeType string, nodeName string, numPods int) {
85 | 	nodePodsCount.WithLabelValues(nodeType, nodeName).Set(float64(numPods))
86 | }
87 | 
88 | // UpdateEvictionsCount adds 1 to the evictions counter
89 | func UpdateEvictionsCount() {
90 | 	evictionsCount.Add(1)
91 | }
92 | 
93 | // UpdateNodeDrainCount updates the number drains and drain state for a node
94 | func UpdateNodeDrainCount(state string, nodeName string) {
95 | 	nodeDrainCount.WithLabelValues(state, nodeName).Add(1)
96 | }
97 | 


--------------------------------------------------------------------------------
/nodes/nodes.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2017 Pusher Ltd.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package nodes
 18 | 
 19 | import (
 20 | 	"sort"
 21 | 	"strings"
 22 | 
 23 | 	apiv1 "k8s.io/api/core/v1"
 24 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 25 | 	"k8s.io/apimachinery/pkg/fields"
 26 | 	kube_client "k8s.io/client-go/kubernetes"
 27 | )
 28 | 
 29 | var (
 30 | 	// OnDemandNodeLabel label for on-demand instances.
 31 | 	OnDemandNodeLabel = "kubernetes.io/role=worker"
 32 | 	// SpotNodeLabel label for spot instances.
 33 | 	SpotNodeLabel = "kubernetes.io/role=spot-worker"
 34 | 	// OnDemand key for on-demand instances of NodesMap.
 35 | 	OnDemand NodeType
 36 | 	// Spot key for spot instances of NodesMap.
 37 | 	Spot NodeType = 1
 38 | )
 39 | 
 40 | // NodeInfo struct containing node and it's pods as well information
 41 | // resources on the node.
 42 | type NodeInfo struct {
 43 | 	Node         *apiv1.Node
 44 | 	Pods         []*apiv1.Pod
 45 | 	RequestedCPU int64
 46 | 	FreeCPU      int64
 47 | }
 48 | 
 49 | // NodeType integer key for keying NodesMap.
 50 | type NodeType int
 51 | 
 52 | // NodeInfoArray array of NodeInfo pointers.
 53 | type NodeInfoArray []*NodeInfo
 54 | 
 55 | // Map map of NodeInfoArray.
 56 | type Map map[NodeType]NodeInfoArray
 57 | 
 58 | // NewNodeMap creates a new NodesMap from a list of Nodes.
 59 | func NewNodeMap(client kube_client.Interface, nodes []*apiv1.Node) (Map, error) {
 60 | 	nodeMap := Map{
 61 | 		OnDemand: make([]*NodeInfo, 0),
 62 | 		Spot:     make([]*NodeInfo, 0),
 63 | 	}
 64 | 
 65 | 	for _, node := range nodes {
 66 | 		nodeInfo, err := newNodeInfo(client, node)
 67 | 		if err != nil {
 68 | 			return nil, err
 69 | 		}
 70 | 
 71 | 		// Sort pods with biggest CPU request first
 72 | 		sort.Slice(nodeInfo.Pods, func(i, j int) bool {
 73 | 			iCPU := getPodCPURequests(nodeInfo.Pods[i])
 74 | 			jCPU := getPodCPURequests(nodeInfo.Pods[j])
 75 | 			return iCPU > jCPU
 76 | 		})
 77 | 
 78 | 		switch true {
 79 | 		case isSpotNode(node):
 80 | 			nodeMap[Spot] = append(nodeMap[Spot], nodeInfo)
 81 | 			continue
 82 | 		case isOnDemandNode(node):
 83 | 			nodeMap[OnDemand] = append(nodeMap[OnDemand], nodeInfo)
 84 | 			continue
 85 | 		default:
 86 | 			continue
 87 | 		}
 88 | 	}
 89 | 
 90 | 	// Sort spot nodes by most requested CPU first
 91 | 	sort.Slice(nodeMap[Spot], func(i, j int) bool {
 92 | 		return nodeMap[Spot][i].RequestedCPU > nodeMap[Spot][j].RequestedCPU
 93 | 	})
 94 | 	// Sort on-demand nodes by least requested CPU first
 95 | 	sort.Slice(nodeMap[OnDemand], func(i, j int) bool {
 96 | 		return nodeMap[OnDemand][i].RequestedCPU < nodeMap[OnDemand][j].RequestedCPU
 97 | 	})
 98 | 
 99 | 	return nodeMap, nil
100 | }
101 | 
102 | func newNodeInfo(client kube_client.Interface, node *apiv1.Node) (*NodeInfo, error) {
103 | 	pods, err := getPodsOnNode(client, node)
104 | 	if err != nil {
105 | 		return nil, err
106 | 	}
107 | 	requestedCPU := calculateRequestedCPU(pods)
108 | 
109 | 	return &NodeInfo{
110 | 		Node:         node,
111 | 		Pods:         pods,
112 | 		RequestedCPU: requestedCPU,
113 | 		FreeCPU:      node.Status.Allocatable.Cpu().MilliValue() - requestedCPU,
114 | 	}, nil
115 | }
116 | 
117 | // AddPod adds a pod to a NodeInfo and updates the relevant resource values.
118 | func (n *NodeInfo) AddPod(pod *apiv1.Pod) {
119 | 	n.Pods = append(n.Pods, pod)
120 | 	n.RequestedCPU = calculateRequestedCPU(n.Pods)
121 | 	n.FreeCPU = n.Node.Status.Allocatable.Cpu().MilliValue() - n.RequestedCPU
122 | }
123 | 
124 | // Gets a list of pods that are running on the given node
125 | func getPodsOnNode(client kube_client.Interface, node *apiv1.Node) ([]*apiv1.Pod, error) {
126 | 	podsOnNode, err := client.CoreV1().Pods(apiv1.NamespaceAll).List(
127 | 		metav1.ListOptions{FieldSelector: fields.SelectorFromSet(fields.Set{"spec.nodeName": node.Name}).String()})
128 | 	if err != nil {
129 | 		return []*apiv1.Pod{}, err
130 | 	}
131 | 
132 | 	pods := make([]*apiv1.Pod, 0)
133 | 	for i := range podsOnNode.Items {
134 | 		pods = append(pods, &podsOnNode.Items[i])
135 | 	}
136 | 	return pods, nil
137 | }
138 | 
139 | // Works out requested CPU for a collection of pods and returns it in MilliValue
140 | // (Pod requests are stored as MilliValues hence the return type here)
141 | func calculateRequestedCPU(pods []*apiv1.Pod) int64 {
142 | 	var CPURequests int64
143 | 	for _, pod := range pods {
144 | 		CPURequests += getPodCPURequests(pod)
145 | 	}
146 | 	return CPURequests
147 | }
148 | 
149 | // Returns the total requested CPU  for all of the containers in a given Pod.
150 | // (Returned as MilliValues)
151 | func getPodCPURequests(pod *apiv1.Pod) int64 {
152 | 	var CPUTotal int64
153 | 	for _, container := range pod.Spec.Containers {
154 | 		CPUTotal += container.Resources.Requests.Cpu().MilliValue()
155 | 	}
156 | 	return CPUTotal
157 | }
158 | 
159 | // Determines if a node has the spotNodeLabel assigned
160 | func isSpotNode(node *apiv1.Node) bool {
161 | 	splitLabel := strings.SplitN(SpotNodeLabel, "=", 2)
162 | 
163 | 	// If "=" found, check for new label schema. If no "=" is found, check for
164 | 	// old label schema
165 | 	switch len(splitLabel) {
166 | 	case 1:
167 | 		_, found := node.ObjectMeta.Labels[SpotNodeLabel]
168 | 		return found
169 | 	case 2:
170 | 		spotLabelKey := splitLabel[0]
171 | 		spotLabelVal := splitLabel[1]
172 | 
173 | 		val, _ := node.ObjectMeta.Labels[spotLabelKey]
174 | 		if val == spotLabelVal {
175 | 			return true
176 | 		}
177 | 	}
178 | 	return false
179 | }
180 | 
181 | // Determines if a node has the OnDemandNodeLabel assigned
182 | func isOnDemandNode(node *apiv1.Node) bool {
183 | 	splitLabel := strings.SplitN(OnDemandNodeLabel, "=", 2)
184 | 
185 | 	// If "=" found, check for new label schema. If no "=" is found, check for
186 | 	// old label schema
187 | 	switch len(splitLabel) {
188 | 	case 1:
189 | 		_, found := node.ObjectMeta.Labels[OnDemandNodeLabel]
190 | 		return found
191 | 	case 2:
192 | 		onDemandLabelKey := splitLabel[0]
193 | 		onDemandLabelVal := splitLabel[1]
194 | 
195 | 		val, _ := node.ObjectMeta.Labels[onDemandLabelKey]
196 | 		if val == onDemandLabelVal {
197 | 			return true
198 | 		}
199 | 	}
200 | 	return false
201 | }
202 | 
203 | // CopyNodeInfos returns an array of copies of the NodeInfos in this array.
204 | func (n NodeInfoArray) CopyNodeInfos() NodeInfoArray {
205 | 	var arr NodeInfoArray
206 | 	for _, node := range n {
207 | 		nodeInfo := &NodeInfo{
208 | 			Node:         node.Node,
209 | 			Pods:         node.Pods,
210 | 			RequestedCPU: node.RequestedCPU,
211 | 			FreeCPU:      node.FreeCPU,
212 | 		}
213 | 		arr = append(arr, nodeInfo)
214 | 	}
215 | 	return arr
216 | }
217 | 


--------------------------------------------------------------------------------
/nodes/nodes_test.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2017 Pusher Ltd.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package nodes
 18 | 
 19 | import (
 20 | 	"fmt"
 21 | 	"testing"
 22 | 
 23 | 	"github.com/stretchr/testify/assert"
 24 | 	apiv1 "k8s.io/api/core/v1"
 25 | 	"k8s.io/apimachinery/pkg/api/resource"
 26 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 27 | 	"k8s.io/apimachinery/pkg/runtime"
 28 | 	"k8s.io/client-go/kubernetes/fake"
 29 | 	core "k8s.io/client-go/testing"
 30 | )
 31 | 
 32 | func TestIsSpotNode(t *testing.T) {
 33 | 	spotNode := createTestNodeWithLabel("fooSpotNode", 2000, map[string]string{"foo": "bar"})
 34 | 
 35 | 	SpotNodeLabel = "foo"
 36 | 	assert.True(t, isSpotNode(spotNode), "expected node with label 'foo' to be spot node")
 37 | 
 38 | 	SpotNodeLabel = "foo=bar"
 39 | 	assert.True(t, isSpotNode(spotNode), "expected node with label 'foo' and value 'bar' to be spot node")
 40 | 
 41 | 	SpotNodeLabel = "foo=baz"
 42 | 	assert.False(t, isSpotNode(spotNode), "expected node with label 'foo' and value 'bar' to not be spot node")
 43 | }
 44 | 
 45 | func TestIsOnDemandNode(t *testing.T) {
 46 | 	onDemandNode := createTestNodeWithLabel("fooDemandNode", 2000, map[string]string{"foo": "bar"})
 47 | 
 48 | 	OnDemandNodeLabel = "foo"
 49 | 	assert.True(t, isOnDemandNode(onDemandNode), "expected node with label 'foo' to be on demand node")
 50 | 
 51 | 	OnDemandNodeLabel = "foo=bar"
 52 | 	assert.True(t, isOnDemandNode(onDemandNode), "expected node with label 'foo' and value 'bar' to be on demand node")
 53 | 
 54 | 	OnDemandNodeLabel = "foo=baz"
 55 | 	assert.False(t, isOnDemandNode(onDemandNode), "expected node with label 'foo' and value 'bar' to not be on demand node")
 56 | }
 57 | 
 58 | func TestNewNodeMap(t *testing.T) {
 59 | 	OnDemandNodeLabel = "kubernetes.io/role=worker"
 60 | 	SpotNodeLabel = "kubernetes.io/role=spot-worker"
 61 | 
 62 | 	spotLabels := map[string]string{
 63 | 		"kubernetes.io/role": "spot-worker",
 64 | 	}
 65 | 	onDemandLabels := map[string]string{
 66 | 		"kubernetes.io/role": "worker",
 67 | 	}
 68 | 
 69 | 	nodes := []*apiv1.Node{
 70 | 		createTestNodeWithLabel("node1", 2000, onDemandLabels),
 71 | 		createTestNodeWithLabel("node2", 2000, onDemandLabels),
 72 | 		createTestNodeWithLabel("node3", 2000, spotLabels),
 73 | 		createTestNodeWithLabel("node4", 2000, spotLabels),
 74 | 	}
 75 | 
 76 | 	fakeClient := createFakeClient(t)
 77 | 
 78 | 	nodeMap, err := NewNodeMap(fakeClient, nodes)
 79 | 	if err != nil {
 80 | 		assert.Error(t, err, "Failed to build nodeMap")
 81 | 	}
 82 | 	onDemandNodeInfos := nodeMap[OnDemand]
 83 | 	spotNodeInfos := nodeMap[Spot]
 84 | 
 85 | 	assert.Equal(t, 2, len(onDemandNodeInfos))
 86 | 	assert.Equal(t, 2, len(spotNodeInfos))
 87 | 
 88 | 	// The first spot node should be the one with least requestedCPU
 89 | 	nodeInfo1 := onDemandNodeInfos[0]
 90 | 	nodeInfo2 := onDemandNodeInfos[1]
 91 | 	if nodeInfo1.RequestedCPU > nodeInfo2.RequestedCPU {
 92 | 		assert.Fail(t, "Spot nodes not sorted by Free CPU")
 93 | 	}
 94 | 
 95 | 	assert.Equal(t, "node1", nodeInfo1.Node.Name)
 96 | 	assert.Equal(t, 2, len(nodeInfo1.Pods))
 97 | 	assert.Equal(t, "node2", nodeInfo2.Node.Name)
 98 | 	assert.Equal(t, 3, len(nodeInfo2.Pods))
 99 | 
100 | 	// The first spot node should be the one with least freeCPU
101 | 	nodeInfo3 := spotNodeInfos[0]
102 | 	nodeInfo4 := spotNodeInfos[1]
103 | 	if nodeInfo3.FreeCPU > nodeInfo4.FreeCPU {
104 | 		assert.Fail(t, "Spot nodes not sorted by Free CPU")
105 | 	}
106 | 
107 | 	// This means we should get node3 and node2 in this order
108 | 	assert.Equal(t, "node4", nodeInfo3.Node.Name)
109 | 	assert.Equal(t, 5, len(nodeInfo3.Pods))
110 | 	assert.Equal(t, "node3", nodeInfo4.Node.Name)
111 | 	assert.Equal(t, 2, len(nodeInfo4.Pods))
112 | 
113 | 	// Check pods are sorted by Most RequestedCPU
114 | 	for _, nodeInfo := range append(onDemandNodeInfos, spotNodeInfos...) {
115 | 		for i := 1; i < len(nodeInfo.Pods); i++ {
116 | 			firstPodRequest := getPodCPURequests(nodeInfo.Pods[i-1])
117 | 			secondPodRequest := getPodCPURequests(nodeInfo.Pods[i])
118 | 			if firstPodRequest < secondPodRequest {
119 | 				assert.Fail(t, "Pods not sorted by most requested CPU on node %s", nodeInfo.Node.Name)
120 | 			}
121 | 		}
122 | 	}
123 | 
124 | }
125 | 
126 | func TestAddPod(t *testing.T) {
127 | 
128 | 	nodeInfo1 := createTestNodeInfo(createTestNode("node1", 2000), []*apiv1.Pod{}, 0)
129 | 	pod1 := createTestPod("pod1", 300)
130 | 	nodeInfo1.AddPod(pod1)
131 | 
132 | 	assert.Equal(t, 1, len(nodeInfo1.Pods))
133 | 	assert.Equal(t, int64(300), nodeInfo1.RequestedCPU)
134 | 	assert.Equal(t, int64(1700), nodeInfo1.FreeCPU)
135 | 
136 | 	pod2 := createTestPod("pod2", 721)
137 | 	nodeInfo1.AddPod(pod2)
138 | 
139 | 	assert.Equal(t, 2, len(nodeInfo1.Pods))
140 | 	assert.Equal(t, int64(1021), nodeInfo1.RequestedCPU)
141 | 	assert.Equal(t, int64(979), nodeInfo1.FreeCPU)
142 | }
143 | 
144 | func TestGetPodsOnNode(t *testing.T) {
145 | 	node1 := createTestNode("node1", 2000)
146 | 	node2 := createTestNode("node2", 2000)
147 | 	node3 := createTestNode("node3", 2000)
148 | 	node4 := createTestNode("node4", 2000)
149 | 
150 | 	fakeClient := createFakeClient(t)
151 | 
152 | 	podsOnNode1, err := getPodsOnNode(fakeClient, node1)
153 | 	if err != nil {
154 | 		assert.Error(t, err, "Found error in getting pods on node")
155 | 	}
156 | 	assert.Equal(t, 2, len(podsOnNode1))
157 | 	assert.Equal(t, "p1n1", podsOnNode1[0].Name)
158 | 	assert.Equal(t, "p2n1", podsOnNode1[1].Name)
159 | 
160 | 	podsOnNode2, err := getPodsOnNode(fakeClient, node2)
161 | 	if err != nil {
162 | 		assert.Error(t, err, "Found error in getting pods on node")
163 | 	}
164 | 	assert.Equal(t, 3, len(podsOnNode2))
165 | 	assert.Equal(t, "p1n2", podsOnNode2[0].Name)
166 | 	assert.Equal(t, "p2n2", podsOnNode2[1].Name)
167 | 	assert.Equal(t, "p3n2", podsOnNode2[2].Name)
168 | 
169 | 	podsOnNode3, err := getPodsOnNode(fakeClient, node3)
170 | 	if err != nil {
171 | 		assert.Error(t, err, "Found error in getting pods on node")
172 | 	}
173 | 	assert.Equal(t, 2, len(podsOnNode3))
174 | 	assert.Equal(t, "p1n3", podsOnNode3[0].Name)
175 | 	assert.Equal(t, "p2n3", podsOnNode3[1].Name)
176 | 
177 | 	podsOnNode4, err := getPodsOnNode(fakeClient, node4)
178 | 	if err != nil {
179 | 		assert.Error(t, err, "Found error in getting pods on node")
180 | 	}
181 | 	assert.Equal(t, 5, len(podsOnNode4))
182 | 	assert.Equal(t, "p1n4", podsOnNode4[0].Name)
183 | 	assert.Equal(t, "p2n4", podsOnNode4[1].Name)
184 | 	assert.Equal(t, "p3n4", podsOnNode4[2].Name)
185 | 	assert.Equal(t, "p4n4", podsOnNode4[3].Name)
186 | 	assert.Equal(t, "p5n4", podsOnNode4[4].Name)
187 | 
188 | }
189 | 
190 | func TestCalculateRequestedCPU(t *testing.T) {
191 | 	pods1 := []*apiv1.Pod{
192 | 		createTestPod("p1n1", 100),
193 | 		createTestPod("p2n1", 300),
194 | 	}
195 | 	pods2 := []*apiv1.Pod{
196 | 		createTestPod("p1n2", 500),
197 | 		createTestPod("p2n2", 300),
198 | 	}
199 | 	pods3 := []*apiv1.Pod{
200 | 		createTestPod("p1n3", 500),
201 | 		createTestPod("p2n3", 500),
202 | 		createTestPod("p3n3", 300),
203 | 	}
204 | 
205 | 	pods1Request := calculateRequestedCPU(pods1)
206 | 	assert.Equal(t, int64(400), pods1Request)
207 | 
208 | 	pods2Request := calculateRequestedCPU(pods2)
209 | 	assert.Equal(t, int64(800), pods2Request)
210 | 
211 | 	pods3Request := calculateRequestedCPU(pods3)
212 | 	assert.Equal(t, int64(1300), pods3Request)
213 | }
214 | 
215 | func TestGetPodCPURequests(t *testing.T) {
216 | 	pod1 := createTestPod("pod1", 100)
217 | 	pod2 := createTestPod("pod2", 200)
218 | 
219 | 	pod1Request := getPodCPURequests(pod1)
220 | 	assert.Equal(t, int64(100), pod1Request)
221 | 
222 | 	pod2Request := getPodCPURequests(pod2)
223 | 	assert.Equal(t, int64(200), pod2Request)
224 | }
225 | 
226 | func TestCopyNodeInfos(t *testing.T) {
227 | 	pods1 := []*apiv1.Pod{
228 | 		createTestPod("p1n1", 100),
229 | 		createTestPod("p2n1", 300),
230 | 	}
231 | 	pods2 := []*apiv1.Pod{
232 | 		createTestPod("p1n2", 500),
233 | 		createTestPod("p2n2", 300),
234 | 	}
235 | 	pods3 := []*apiv1.Pod{
236 | 		createTestPod("p1n3", 500),
237 | 		createTestPod("p2n3", 500),
238 | 		createTestPod("p3n3", 300),
239 | 	}
240 | 
241 | 	pod1 := createTestPod("pod1", 200)
242 | 	pod2 := createTestPod("pod2", 200)
243 | 	pod3 := createTestPod("pod3", 200)
244 | 
245 | 	nodeInfos := NodeInfoArray{
246 | 		createTestNodeInfo(createTestNode("node1", 2000), pods1, 400),
247 | 		createTestNodeInfo(createTestNode("node2", 2000), pods2, 800),
248 | 		createTestNodeInfo(createTestNode("node3", 2000), pods3, 1300),
249 | 	}
250 | 
251 | 	// Create a copy of the array
252 | 	nodeInfosCopy := nodeInfos.CopyNodeInfos()
253 | 
254 | 	// Modify the array
255 | 	nodeInfosCopy[0].AddPod(pod1)
256 | 	nodeInfosCopy[1].AddPod(pod2)
257 | 	nodeInfosCopy[2].AddPod(pod3)
258 | 
259 | 	// Check the changes applied
260 | 	assert.Equal(t, len(pods1)+1, len(nodeInfosCopy[0].Pods))
261 | 	assert.Equal(t, len(pods2)+1, len(nodeInfosCopy[1].Pods))
262 | 	assert.Equal(t, len(pods3)+1, len(nodeInfosCopy[2].Pods))
263 | 
264 | 	// Check the original has not changed
265 | 	assert.Equal(t, len(pods1), len(nodeInfos[0].Pods))
266 | 	assert.Equal(t, len(pods2), len(nodeInfos[1].Pods))
267 | 	assert.Equal(t, len(pods3), len(nodeInfos[2].Pods))
268 | }
269 | 
270 | func createTestPod(name string, cpu int64) *apiv1.Pod {
271 | 	pod := &apiv1.Pod{
272 | 		ObjectMeta: metav1.ObjectMeta{
273 | 			Namespace: "kube-system",
274 | 			Name:      name,
275 | 			SelfLink:  fmt.Sprintf("/api/v1/namespaces/default/pods/%s", name),
276 | 		},
277 | 		Spec: apiv1.PodSpec{
278 | 			Containers: []apiv1.Container{
279 | 				{
280 | 					Resources: apiv1.ResourceRequirements{
281 | 						Requests: apiv1.ResourceList{
282 | 							apiv1.ResourceCPU: *resource.NewMilliQuantity(cpu, resource.DecimalSI),
283 | 						},
284 | 					},
285 | 				},
286 | 			},
287 | 		},
288 | 	}
289 | 	return pod
290 | }
291 | 
292 | func createTestNode(name string, cpu int64) *apiv1.Node {
293 | 	node := &apiv1.Node{
294 | 		ObjectMeta: metav1.ObjectMeta{
295 | 			Name: name,
296 | 		},
297 | 		Status: apiv1.NodeStatus{
298 | 			Capacity: apiv1.ResourceList{
299 | 				apiv1.ResourceCPU:    *resource.NewMilliQuantity(cpu, resource.DecimalSI),
300 | 				apiv1.ResourceMemory: *resource.NewQuantity(2*1024*1024*1024, resource.DecimalSI),
301 | 				apiv1.ResourcePods:   *resource.NewQuantity(100, resource.DecimalSI),
302 | 			},
303 | 			Conditions: []apiv1.NodeCondition{
304 | 				{
305 | 					Type:   apiv1.NodeReady,
306 | 					Status: apiv1.ConditionTrue,
307 | 				},
308 | 			},
309 | 		},
310 | 	}
311 | 	node.Status.Allocatable = node.Status.Capacity
312 | 	return node
313 | }
314 | 
315 | func createTestNodeWithLabel(name string, cpu int64, labels map[string]string) *apiv1.Node {
316 | 	node := createTestNode(name, cpu)
317 | 	node.ObjectMeta.Labels = labels
318 | 	return node
319 | }
320 | 
321 | func createTestNodeInfo(node *apiv1.Node, pods []*apiv1.Pod, requests int64) *NodeInfo {
322 | 	nodeInfo := &NodeInfo{
323 | 		Node:         node,
324 | 		Pods:         pods,
325 | 		RequestedCPU: requests,
326 | 		FreeCPU:      node.Status.Capacity.Cpu().MilliValue() - requests,
327 | 	}
328 | 	return nodeInfo
329 | }
330 | 
331 | func createFakeClient(t *testing.T) *fake.Clientset {
332 | 	pods1 := []apiv1.Pod{
333 | 		*createTestPod("p1n1", 100),
334 | 		*createTestPod("p2n1", 300),
335 | 	}
336 | 	pods2 := []apiv1.Pod{
337 | 		*createTestPod("p1n2", 500),
338 | 		*createTestPod("p2n2", 300),
339 | 		*createTestPod("p3n2", 400),
340 | 	}
341 | 	pods3 := []apiv1.Pod{
342 | 		*createTestPod("p1n3", 500),
343 | 		*createTestPod("p2n3", 300),
344 | 	}
345 | 	pods4 := []apiv1.Pod{
346 | 		*createTestPod("p1n4", 500),
347 | 		*createTestPod("p2n4", 200),
348 | 		*createTestPod("p3n4", 400),
349 | 		*createTestPod("p4n4", 100),
350 | 		*createTestPod("p5n4", 300),
351 | 	}
352 | 
353 | 	fakeClient := &fake.Clientset{}
354 | 	fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
355 | 		listAction, ok := action.(core.ListAction)
356 | 		assert.True(t, ok)
357 | 		restrictions := listAction.GetListRestrictions().Fields.String()
358 | 
359 | 		podList := &apiv1.PodList{}
360 | 		switch restrictions {
361 | 		case "spec.nodeName=node1":
362 | 			podList.Items = pods1
363 | 		case "spec.nodeName=node2":
364 | 			podList.Items = pods2
365 | 		case "spec.nodeName=node3":
366 | 			podList.Items = pods3
367 | 		case "spec.nodeName=node4":
368 | 			podList.Items = pods4
369 | 		default:
370 | 			t.Fatalf("unexpected list restrictions: %v", restrictions)
371 | 		}
372 | 		return true, podList, nil
373 | 	})
374 | 	return fakeClient
375 | }
376 | 


--------------------------------------------------------------------------------
/rescheduler.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2017 Pusher Ltd.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package main
 18 | 
 19 | import (
 20 | 	goflag "flag"
 21 | 	"fmt"
 22 | 	"net/http"
 23 | 	"os"
 24 | 	"strings"
 25 | 	"time"
 26 | 
 27 | 	"github.com/pusher/k8s-spot-rescheduler/metrics"
 28 | 	"github.com/pusher/k8s-spot-rescheduler/nodes"
 29 | 	"github.com/pusher/k8s-spot-rescheduler/scaler"
 30 | 	apiv1 "k8s.io/api/core/v1"
 31 | 	policyv1 "k8s.io/api/policy/v1beta1"
 32 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 33 | 	simulator "k8s.io/autoscaler/cluster-autoscaler/simulator"
 34 | 	autoscaler_drain "k8s.io/autoscaler/cluster-autoscaler/utils/drain"
 35 | 	kube_utils "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
 36 | 	kube_client "k8s.io/client-go/kubernetes"
 37 | 	v1core "k8s.io/client-go/kubernetes/typed/core/v1"
 38 | 	kube_restclient "k8s.io/client-go/rest"
 39 | 	kube_leaderelection "k8s.io/client-go/tools/leaderelection"
 40 | 	"k8s.io/client-go/tools/leaderelection/resourcelock"
 41 | 	kube_record "k8s.io/client-go/tools/record"
 42 | 	api "k8s.io/kubernetes/pkg/api/legacyscheme"
 43 | 	"k8s.io/kubernetes/pkg/client/leaderelectionconfig"
 44 | 	kubectl_util "k8s.io/kubernetes/pkg/kubectl/cmd/util"
 45 | 	"k8s.io/kubernetes/pkg/scheduler/schedulercache"
 46 | 
 47 | 	"github.com/golang/glog"
 48 | 	"github.com/prometheus/client_golang/prometheus"
 49 | 	flag "github.com/spf13/pflag"
 50 | )
 51 | 
 52 | var (
 53 | 	flags = flag.NewFlagSet(
 54 | 		`rescheduler: rescheduler --running-in-cluster=true`,
 55 | 		flag.ExitOnError)
 56 | 
 57 | 	inCluster = flags.Bool("running-in-cluster", true,
 58 | 		`Optional, if this controller is running in a kubernetes cluster, use the
 59 | 		 pod secrets for creating a Kubernetes client.`)
 60 | 
 61 | 	namespace = flags.String("namespace", "kube-system",
 62 | 		`Namespace in which k8s-spot-rescheduler is run`)
 63 | 
 64 | 	contentType = flags.String("kube-api-content-type", "application/vnd.kubernetes.protobuf",
 65 | 		`Content type of requests sent to apiserver.`)
 66 | 
 67 | 	housekeepingInterval = flags.Duration("housekeeping-interval", 10*time.Second,
 68 | 		`How often rescheduler takes actions.`)
 69 | 
 70 | 	nodeDrainDelay = flags.Duration("node-drain-delay", 10*time.Minute,
 71 | 		`How long the scheduler should wait between draining nodes.`)
 72 | 
 73 | 	podEvictionTimeout = flags.Duration("pod-eviction-timeout", 2*time.Minute,
 74 | 		`How long should the rescheduler attempt to retrieve successful pod
 75 | 		 evictions for.`)
 76 | 
 77 | 	maxGracefulTermination = flags.Duration("max-graceful-termination", 2*time.Minute,
 78 | 		`How long should the rescheduler wait for pods to shutdown gracefully before
 79 | 		 failing the node drain attempt.`)
 80 | 
 81 | 	listenAddress = flags.String("listen-address", "localhost:9235",
 82 | 		`Address to listen on for serving prometheus metrics`)
 83 | 
 84 | 	deleteNonReplicatedPods = flags.Bool("delete-non-replicated-pods", false, `Delete non-replicated pods running on on-demand instance. Note that some non-replicated pods will not be rescheduled.`)
 85 | 
 86 | 	showVersion = flags.Bool("version", false, "Show version information and exit.")
 87 | )
 88 | 
 89 | func main() {
 90 | 	flags.AddGoFlagSet(goflag.CommandLine)
 91 | 
 92 | 	// Log to stderr by default and fix usage message accordingly
 93 | 	logToStdErr := flags.Lookup("logtostderr")
 94 | 	logToStdErr.DefValue = "true"
 95 | 	flags.Set("logtostderr", "true")
 96 | 
 97 | 	// Add nodes labels as flags
 98 | 	flags.StringVar(&nodes.OnDemandNodeLabel,
 99 | 		"on-demand-node-label",
100 | 		"kubernetes.io/role=worker",
101 | 		`Name of label on nodes to be considered for draining.`)
102 | 	flags.StringVar(&nodes.SpotNodeLabel,
103 | 		"spot-node-label",
104 | 		"kubernetes.io/role=spot-worker",
105 | 		`Name of label on nodes to be considered as targets for pods.`)
106 | 
107 | 	flags.Parse(os.Args)
108 | 
109 | 	if *showVersion {
110 | 		fmt.Printf("k8s-spot-rescheduler %s\n", VERSION)
111 | 		os.Exit(0)
112 | 	}
113 | 
114 | 	err := validateArgs(nodes.OnDemandNodeLabel, nodes.SpotNodeLabel)
115 | 	if err != nil {
116 | 		fmt.Printf("Error: %s", err)
117 | 		os.Exit(1)
118 | 	}
119 | 
120 | 	glog.Infof("Running Rescheduler")
121 | 
122 | 	// Register metrics from metrics.go
123 | 	go func() {
124 | 		http.Handle("/metrics", prometheus.Handler())
125 | 		err := http.ListenAndServe(*listenAddress, nil)
126 | 		glog.Fatalf("Failed to start metrics: %v", err)
127 | 	}()
128 | 
129 | 	kubeClient, err := createKubeClient(flags, *inCluster)
130 | 	if err != nil {
131 | 		glog.Fatalf("Failed to create kube client: %v", err)
132 | 	}
133 | 
134 | 	recorder := createEventRecorder(kubeClient)
135 | 
136 | 	// Allows active/standy HA.
137 | 	// Prevent multiple pods running the algorithm simultaneously.
138 | 	leaderElection := leaderelectionconfig.DefaultLeaderElectionConfiguration()
139 | 	if *inCluster {
140 | 		leaderElection.LeaderElect = true
141 | 	}
142 | 
143 | 	if !leaderElection.LeaderElect {
144 | 		// Leader election not enabled.
145 | 		// Execute main logic.
146 | 		run(kubeClient, recorder)
147 | 	} else {
148 | 		id, err := os.Hostname()
149 | 		if err != nil {
150 | 			glog.Fatalf("Unable to get hostname: %v", err)
151 | 		}
152 | 		// Leader election process
153 | 		kube_leaderelection.RunOrDie(kube_leaderelection.LeaderElectionConfig{
154 | 			Lock: &resourcelock.EndpointsLock{
155 | 				EndpointsMeta: metav1.ObjectMeta{
156 | 					Namespace: *namespace,
157 | 					Name:      "k8s-spot-rescheduler",
158 | 				},
159 | 				Client: kubeClient.CoreV1(),
160 | 				LockConfig: resourcelock.ResourceLockConfig{
161 | 					Identity:      id,
162 | 					EventRecorder: recorder,
163 | 				},
164 | 			},
165 | 			LeaseDuration: leaderElection.LeaseDuration.Duration,
166 | 			RenewDeadline: leaderElection.RenewDeadline.Duration,
167 | 			RetryPeriod:   leaderElection.RetryPeriod.Duration,
168 | 			Callbacks: kube_leaderelection.LeaderCallbacks{
169 | 				OnStartedLeading: func(_ <-chan struct{}) {
170 | 					// Since we are committing a suicide after losing
171 | 					// mastership, we can safely ignore the argument.
172 | 					run(kubeClient, recorder)
173 | 				},
174 | 				OnStoppedLeading: func() {
175 | 					glog.Fatalf("Lost leader status, terminating.")
176 | 				},
177 | 			},
178 | 		})
179 | 
180 | 	}
181 | 
182 | }
183 | 
184 | func run(kubeClient kube_client.Interface, recorder kube_record.EventRecorder) {
185 | 
186 | 	stopChannel := make(chan struct{})
187 | 
188 | 	// Predicate checker from K8s scheduler works out if a Pod could schedule onto a node
189 | 	predicateChecker, err := simulator.NewPredicateChecker(kubeClient, stopChannel)
190 | 	if err != nil {
191 | 		glog.Fatalf("Failed to create predicate checker: %v", err)
192 | 	}
193 | 
194 | 	nodeLister := kube_utils.NewReadyNodeLister(kubeClient, stopChannel)
195 | 	podDisruptionBudgetLister := kube_utils.NewPodDisruptionBudgetLister(kubeClient, stopChannel)
196 | 	unschedulablePodLister := kube_utils.NewUnschedulablePodLister(kubeClient, stopChannel)
197 | 
198 | 	// Set nextDrainTime to now to ensure we start processing straight away.
199 | 	nextDrainTime := time.Now()
200 | 
201 | 	for {
202 | 		select {
203 | 		// Run forever, every housekeepingInterval seconds
204 | 		case <-time.After(*housekeepingInterval):
205 | 			{
206 | 				// Don't do anything if we are waiting for the drain delay timer
207 | 				if time.Until(nextDrainTime) > 0 {
208 | 					glog.V(2).Infof("Waiting %s for drain delay timer.", time.Until(nextDrainTime).Round(time.Second))
209 | 					continue
210 | 				}
211 | 
212 | 				// Don't run if pods are unschedulable.
213 | 				// Attempt to not make things worse.
214 | 				unschedulablePods, err := unschedulablePodLister.List()
215 | 				if err != nil {
216 | 					glog.Errorf("Failed to get unschedulable pods: %v", err)
217 | 				}
218 | 				if len(unschedulablePods) > 0 {
219 | 					glog.V(2).Info("Waiting for unschedulable pods to be scheduled.")
220 | 					continue
221 | 				}
222 | 
223 | 				glog.V(3).Info("Starting node processing.")
224 | 
225 | 				// Get all nodes in the cluster
226 | 				allNodes, err := nodeLister.List()
227 | 				if err != nil {
228 | 					glog.Errorf("Failed to list nodes: %v", err)
229 | 					continue
230 | 				}
231 | 
232 | 				// Build a map of nodeInfo structs.
233 | 				// NodeInfo is used to map pods onto nodes and see their available
234 | 				// resources.
235 | 				nodeMap, err := nodes.NewNodeMap(kubeClient, allNodes)
236 | 				if err != nil {
237 | 					glog.Errorf("Failed to build node map; %v", err)
238 | 					continue
239 | 				}
240 | 
241 | 				// Update metrics.
242 | 				metrics.UpdateNodesMap(nodeMap)
243 | 
244 | 				// Get PodDisruptionBudgets
245 | 				allPDBs, err := podDisruptionBudgetLister.List()
246 | 				if err != nil {
247 | 					glog.Errorf("Failed to list PDBs: %v", err)
248 | 					continue
249 | 				}
250 | 
251 | 				// Get onDemand and spot nodeInfoArrays
252 | 				// These are sorted when the nodeMap is created.
253 | 				onDemandNodeInfos := nodeMap[nodes.OnDemand]
254 | 				spotNodeInfos := nodeMap[nodes.Spot]
255 | 
256 | 				// Update spot node metrics
257 | 				updateSpotNodeMetrics(spotNodeInfos, allPDBs)
258 | 
259 | 				// No on demand nodes so nothing to do.
260 | 				if len(onDemandNodeInfos) < 1 {
261 | 					glog.V(2).Info("No nodes to process.")
262 | 				}
263 | 
264 | 				// Go through each onDemand node in turn
265 | 				// Build a plan to move pods onto other nodes
266 | 				// In the case that all can be moved, drain the node
267 | 				for _, nodeInfo := range onDemandNodeInfos {
268 | 
269 | 					// Get a list of pods that we would need to move onto other nodes
270 | 					allPods, err := autoscaler_drain.GetPodsForDeletionOnNodeDrain(nodeInfo.Pods, allPDBs, *deleteNonReplicatedPods, false, false, false, nil, 0, time.Now())
271 | 					if err != nil {
272 | 						glog.Errorf("Failed to get pods for consideration: %v", err)
273 | 						continue
274 | 					}
275 | 
276 | 					podsForDeletion := make([]*apiv1.Pod, 0)
277 | 					for _, pod := range allPods {
278 | 						controlledByDaemonSet := false
279 | 						for _, owner := range pod.GetOwnerReferences() {
280 | 							if *owner.Controller && owner.Kind == "DaemonSet" {
281 | 								controlledByDaemonSet = true
282 | 								break
283 | 							}
284 | 						}
285 | 
286 | 						if controlledByDaemonSet {
287 | 							glog.V(4).Infof("Ignoring pod %s which is controlled by DaemonSet", podID(pod))
288 | 							continue
289 | 						}
290 | 
291 | 						podsForDeletion = append(podsForDeletion, pod)
292 | 					}
293 | 
294 | 					// Update the number of pods on this node's metrics
295 | 					metrics.UpdateNodePodsCount(nodes.OnDemandNodeLabel, nodeInfo.Node.Name, len(podsForDeletion))
296 | 					if len(podsForDeletion) < 1 {
297 | 						// No pods so should just wait for node to be autoscaled away.
298 | 						glog.V(2).Infof("No pods on %s, skipping.", nodeInfo.Node.Name)
299 | 						continue
300 | 					}
301 | 
302 | 					glog.V(2).Infof("Considering %s for removal", nodeInfo.Node.Name)
303 | 
304 | 					// Checks whether or not a node can be drained
305 | 					err = canDrainNode(predicateChecker, spotNodeInfos, podsForDeletion)
306 | 					if err != nil {
307 | 						glog.V(2).Infof("Cannot drain node: %v", err)
308 | 						continue
309 | 					}
310 | 
311 | 					// If building plan was successful, can drain node.
312 | 					glog.V(2).Infof("All pods on %v can be moved. Will drain node.", nodeInfo.Node.Name)
313 | 					// Drain the node - places eviction on each pod moving them in turn.
314 | 					err = drainNode(kubeClient, recorder, nodeInfo.Node, podsForDeletion, int(maxGracefulTermination.Seconds()), *podEvictionTimeout)
315 | 					if err != nil {
316 | 						glog.Errorf("Failed to drain node: %v", err)
317 | 					}
318 | 					// Add the drain delay to allow system to stabilise
319 | 					nextDrainTime = time.Now().Add(*nodeDrainDelay)
320 | 					break
321 | 				}
322 | 
323 | 				glog.V(3).Info("Finished processing nodes.")
324 | 			}
325 | 		}
326 | 	}
327 | }
328 | 
329 | // Configure the kube client used to access the api, either from kubeconfig or
330 | //from pod environment if running in the cluster
331 | func createKubeClient(flags *flag.FlagSet, inCluster bool) (kube_client.Interface, error) {
332 | 	var config *kube_restclient.Config
333 | 	var err error
334 | 	if inCluster {
335 | 		// Load config from Kubernetes well known location.
336 | 		config, err = kube_restclient.InClusterConfig()
337 | 	} else {
338 | 		// Search environment for kubeconfig.
339 | 		clientConfig := kubectl_util.DefaultClientConfig(flags)
340 | 		config, err = clientConfig.ClientConfig()
341 | 	}
342 | 	if err != nil {
343 | 		return nil, fmt.Errorf("error connecting to the client: %v", err)
344 | 	}
345 | 	config.ContentType = *contentType
346 | 	return kube_client.NewForConfigOrDie(config), nil
347 | }
348 | 
349 | // Create an event broadcaster so that we can call events when we modify the system
350 | func createEventRecorder(client kube_client.Interface) kube_record.EventRecorder {
351 | 	eventBroadcaster := kube_record.NewBroadcaster()
352 | 	eventBroadcaster.StartLogging(glog.Infof)
353 | 	eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(client.CoreV1().RESTClient()).Events("")})
354 | 	return eventBroadcaster.NewRecorder(api.Scheme, apiv1.EventSource{Component: "rescheduler"})
355 | }
356 | 
357 | // Determines if any of the nodes meet the predicates that allow the Pod to be
358 | // scheduled on the node, and returns the node if it finds a suitable one.
359 | // Currently sorts nodes by most requested CPU in an attempt to fill fuller
360 | // nodes first (Attempting to bin pack)
361 | func findSpotNodeForPod(predicateChecker *simulator.PredicateChecker, nodeInfos []*nodes.NodeInfo, pod *apiv1.Pod) *nodes.NodeInfo {
362 | 	for _, nodeInfo := range nodeInfos {
363 | 		kubeNodeInfo := schedulercache.NewNodeInfo(nodeInfo.Pods...)
364 | 		kubeNodeInfo.SetNode(nodeInfo.Node)
365 | 
366 | 		// Pretend pod isn't scheduled
367 | 		pod.Spec.NodeName = ""
368 | 
369 | 		// Check with the schedulers predicates to find a node to schedule on
370 | 		if err := predicateChecker.CheckPredicates(pod, nil, kubeNodeInfo, true); err == nil {
371 | 			return nodeInfo
372 | 		}
373 | 	}
374 | 	return nil
375 | }
376 | 
377 | // Goes through a list of pods and works out new nodes to place them on.
378 | // Returns an error if any of the pods won't fit onto existing spot nodes.
379 | func canDrainNode(predicateChecker *simulator.PredicateChecker, nodeInfos nodes.NodeInfoArray, pods []*apiv1.Pod) error {
380 | 	// Create a copy of the nodeInfos so that we can modify the list
381 | 	nodePlan := nodeInfos.CopyNodeInfos()
382 | 
383 | 	for _, pod := range pods {
384 | 		// Works out if a spot node is available for rescheduling
385 | 		spotNodeInfo := findSpotNodeForPod(predicateChecker, nodePlan, pod)
386 | 		if spotNodeInfo == nil {
387 | 			return fmt.Errorf("pod %s can't be rescheduled on any existing spot node", podID(pod))
388 | 		}
389 | 		glog.V(4).Infof("Pod %s can be rescheduled on %v, adding to plan.", podID(pod), spotNodeInfo.Node.ObjectMeta.Name)
390 | 		spotNodeInfo.AddPod(pod)
391 | 	}
392 | 
393 | 	return nil
394 | }
395 | 
396 | // Performs a drain on given node and updates the nextDrainTime variable.
397 | // Returns an error if the drain fails.
398 | func drainNode(kubeClient kube_client.Interface, recorder kube_record.EventRecorder, node *apiv1.Node, pods []*apiv1.Pod, maxGracefulTermination int, podEvictionTimeout time.Duration) error {
399 | 	err := scaler.DrainNode(node, pods, kubeClient, recorder, maxGracefulTermination, podEvictionTimeout, scaler.EvictionRetryTime)
400 | 	if err != nil {
401 | 		metrics.UpdateNodeDrainCount("Failure", node.Name)
402 | 		return err
403 | 	}
404 | 
405 | 	metrics.UpdateNodeDrainCount("Success", node.Name)
406 | 	return nil
407 | }
408 | 
409 | // Goes through a list of NodeInfos and updates the metrics system with the
410 | // number of pods that the rescheduler understands (So not daemonsets for
411 | // instance) that are on each of the nodes, labelling them as spot nodes.
412 | func updateSpotNodeMetrics(spotNodeInfos nodes.NodeInfoArray, pdbs []*policyv1.PodDisruptionBudget) {
413 | 	for _, nodeInfo := range spotNodeInfos {
414 | 		// Get a list of pods that are on the node (Only the types considered by the rescheduler)
415 | 		podsOnNode, err := autoscaler_drain.GetPodsForDeletionOnNodeDrain(nodeInfo.Pods, pdbs, *deleteNonReplicatedPods, false, false, false, nil, 0, time.Now())
416 | 		if err != nil {
417 | 			glog.Errorf("Failed to update metrics on spot node %s: %v", nodeInfo.Node.Name, err)
418 | 			continue
419 | 		}
420 | 		metrics.UpdateNodePodsCount(nodes.SpotNodeLabel, nodeInfo.Node.Name, len(podsOnNode))
421 | 
422 | 	}
423 | }
424 | 
425 | // Returns the pods Namespace/Name as a string
426 | func podID(pod *apiv1.Pod) string {
427 | 	return fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)
428 | }
429 | 
430 | // Checks that the node lablels provided as arguments are in fact, sane.
431 | func validateArgs(OnDemandNodeLabel string, SpotNodeLabel string) error {
432 | 	if len(strings.Split(OnDemandNodeLabel, "=")) > 2 {
433 | 		return fmt.Errorf("the on demand node label is not correctly formatted: expected '<label_name>' or '<label_name>=<label_value>', but got %s", OnDemandNodeLabel)
434 | 	}
435 | 
436 | 	if len(strings.Split(SpotNodeLabel, "=")) > 2 {
437 | 		return fmt.Errorf("the spot node label is not correctly formatted: expected '<label_name>' or '<label_name>=<label_value>', but got %s", SpotNodeLabel)
438 | 	}
439 | 
440 | 	return nil
441 | }
442 | 


--------------------------------------------------------------------------------
/rescheduler_test.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2017 Pusher Ltd.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package main
 18 | 
 19 | import (
 20 | 	"fmt"
 21 | 	"testing"
 22 | 
 23 | 	"github.com/pusher/k8s-spot-rescheduler/nodes"
 24 | 	"github.com/stretchr/testify/assert"
 25 | 	apiv1 "k8s.io/api/core/v1"
 26 | 	"k8s.io/apimachinery/pkg/api/resource"
 27 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 28 | 	simulator "k8s.io/autoscaler/cluster-autoscaler/simulator"
 29 | )
 30 | 
 31 | func TestFindSpotNodeForPod(t *testing.T) {
 32 | 	predicateChecker := simulator.NewTestPredicateChecker()
 33 | 
 34 | 	pods1 := []*apiv1.Pod{
 35 | 		createTestPod("p1n1", 100),
 36 | 		createTestPod("p2n1", 300),
 37 | 	}
 38 | 	pods2 := []*apiv1.Pod{
 39 | 		createTestPod("p1n2", 500),
 40 | 		createTestPod("p2n2", 300),
 41 | 	}
 42 | 	pods3 := []*apiv1.Pod{
 43 | 		createTestPod("p1n3", 500),
 44 | 		createTestPod("p2n3", 500),
 45 | 		createTestPod("p3n3", 300),
 46 | 	}
 47 | 
 48 | 	nodeInfos := []*nodes.NodeInfo{
 49 | 		createTestNodeInfo(createTestNode("node1", 500), pods1, 400),
 50 | 		createTestNodeInfo(createTestNode("node2", 1000), pods2, 800),
 51 | 		createTestNodeInfo(createTestNode("node3", 2000), pods3, 1300),
 52 | 	}
 53 | 
 54 | 	pod1 := createTestPod("pod1", 100)
 55 | 	pod2 := createTestPod("pod2", 200)
 56 | 	pod3 := createTestPod("pod3", 700)
 57 | 	pod4 := createTestPod("pod4", 2200)
 58 | 
 59 | 	node := findSpotNodeForPod(predicateChecker, nodeInfos, pod1)
 60 | 	assert.Equal(t, "node1", node.Node.Name)
 61 | 
 62 | 	node = findSpotNodeForPod(predicateChecker, nodeInfos, pod2)
 63 | 	assert.Equal(t, "node2", node.Node.Name)
 64 | 
 65 | 	node = findSpotNodeForPod(predicateChecker, nodeInfos, pod3)
 66 | 	assert.Equal(t, "node3", node.Node.Name)
 67 | 
 68 | 	node = findSpotNodeForPod(predicateChecker, nodeInfos, pod4)
 69 | 	assert.Nil(t, node)
 70 | 
 71 | }
 72 | 
 73 | func TestNodeLabelValidation(t *testing.T) {
 74 | 	onDemandLabel := "foo.bar/role=worker"
 75 | 	spotLabel := "foo.bar/node-role"
 76 | 
 77 | 	err := validateArgs(onDemandLabel, spotLabel)
 78 | 	assert.NoError(t, err)
 79 | 
 80 | 	onDemandLabel = "foo.bar/broken=worker=true"
 81 | 	err = validateArgs(onDemandLabel, spotLabel)
 82 | 	assert.EqualError(t, err, "the on demand node label is not correctly formatted: expected '<label_name>' or '<label_name>=<label_value>', but got foo.bar/broken=worker=true")
 83 | 
 84 | 	onDemandLabel = "foo.bar/role=worker"
 85 | 	spotLabel = "foo.bar/node-role=spot=fail"
 86 | 	err = validateArgs(onDemandLabel, spotLabel)
 87 | 	assert.EqualError(t, err, "the spot node label is not correctly formatted: expected '<label_name>' or '<label_name>=<label_value>', but got foo.bar/node-role=spot=fail")
 88 | 
 89 | }
 90 | 
 91 | func TestCanDrainNode(t *testing.T) {
 92 | 	predicateChecker := simulator.NewTestPredicateChecker()
 93 | 
 94 | 	pods1 := []*apiv1.Pod{
 95 | 		createTestPod("p1n1", 100),
 96 | 		createTestPod("p2n1", 300),
 97 | 	}
 98 | 	pods2 := []*apiv1.Pod{
 99 | 		createTestPod("p1n2", 500),
100 | 		createTestPod("p2n2", 300),
101 | 	}
102 | 	pods3 := []*apiv1.Pod{
103 | 		createTestPod("p1n3", 500),
104 | 		createTestPod("p2n3", 500),
105 | 		createTestPod("p3n3", 300),
106 | 	}
107 | 
108 | 	spotNodeInfos := []*nodes.NodeInfo{
109 | 		createTestNodeInfo(createTestNode("node3", 2000), pods3, 1300),
110 | 		createTestNodeInfo(createTestNode("node2", 1100), pods2, 800),
111 | 		createTestNodeInfo(createTestNode("node1", 500), pods1, 400),
112 | 	}
113 | 
114 | 	podsForDeletion1 := []*apiv1.Pod{
115 | 		createTestPod("pod1", 500),
116 | 		createTestPod("pod2", 300),
117 | 		createTestPod("pod1", 100),
118 | 		createTestPod("pod2", 100),
119 | 		createTestPod("pod1", 100),
120 | 	}
121 | 	podsForDeletion2 := []*apiv1.Pod{
122 | 		createTestPod("pod1", 500),
123 | 		createTestPod("pod2", 400),
124 | 		createTestPod("pod1", 100),
125 | 		createTestPod("pod2", 100),
126 | 		createTestPod("pod1", 100),
127 | 	}
128 | 
129 | 	err1 := canDrainNode(predicateChecker, spotNodeInfos, podsForDeletion1)
130 | 	if err1 != nil {
131 | 		assert.Fail(t, "canDrainNode should be successful with podsForDeletion1", "%v", err1)
132 | 	}
133 | 
134 | 	err2 := canDrainNode(predicateChecker, spotNodeInfos, podsForDeletion2)
135 | 	if err2 == nil {
136 | 		assert.Fail(t, "canDrainNode should fail with podsForDeletion2, too much requested CPU.")
137 | 	}
138 | }
139 | 
140 | func createTestPod(name string, cpu int64) *apiv1.Pod {
141 | 	pod := &apiv1.Pod{
142 | 		ObjectMeta: metav1.ObjectMeta{
143 | 			Namespace: "kube-system",
144 | 			Name:      name,
145 | 			SelfLink:  fmt.Sprintf("/api/v1/namespaces/default/pods/%s", name),
146 | 		},
147 | 		Spec: apiv1.PodSpec{
148 | 			Containers: []apiv1.Container{
149 | 				{
150 | 					Resources: apiv1.ResourceRequirements{
151 | 						Requests: apiv1.ResourceList{
152 | 							apiv1.ResourceCPU: *resource.NewMilliQuantity(cpu, resource.DecimalSI),
153 | 						},
154 | 					},
155 | 				},
156 | 			},
157 | 		},
158 | 	}
159 | 	return pod
160 | }
161 | 
162 | func createTestNode(name string, cpu int64) *apiv1.Node {
163 | 	node := &apiv1.Node{
164 | 		ObjectMeta: metav1.ObjectMeta{
165 | 			Name: name,
166 | 		},
167 | 		Status: apiv1.NodeStatus{
168 | 			Capacity: apiv1.ResourceList{
169 | 				apiv1.ResourceCPU:    *resource.NewMilliQuantity(cpu, resource.DecimalSI),
170 | 				apiv1.ResourceMemory: *resource.NewQuantity(2*1024*1024*1024, resource.DecimalSI),
171 | 				apiv1.ResourcePods:   *resource.NewQuantity(100, resource.DecimalSI),
172 | 			},
173 | 			Conditions: []apiv1.NodeCondition{
174 | 				{
175 | 					Type:   apiv1.NodeReady,
176 | 					Status: apiv1.ConditionTrue,
177 | 				},
178 | 			},
179 | 		},
180 | 	}
181 | 	node.Status.Allocatable = node.Status.Capacity
182 | 	return node
183 | }
184 | 
185 | func createTestNodeInfo(node *apiv1.Node, pods []*apiv1.Pod, requests int64) *nodes.NodeInfo {
186 | 	nodeInfo := &nodes.NodeInfo{
187 | 		Node:         node,
188 | 		Pods:         pods,
189 | 		RequestedCPU: requests,
190 | 		FreeCPU:      node.Status.Capacity.Cpu().MilliValue() - requests,
191 | 	}
192 | 	return nodeInfo
193 | }
194 | 


--------------------------------------------------------------------------------
/scaler/scaler.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2017 The Kubernetes Authors.
  3 | Modifications copyright 2017 Pusher Ltd.
  4 | 
  5 | Licensed under the Apache License, Version 2.0 (the "License");
  6 | you may not use this file except in compliance with the License.
  7 | You may obtain a copy of the License at
  8 | 
  9 |     http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 | Unless required by applicable law or agreed to in writing, software
 12 | distributed under the License is distributed on an "AS IS" BASIS,
 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | See the License for the specific language governing permissions and
 15 | limitations under the License.
 16 | */
 17 | 
 18 | package scaler
 19 | 
 20 | import (
 21 | 	"fmt"
 22 | 	"time"
 23 | 
 24 | 	"github.com/golang/glog"
 25 | 	"github.com/pusher/k8s-spot-rescheduler/metrics"
 26 | 	apiv1 "k8s.io/api/core/v1"
 27 | 	policyv1 "k8s.io/api/policy/v1beta1"
 28 | 	"k8s.io/apimachinery/pkg/api/errors"
 29 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 30 | 	"k8s.io/autoscaler/cluster-autoscaler/utils/deletetaint"
 31 | 	kube_client "k8s.io/client-go/kubernetes"
 32 | 	kube_record "k8s.io/client-go/tools/record"
 33 | )
 34 | 
 35 | const (
 36 | 	// EvictionRetryTime is the time after CA retries failed pod eviction.
 37 | 	EvictionRetryTime = 10 * time.Second
 38 | )
 39 | 
 40 | // Originally from https://github.com/kubernetes/autoscaler/blob/bf59e3daa5922c0e44027fa211948b50cb6b7a12/cluster-autoscaler/core/scale_down.go#L690-L723
 41 | func evictPod(podToEvict *apiv1.Pod, client kube_client.Interface, recorder kube_record.EventRecorder,
 42 | 	maxGracefulTerminationSec int, retryUntil time.Time, waitBetweenRetries time.Duration) error {
 43 | 	recorder.Eventf(podToEvict, apiv1.EventTypeNormal, "Rescheduler", "deleting pod from on-demand node")
 44 | 	maxGraceful64 := int64(maxGracefulTerminationSec)
 45 | 	var lastError error
 46 | 	for first := true; first || time.Now().Before(retryUntil); time.Sleep(waitBetweenRetries) {
 47 | 		first = false
 48 | 		eviction := &policyv1.Eviction{
 49 | 			ObjectMeta: metav1.ObjectMeta{
 50 | 				Namespace: podToEvict.Namespace,
 51 | 				Name:      podToEvict.Name,
 52 | 			},
 53 | 			DeleteOptions: &metav1.DeleteOptions{
 54 | 				GracePeriodSeconds: &maxGraceful64,
 55 | 			},
 56 | 		}
 57 | 		lastError = client.Core().Pods(podToEvict.Namespace).Evict(eviction)
 58 | 		if lastError == nil {
 59 | 			return nil
 60 | 		}
 61 | 	}
 62 | 	glog.Errorf("Failed to evict pod %s, error: %v", podToEvict.Name, lastError)
 63 | 	recorder.Eventf(podToEvict, apiv1.EventTypeWarning, "ReschedulerFailed", "failed to delete pod from on-demand node")
 64 | 	return fmt.Errorf("Failed to evict pod %s/%s within allowed timeout (last error: %v)", podToEvict.Namespace, podToEvict.Name, lastError)
 65 | }
 66 | 
 67 | // DrainNode performs drain logic on the node. Marks the node as unschedulable and later removes all pods, giving
 68 | // them up to MaxGracefulTerminationTime to finish.
 69 | //
 70 | // Originally from https://github.com/kubernetes/autoscaler/blob/bf59e3daa5922c0e44027fa211948b50cb6b7a12/cluster-autoscaler/core/scale_down.go#L725-L783
 71 | func DrainNode(node *apiv1.Node, pods []*apiv1.Pod, client kube_client.Interface, recorder kube_record.EventRecorder,
 72 | 	maxGracefulTerminationSec int, maxPodEvictionTime time.Duration, waitBetweenRetries time.Duration) error {
 73 | 
 74 | 	drainSuccessful := false
 75 | 	toEvict := len(pods)
 76 | 	if err := deletetaint.MarkToBeDeleted(node, client); err != nil {
 77 | 		recorder.Eventf(node, apiv1.EventTypeWarning, "ReschedulerFailed", "failed to mark the node as draining/unschedulable: %v", err)
 78 | 		return err
 79 | 	}
 80 | 
 81 | 	// If we fail to evict all the pods from the node we want to remove delete taint
 82 | 	defer func() {
 83 | 		if !drainSuccessful {
 84 | 			deletetaint.CleanToBeDeleted(node, client)
 85 | 			recorder.Eventf(node, apiv1.EventTypeWarning, "ReschedulerFailed", "failed to drain the node, aborting drain.")
 86 | 		}
 87 | 	}()
 88 | 
 89 | 	recorder.Eventf(node, apiv1.EventTypeNormal, "Rescheduler", "marked the node as draining/unschedulable")
 90 | 
 91 | 	retryUntil := time.Now().Add(maxPodEvictionTime)
 92 | 	confirmations := make(chan error, toEvict)
 93 | 	for _, pod := range pods {
 94 | 		go func(podToEvict *apiv1.Pod) {
 95 | 			confirmations <- evictPod(podToEvict, client, recorder, maxGracefulTerminationSec, retryUntil, waitBetweenRetries)
 96 | 		}(pod)
 97 | 	}
 98 | 
 99 | 	evictionErrs := make([]error, 0)
100 | 
101 | 	for range pods {
102 | 		select {
103 | 		case err := <-confirmations:
104 | 			if err != nil {
105 | 				evictionErrs = append(evictionErrs, err)
106 | 			} else {
107 | 				metrics.UpdateEvictionsCount()
108 | 			}
109 | 		case <-time.After(retryUntil.Sub(time.Now()) + 5*time.Second):
110 | 			return fmt.Errorf("Failed to drain node %s/%s: timeout when waiting for creating evictions", node.Namespace, node.Name)
111 | 		}
112 | 	}
113 | 	if len(evictionErrs) != 0 {
114 | 		return fmt.Errorf("Failed to drain node %s/%s, due to following errors: %v", node.Namespace, node.Name, evictionErrs)
115 | 	}
116 | 
117 | 	// Evictions created successfully, wait for the remainder of maxPodEvictionTime to see if pods have been evicted
118 | 	var allGone bool
119 | 	for time.Now().Before(retryUntil.Add(5 * time.Second)) {
120 | 		allGone = true
121 | 		for _, pod := range pods {
122 | 			podreturned, err := client.Core().Pods(pod.Namespace).Get(pod.Name, metav1.GetOptions{})
123 | 			if err == nil && (podreturned != nil && podreturned.Spec.NodeName == node.Name) {
124 | 				glog.Errorf("Not deleted yet %v", podreturned.Name)
125 | 				allGone = false
126 | 				break
127 | 			}
128 | 			if err != nil && !errors.IsNotFound(err) {
129 | 				glog.Errorf("Failed to check pod %s/%s: %v", pod.Namespace, pod.Name, err)
130 | 				allGone = false
131 | 				break
132 | 			}
133 | 		}
134 | 		if allGone {
135 | 			glog.V(4).Infof("All pods removed from %s", node.Name)
136 | 			// Let the defered function know there is no need for cleanup
137 | 			drainSuccessful = true
138 | 			recorder.Eventf(node, apiv1.EventTypeNormal, "Rescheduler", "marked the node as drained/schedulable")
139 | 			deletetaint.CleanToBeDeleted(node, client)
140 | 			return nil
141 | 		}
142 | 		time.Sleep(5 * time.Second)
143 | 	}
144 | 	return fmt.Errorf("Failed to drain node %s/%s: pods remaining after timeout", node.Namespace, node.Name)
145 | }
146 | 


--------------------------------------------------------------------------------
/version.go:
--------------------------------------------------------------------------------
1 | package main
2 | 
3 | // VERSION contains injected version information
4 | var VERSION = "undefined"
5 | 


--------------------------------------------------------------------------------