├── .dockerignore ├── .gitignore ├── CONTRIBUTING.md ├── Dockerfile ├── Gopkg.lock ├── Gopkg.toml ├── LICENSE ├── Makefile ├── README.md ├── configure ├── deploy ├── clusterrole.yaml ├── clusterrolebinding.yaml ├── deployment.yaml └── serviceaccount.yaml ├── metrics └── metrics.go ├── nodes ├── nodes.go └── nodes_test.go ├── rescheduler.go ├── rescheduler_test.go ├── scaler └── scaler.go └── version.go /.dockerignore: -------------------------------------------------------------------------------- 1 | vendor 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.dll 4 | *.so 5 | *.dylib 6 | 7 | # Test binary, build with `go test -c` 8 | *.test 9 | 10 | # Output of the go coverage tool, specifically when used with LiteIDE 11 | *.out 12 | 13 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 14 | .glide/ 15 | 16 | # Project-local wercker build cache 17 | .wercker/ 18 | 19 | # Project-local vendor folder 20 | vendor/ 21 | 22 | # Kubernetes config 23 | kubeconfig 24 | credentials/ 25 | 26 | # Local environmental config 27 | .env 28 | k8s-spot-rescheduler 29 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | To develop on this project, please fork the repo and clone into your `$GOPATH`. 3 | 4 | Dependencies are **not** checked in so please download those seperately. 5 | Download the dependencies using [`glide`](https://github.com/Masterminds/glide). 6 | 7 | ```bash 8 | cd $GOPATH/src/github.com # Create this directory if it doesn't exist 9 | git clone git@github.com:/k8s-spot-rescheduler pusher/k8s-spot-rescheduler 10 | cd $GOPATH/src/github.com/pusher/k8s-spot-recheduler 11 | ./configure # Configure local tooling - install anything reported as missing 12 | make vendor # Clone required project dependencies 13 | ``` 14 | 15 | The main package is within `rescheduler.go` and an overview of it's operating logic is described in the [Readme](README.md/#operating-logic). 16 | 17 | If you want to run the rescheduler locally you must have a valid `kubeconfig` file somewhere on your machine and then run the program with the flag `--running-in-cluster=false`. 18 | 19 | ## Pull Requests and Issues 20 | We track bugs and issues using Github . 21 | 22 | If you find a bug, please open an Issue. 23 | 24 | If you want to fix a bug, please fork, fix the bug and open a PR back to this repo. 25 | Please mention the open bug issue number within your PR if applicable. 26 | 27 | ### Tests 28 | Unit tests are covering the decision making parts of this code and can be run using the built in Go test suite. 29 | 30 | To run the tests: `make test` 31 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG VERSION=undefined 2 | 3 | FROM golang:1.12 AS builder 4 | ARG VERSION 5 | 6 | RUN curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh 7 | 8 | WORKDIR /go/src/github.com/pusher/k8s-spot-rescheduler 9 | 10 | COPY Gopkg.lock Gopkg.lock 11 | COPY Gopkg.toml Gopkg.toml 12 | 13 | RUN dep ensure --vendor-only 14 | 15 | COPY *.go ./ 16 | COPY deploy deploy/ 17 | COPY metrics metrics/ 18 | COPY nodes nodes/ 19 | COPY scaler scaler/ 20 | 21 | RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags="-X main.VERSION=${VERSION}" -a -o k8s-spot-rescheduler github.com/pusher/k8s-spot-rescheduler 22 | 23 | FROM alpine:3.9 24 | RUN apk --no-cache add ca-certificates 25 | WORKDIR /bin 26 | COPY --from=builder /go/src/github.com/pusher/k8s-spot-rescheduler/k8s-spot-rescheduler . 27 | 28 | ENTRYPOINT ["/bin/k8s-spot-rescheduler"] 29 | -------------------------------------------------------------------------------- /Gopkg.lock: -------------------------------------------------------------------------------- 1 | # This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. 2 | 3 | 4 | [[projects]] 5 | branch = "master" 6 | name = "github.com/Azure/go-ansiterm" 7 | packages = [ 8 | ".", 9 | "winterm" 10 | ] 11 | revision = "d6e3b3328b783f23731bc4d058875b0371ff8109" 12 | 13 | [[projects]] 14 | branch = "master" 15 | name = "github.com/MakeNowJust/heredoc" 16 | packages = ["."] 17 | revision = "e9091a26100e9cfb2b6a8f470085bfa541931a91" 18 | 19 | [[projects]] 20 | name = "github.com/PuerkitoBio/purell" 21 | packages = ["."] 22 | revision = "0bcb03f4b4d0a9428594752bd2a3b9aa0a9d4bd4" 23 | version = "v1.1.0" 24 | 25 | [[projects]] 26 | branch = "master" 27 | name = "github.com/PuerkitoBio/urlesc" 28 | packages = ["."] 29 | revision = "de5bf2ad457846296e2031421a34e2568e304e35" 30 | 31 | [[projects]] 32 | name = "github.com/Sirupsen/logrus" 33 | packages = ["."] 34 | revision = "c155da19408a8799da419ed3eeb0cb5db0ad5dbc" 35 | version = "v1.0.5" 36 | 37 | [[projects]] 38 | branch = "master" 39 | name = "github.com/beorn7/perks" 40 | packages = ["quantile"] 41 | revision = "3a771d992973f24aa725d07868b467d1ddfceafb" 42 | 43 | [[projects]] 44 | name = "github.com/davecgh/go-spew" 45 | packages = ["spew"] 46 | revision = "346938d642f2ec3594ed81d874461961cd0faa76" 47 | version = "v1.1.0" 48 | 49 | [[projects]] 50 | name = "github.com/docker/distribution" 51 | packages = [ 52 | "digestset", 53 | "reference" 54 | ] 55 | revision = "f0cc927784781fa395c06317c58dea2841ece3a9" 56 | 57 | [[projects]] 58 | name = "github.com/docker/docker" 59 | packages = [ 60 | "api/types", 61 | "api/types/blkiodev", 62 | "api/types/container", 63 | "api/types/filters", 64 | "api/types/mount", 65 | "api/types/network", 66 | "api/types/registry", 67 | "api/types/strslice", 68 | "api/types/swarm", 69 | "api/types/versions", 70 | "pkg/term", 71 | "pkg/term/windows" 72 | ] 73 | revision = "092cba3727bb9b4a2f0e922cd6c0f93ea270e363" 74 | version = "v1.13.1" 75 | 76 | [[projects]] 77 | name = "github.com/docker/go-connections" 78 | packages = ["nat"] 79 | revision = "3ede32e2033de7505e6500d6c868c2b9ed9f169d" 80 | version = "v0.3.0" 81 | 82 | [[projects]] 83 | name = "github.com/docker/go-units" 84 | packages = ["."] 85 | revision = "47565b4f722fb6ceae66b95f853feed578a4a51c" 86 | version = "v0.3.3" 87 | 88 | [[projects]] 89 | branch = "master" 90 | name = "github.com/docker/spdystream" 91 | packages = [ 92 | ".", 93 | "spdy" 94 | ] 95 | revision = "bc6354cbbc295e925e4c611ffe90c1f287ee54db" 96 | 97 | [[projects]] 98 | name = "github.com/evanphx/json-patch" 99 | packages = ["."] 100 | revision = "afac545df32f2287a079e2dfb7ba2745a643747e" 101 | version = "v3.0.0" 102 | 103 | [[projects]] 104 | branch = "master" 105 | name = "github.com/exponent-io/jsonpath" 106 | packages = ["."] 107 | revision = "d6023ce2651d8eafb5c75bb0c7167536102ec9f5" 108 | 109 | [[projects]] 110 | branch = "master" 111 | name = "github.com/fatih/camelcase" 112 | packages = ["."] 113 | revision = "44e46d280b43ec1531bb25252440e34f1b800b65" 114 | 115 | [[projects]] 116 | name = "github.com/ghodss/yaml" 117 | packages = ["."] 118 | revision = "0ca9ea5df5451ffdf184b4428c902747c2c11cd7" 119 | version = "v1.0.0" 120 | 121 | [[projects]] 122 | branch = "master" 123 | name = "github.com/go-openapi/jsonpointer" 124 | packages = ["."] 125 | revision = "3a0015ad55fa9873f41605d3e8f28cd279c32ab2" 126 | 127 | [[projects]] 128 | branch = "master" 129 | name = "github.com/go-openapi/jsonreference" 130 | packages = ["."] 131 | revision = "3fb327e6747da3043567ee86abd02bb6376b6be2" 132 | 133 | [[projects]] 134 | branch = "master" 135 | name = "github.com/go-openapi/spec" 136 | packages = ["."] 137 | revision = "bcff419492eeeb01f76e77d2ebc714dc97b607f5" 138 | 139 | [[projects]] 140 | branch = "master" 141 | name = "github.com/go-openapi/swag" 142 | packages = ["."] 143 | revision = "811b1089cde9dad18d4d0c2d09fbdbf28dbd27a5" 144 | 145 | [[projects]] 146 | name = "github.com/gogo/protobuf" 147 | packages = [ 148 | "proto", 149 | "sortkeys" 150 | ] 151 | revision = "1adfc126b41513cc696b209667c8656ea7aac67c" 152 | version = "v1.0.0" 153 | 154 | [[projects]] 155 | branch = "master" 156 | name = "github.com/golang/glog" 157 | packages = ["."] 158 | revision = "23def4e6c14b4da8ac2ed8007337bc5eb5007998" 159 | 160 | [[projects]] 161 | branch = "master" 162 | name = "github.com/golang/groupcache" 163 | packages = ["lru"] 164 | revision = "24b0969c4cb722950103eed87108c8d291a8df00" 165 | 166 | [[projects]] 167 | name = "github.com/golang/protobuf" 168 | packages = [ 169 | "proto", 170 | "ptypes", 171 | "ptypes/any", 172 | "ptypes/duration", 173 | "ptypes/timestamp" 174 | ] 175 | revision = "b4deda0973fb4c70b50d226b1af49f3da59f5265" 176 | version = "v1.1.0" 177 | 178 | [[projects]] 179 | branch = "master" 180 | name = "github.com/google/btree" 181 | packages = ["."] 182 | revision = "e89373fe6b4a7413d7acd6da1725b83ef713e6e4" 183 | 184 | [[projects]] 185 | branch = "master" 186 | name = "github.com/google/gofuzz" 187 | packages = ["."] 188 | revision = "24818f796faf91cd76ec7bddd72458fbced7a6c1" 189 | 190 | [[projects]] 191 | name = "github.com/googleapis/gnostic" 192 | packages = [ 193 | "OpenAPIv2", 194 | "compiler", 195 | "extensions" 196 | ] 197 | revision = "7c663266750e7d82587642f65e60bc4083f1f84e" 198 | version = "v0.2.0" 199 | 200 | [[projects]] 201 | branch = "master" 202 | name = "github.com/gregjones/httpcache" 203 | packages = [ 204 | ".", 205 | "diskcache" 206 | ] 207 | revision = "9cad4c3443a7200dd6400aef47183728de563a38" 208 | 209 | [[projects]] 210 | branch = "master" 211 | name = "github.com/hashicorp/golang-lru" 212 | packages = [ 213 | ".", 214 | "simplelru" 215 | ] 216 | revision = "0fb14efe8c47ae851c0034ed7a448854d3d34cf3" 217 | 218 | [[projects]] 219 | branch = "master" 220 | name = "github.com/howeyc/gopass" 221 | packages = ["."] 222 | revision = "bf9dde6d0d2c004a008c27aaee91170c786f6db8" 223 | 224 | [[projects]] 225 | name = "github.com/imdario/mergo" 226 | packages = ["."] 227 | revision = "9316a62528ac99aaecb4e47eadd6dc8aa6533d58" 228 | version = "v0.3.5" 229 | 230 | [[projects]] 231 | name = "github.com/inconshreveable/mousetrap" 232 | packages = ["."] 233 | revision = "76626ae9c91c4f2a10f34cad8ce83ea42c93bb75" 234 | version = "v1.0" 235 | 236 | [[projects]] 237 | name = "github.com/json-iterator/go" 238 | packages = ["."] 239 | revision = "f2b4162afba35581b6d4a50d3b8f34e33c144682" 240 | 241 | [[projects]] 242 | branch = "master" 243 | name = "github.com/mailru/easyjson" 244 | packages = [ 245 | "buffer", 246 | "jlexer", 247 | "jwriter" 248 | ] 249 | revision = "3fdea8d05856a0c8df22ed4bc71b3219245e4485" 250 | 251 | [[projects]] 252 | name = "github.com/matttproud/golang_protobuf_extensions" 253 | packages = ["pbutil"] 254 | revision = "c12348ce28de40eed0136aa2b644d0ee0650e56c" 255 | version = "v1.0.1" 256 | 257 | [[projects]] 258 | branch = "master" 259 | name = "github.com/mitchellh/go-wordwrap" 260 | packages = ["."] 261 | revision = "ad45545899c7b13c020ea92b2072220eefad42b8" 262 | 263 | [[projects]] 264 | name = "github.com/modern-go/concurrent" 265 | packages = ["."] 266 | revision = "bacd9c7ef1dd9b15be4a9909b8ac7a4e313eec94" 267 | version = "1.0.3" 268 | 269 | [[projects]] 270 | name = "github.com/modern-go/reflect2" 271 | packages = ["."] 272 | revision = "1df9eeb2bb81f327b96228865c5687bc2194af3f" 273 | version = "1.0.0" 274 | 275 | [[projects]] 276 | name = "github.com/opencontainers/go-digest" 277 | packages = ["."] 278 | revision = "279bed98673dd5bef374d3b6e4b09e2af76183bf" 279 | version = "v1.0.0-rc1" 280 | 281 | [[projects]] 282 | name = "github.com/pborman/uuid" 283 | packages = ["."] 284 | revision = "e790cca94e6cc75c7064b1332e63811d4aae1a53" 285 | version = "v1.1" 286 | 287 | [[projects]] 288 | branch = "master" 289 | name = "github.com/petar/GoLLRB" 290 | packages = ["llrb"] 291 | revision = "53be0d36a84c2a886ca057d34b6aa4468df9ccb4" 292 | 293 | [[projects]] 294 | name = "github.com/peterbourgon/diskv" 295 | packages = ["."] 296 | revision = "5f041e8faa004a95c88a202771f4cc3e991971e6" 297 | version = "v2.0.1" 298 | 299 | [[projects]] 300 | name = "github.com/pmezard/go-difflib" 301 | packages = ["difflib"] 302 | revision = "792786c7400a136282c1664665ae0a8db921c6c2" 303 | version = "v1.0.0" 304 | 305 | [[projects]] 306 | name = "github.com/prometheus/client_golang" 307 | packages = ["prometheus"] 308 | revision = "c5b7fccd204277076155f10851dad72b76a49317" 309 | version = "v0.8.0" 310 | 311 | [[projects]] 312 | branch = "master" 313 | name = "github.com/prometheus/client_model" 314 | packages = ["go"] 315 | revision = "99fa1f4be8e564e8a6b613da7fa6f46c9edafc6c" 316 | 317 | [[projects]] 318 | branch = "master" 319 | name = "github.com/prometheus/common" 320 | packages = [ 321 | "expfmt", 322 | "internal/bitbucket.org/ww/goautoneg", 323 | "model" 324 | ] 325 | revision = "7600349dcfe1abd18d72d3a1770870d9800a7801" 326 | 327 | [[projects]] 328 | branch = "master" 329 | name = "github.com/prometheus/procfs" 330 | packages = [ 331 | ".", 332 | "internal/util", 333 | "nfs", 334 | "xfs" 335 | ] 336 | revision = "7d6f385de8bea29190f15ba9931442a0eaef9af7" 337 | 338 | [[projects]] 339 | name = "github.com/russross/blackfriday" 340 | packages = ["."] 341 | revision = "300106c228d52c8941d4b3de6054a6062a86dda3" 342 | 343 | [[projects]] 344 | branch = "master" 345 | name = "github.com/shurcooL/sanitized_anchor_name" 346 | packages = ["."] 347 | revision = "86672fcb3f950f35f2e675df2240550f2a50762f" 348 | 349 | [[projects]] 350 | name = "github.com/spf13/cobra" 351 | packages = ["."] 352 | revision = "ef82de70bb3f60c65fb8eebacbb2d122ef517385" 353 | version = "v0.0.3" 354 | 355 | [[projects]] 356 | name = "github.com/spf13/pflag" 357 | packages = ["."] 358 | revision = "583c0c0531f06d5278b7d917446061adc344b5cd" 359 | version = "v1.0.1" 360 | 361 | [[projects]] 362 | name = "github.com/stretchr/testify" 363 | packages = ["assert"] 364 | revision = "f35b8ab0b5a2cef36673838d662e249dd9c94686" 365 | version = "v1.2.2" 366 | 367 | [[projects]] 368 | branch = "master" 369 | name = "golang.org/x/crypto" 370 | packages = [ 371 | "ed25519", 372 | "ed25519/internal/edwards25519", 373 | "ssh/terminal" 374 | ] 375 | revision = "a49355c7e3f8fe157a85be2f77e6e269a0f89602" 376 | 377 | [[projects]] 378 | branch = "master" 379 | name = "golang.org/x/net" 380 | packages = [ 381 | "context", 382 | "http/httpguts", 383 | "http2", 384 | "http2/hpack", 385 | "idna" 386 | ] 387 | revision = "4cb1c02c05b0e749b0365f61ae859a8e0cfceed9" 388 | 389 | [[projects]] 390 | branch = "master" 391 | name = "golang.org/x/sys" 392 | packages = [ 393 | "unix", 394 | "windows" 395 | ] 396 | revision = "7138fd3d9dc8335c567ca206f4333fb75eb05d56" 397 | 398 | [[projects]] 399 | name = "golang.org/x/text" 400 | packages = [ 401 | "collate", 402 | "collate/build", 403 | "encoding", 404 | "encoding/internal", 405 | "encoding/internal/identifier", 406 | "encoding/unicode", 407 | "internal/colltab", 408 | "internal/gen", 409 | "internal/tag", 410 | "internal/triegen", 411 | "internal/ucd", 412 | "internal/utf8internal", 413 | "language", 414 | "runes", 415 | "secure/bidirule", 416 | "transform", 417 | "unicode/bidi", 418 | "unicode/cldr", 419 | "unicode/norm", 420 | "unicode/rangetable", 421 | "width" 422 | ] 423 | revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0" 424 | version = "v0.3.0" 425 | 426 | [[projects]] 427 | branch = "master" 428 | name = "golang.org/x/time" 429 | packages = ["rate"] 430 | revision = "fbb02b2291d28baffd63558aa44b4b56f178d650" 431 | 432 | [[projects]] 433 | name = "gopkg.in/inf.v0" 434 | packages = ["."] 435 | revision = "d2d2541c53f18d2a059457998ce2876cc8e67cbf" 436 | version = "v0.9.1" 437 | 438 | [[projects]] 439 | name = "gopkg.in/square/go-jose.v2" 440 | packages = [ 441 | ".", 442 | "cipher", 443 | "json", 444 | "jwt" 445 | ] 446 | revision = "76dd09796242edb5b897103a75df2645c028c960" 447 | version = "v2.1.6" 448 | 449 | [[projects]] 450 | name = "gopkg.in/yaml.v2" 451 | packages = ["."] 452 | revision = "5420a8b6744d3b0345ab293f6fcba19c978f1183" 453 | version = "v2.2.1" 454 | 455 | [[projects]] 456 | name = "k8s.io/api" 457 | packages = [ 458 | "admission/v1beta1", 459 | "admissionregistration/v1alpha1", 460 | "admissionregistration/v1beta1", 461 | "apps/v1", 462 | "apps/v1beta1", 463 | "apps/v1beta2", 464 | "authentication/v1", 465 | "authentication/v1beta1", 466 | "authorization/v1", 467 | "authorization/v1beta1", 468 | "autoscaling/v1", 469 | "autoscaling/v2beta1", 470 | "batch/v1", 471 | "batch/v1beta1", 472 | "batch/v2alpha1", 473 | "certificates/v1beta1", 474 | "core/v1", 475 | "events/v1beta1", 476 | "extensions/v1beta1", 477 | "imagepolicy/v1alpha1", 478 | "networking/v1", 479 | "policy/v1beta1", 480 | "rbac/v1", 481 | "rbac/v1alpha1", 482 | "rbac/v1beta1", 483 | "scheduling/v1alpha1", 484 | "settings/v1alpha1", 485 | "storage/v1", 486 | "storage/v1alpha1", 487 | "storage/v1beta1" 488 | ] 489 | revision = "6c0bbc3e58fab96285be9b6ed41b12b58c737a96" 490 | version = "kubernetes-1.10.5" 491 | 492 | [[projects]] 493 | branch = "master" 494 | name = "k8s.io/apiextensions-apiserver" 495 | packages = ["pkg/features"] 496 | revision = "4340dd9c24063611b7a269f847e16f9ad758bd52" 497 | 498 | [[projects]] 499 | branch = "release-1.10" 500 | name = "k8s.io/apimachinery" 501 | packages = [ 502 | "pkg/api/equality", 503 | "pkg/api/errors", 504 | "pkg/api/meta", 505 | "pkg/api/resource", 506 | "pkg/api/validation", 507 | "pkg/apimachinery", 508 | "pkg/apimachinery/announced", 509 | "pkg/apimachinery/registered", 510 | "pkg/apis/meta/internalversion", 511 | "pkg/apis/meta/v1", 512 | "pkg/apis/meta/v1/unstructured", 513 | "pkg/apis/meta/v1/validation", 514 | "pkg/apis/meta/v1beta1", 515 | "pkg/conversion", 516 | "pkg/conversion/queryparams", 517 | "pkg/fields", 518 | "pkg/labels", 519 | "pkg/runtime", 520 | "pkg/runtime/schema", 521 | "pkg/runtime/serializer", 522 | "pkg/runtime/serializer/json", 523 | "pkg/runtime/serializer/protobuf", 524 | "pkg/runtime/serializer/recognizer", 525 | "pkg/runtime/serializer/streaming", 526 | "pkg/runtime/serializer/versioning", 527 | "pkg/selection", 528 | "pkg/types", 529 | "pkg/util/cache", 530 | "pkg/util/clock", 531 | "pkg/util/diff", 532 | "pkg/util/duration", 533 | "pkg/util/errors", 534 | "pkg/util/framer", 535 | "pkg/util/httpstream", 536 | "pkg/util/httpstream/spdy", 537 | "pkg/util/intstr", 538 | "pkg/util/json", 539 | "pkg/util/mergepatch", 540 | "pkg/util/net", 541 | "pkg/util/rand", 542 | "pkg/util/remotecommand", 543 | "pkg/util/runtime", 544 | "pkg/util/sets", 545 | "pkg/util/strategicpatch", 546 | "pkg/util/uuid", 547 | "pkg/util/validation", 548 | "pkg/util/validation/field", 549 | "pkg/util/wait", 550 | "pkg/util/yaml", 551 | "pkg/version", 552 | "pkg/watch", 553 | "third_party/forked/golang/json", 554 | "third_party/forked/golang/netutil", 555 | "third_party/forked/golang/reflect" 556 | ] 557 | revision = "e386b2658ed20923da8cc9250e552f082899a1ee" 558 | 559 | [[projects]] 560 | name = "k8s.io/apiserver" 561 | packages = [ 562 | "pkg/apis/audit", 563 | "pkg/authentication/authenticator", 564 | "pkg/authentication/serviceaccount", 565 | "pkg/authentication/user", 566 | "pkg/endpoints/request", 567 | "pkg/features", 568 | "pkg/util/feature", 569 | "pkg/util/flag", 570 | "pkg/util/trace" 571 | ] 572 | revision = "1bfbd2d59262936b587dc0611082edaee5218df2" 573 | version = "kubernetes-1.10.5" 574 | 575 | [[projects]] 576 | name = "k8s.io/autoscaler" 577 | packages = [ 578 | "cluster-autoscaler/simulator", 579 | "cluster-autoscaler/utils/deletetaint", 580 | "cluster-autoscaler/utils/drain", 581 | "cluster-autoscaler/utils/errors", 582 | "cluster-autoscaler/utils/glogx", 583 | "cluster-autoscaler/utils/kubernetes", 584 | "cluster-autoscaler/utils/scheduler" 585 | ] 586 | revision = "a909d0cc3695148f097b56e62f052063d354ee99" 587 | version = "cluster-autoscaler-1.2.2" 588 | 589 | [[projects]] 590 | name = "k8s.io/client-go" 591 | packages = [ 592 | "discovery", 593 | "discovery/fake", 594 | "dynamic", 595 | "informers", 596 | "informers/admissionregistration", 597 | "informers/admissionregistration/v1alpha1", 598 | "informers/admissionregistration/v1beta1", 599 | "informers/apps", 600 | "informers/apps/v1", 601 | "informers/apps/v1beta1", 602 | "informers/apps/v1beta2", 603 | "informers/autoscaling", 604 | "informers/autoscaling/v1", 605 | "informers/autoscaling/v2beta1", 606 | "informers/batch", 607 | "informers/batch/v1", 608 | "informers/batch/v1beta1", 609 | "informers/batch/v2alpha1", 610 | "informers/certificates", 611 | "informers/certificates/v1beta1", 612 | "informers/core", 613 | "informers/core/v1", 614 | "informers/events", 615 | "informers/events/v1beta1", 616 | "informers/extensions", 617 | "informers/extensions/v1beta1", 618 | "informers/internalinterfaces", 619 | "informers/networking", 620 | "informers/networking/v1", 621 | "informers/policy", 622 | "informers/policy/v1beta1", 623 | "informers/rbac", 624 | "informers/rbac/v1", 625 | "informers/rbac/v1alpha1", 626 | "informers/rbac/v1beta1", 627 | "informers/scheduling", 628 | "informers/scheduling/v1alpha1", 629 | "informers/settings", 630 | "informers/settings/v1alpha1", 631 | "informers/storage", 632 | "informers/storage/v1", 633 | "informers/storage/v1alpha1", 634 | "informers/storage/v1beta1", 635 | "kubernetes", 636 | "kubernetes/fake", 637 | "kubernetes/scheme", 638 | "kubernetes/typed/admissionregistration/v1alpha1", 639 | "kubernetes/typed/admissionregistration/v1alpha1/fake", 640 | "kubernetes/typed/admissionregistration/v1beta1", 641 | "kubernetes/typed/admissionregistration/v1beta1/fake", 642 | "kubernetes/typed/apps/v1", 643 | "kubernetes/typed/apps/v1/fake", 644 | "kubernetes/typed/apps/v1beta1", 645 | "kubernetes/typed/apps/v1beta1/fake", 646 | "kubernetes/typed/apps/v1beta2", 647 | "kubernetes/typed/apps/v1beta2/fake", 648 | "kubernetes/typed/authentication/v1", 649 | "kubernetes/typed/authentication/v1/fake", 650 | "kubernetes/typed/authentication/v1beta1", 651 | "kubernetes/typed/authentication/v1beta1/fake", 652 | "kubernetes/typed/authorization/v1", 653 | "kubernetes/typed/authorization/v1/fake", 654 | "kubernetes/typed/authorization/v1beta1", 655 | "kubernetes/typed/authorization/v1beta1/fake", 656 | "kubernetes/typed/autoscaling/v1", 657 | "kubernetes/typed/autoscaling/v1/fake", 658 | "kubernetes/typed/autoscaling/v2beta1", 659 | "kubernetes/typed/autoscaling/v2beta1/fake", 660 | "kubernetes/typed/batch/v1", 661 | "kubernetes/typed/batch/v1/fake", 662 | "kubernetes/typed/batch/v1beta1", 663 | "kubernetes/typed/batch/v1beta1/fake", 664 | "kubernetes/typed/batch/v2alpha1", 665 | "kubernetes/typed/batch/v2alpha1/fake", 666 | "kubernetes/typed/certificates/v1beta1", 667 | "kubernetes/typed/certificates/v1beta1/fake", 668 | "kubernetes/typed/core/v1", 669 | "kubernetes/typed/core/v1/fake", 670 | "kubernetes/typed/events/v1beta1", 671 | "kubernetes/typed/events/v1beta1/fake", 672 | "kubernetes/typed/extensions/v1beta1", 673 | "kubernetes/typed/extensions/v1beta1/fake", 674 | "kubernetes/typed/networking/v1", 675 | "kubernetes/typed/networking/v1/fake", 676 | "kubernetes/typed/policy/v1beta1", 677 | "kubernetes/typed/policy/v1beta1/fake", 678 | "kubernetes/typed/rbac/v1", 679 | "kubernetes/typed/rbac/v1/fake", 680 | "kubernetes/typed/rbac/v1alpha1", 681 | "kubernetes/typed/rbac/v1alpha1/fake", 682 | "kubernetes/typed/rbac/v1beta1", 683 | "kubernetes/typed/rbac/v1beta1/fake", 684 | "kubernetes/typed/scheduling/v1alpha1", 685 | "kubernetes/typed/scheduling/v1alpha1/fake", 686 | "kubernetes/typed/settings/v1alpha1", 687 | "kubernetes/typed/settings/v1alpha1/fake", 688 | "kubernetes/typed/storage/v1", 689 | "kubernetes/typed/storage/v1/fake", 690 | "kubernetes/typed/storage/v1alpha1", 691 | "kubernetes/typed/storage/v1alpha1/fake", 692 | "kubernetes/typed/storage/v1beta1", 693 | "kubernetes/typed/storage/v1beta1/fake", 694 | "listers/admissionregistration/v1alpha1", 695 | "listers/admissionregistration/v1beta1", 696 | "listers/apps/v1", 697 | "listers/apps/v1beta1", 698 | "listers/apps/v1beta2", 699 | "listers/autoscaling/v1", 700 | "listers/autoscaling/v2beta1", 701 | "listers/batch/v1", 702 | "listers/batch/v1beta1", 703 | "listers/batch/v2alpha1", 704 | "listers/certificates/v1beta1", 705 | "listers/core/v1", 706 | "listers/events/v1beta1", 707 | "listers/extensions/v1beta1", 708 | "listers/networking/v1", 709 | "listers/policy/v1beta1", 710 | "listers/rbac/v1", 711 | "listers/rbac/v1alpha1", 712 | "listers/rbac/v1beta1", 713 | "listers/scheduling/v1alpha1", 714 | "listers/settings/v1alpha1", 715 | "listers/storage/v1", 716 | "listers/storage/v1alpha1", 717 | "listers/storage/v1beta1", 718 | "pkg/apis/clientauthentication", 719 | "pkg/apis/clientauthentication/v1alpha1", 720 | "pkg/version", 721 | "plugin/pkg/client/auth/exec", 722 | "rest", 723 | "rest/watch", 724 | "scale", 725 | "scale/scheme", 726 | "scale/scheme/appsint", 727 | "scale/scheme/appsv1beta1", 728 | "scale/scheme/appsv1beta2", 729 | "scale/scheme/autoscalingv1", 730 | "scale/scheme/extensionsint", 731 | "scale/scheme/extensionsv1beta1", 732 | "testing", 733 | "third_party/forked/golang/template", 734 | "tools/auth", 735 | "tools/cache", 736 | "tools/clientcmd", 737 | "tools/clientcmd/api", 738 | "tools/clientcmd/api/latest", 739 | "tools/clientcmd/api/v1", 740 | "tools/leaderelection", 741 | "tools/leaderelection/resourcelock", 742 | "tools/metrics", 743 | "tools/pager", 744 | "tools/record", 745 | "tools/reference", 746 | "tools/remotecommand", 747 | "transport", 748 | "transport/spdy", 749 | "util/buffer", 750 | "util/cert", 751 | "util/exec", 752 | "util/flowcontrol", 753 | "util/homedir", 754 | "util/integer", 755 | "util/jsonpath", 756 | "util/retry", 757 | "util/workqueue" 758 | ] 759 | revision = "23781f4d6632d88e869066eaebb743857aa1ef9b" 760 | version = "v7.0.0" 761 | 762 | [[projects]] 763 | branch = "master" 764 | name = "k8s.io/kube-openapi" 765 | packages = [ 766 | "pkg/util/proto", 767 | "pkg/util/proto/validation" 768 | ] 769 | revision = "d83b052f768a50a309c692a9c271da3f3276ff88" 770 | 771 | [[projects]] 772 | name = "k8s.io/kubernetes" 773 | packages = [ 774 | "pkg/api/events", 775 | "pkg/api/legacyscheme", 776 | "pkg/api/pod", 777 | "pkg/api/ref", 778 | "pkg/api/resource", 779 | "pkg/api/service", 780 | "pkg/api/v1/pod", 781 | "pkg/apis/admissionregistration", 782 | "pkg/apis/admissionregistration/install", 783 | "pkg/apis/admissionregistration/v1alpha1", 784 | "pkg/apis/admissionregistration/v1beta1", 785 | "pkg/apis/apps", 786 | "pkg/apis/apps/install", 787 | "pkg/apis/apps/v1", 788 | "pkg/apis/apps/v1beta1", 789 | "pkg/apis/apps/v1beta2", 790 | "pkg/apis/authentication", 791 | "pkg/apis/authentication/install", 792 | "pkg/apis/authentication/v1", 793 | "pkg/apis/authentication/v1beta1", 794 | "pkg/apis/authorization", 795 | "pkg/apis/authorization/install", 796 | "pkg/apis/authorization/v1", 797 | "pkg/apis/authorization/v1beta1", 798 | "pkg/apis/autoscaling", 799 | "pkg/apis/autoscaling/install", 800 | "pkg/apis/autoscaling/v1", 801 | "pkg/apis/autoscaling/v2beta1", 802 | "pkg/apis/batch", 803 | "pkg/apis/batch/install", 804 | "pkg/apis/batch/v1", 805 | "pkg/apis/batch/v1beta1", 806 | "pkg/apis/batch/v2alpha1", 807 | "pkg/apis/certificates", 808 | "pkg/apis/certificates/install", 809 | "pkg/apis/certificates/v1beta1", 810 | "pkg/apis/componentconfig", 811 | "pkg/apis/componentconfig/install", 812 | "pkg/apis/componentconfig/v1alpha1", 813 | "pkg/apis/core", 814 | "pkg/apis/core/helper", 815 | "pkg/apis/core/helper/qos", 816 | "pkg/apis/core/install", 817 | "pkg/apis/core/pods", 818 | "pkg/apis/core/v1", 819 | "pkg/apis/core/v1/helper", 820 | "pkg/apis/core/v1/helper/qos", 821 | "pkg/apis/core/validation", 822 | "pkg/apis/events", 823 | "pkg/apis/events/install", 824 | "pkg/apis/events/v1beta1", 825 | "pkg/apis/extensions", 826 | "pkg/apis/extensions/install", 827 | "pkg/apis/extensions/v1beta1", 828 | "pkg/apis/networking", 829 | "pkg/apis/networking/install", 830 | "pkg/apis/networking/v1", 831 | "pkg/apis/policy", 832 | "pkg/apis/policy/install", 833 | "pkg/apis/policy/v1beta1", 834 | "pkg/apis/rbac", 835 | "pkg/apis/rbac/install", 836 | "pkg/apis/rbac/v1", 837 | "pkg/apis/rbac/v1alpha1", 838 | "pkg/apis/rbac/v1beta1", 839 | "pkg/apis/scheduling", 840 | "pkg/apis/scheduling/install", 841 | "pkg/apis/scheduling/v1alpha1", 842 | "pkg/apis/settings", 843 | "pkg/apis/settings/install", 844 | "pkg/apis/settings/v1alpha1", 845 | "pkg/apis/storage", 846 | "pkg/apis/storage/install", 847 | "pkg/apis/storage/util", 848 | "pkg/apis/storage/v1", 849 | "pkg/apis/storage/v1alpha1", 850 | "pkg/apis/storage/v1beta1", 851 | "pkg/capabilities", 852 | "pkg/client/clientset_generated/internalclientset", 853 | "pkg/client/clientset_generated/internalclientset/scheme", 854 | "pkg/client/clientset_generated/internalclientset/typed/admissionregistration/internalversion", 855 | "pkg/client/clientset_generated/internalclientset/typed/apps/internalversion", 856 | "pkg/client/clientset_generated/internalclientset/typed/authentication/internalversion", 857 | "pkg/client/clientset_generated/internalclientset/typed/authorization/internalversion", 858 | "pkg/client/clientset_generated/internalclientset/typed/autoscaling/internalversion", 859 | "pkg/client/clientset_generated/internalclientset/typed/batch/internalversion", 860 | "pkg/client/clientset_generated/internalclientset/typed/certificates/internalversion", 861 | "pkg/client/clientset_generated/internalclientset/typed/core/internalversion", 862 | "pkg/client/clientset_generated/internalclientset/typed/events/internalversion", 863 | "pkg/client/clientset_generated/internalclientset/typed/extensions/internalversion", 864 | "pkg/client/clientset_generated/internalclientset/typed/networking/internalversion", 865 | "pkg/client/clientset_generated/internalclientset/typed/policy/internalversion", 866 | "pkg/client/clientset_generated/internalclientset/typed/rbac/internalversion", 867 | "pkg/client/clientset_generated/internalclientset/typed/scheduling/internalversion", 868 | "pkg/client/clientset_generated/internalclientset/typed/settings/internalversion", 869 | "pkg/client/clientset_generated/internalclientset/typed/storage/internalversion", 870 | "pkg/client/leaderelectionconfig", 871 | "pkg/cloudprovider", 872 | "pkg/controller", 873 | "pkg/controller/daemon", 874 | "pkg/controller/daemon/util", 875 | "pkg/controller/deployment/util", 876 | "pkg/controller/history", 877 | "pkg/controller/statefulset", 878 | "pkg/controller/volume/events", 879 | "pkg/controller/volume/persistentvolume", 880 | "pkg/controller/volume/persistentvolume/metrics", 881 | "pkg/credentialprovider", 882 | "pkg/features", 883 | "pkg/fieldpath", 884 | "pkg/kubectl", 885 | "pkg/kubectl/apps", 886 | "pkg/kubectl/categories", 887 | "pkg/kubectl/cmd/templates", 888 | "pkg/kubectl/cmd/util", 889 | "pkg/kubectl/cmd/util/openapi", 890 | "pkg/kubectl/cmd/util/openapi/validation", 891 | "pkg/kubectl/plugins", 892 | "pkg/kubectl/resource", 893 | "pkg/kubectl/scheme", 894 | "pkg/kubectl/util", 895 | "pkg/kubectl/util/hash", 896 | "pkg/kubectl/util/slice", 897 | "pkg/kubectl/util/term", 898 | "pkg/kubectl/util/transport", 899 | "pkg/kubectl/validation", 900 | "pkg/kubelet/apis", 901 | "pkg/kubelet/types", 902 | "pkg/master/ports", 903 | "pkg/printers", 904 | "pkg/printers/internalversion", 905 | "pkg/registry/rbac/validation", 906 | "pkg/scheduler", 907 | "pkg/scheduler/algorithm", 908 | "pkg/scheduler/algorithm/predicates", 909 | "pkg/scheduler/algorithm/priorities", 910 | "pkg/scheduler/algorithm/priorities/util", 911 | "pkg/scheduler/algorithmprovider", 912 | "pkg/scheduler/algorithmprovider/defaults", 913 | "pkg/scheduler/api", 914 | "pkg/scheduler/api/validation", 915 | "pkg/scheduler/core", 916 | "pkg/scheduler/factory", 917 | "pkg/scheduler/metrics", 918 | "pkg/scheduler/schedulercache", 919 | "pkg/scheduler/util", 920 | "pkg/scheduler/volumebinder", 921 | "pkg/security/apparmor", 922 | "pkg/serviceaccount", 923 | "pkg/util/file", 924 | "pkg/util/goroutinemap", 925 | "pkg/util/goroutinemap/exponentialbackoff", 926 | "pkg/util/hash", 927 | "pkg/util/interrupt", 928 | "pkg/util/io", 929 | "pkg/util/labels", 930 | "pkg/util/metrics", 931 | "pkg/util/mount", 932 | "pkg/util/net/sets", 933 | "pkg/util/node", 934 | "pkg/util/nsenter", 935 | "pkg/util/parsers", 936 | "pkg/util/pointer", 937 | "pkg/util/slice", 938 | "pkg/util/taints", 939 | "pkg/version", 940 | "pkg/volume", 941 | "pkg/volume/util", 942 | "pkg/volume/util/fs", 943 | "pkg/volume/util/recyclerclient", 944 | "pkg/volume/util/types" 945 | ] 946 | revision = "32ac1c9073b132b8ba18aa830f46b77dcceb0723" 947 | version = "v1.10.5" 948 | 949 | [[projects]] 950 | branch = "master" 951 | name = "k8s.io/utils" 952 | packages = ["exec"] 953 | revision = "733eca437aa39379e4bcc25e726439dfca40fcff" 954 | 955 | [[projects]] 956 | branch = "master" 957 | name = "vbom.ml/util" 958 | packages = ["sortorder"] 959 | revision = "256737ac55c46798123f754ab7d2c784e2c71783" 960 | 961 | [solve-meta] 962 | analyzer-name = "dep" 963 | analyzer-version = 1 964 | inputs-digest = "b919b5572a20e63d1e4012b4f068cc3aaf040e12987f856d28224508f22417ad" 965 | solver-name = "gps-cdcl" 966 | solver-version = 1 967 | -------------------------------------------------------------------------------- /Gopkg.toml: -------------------------------------------------------------------------------- 1 | # Gopkg.toml example 2 | # 3 | # Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md 4 | # for detailed Gopkg.toml documentation. 5 | # 6 | # required = ["github.com/user/thing/cmd/thing"] 7 | # ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"] 8 | # 9 | # [[constraint]] 10 | # name = "github.com/user/project" 11 | # version = "1.0.0" 12 | # 13 | # [[constraint]] 14 | # name = "github.com/user/project2" 15 | # branch = "dev" 16 | # source = "github.com/myfork/project2" 17 | # 18 | # [[override]] 19 | # name = "github.com/x/y" 20 | # version = "2.4.0" 21 | 22 | [[constraint]] 23 | name = "k8s.io/kubernetes" 24 | version = "1.10.0" 25 | 26 | [[constraint]] 27 | name = "k8s.io/client-go" 28 | version = "7.0.0" 29 | 30 | [[constraint]] 31 | name = "k8s.io/api" 32 | version = "kubernetes-1.10.5" 33 | 34 | [[constraint]] 35 | name = "k8s.io/apimachinery" 36 | version = "kubernetes-1.10.5" 37 | 38 | [[override]] 39 | name = "k8s.io/apiserver" 40 | version = "kubernetes-1.10.5" 41 | 42 | [[constraint]] 43 | name = "k8s.io/autoscaler" 44 | version = "cluster-autoscaler-1.2.2" 45 | 46 | [[override]] 47 | name = "github.com/docker/distribution" 48 | revision = "f0cc927784781fa395c06317c58dea2841ece3a9" 49 | 50 | [[override]] 51 | name = "github.com/json-iterator/go" 52 | revision = "f2b4162afba35581b6d4a50d3b8f34e33c144682" 53 | 54 | [[override]] 55 | name = "github.com/russross/blackfriday" 56 | revision = "300106c228d52c8941d4b3de6054a6062a86dda3" 57 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | include .env 2 | BINARY := k8s-spot-rescheduler 3 | VERSION := $(shell git describe --always --dirty --tags 2>/dev/null || echo "undefined") 4 | 5 | RED := \033[31m 6 | GREEN := \033[32m 7 | NC := \033[0m 8 | 9 | IMG ?= quay.io/pusher/k8s-spot-rescheduler 10 | 11 | .NOTPARALLEL: 12 | 13 | .PHONY: all 14 | all: distclean test build 15 | 16 | .PHONY: build 17 | build: clean $(BINARY) 18 | 19 | .PHONY: clean 20 | clean: 21 | rm -f $(BINARY) 22 | 23 | .PHONY: distclean 24 | distclean: clean 25 | rm -rf vendor 26 | rm -rf release 27 | 28 | .PHONY: fmt 29 | fmt: 30 | $(GO) fmt ./... 31 | 32 | .PHONY: vet 33 | vet: vendor 34 | $(GO) vet ./... 35 | 36 | .PHONY: lint 37 | lint: vendor 38 | @ echo "$(GREEN)Linting code$(NC)" 39 | $(LINTER) run --disable-all \ 40 | --exclude-use-default=false \ 41 | --enable=govet \ 42 | --enable=ineffassign \ 43 | --enable=deadcode \ 44 | --enable=golint \ 45 | --enable=goconst \ 46 | --enable=gofmt \ 47 | --enable=goimports \ 48 | --skip-dirs=pkg/client/ \ 49 | --deadline=120s \ 50 | --tests ./... 51 | @ echo 52 | 53 | vendor: 54 | @ echo "$(GREEN)Pulling dependencies$(NC)" 55 | $(DEP) ensure --vendor-only 56 | @ echo 57 | 58 | .PHONY: test 59 | test: vendor 60 | @ echo "$(GREEN)Running test suite$(NC)" 61 | $(GO) test ./... 62 | @ echo 63 | 64 | .PHONY: check 65 | check: fmt lint vet test 66 | 67 | .PHONY: build 68 | build: clean $(BINARY) 69 | 70 | $(BINARY): fmt vet 71 | CGO_ENABLED=0 $(GO) build -o $(BINARY) -ldflags="-X main.VERSION=${VERSION}" github.com/pusher/k8s-spot-rescheduler 72 | 73 | .PHONY: docker-build 74 | docker-build: check 75 | docker build --build-arg VERSION=${VERSION} . -t ${IMG}:${VERSION} 76 | @echo "$(GREEN)Built $(IMG):$(VERSION)$(NC)" 77 | 78 | TAGS ?= latest 79 | .PHONY: docker-tag 80 | docker-tag: docker-build 81 | @IFS=","; tags=${TAGS}; for tag in $${tags}; do docker tag ${IMG}:${VERSION} ${IMG}:$${tag}; echo "$(GREEN)Tagged $(IMG):$(VERSION) as $${tag}$(NC)"; done 82 | 83 | PUSH_TAGS ?= ${VERSION}, latest 84 | .PHONY: docker-push 85 | docker-push: docker-build docker-tag 86 | @IFS=","; tags=${PUSH_TAGS}; for tag in $${tags}; do docker push ${IMG}:$${tag}; echo "$(GREEN)Pushed $(IMG):$${tag}$(NC)"; done 87 | 88 | TAGS ?= latest 89 | .PHONY: docker-clean 90 | docker-clean: 91 | @IFS=","; tags=${TAGS}; for tag in $${tags}; do docker rmi -f ${IMG}:${VERSION} ${IMG}:$${tag}; done 92 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # K8s Spot Rescheduler 2 | 3 | > NOTE: this repository is currently **UNMAINTAINED** and is looking for new owner(s). 4 | > See [#74](/../../issues/74) for more information. 5 | 6 | ## Table of contents 7 | * [Introduction](#introduction) 8 | * [Motivation](#motivation) 9 | * [Usage](#usage) 10 | * [Scope of the project](#scope-of-the-project) 11 | * [Operating logic](#operating-logic) 12 | * [Related](#related) 13 | * [Communication](#communication) 14 | * [Contributing](#contributing) 15 | * [License](#license) 16 | 17 | ## Introduction 18 | 19 | K8s Spot rescheduler is a tool that tries to reduce load on a set of Kubernetes nodes. It was designed with the purpose of moving Pods scheduled on AWS on-demand instances to AWS spot instances to allow the on-demand instances to be safely scaled down (By the [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler)). 20 | 21 | In reality the rescheduler can be used to remove load from any group of nodes onto a different group of nodes. They just need to be labelled appropriately. 22 | 23 | For example, it could also be used to allow controller nodes to take up slack while new nodes are being scaled up, and then rescheduling those pods when the new capacity becomes available, thus reducing the load on the controllers once again. 24 | 25 | ## Attribution 26 | This project was inspired by the [Critical Pod Rescheduler](https://github.com/kubernetes/contrib/tree/master/rescheduler) and takes portions of code from both the [Critical Pod Rescheduler](https://github.com/kubernetes/contrib/tree/master/rescheduler) and the [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler). 27 | 28 | ## Motivation 29 | 30 | AWS spot instances are a great way to reduce the cost of your infrastructure running costs. They do however come with a significant drawback; at any point, the spot price for the instances you are using could rise above your bid and your instances will be terminated. To solve this problem, you can use an AutoScaling group backed by on-demand instances and managed by the [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) to take up the slack when spot instances are removed from your cluster. 31 | 32 | The problem however, comes when the spot price drops and you are given new spot instances back into your cluster. At this point you are left with empty spot instances and full, expensive on-demand instances. 33 | 34 | By tainting the on-demand instances with the Kubernetes `PreferNoSchedule` taint, we can ensure that, if at any point the scheduler needs to choose between spot and on-demand instances, it will choose the preferred spot instances to schedule the new Pods onto. 35 | 36 | However, the scheduler won't reschedule Pods that are already running on on-demand instances, blocking them from being scaled down. At this point, the K8s Spot Rescheduler is required to start the process of moving Pods from the on-demand instances back onto the spot instances. 37 | 38 | ## Usage 39 | 40 | ### Deploy to Kubernetes 41 | A docker image is available at `quay.io/pusher/k8s-spot-rescheduler`. 42 | These images are currently built on pushes to master. Releases will be tagged as and when releases are made. 43 | 44 | Sample Kubernetes manifests are available in the [deploy](deploy/) folder. 45 | 46 | To deploy in clusters using RBAC, please apply all of the manifests (Deployment, ClusterRole, ClusterRoleBinding and ServiceAccount) in the [deploy](deploy/) folder but uncomment the `serviceAccountName` in the [deployment](deploy/deployment.yaml) 47 | 48 | #### Requirements 49 | 50 | For the K8s Spot Rescheduler to process nodes as expected; you will need identifying labels which can be passed to the program to allow it to distinguish which nodes it should consider as on-demand and which it should consider as spot instances. 51 | 52 | For instance you could add labels `node-role.kubernetes.io/worker` and `node-role.kubernetes.io/spot-worker` to your on-demand and spot instances respectively. 53 | 54 | You should also add the `PreferNoSchedule` taint to your on-demand instances to ensure that the scheduler prefers spot instances when making it's scheduling decisions. 55 | 56 | For example you could add the following flags to your Kubelet: 57 | ``` 58 | --register-with-taints="node-role.kubernetes.io/worker=true:PreferNoSchedule" 59 | --node-labels="node-role.kubernetes.io/worker=true" 60 | ``` 61 | 62 | ### Building 63 | If you wish to build the binary yourself; first make sure you have go installed and set up. Then clone this repo into your `$GOPATH` and download the dependencies using [`dep`](https://github.com/golang/dep). 64 | 65 | ```bash 66 | cd $GOPATH/src/github.com # Create this directory if it doesn't exist 67 | git clone git@github.com:pusher/k8s-spot-rescheduler pusher/k8s-spot-rescheduler 68 | dep ensure -v # Installs dependencies to vendor folder. 69 | ``` 70 | 71 | Then build the code using `go build` which will produce the built binary in a file `k8s-spot-rescheduler`. 72 | 73 | ### Flags 74 | `-v` (default: 0): The log verbosity level the program should run in, currently numeric with values between 2 & 4, recommended to use `-v=2` 75 | 76 | `--running-in-cluster` (default: `true`): Optional, if this controller is running in a kubernetes cluster, use the pod secrets for creating a Kubernetes client. 77 | 78 | `--namespace` (deafult: `kube-system`): Namespace in which k8s-spot-rescheduler is run. 79 | 80 | `--kube-api-content-type` (default: `application/vnd.kubernetes.protobuf`): Content type of requests sent to apiserver. 81 | 82 | `--housekeeping-interval` (default: 10s): How often rescheduler takes actions. 83 | 84 | `--node-drain-delay` (default: 10m): How long the scheduler should wait between draining nodes. 85 | 86 | `--pod-eviction-timeout` (default: 2m): How long should the rescheduler attempt to retrieve successful pod evictions for. 87 | 88 | `--max-graceful-termination` (default: 2m): How long should the rescheduler wait for pods to shutdown gracefully before failing the node drain attempt. 89 | 90 | `--listen-address` (default: `localhost:9235`): Address to listen on for serving prometheus metrics. 91 | 92 | `--on-demand-node-label` (default: `node-role.kubernetes.io/worker`) Name of label on nodes to be considered for draining. 93 | 94 | `--spot-node-label` (default: `node-role.kubernetes.io/spot-worker`) Name of label on nodes to be considered as targets for pods. 95 | 96 | `--delete-non-replicated-pods` (default: `false`) Delete non-replicated pods running on on-demand instance. Note that some non-replicated pods will not be rescheduled. 97 | 98 | ## Scope of the project 99 | ### Does 100 | * Look for Pods on on-demand instances 101 | * Look for space for Pods on spot instances 102 | * Checks the following [predicates](https://github.com/kubernetes/kubernetes/blob/v1.8.0-alpha.3/plugin/pkg/scheduler/algorithm/predicates/predicates.go) when determining whether a pod can be moved: 103 | * CheckNodeMemoryPressure 104 | * CheckNodeDiskPressure 105 | * GeneralPredicates 106 | * MaxAzureDiskVolumeCount 107 | * MaxGCEPDVolumeCount 108 | * NoDiskConflict 109 | * MatchInterPodAffinity 110 | * PodToleratesNodeTaints 111 | * MaxEBSVolumeCount 112 | * NoVolumeZoneConflict 113 | * ready 114 | * Checks whether there is enough capacity to move all pods on the on-demand node to spot nodes 115 | * Evicts all pods on the node if the previous check passes 116 | * Leaves the node in a schedulable state - in case it's capacity is required again 117 | 118 | 119 | ### Does not 120 | * Schedule pods (The default scheduler handles this) 121 | * Scale down empty nodes on your cloud provider (Try the [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler)) 122 | 123 | ## Operating logic 124 | 125 | The rescheduler logic roughly follows the below: 126 | 127 | 1. Gets a list of on-demand and spot nodes and their respective Pods 128 | * Builds a map of nodeInfo structs 129 | * Add node to struct 130 | * Add pods for that node to struct 131 | * Add requested and free CPU fields to struct 132 | * Map these structs based on whether they are on-demand or spot instances. 133 | * Sort on-demand instances by least requested CPU 134 | * Sort spot instances by most free CPU 135 | 2. Iterate through each on-demand node and try to drain it 136 | * Iterate through each pod 137 | * Determine if a spot node has space for the pod 138 | * Add the pod to the prospective spot node 139 | * Move onto next node if no spot node space available 140 | * Drain the node 141 | * Iterate through pods and evict them in turn 142 | * Evict pod 143 | * Wait for deletion and reschedule 144 | * Cancel all further processing 145 | 146 | This process is repeated every `housekeeping-interval` seconds. 147 | 148 | The effect of this algorithm should be, that we take the emptiest nodes first and empty those before we empty a node which is busier, thus resulting in the highest number of 'empty' nodes that can be removed from the cluster. 149 | 150 | ## Related 151 | - [K8s Spot Termination Handler](https://github.com/pusher/k8s-spot-termination-handler): Gracefully drain spot instances when they are issued with a termination notice. 152 | 153 | ## Communication 154 | 155 | * Found a bug? Please open an issue. 156 | * Have a feature request. Please open an issue. 157 | * If you want to contribute, please submit a pull request 158 | 159 | ## Contributing 160 | Please see our [Contributing](CONTRIBUTING.md) guidelines. 161 | 162 | ## License 163 | This project is licensed under Apache 2.0 and a copy of the license is available [here](https://github.com/pusher/k8s-spot-rescheduler/blob/master/LICENSE). 164 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if (( ${BASH_VERSION:0:1} < 4 )); then 4 | echo "This configure script requires bash 4" 5 | exit 1 6 | fi 7 | 8 | RED='\033[0;31m' 9 | GREEN='\033[0;32m' 10 | BLUE='\033[0;34m' 11 | NC='\033[0m' 12 | 13 | declare -A tools=() 14 | 15 | vercomp () { 16 | if [[ $1 == $2 ]] 17 | then 18 | return 0 19 | fi 20 | local IFS=. 21 | local i ver1=($1) ver2=($2) 22 | # fill empty fields in ver1 with zeros 23 | for ((i=${#ver1[@]}; i<${#ver2[@]}; i++)) 24 | do 25 | ver1[i]=0 26 | done 27 | for ((i=0; i<${#ver1[@]}; i++)) 28 | do 29 | if [[ -z ${ver2[i]} ]] 30 | then 31 | # fill empty fields in ver2 with zeros 32 | ver2[i]=0 33 | fi 34 | if ((10#${ver1[i]} > 10#${ver2[i]})) 35 | then 36 | return 1 37 | fi 38 | if ((10#${ver1[i]} < 10#${ver2[i]})) 39 | then 40 | return 2 41 | fi 42 | done 43 | return 0 44 | } 45 | 46 | check_for() { 47 | echo -n "Checking for $1... " 48 | TOOL_PATH=$(command -v $1) 49 | if ! [ -x "$TOOL_PATH" -a -f "$TOOL_PATH" ]; then 50 | printf "${RED}not found${NC}\n" 51 | cd - > /dev/null 52 | exit 1 53 | else 54 | printf "${GREEN}found${NC}\n" 55 | tools[$1]=$TOOL_PATH 56 | fi 57 | } 58 | 59 | check_go_env() { 60 | echo -n "Checking \$GOPATH... " 61 | if [ -z "$GOPATH" ]; then 62 | printf "${RED}invalid${NC} - GOPATH not set\n" 63 | exit 1 64 | fi 65 | printf "${GREEN}valid${NC} - $GOPATH\n" 66 | } 67 | 68 | check_go_version() { 69 | echo -n "Checking go version... " 70 | GO_VERSION=$(${tools[go]} version | ${tools[awk]} '{where = match($0, /[0-9]\.[0-9]+[\.0-9]*/); if (where != 0) print substr($0, RSTART, RLENGTH)}') 71 | vercomp $GO_VERSION 1.10 72 | case $? in 73 | 0) ;& 74 | 1) 75 | printf "${GREEN}" 76 | echo $GO_VERSION 77 | printf "${NC}" 78 | ;; 79 | 2) 80 | printf "${RED}" 81 | echo "$GO_VERSION < 1.10" 82 | exit 1 83 | ;; 84 | esac 85 | } 86 | 87 | cd ${0%/*} 88 | 89 | check_for make 90 | check_for awk 91 | check_for go 92 | check_for dep 93 | check_for golangci-lint 94 | check_for shasum 95 | check_go_env 96 | check_go_version 97 | 98 | cat <<- EOF > .env 99 | MAKE := ${tools[make]} 100 | SHASUM := ${tools[shasum]} 101 | GO := ${tools[go]} 102 | GOVERSION := $GO_VERSION 103 | DEP := ${tools[dep]} 104 | LINTER := ${tools[golangci-lint]} 105 | EOF 106 | 107 | echo "Environment configuration written to .env" 108 | -------------------------------------------------------------------------------- /deploy/clusterrole.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: ClusterRole 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | metadata: 5 | name: k8s-spot-rescheduler 6 | rules: 7 | # For leader election 8 | - apiGroups: 9 | - "" 10 | resources: 11 | - endpoints 12 | verbs: 13 | - get 14 | - update 15 | - create 16 | - apiGroups: 17 | - "" 18 | resources: 19 | - events 20 | verbs: 21 | - create 22 | - patch 23 | 24 | # For listing and watching items 25 | - apiGroups: 26 | - "" 27 | resources: 28 | - nodes 29 | - pods 30 | - replicasets 31 | - replicationcontrollers 32 | - services 33 | - statefulsets 34 | - poddisruptionbudgets 35 | - persistentvolumes 36 | - persistentvolumeclaims 37 | verbs: 38 | - list 39 | - get 40 | - watch 41 | - apiGroups: 42 | - apps 43 | resources: 44 | - statefulsets 45 | verbs: 46 | - list 47 | - get 48 | - watch 49 | - apiGroups: 50 | - extensions 51 | resources: 52 | - replicasets 53 | verbs: 54 | - list 55 | - get 56 | - watch 57 | - apiGroups: 58 | - policy 59 | resources: 60 | - poddisruptionbudgets 61 | verbs: 62 | - list 63 | - get 64 | - watch 65 | 66 | # For rescheduling pods 67 | - apiGroups: 68 | - "" 69 | resources: 70 | - nodes 71 | verbs: 72 | - update 73 | - apiGroups: 74 | - "" 75 | resources: 76 | - pods/eviction 77 | verbs: 78 | - create 79 | 80 | - apiGroups: 81 | - storage.k8s.io 82 | resources: 83 | - storageclasses 84 | verbs: 85 | - list 86 | - get 87 | - watch 88 | -------------------------------------------------------------------------------- /deploy/clusterrolebinding.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRoleBinding 4 | metadata: 5 | name: k8s-spot-rescheduler 6 | roleRef: 7 | apiGroup: rbac.authorization.k8s.io 8 | kind: ClusterRole 9 | name: k8s-spot-rescheduler 10 | subjects: 11 | - kind: ServiceAccount 12 | name: k8s-spot-rescheduler 13 | namespace: kube-system 14 | -------------------------------------------------------------------------------- /deploy/deployment.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: extensions/v1beta1 3 | kind: Deployment 4 | metadata: 5 | name: k8s-spot-rescheduler 6 | namespace: kube-system 7 | labels: 8 | app: k8s-spot-rescheduler 9 | spec: 10 | replicas: 2 11 | selector: 12 | matchLabels: 13 | app: k8s-spot-rescheduler 14 | template: 15 | metadata: 16 | labels: 17 | app: k8s-spot-rescheduler 18 | spec: 19 | # Uncomment the following line if using RBAC 20 | #serviceAccountName: k8s-spot-rescheduler 21 | containers: 22 | - image: quay.io/pusher/k8s-spot-rescheduler:v0.3.0 23 | name: k8s-spot-rescheduler 24 | resources: 25 | limits: 26 | cpu: 20m 27 | memory: 100Mi 28 | requests: 29 | cpu: 10m 30 | memory: 50Mi 31 | command: 32 | - k8s-spot-rescheduler 33 | - -v=2 34 | - --running-in-cluster=true 35 | - --namespace=kube-system 36 | - --housekeeping-interval=10s 37 | - --node-drain-delay=10m 38 | - --pod-eviction-timeout=2m 39 | - --max-graceful-termination=2m 40 | - --listen-address=0.0.0.0:9235 41 | - --on-demand-node-label=node-role.kubernetes.io/worker 42 | - --spot-node-label=node-role.kubernetes.io/spot-worker 43 | ports: 44 | - name: http 45 | containerPort: 9235 46 | -------------------------------------------------------------------------------- /deploy/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | labels: 6 | k8s-app: k8s-spot-rescheduler 7 | name: k8s-spot-rescheduler 8 | namespace: kube-system 9 | -------------------------------------------------------------------------------- /metrics/metrics.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 Pusher Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package metrics 18 | 19 | import ( 20 | "github.com/prometheus/client_golang/prometheus" 21 | "github.com/pusher/k8s-spot-rescheduler/nodes" 22 | ) 23 | 24 | const ( 25 | reschedulerNamespace = "spot_rescheduler" 26 | ) 27 | 28 | var ( 29 | // nodePodsCount tracks how many pods are nodes by type and by node name. 30 | nodePodsCount = prometheus.NewCounterVec( 31 | prometheus.CounterOpts{ 32 | Namespace: reschedulerNamespace, 33 | Name: "node_pods_count", 34 | Help: "Number of pods on each node.", 35 | }, 36 | []string{"node_type", "node"}) 37 | 38 | // nodesCount tracks the number of nodes in the cluster. 39 | nodesCount = prometheus.NewGaugeVec( 40 | prometheus.GaugeOpts{ 41 | Namespace: reschedulerNamespace, 42 | Name: "nodes_count", 43 | Help: "Number of nodes in cluster.", 44 | }, []string{"node_type"}, 45 | ) 46 | 47 | // nodeDrainCount counts the number of nodes drained by the rescheduler. 48 | nodeDrainCount = prometheus.NewCounterVec( 49 | prometheus.CounterOpts{ 50 | Namespace: reschedulerNamespace, 51 | Name: "node_drain_total", 52 | Help: "Number of nodes drained by rescheduler.", 53 | }, []string{"drain_state", "node"}, 54 | ) 55 | 56 | // evictionsCount counts the number of pods evicted by the rescheduler 57 | evictionsCount = prometheus.NewCounter( 58 | prometheus.CounterOpts{ 59 | Namespace: reschedulerNamespace, 60 | Name: "evicted_pods_total", 61 | Help: "Number of pods evicted by the rescheduler.", 62 | }, 63 | ) 64 | ) 65 | 66 | func init() { 67 | prometheus.MustRegister(nodePodsCount) 68 | prometheus.MustRegister(nodesCount) 69 | prometheus.MustRegister(nodeDrainCount) 70 | prometheus.MustRegister(evictionsCount) 71 | } 72 | 73 | // UpdateNodesMap updates the metrics calculated by the nodes map 74 | func UpdateNodesMap(nm nodes.Map) { 75 | if nm == nil { 76 | return 77 | } 78 | nodesCount.WithLabelValues(nodes.OnDemandNodeLabel).Set(float64(len(nm[nodes.OnDemand]))) 79 | nodesCount.WithLabelValues(nodes.SpotNodeLabel).Set(float64(len(nm[nodes.Spot]))) 80 | 81 | } 82 | 83 | // UpdateNodePodsCount updates nodePodsCount for a given node 84 | func UpdateNodePodsCount(nodeType string, nodeName string, numPods int) { 85 | nodePodsCount.WithLabelValues(nodeType, nodeName).Set(float64(numPods)) 86 | } 87 | 88 | // UpdateEvictionsCount adds 1 to the evictions counter 89 | func UpdateEvictionsCount() { 90 | evictionsCount.Add(1) 91 | } 92 | 93 | // UpdateNodeDrainCount updates the number drains and drain state for a node 94 | func UpdateNodeDrainCount(state string, nodeName string) { 95 | nodeDrainCount.WithLabelValues(state, nodeName).Add(1) 96 | } 97 | -------------------------------------------------------------------------------- /nodes/nodes.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 Pusher Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package nodes 18 | 19 | import ( 20 | "sort" 21 | "strings" 22 | 23 | apiv1 "k8s.io/api/core/v1" 24 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 25 | "k8s.io/apimachinery/pkg/fields" 26 | kube_client "k8s.io/client-go/kubernetes" 27 | ) 28 | 29 | var ( 30 | // OnDemandNodeLabel label for on-demand instances. 31 | OnDemandNodeLabel = "kubernetes.io/role=worker" 32 | // SpotNodeLabel label for spot instances. 33 | SpotNodeLabel = "kubernetes.io/role=spot-worker" 34 | // OnDemand key for on-demand instances of NodesMap. 35 | OnDemand NodeType 36 | // Spot key for spot instances of NodesMap. 37 | Spot NodeType = 1 38 | ) 39 | 40 | // NodeInfo struct containing node and it's pods as well information 41 | // resources on the node. 42 | type NodeInfo struct { 43 | Node *apiv1.Node 44 | Pods []*apiv1.Pod 45 | RequestedCPU int64 46 | FreeCPU int64 47 | } 48 | 49 | // NodeType integer key for keying NodesMap. 50 | type NodeType int 51 | 52 | // NodeInfoArray array of NodeInfo pointers. 53 | type NodeInfoArray []*NodeInfo 54 | 55 | // Map map of NodeInfoArray. 56 | type Map map[NodeType]NodeInfoArray 57 | 58 | // NewNodeMap creates a new NodesMap from a list of Nodes. 59 | func NewNodeMap(client kube_client.Interface, nodes []*apiv1.Node) (Map, error) { 60 | nodeMap := Map{ 61 | OnDemand: make([]*NodeInfo, 0), 62 | Spot: make([]*NodeInfo, 0), 63 | } 64 | 65 | for _, node := range nodes { 66 | nodeInfo, err := newNodeInfo(client, node) 67 | if err != nil { 68 | return nil, err 69 | } 70 | 71 | // Sort pods with biggest CPU request first 72 | sort.Slice(nodeInfo.Pods, func(i, j int) bool { 73 | iCPU := getPodCPURequests(nodeInfo.Pods[i]) 74 | jCPU := getPodCPURequests(nodeInfo.Pods[j]) 75 | return iCPU > jCPU 76 | }) 77 | 78 | switch true { 79 | case isSpotNode(node): 80 | nodeMap[Spot] = append(nodeMap[Spot], nodeInfo) 81 | continue 82 | case isOnDemandNode(node): 83 | nodeMap[OnDemand] = append(nodeMap[OnDemand], nodeInfo) 84 | continue 85 | default: 86 | continue 87 | } 88 | } 89 | 90 | // Sort spot nodes by most requested CPU first 91 | sort.Slice(nodeMap[Spot], func(i, j int) bool { 92 | return nodeMap[Spot][i].RequestedCPU > nodeMap[Spot][j].RequestedCPU 93 | }) 94 | // Sort on-demand nodes by least requested CPU first 95 | sort.Slice(nodeMap[OnDemand], func(i, j int) bool { 96 | return nodeMap[OnDemand][i].RequestedCPU < nodeMap[OnDemand][j].RequestedCPU 97 | }) 98 | 99 | return nodeMap, nil 100 | } 101 | 102 | func newNodeInfo(client kube_client.Interface, node *apiv1.Node) (*NodeInfo, error) { 103 | pods, err := getPodsOnNode(client, node) 104 | if err != nil { 105 | return nil, err 106 | } 107 | requestedCPU := calculateRequestedCPU(pods) 108 | 109 | return &NodeInfo{ 110 | Node: node, 111 | Pods: pods, 112 | RequestedCPU: requestedCPU, 113 | FreeCPU: node.Status.Allocatable.Cpu().MilliValue() - requestedCPU, 114 | }, nil 115 | } 116 | 117 | // AddPod adds a pod to a NodeInfo and updates the relevant resource values. 118 | func (n *NodeInfo) AddPod(pod *apiv1.Pod) { 119 | n.Pods = append(n.Pods, pod) 120 | n.RequestedCPU = calculateRequestedCPU(n.Pods) 121 | n.FreeCPU = n.Node.Status.Allocatable.Cpu().MilliValue() - n.RequestedCPU 122 | } 123 | 124 | // Gets a list of pods that are running on the given node 125 | func getPodsOnNode(client kube_client.Interface, node *apiv1.Node) ([]*apiv1.Pod, error) { 126 | podsOnNode, err := client.CoreV1().Pods(apiv1.NamespaceAll).List( 127 | metav1.ListOptions{FieldSelector: fields.SelectorFromSet(fields.Set{"spec.nodeName": node.Name}).String()}) 128 | if err != nil { 129 | return []*apiv1.Pod{}, err 130 | } 131 | 132 | pods := make([]*apiv1.Pod, 0) 133 | for i := range podsOnNode.Items { 134 | pods = append(pods, &podsOnNode.Items[i]) 135 | } 136 | return pods, nil 137 | } 138 | 139 | // Works out requested CPU for a collection of pods and returns it in MilliValue 140 | // (Pod requests are stored as MilliValues hence the return type here) 141 | func calculateRequestedCPU(pods []*apiv1.Pod) int64 { 142 | var CPURequests int64 143 | for _, pod := range pods { 144 | CPURequests += getPodCPURequests(pod) 145 | } 146 | return CPURequests 147 | } 148 | 149 | // Returns the total requested CPU for all of the containers in a given Pod. 150 | // (Returned as MilliValues) 151 | func getPodCPURequests(pod *apiv1.Pod) int64 { 152 | var CPUTotal int64 153 | for _, container := range pod.Spec.Containers { 154 | CPUTotal += container.Resources.Requests.Cpu().MilliValue() 155 | } 156 | return CPUTotal 157 | } 158 | 159 | // Determines if a node has the spotNodeLabel assigned 160 | func isSpotNode(node *apiv1.Node) bool { 161 | splitLabel := strings.SplitN(SpotNodeLabel, "=", 2) 162 | 163 | // If "=" found, check for new label schema. If no "=" is found, check for 164 | // old label schema 165 | switch len(splitLabel) { 166 | case 1: 167 | _, found := node.ObjectMeta.Labels[SpotNodeLabel] 168 | return found 169 | case 2: 170 | spotLabelKey := splitLabel[0] 171 | spotLabelVal := splitLabel[1] 172 | 173 | val, _ := node.ObjectMeta.Labels[spotLabelKey] 174 | if val == spotLabelVal { 175 | return true 176 | } 177 | } 178 | return false 179 | } 180 | 181 | // Determines if a node has the OnDemandNodeLabel assigned 182 | func isOnDemandNode(node *apiv1.Node) bool { 183 | splitLabel := strings.SplitN(OnDemandNodeLabel, "=", 2) 184 | 185 | // If "=" found, check for new label schema. If no "=" is found, check for 186 | // old label schema 187 | switch len(splitLabel) { 188 | case 1: 189 | _, found := node.ObjectMeta.Labels[OnDemandNodeLabel] 190 | return found 191 | case 2: 192 | onDemandLabelKey := splitLabel[0] 193 | onDemandLabelVal := splitLabel[1] 194 | 195 | val, _ := node.ObjectMeta.Labels[onDemandLabelKey] 196 | if val == onDemandLabelVal { 197 | return true 198 | } 199 | } 200 | return false 201 | } 202 | 203 | // CopyNodeInfos returns an array of copies of the NodeInfos in this array. 204 | func (n NodeInfoArray) CopyNodeInfos() NodeInfoArray { 205 | var arr NodeInfoArray 206 | for _, node := range n { 207 | nodeInfo := &NodeInfo{ 208 | Node: node.Node, 209 | Pods: node.Pods, 210 | RequestedCPU: node.RequestedCPU, 211 | FreeCPU: node.FreeCPU, 212 | } 213 | arr = append(arr, nodeInfo) 214 | } 215 | return arr 216 | } 217 | -------------------------------------------------------------------------------- /nodes/nodes_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 Pusher Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package nodes 18 | 19 | import ( 20 | "fmt" 21 | "testing" 22 | 23 | "github.com/stretchr/testify/assert" 24 | apiv1 "k8s.io/api/core/v1" 25 | "k8s.io/apimachinery/pkg/api/resource" 26 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 | "k8s.io/apimachinery/pkg/runtime" 28 | "k8s.io/client-go/kubernetes/fake" 29 | core "k8s.io/client-go/testing" 30 | ) 31 | 32 | func TestIsSpotNode(t *testing.T) { 33 | spotNode := createTestNodeWithLabel("fooSpotNode", 2000, map[string]string{"foo": "bar"}) 34 | 35 | SpotNodeLabel = "foo" 36 | assert.True(t, isSpotNode(spotNode), "expected node with label 'foo' to be spot node") 37 | 38 | SpotNodeLabel = "foo=bar" 39 | assert.True(t, isSpotNode(spotNode), "expected node with label 'foo' and value 'bar' to be spot node") 40 | 41 | SpotNodeLabel = "foo=baz" 42 | assert.False(t, isSpotNode(spotNode), "expected node with label 'foo' and value 'bar' to not be spot node") 43 | } 44 | 45 | func TestIsOnDemandNode(t *testing.T) { 46 | onDemandNode := createTestNodeWithLabel("fooDemandNode", 2000, map[string]string{"foo": "bar"}) 47 | 48 | OnDemandNodeLabel = "foo" 49 | assert.True(t, isOnDemandNode(onDemandNode), "expected node with label 'foo' to be on demand node") 50 | 51 | OnDemandNodeLabel = "foo=bar" 52 | assert.True(t, isOnDemandNode(onDemandNode), "expected node with label 'foo' and value 'bar' to be on demand node") 53 | 54 | OnDemandNodeLabel = "foo=baz" 55 | assert.False(t, isOnDemandNode(onDemandNode), "expected node with label 'foo' and value 'bar' to not be on demand node") 56 | } 57 | 58 | func TestNewNodeMap(t *testing.T) { 59 | OnDemandNodeLabel = "kubernetes.io/role=worker" 60 | SpotNodeLabel = "kubernetes.io/role=spot-worker" 61 | 62 | spotLabels := map[string]string{ 63 | "kubernetes.io/role": "spot-worker", 64 | } 65 | onDemandLabels := map[string]string{ 66 | "kubernetes.io/role": "worker", 67 | } 68 | 69 | nodes := []*apiv1.Node{ 70 | createTestNodeWithLabel("node1", 2000, onDemandLabels), 71 | createTestNodeWithLabel("node2", 2000, onDemandLabels), 72 | createTestNodeWithLabel("node3", 2000, spotLabels), 73 | createTestNodeWithLabel("node4", 2000, spotLabels), 74 | } 75 | 76 | fakeClient := createFakeClient(t) 77 | 78 | nodeMap, err := NewNodeMap(fakeClient, nodes) 79 | if err != nil { 80 | assert.Error(t, err, "Failed to build nodeMap") 81 | } 82 | onDemandNodeInfos := nodeMap[OnDemand] 83 | spotNodeInfos := nodeMap[Spot] 84 | 85 | assert.Equal(t, 2, len(onDemandNodeInfos)) 86 | assert.Equal(t, 2, len(spotNodeInfos)) 87 | 88 | // The first spot node should be the one with least requestedCPU 89 | nodeInfo1 := onDemandNodeInfos[0] 90 | nodeInfo2 := onDemandNodeInfos[1] 91 | if nodeInfo1.RequestedCPU > nodeInfo2.RequestedCPU { 92 | assert.Fail(t, "Spot nodes not sorted by Free CPU") 93 | } 94 | 95 | assert.Equal(t, "node1", nodeInfo1.Node.Name) 96 | assert.Equal(t, 2, len(nodeInfo1.Pods)) 97 | assert.Equal(t, "node2", nodeInfo2.Node.Name) 98 | assert.Equal(t, 3, len(nodeInfo2.Pods)) 99 | 100 | // The first spot node should be the one with least freeCPU 101 | nodeInfo3 := spotNodeInfos[0] 102 | nodeInfo4 := spotNodeInfos[1] 103 | if nodeInfo3.FreeCPU > nodeInfo4.FreeCPU { 104 | assert.Fail(t, "Spot nodes not sorted by Free CPU") 105 | } 106 | 107 | // This means we should get node3 and node2 in this order 108 | assert.Equal(t, "node4", nodeInfo3.Node.Name) 109 | assert.Equal(t, 5, len(nodeInfo3.Pods)) 110 | assert.Equal(t, "node3", nodeInfo4.Node.Name) 111 | assert.Equal(t, 2, len(nodeInfo4.Pods)) 112 | 113 | // Check pods are sorted by Most RequestedCPU 114 | for _, nodeInfo := range append(onDemandNodeInfos, spotNodeInfos...) { 115 | for i := 1; i < len(nodeInfo.Pods); i++ { 116 | firstPodRequest := getPodCPURequests(nodeInfo.Pods[i-1]) 117 | secondPodRequest := getPodCPURequests(nodeInfo.Pods[i]) 118 | if firstPodRequest < secondPodRequest { 119 | assert.Fail(t, "Pods not sorted by most requested CPU on node %s", nodeInfo.Node.Name) 120 | } 121 | } 122 | } 123 | 124 | } 125 | 126 | func TestAddPod(t *testing.T) { 127 | 128 | nodeInfo1 := createTestNodeInfo(createTestNode("node1", 2000), []*apiv1.Pod{}, 0) 129 | pod1 := createTestPod("pod1", 300) 130 | nodeInfo1.AddPod(pod1) 131 | 132 | assert.Equal(t, 1, len(nodeInfo1.Pods)) 133 | assert.Equal(t, int64(300), nodeInfo1.RequestedCPU) 134 | assert.Equal(t, int64(1700), nodeInfo1.FreeCPU) 135 | 136 | pod2 := createTestPod("pod2", 721) 137 | nodeInfo1.AddPod(pod2) 138 | 139 | assert.Equal(t, 2, len(nodeInfo1.Pods)) 140 | assert.Equal(t, int64(1021), nodeInfo1.RequestedCPU) 141 | assert.Equal(t, int64(979), nodeInfo1.FreeCPU) 142 | } 143 | 144 | func TestGetPodsOnNode(t *testing.T) { 145 | node1 := createTestNode("node1", 2000) 146 | node2 := createTestNode("node2", 2000) 147 | node3 := createTestNode("node3", 2000) 148 | node4 := createTestNode("node4", 2000) 149 | 150 | fakeClient := createFakeClient(t) 151 | 152 | podsOnNode1, err := getPodsOnNode(fakeClient, node1) 153 | if err != nil { 154 | assert.Error(t, err, "Found error in getting pods on node") 155 | } 156 | assert.Equal(t, 2, len(podsOnNode1)) 157 | assert.Equal(t, "p1n1", podsOnNode1[0].Name) 158 | assert.Equal(t, "p2n1", podsOnNode1[1].Name) 159 | 160 | podsOnNode2, err := getPodsOnNode(fakeClient, node2) 161 | if err != nil { 162 | assert.Error(t, err, "Found error in getting pods on node") 163 | } 164 | assert.Equal(t, 3, len(podsOnNode2)) 165 | assert.Equal(t, "p1n2", podsOnNode2[0].Name) 166 | assert.Equal(t, "p2n2", podsOnNode2[1].Name) 167 | assert.Equal(t, "p3n2", podsOnNode2[2].Name) 168 | 169 | podsOnNode3, err := getPodsOnNode(fakeClient, node3) 170 | if err != nil { 171 | assert.Error(t, err, "Found error in getting pods on node") 172 | } 173 | assert.Equal(t, 2, len(podsOnNode3)) 174 | assert.Equal(t, "p1n3", podsOnNode3[0].Name) 175 | assert.Equal(t, "p2n3", podsOnNode3[1].Name) 176 | 177 | podsOnNode4, err := getPodsOnNode(fakeClient, node4) 178 | if err != nil { 179 | assert.Error(t, err, "Found error in getting pods on node") 180 | } 181 | assert.Equal(t, 5, len(podsOnNode4)) 182 | assert.Equal(t, "p1n4", podsOnNode4[0].Name) 183 | assert.Equal(t, "p2n4", podsOnNode4[1].Name) 184 | assert.Equal(t, "p3n4", podsOnNode4[2].Name) 185 | assert.Equal(t, "p4n4", podsOnNode4[3].Name) 186 | assert.Equal(t, "p5n4", podsOnNode4[4].Name) 187 | 188 | } 189 | 190 | func TestCalculateRequestedCPU(t *testing.T) { 191 | pods1 := []*apiv1.Pod{ 192 | createTestPod("p1n1", 100), 193 | createTestPod("p2n1", 300), 194 | } 195 | pods2 := []*apiv1.Pod{ 196 | createTestPod("p1n2", 500), 197 | createTestPod("p2n2", 300), 198 | } 199 | pods3 := []*apiv1.Pod{ 200 | createTestPod("p1n3", 500), 201 | createTestPod("p2n3", 500), 202 | createTestPod("p3n3", 300), 203 | } 204 | 205 | pods1Request := calculateRequestedCPU(pods1) 206 | assert.Equal(t, int64(400), pods1Request) 207 | 208 | pods2Request := calculateRequestedCPU(pods2) 209 | assert.Equal(t, int64(800), pods2Request) 210 | 211 | pods3Request := calculateRequestedCPU(pods3) 212 | assert.Equal(t, int64(1300), pods3Request) 213 | } 214 | 215 | func TestGetPodCPURequests(t *testing.T) { 216 | pod1 := createTestPod("pod1", 100) 217 | pod2 := createTestPod("pod2", 200) 218 | 219 | pod1Request := getPodCPURequests(pod1) 220 | assert.Equal(t, int64(100), pod1Request) 221 | 222 | pod2Request := getPodCPURequests(pod2) 223 | assert.Equal(t, int64(200), pod2Request) 224 | } 225 | 226 | func TestCopyNodeInfos(t *testing.T) { 227 | pods1 := []*apiv1.Pod{ 228 | createTestPod("p1n1", 100), 229 | createTestPod("p2n1", 300), 230 | } 231 | pods2 := []*apiv1.Pod{ 232 | createTestPod("p1n2", 500), 233 | createTestPod("p2n2", 300), 234 | } 235 | pods3 := []*apiv1.Pod{ 236 | createTestPod("p1n3", 500), 237 | createTestPod("p2n3", 500), 238 | createTestPod("p3n3", 300), 239 | } 240 | 241 | pod1 := createTestPod("pod1", 200) 242 | pod2 := createTestPod("pod2", 200) 243 | pod3 := createTestPod("pod3", 200) 244 | 245 | nodeInfos := NodeInfoArray{ 246 | createTestNodeInfo(createTestNode("node1", 2000), pods1, 400), 247 | createTestNodeInfo(createTestNode("node2", 2000), pods2, 800), 248 | createTestNodeInfo(createTestNode("node3", 2000), pods3, 1300), 249 | } 250 | 251 | // Create a copy of the array 252 | nodeInfosCopy := nodeInfos.CopyNodeInfos() 253 | 254 | // Modify the array 255 | nodeInfosCopy[0].AddPod(pod1) 256 | nodeInfosCopy[1].AddPod(pod2) 257 | nodeInfosCopy[2].AddPod(pod3) 258 | 259 | // Check the changes applied 260 | assert.Equal(t, len(pods1)+1, len(nodeInfosCopy[0].Pods)) 261 | assert.Equal(t, len(pods2)+1, len(nodeInfosCopy[1].Pods)) 262 | assert.Equal(t, len(pods3)+1, len(nodeInfosCopy[2].Pods)) 263 | 264 | // Check the original has not changed 265 | assert.Equal(t, len(pods1), len(nodeInfos[0].Pods)) 266 | assert.Equal(t, len(pods2), len(nodeInfos[1].Pods)) 267 | assert.Equal(t, len(pods3), len(nodeInfos[2].Pods)) 268 | } 269 | 270 | func createTestPod(name string, cpu int64) *apiv1.Pod { 271 | pod := &apiv1.Pod{ 272 | ObjectMeta: metav1.ObjectMeta{ 273 | Namespace: "kube-system", 274 | Name: name, 275 | SelfLink: fmt.Sprintf("/api/v1/namespaces/default/pods/%s", name), 276 | }, 277 | Spec: apiv1.PodSpec{ 278 | Containers: []apiv1.Container{ 279 | { 280 | Resources: apiv1.ResourceRequirements{ 281 | Requests: apiv1.ResourceList{ 282 | apiv1.ResourceCPU: *resource.NewMilliQuantity(cpu, resource.DecimalSI), 283 | }, 284 | }, 285 | }, 286 | }, 287 | }, 288 | } 289 | return pod 290 | } 291 | 292 | func createTestNode(name string, cpu int64) *apiv1.Node { 293 | node := &apiv1.Node{ 294 | ObjectMeta: metav1.ObjectMeta{ 295 | Name: name, 296 | }, 297 | Status: apiv1.NodeStatus{ 298 | Capacity: apiv1.ResourceList{ 299 | apiv1.ResourceCPU: *resource.NewMilliQuantity(cpu, resource.DecimalSI), 300 | apiv1.ResourceMemory: *resource.NewQuantity(2*1024*1024*1024, resource.DecimalSI), 301 | apiv1.ResourcePods: *resource.NewQuantity(100, resource.DecimalSI), 302 | }, 303 | Conditions: []apiv1.NodeCondition{ 304 | { 305 | Type: apiv1.NodeReady, 306 | Status: apiv1.ConditionTrue, 307 | }, 308 | }, 309 | }, 310 | } 311 | node.Status.Allocatable = node.Status.Capacity 312 | return node 313 | } 314 | 315 | func createTestNodeWithLabel(name string, cpu int64, labels map[string]string) *apiv1.Node { 316 | node := createTestNode(name, cpu) 317 | node.ObjectMeta.Labels = labels 318 | return node 319 | } 320 | 321 | func createTestNodeInfo(node *apiv1.Node, pods []*apiv1.Pod, requests int64) *NodeInfo { 322 | nodeInfo := &NodeInfo{ 323 | Node: node, 324 | Pods: pods, 325 | RequestedCPU: requests, 326 | FreeCPU: node.Status.Capacity.Cpu().MilliValue() - requests, 327 | } 328 | return nodeInfo 329 | } 330 | 331 | func createFakeClient(t *testing.T) *fake.Clientset { 332 | pods1 := []apiv1.Pod{ 333 | *createTestPod("p1n1", 100), 334 | *createTestPod("p2n1", 300), 335 | } 336 | pods2 := []apiv1.Pod{ 337 | *createTestPod("p1n2", 500), 338 | *createTestPod("p2n2", 300), 339 | *createTestPod("p3n2", 400), 340 | } 341 | pods3 := []apiv1.Pod{ 342 | *createTestPod("p1n3", 500), 343 | *createTestPod("p2n3", 300), 344 | } 345 | pods4 := []apiv1.Pod{ 346 | *createTestPod("p1n4", 500), 347 | *createTestPod("p2n4", 200), 348 | *createTestPod("p3n4", 400), 349 | *createTestPod("p4n4", 100), 350 | *createTestPod("p5n4", 300), 351 | } 352 | 353 | fakeClient := &fake.Clientset{} 354 | fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) { 355 | listAction, ok := action.(core.ListAction) 356 | assert.True(t, ok) 357 | restrictions := listAction.GetListRestrictions().Fields.String() 358 | 359 | podList := &apiv1.PodList{} 360 | switch restrictions { 361 | case "spec.nodeName=node1": 362 | podList.Items = pods1 363 | case "spec.nodeName=node2": 364 | podList.Items = pods2 365 | case "spec.nodeName=node3": 366 | podList.Items = pods3 367 | case "spec.nodeName=node4": 368 | podList.Items = pods4 369 | default: 370 | t.Fatalf("unexpected list restrictions: %v", restrictions) 371 | } 372 | return true, podList, nil 373 | }) 374 | return fakeClient 375 | } 376 | -------------------------------------------------------------------------------- /rescheduler.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 Pusher Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import ( 20 | goflag "flag" 21 | "fmt" 22 | "net/http" 23 | "os" 24 | "strings" 25 | "time" 26 | 27 | "github.com/pusher/k8s-spot-rescheduler/metrics" 28 | "github.com/pusher/k8s-spot-rescheduler/nodes" 29 | "github.com/pusher/k8s-spot-rescheduler/scaler" 30 | apiv1 "k8s.io/api/core/v1" 31 | policyv1 "k8s.io/api/policy/v1beta1" 32 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 33 | simulator "k8s.io/autoscaler/cluster-autoscaler/simulator" 34 | autoscaler_drain "k8s.io/autoscaler/cluster-autoscaler/utils/drain" 35 | kube_utils "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" 36 | kube_client "k8s.io/client-go/kubernetes" 37 | v1core "k8s.io/client-go/kubernetes/typed/core/v1" 38 | kube_restclient "k8s.io/client-go/rest" 39 | kube_leaderelection "k8s.io/client-go/tools/leaderelection" 40 | "k8s.io/client-go/tools/leaderelection/resourcelock" 41 | kube_record "k8s.io/client-go/tools/record" 42 | api "k8s.io/kubernetes/pkg/api/legacyscheme" 43 | "k8s.io/kubernetes/pkg/client/leaderelectionconfig" 44 | kubectl_util "k8s.io/kubernetes/pkg/kubectl/cmd/util" 45 | "k8s.io/kubernetes/pkg/scheduler/schedulercache" 46 | 47 | "github.com/golang/glog" 48 | "github.com/prometheus/client_golang/prometheus" 49 | flag "github.com/spf13/pflag" 50 | ) 51 | 52 | var ( 53 | flags = flag.NewFlagSet( 54 | `rescheduler: rescheduler --running-in-cluster=true`, 55 | flag.ExitOnError) 56 | 57 | inCluster = flags.Bool("running-in-cluster", true, 58 | `Optional, if this controller is running in a kubernetes cluster, use the 59 | pod secrets for creating a Kubernetes client.`) 60 | 61 | namespace = flags.String("namespace", "kube-system", 62 | `Namespace in which k8s-spot-rescheduler is run`) 63 | 64 | contentType = flags.String("kube-api-content-type", "application/vnd.kubernetes.protobuf", 65 | `Content type of requests sent to apiserver.`) 66 | 67 | housekeepingInterval = flags.Duration("housekeeping-interval", 10*time.Second, 68 | `How often rescheduler takes actions.`) 69 | 70 | nodeDrainDelay = flags.Duration("node-drain-delay", 10*time.Minute, 71 | `How long the scheduler should wait between draining nodes.`) 72 | 73 | podEvictionTimeout = flags.Duration("pod-eviction-timeout", 2*time.Minute, 74 | `How long should the rescheduler attempt to retrieve successful pod 75 | evictions for.`) 76 | 77 | maxGracefulTermination = flags.Duration("max-graceful-termination", 2*time.Minute, 78 | `How long should the rescheduler wait for pods to shutdown gracefully before 79 | failing the node drain attempt.`) 80 | 81 | listenAddress = flags.String("listen-address", "localhost:9235", 82 | `Address to listen on for serving prometheus metrics`) 83 | 84 | deleteNonReplicatedPods = flags.Bool("delete-non-replicated-pods", false, `Delete non-replicated pods running on on-demand instance. Note that some non-replicated pods will not be rescheduled.`) 85 | 86 | showVersion = flags.Bool("version", false, "Show version information and exit.") 87 | ) 88 | 89 | func main() { 90 | flags.AddGoFlagSet(goflag.CommandLine) 91 | 92 | // Log to stderr by default and fix usage message accordingly 93 | logToStdErr := flags.Lookup("logtostderr") 94 | logToStdErr.DefValue = "true" 95 | flags.Set("logtostderr", "true") 96 | 97 | // Add nodes labels as flags 98 | flags.StringVar(&nodes.OnDemandNodeLabel, 99 | "on-demand-node-label", 100 | "kubernetes.io/role=worker", 101 | `Name of label on nodes to be considered for draining.`) 102 | flags.StringVar(&nodes.SpotNodeLabel, 103 | "spot-node-label", 104 | "kubernetes.io/role=spot-worker", 105 | `Name of label on nodes to be considered as targets for pods.`) 106 | 107 | flags.Parse(os.Args) 108 | 109 | if *showVersion { 110 | fmt.Printf("k8s-spot-rescheduler %s\n", VERSION) 111 | os.Exit(0) 112 | } 113 | 114 | err := validateArgs(nodes.OnDemandNodeLabel, nodes.SpotNodeLabel) 115 | if err != nil { 116 | fmt.Printf("Error: %s", err) 117 | os.Exit(1) 118 | } 119 | 120 | glog.Infof("Running Rescheduler") 121 | 122 | // Register metrics from metrics.go 123 | go func() { 124 | http.Handle("/metrics", prometheus.Handler()) 125 | err := http.ListenAndServe(*listenAddress, nil) 126 | glog.Fatalf("Failed to start metrics: %v", err) 127 | }() 128 | 129 | kubeClient, err := createKubeClient(flags, *inCluster) 130 | if err != nil { 131 | glog.Fatalf("Failed to create kube client: %v", err) 132 | } 133 | 134 | recorder := createEventRecorder(kubeClient) 135 | 136 | // Allows active/standy HA. 137 | // Prevent multiple pods running the algorithm simultaneously. 138 | leaderElection := leaderelectionconfig.DefaultLeaderElectionConfiguration() 139 | if *inCluster { 140 | leaderElection.LeaderElect = true 141 | } 142 | 143 | if !leaderElection.LeaderElect { 144 | // Leader election not enabled. 145 | // Execute main logic. 146 | run(kubeClient, recorder) 147 | } else { 148 | id, err := os.Hostname() 149 | if err != nil { 150 | glog.Fatalf("Unable to get hostname: %v", err) 151 | } 152 | // Leader election process 153 | kube_leaderelection.RunOrDie(kube_leaderelection.LeaderElectionConfig{ 154 | Lock: &resourcelock.EndpointsLock{ 155 | EndpointsMeta: metav1.ObjectMeta{ 156 | Namespace: *namespace, 157 | Name: "k8s-spot-rescheduler", 158 | }, 159 | Client: kubeClient.CoreV1(), 160 | LockConfig: resourcelock.ResourceLockConfig{ 161 | Identity: id, 162 | EventRecorder: recorder, 163 | }, 164 | }, 165 | LeaseDuration: leaderElection.LeaseDuration.Duration, 166 | RenewDeadline: leaderElection.RenewDeadline.Duration, 167 | RetryPeriod: leaderElection.RetryPeriod.Duration, 168 | Callbacks: kube_leaderelection.LeaderCallbacks{ 169 | OnStartedLeading: func(_ <-chan struct{}) { 170 | // Since we are committing a suicide after losing 171 | // mastership, we can safely ignore the argument. 172 | run(kubeClient, recorder) 173 | }, 174 | OnStoppedLeading: func() { 175 | glog.Fatalf("Lost leader status, terminating.") 176 | }, 177 | }, 178 | }) 179 | 180 | } 181 | 182 | } 183 | 184 | func run(kubeClient kube_client.Interface, recorder kube_record.EventRecorder) { 185 | 186 | stopChannel := make(chan struct{}) 187 | 188 | // Predicate checker from K8s scheduler works out if a Pod could schedule onto a node 189 | predicateChecker, err := simulator.NewPredicateChecker(kubeClient, stopChannel) 190 | if err != nil { 191 | glog.Fatalf("Failed to create predicate checker: %v", err) 192 | } 193 | 194 | nodeLister := kube_utils.NewReadyNodeLister(kubeClient, stopChannel) 195 | podDisruptionBudgetLister := kube_utils.NewPodDisruptionBudgetLister(kubeClient, stopChannel) 196 | unschedulablePodLister := kube_utils.NewUnschedulablePodLister(kubeClient, stopChannel) 197 | 198 | // Set nextDrainTime to now to ensure we start processing straight away. 199 | nextDrainTime := time.Now() 200 | 201 | for { 202 | select { 203 | // Run forever, every housekeepingInterval seconds 204 | case <-time.After(*housekeepingInterval): 205 | { 206 | // Don't do anything if we are waiting for the drain delay timer 207 | if time.Until(nextDrainTime) > 0 { 208 | glog.V(2).Infof("Waiting %s for drain delay timer.", time.Until(nextDrainTime).Round(time.Second)) 209 | continue 210 | } 211 | 212 | // Don't run if pods are unschedulable. 213 | // Attempt to not make things worse. 214 | unschedulablePods, err := unschedulablePodLister.List() 215 | if err != nil { 216 | glog.Errorf("Failed to get unschedulable pods: %v", err) 217 | } 218 | if len(unschedulablePods) > 0 { 219 | glog.V(2).Info("Waiting for unschedulable pods to be scheduled.") 220 | continue 221 | } 222 | 223 | glog.V(3).Info("Starting node processing.") 224 | 225 | // Get all nodes in the cluster 226 | allNodes, err := nodeLister.List() 227 | if err != nil { 228 | glog.Errorf("Failed to list nodes: %v", err) 229 | continue 230 | } 231 | 232 | // Build a map of nodeInfo structs. 233 | // NodeInfo is used to map pods onto nodes and see their available 234 | // resources. 235 | nodeMap, err := nodes.NewNodeMap(kubeClient, allNodes) 236 | if err != nil { 237 | glog.Errorf("Failed to build node map; %v", err) 238 | continue 239 | } 240 | 241 | // Update metrics. 242 | metrics.UpdateNodesMap(nodeMap) 243 | 244 | // Get PodDisruptionBudgets 245 | allPDBs, err := podDisruptionBudgetLister.List() 246 | if err != nil { 247 | glog.Errorf("Failed to list PDBs: %v", err) 248 | continue 249 | } 250 | 251 | // Get onDemand and spot nodeInfoArrays 252 | // These are sorted when the nodeMap is created. 253 | onDemandNodeInfos := nodeMap[nodes.OnDemand] 254 | spotNodeInfos := nodeMap[nodes.Spot] 255 | 256 | // Update spot node metrics 257 | updateSpotNodeMetrics(spotNodeInfos, allPDBs) 258 | 259 | // No on demand nodes so nothing to do. 260 | if len(onDemandNodeInfos) < 1 { 261 | glog.V(2).Info("No nodes to process.") 262 | } 263 | 264 | // Go through each onDemand node in turn 265 | // Build a plan to move pods onto other nodes 266 | // In the case that all can be moved, drain the node 267 | for _, nodeInfo := range onDemandNodeInfos { 268 | 269 | // Get a list of pods that we would need to move onto other nodes 270 | allPods, err := autoscaler_drain.GetPodsForDeletionOnNodeDrain(nodeInfo.Pods, allPDBs, *deleteNonReplicatedPods, false, false, false, nil, 0, time.Now()) 271 | if err != nil { 272 | glog.Errorf("Failed to get pods for consideration: %v", err) 273 | continue 274 | } 275 | 276 | podsForDeletion := make([]*apiv1.Pod, 0) 277 | for _, pod := range allPods { 278 | controlledByDaemonSet := false 279 | for _, owner := range pod.GetOwnerReferences() { 280 | if *owner.Controller && owner.Kind == "DaemonSet" { 281 | controlledByDaemonSet = true 282 | break 283 | } 284 | } 285 | 286 | if controlledByDaemonSet { 287 | glog.V(4).Infof("Ignoring pod %s which is controlled by DaemonSet", podID(pod)) 288 | continue 289 | } 290 | 291 | podsForDeletion = append(podsForDeletion, pod) 292 | } 293 | 294 | // Update the number of pods on this node's metrics 295 | metrics.UpdateNodePodsCount(nodes.OnDemandNodeLabel, nodeInfo.Node.Name, len(podsForDeletion)) 296 | if len(podsForDeletion) < 1 { 297 | // No pods so should just wait for node to be autoscaled away. 298 | glog.V(2).Infof("No pods on %s, skipping.", nodeInfo.Node.Name) 299 | continue 300 | } 301 | 302 | glog.V(2).Infof("Considering %s for removal", nodeInfo.Node.Name) 303 | 304 | // Checks whether or not a node can be drained 305 | err = canDrainNode(predicateChecker, spotNodeInfos, podsForDeletion) 306 | if err != nil { 307 | glog.V(2).Infof("Cannot drain node: %v", err) 308 | continue 309 | } 310 | 311 | // If building plan was successful, can drain node. 312 | glog.V(2).Infof("All pods on %v can be moved. Will drain node.", nodeInfo.Node.Name) 313 | // Drain the node - places eviction on each pod moving them in turn. 314 | err = drainNode(kubeClient, recorder, nodeInfo.Node, podsForDeletion, int(maxGracefulTermination.Seconds()), *podEvictionTimeout) 315 | if err != nil { 316 | glog.Errorf("Failed to drain node: %v", err) 317 | } 318 | // Add the drain delay to allow system to stabilise 319 | nextDrainTime = time.Now().Add(*nodeDrainDelay) 320 | break 321 | } 322 | 323 | glog.V(3).Info("Finished processing nodes.") 324 | } 325 | } 326 | } 327 | } 328 | 329 | // Configure the kube client used to access the api, either from kubeconfig or 330 | //from pod environment if running in the cluster 331 | func createKubeClient(flags *flag.FlagSet, inCluster bool) (kube_client.Interface, error) { 332 | var config *kube_restclient.Config 333 | var err error 334 | if inCluster { 335 | // Load config from Kubernetes well known location. 336 | config, err = kube_restclient.InClusterConfig() 337 | } else { 338 | // Search environment for kubeconfig. 339 | clientConfig := kubectl_util.DefaultClientConfig(flags) 340 | config, err = clientConfig.ClientConfig() 341 | } 342 | if err != nil { 343 | return nil, fmt.Errorf("error connecting to the client: %v", err) 344 | } 345 | config.ContentType = *contentType 346 | return kube_client.NewForConfigOrDie(config), nil 347 | } 348 | 349 | // Create an event broadcaster so that we can call events when we modify the system 350 | func createEventRecorder(client kube_client.Interface) kube_record.EventRecorder { 351 | eventBroadcaster := kube_record.NewBroadcaster() 352 | eventBroadcaster.StartLogging(glog.Infof) 353 | eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(client.CoreV1().RESTClient()).Events("")}) 354 | return eventBroadcaster.NewRecorder(api.Scheme, apiv1.EventSource{Component: "rescheduler"}) 355 | } 356 | 357 | // Determines if any of the nodes meet the predicates that allow the Pod to be 358 | // scheduled on the node, and returns the node if it finds a suitable one. 359 | // Currently sorts nodes by most requested CPU in an attempt to fill fuller 360 | // nodes first (Attempting to bin pack) 361 | func findSpotNodeForPod(predicateChecker *simulator.PredicateChecker, nodeInfos []*nodes.NodeInfo, pod *apiv1.Pod) *nodes.NodeInfo { 362 | for _, nodeInfo := range nodeInfos { 363 | kubeNodeInfo := schedulercache.NewNodeInfo(nodeInfo.Pods...) 364 | kubeNodeInfo.SetNode(nodeInfo.Node) 365 | 366 | // Pretend pod isn't scheduled 367 | pod.Spec.NodeName = "" 368 | 369 | // Check with the schedulers predicates to find a node to schedule on 370 | if err := predicateChecker.CheckPredicates(pod, nil, kubeNodeInfo, true); err == nil { 371 | return nodeInfo 372 | } 373 | } 374 | return nil 375 | } 376 | 377 | // Goes through a list of pods and works out new nodes to place them on. 378 | // Returns an error if any of the pods won't fit onto existing spot nodes. 379 | func canDrainNode(predicateChecker *simulator.PredicateChecker, nodeInfos nodes.NodeInfoArray, pods []*apiv1.Pod) error { 380 | // Create a copy of the nodeInfos so that we can modify the list 381 | nodePlan := nodeInfos.CopyNodeInfos() 382 | 383 | for _, pod := range pods { 384 | // Works out if a spot node is available for rescheduling 385 | spotNodeInfo := findSpotNodeForPod(predicateChecker, nodePlan, pod) 386 | if spotNodeInfo == nil { 387 | return fmt.Errorf("pod %s can't be rescheduled on any existing spot node", podID(pod)) 388 | } 389 | glog.V(4).Infof("Pod %s can be rescheduled on %v, adding to plan.", podID(pod), spotNodeInfo.Node.ObjectMeta.Name) 390 | spotNodeInfo.AddPod(pod) 391 | } 392 | 393 | return nil 394 | } 395 | 396 | // Performs a drain on given node and updates the nextDrainTime variable. 397 | // Returns an error if the drain fails. 398 | func drainNode(kubeClient kube_client.Interface, recorder kube_record.EventRecorder, node *apiv1.Node, pods []*apiv1.Pod, maxGracefulTermination int, podEvictionTimeout time.Duration) error { 399 | err := scaler.DrainNode(node, pods, kubeClient, recorder, maxGracefulTermination, podEvictionTimeout, scaler.EvictionRetryTime) 400 | if err != nil { 401 | metrics.UpdateNodeDrainCount("Failure", node.Name) 402 | return err 403 | } 404 | 405 | metrics.UpdateNodeDrainCount("Success", node.Name) 406 | return nil 407 | } 408 | 409 | // Goes through a list of NodeInfos and updates the metrics system with the 410 | // number of pods that the rescheduler understands (So not daemonsets for 411 | // instance) that are on each of the nodes, labelling them as spot nodes. 412 | func updateSpotNodeMetrics(spotNodeInfos nodes.NodeInfoArray, pdbs []*policyv1.PodDisruptionBudget) { 413 | for _, nodeInfo := range spotNodeInfos { 414 | // Get a list of pods that are on the node (Only the types considered by the rescheduler) 415 | podsOnNode, err := autoscaler_drain.GetPodsForDeletionOnNodeDrain(nodeInfo.Pods, pdbs, *deleteNonReplicatedPods, false, false, false, nil, 0, time.Now()) 416 | if err != nil { 417 | glog.Errorf("Failed to update metrics on spot node %s: %v", nodeInfo.Node.Name, err) 418 | continue 419 | } 420 | metrics.UpdateNodePodsCount(nodes.SpotNodeLabel, nodeInfo.Node.Name, len(podsOnNode)) 421 | 422 | } 423 | } 424 | 425 | // Returns the pods Namespace/Name as a string 426 | func podID(pod *apiv1.Pod) string { 427 | return fmt.Sprintf("%s/%s", pod.Namespace, pod.Name) 428 | } 429 | 430 | // Checks that the node lablels provided as arguments are in fact, sane. 431 | func validateArgs(OnDemandNodeLabel string, SpotNodeLabel string) error { 432 | if len(strings.Split(OnDemandNodeLabel, "=")) > 2 { 433 | return fmt.Errorf("the on demand node label is not correctly formatted: expected '' or '=', but got %s", OnDemandNodeLabel) 434 | } 435 | 436 | if len(strings.Split(SpotNodeLabel, "=")) > 2 { 437 | return fmt.Errorf("the spot node label is not correctly formatted: expected '' or '=', but got %s", SpotNodeLabel) 438 | } 439 | 440 | return nil 441 | } 442 | -------------------------------------------------------------------------------- /rescheduler_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 Pusher Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import ( 20 | "fmt" 21 | "testing" 22 | 23 | "github.com/pusher/k8s-spot-rescheduler/nodes" 24 | "github.com/stretchr/testify/assert" 25 | apiv1 "k8s.io/api/core/v1" 26 | "k8s.io/apimachinery/pkg/api/resource" 27 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 | simulator "k8s.io/autoscaler/cluster-autoscaler/simulator" 29 | ) 30 | 31 | func TestFindSpotNodeForPod(t *testing.T) { 32 | predicateChecker := simulator.NewTestPredicateChecker() 33 | 34 | pods1 := []*apiv1.Pod{ 35 | createTestPod("p1n1", 100), 36 | createTestPod("p2n1", 300), 37 | } 38 | pods2 := []*apiv1.Pod{ 39 | createTestPod("p1n2", 500), 40 | createTestPod("p2n2", 300), 41 | } 42 | pods3 := []*apiv1.Pod{ 43 | createTestPod("p1n3", 500), 44 | createTestPod("p2n3", 500), 45 | createTestPod("p3n3", 300), 46 | } 47 | 48 | nodeInfos := []*nodes.NodeInfo{ 49 | createTestNodeInfo(createTestNode("node1", 500), pods1, 400), 50 | createTestNodeInfo(createTestNode("node2", 1000), pods2, 800), 51 | createTestNodeInfo(createTestNode("node3", 2000), pods3, 1300), 52 | } 53 | 54 | pod1 := createTestPod("pod1", 100) 55 | pod2 := createTestPod("pod2", 200) 56 | pod3 := createTestPod("pod3", 700) 57 | pod4 := createTestPod("pod4", 2200) 58 | 59 | node := findSpotNodeForPod(predicateChecker, nodeInfos, pod1) 60 | assert.Equal(t, "node1", node.Node.Name) 61 | 62 | node = findSpotNodeForPod(predicateChecker, nodeInfos, pod2) 63 | assert.Equal(t, "node2", node.Node.Name) 64 | 65 | node = findSpotNodeForPod(predicateChecker, nodeInfos, pod3) 66 | assert.Equal(t, "node3", node.Node.Name) 67 | 68 | node = findSpotNodeForPod(predicateChecker, nodeInfos, pod4) 69 | assert.Nil(t, node) 70 | 71 | } 72 | 73 | func TestNodeLabelValidation(t *testing.T) { 74 | onDemandLabel := "foo.bar/role=worker" 75 | spotLabel := "foo.bar/node-role" 76 | 77 | err := validateArgs(onDemandLabel, spotLabel) 78 | assert.NoError(t, err) 79 | 80 | onDemandLabel = "foo.bar/broken=worker=true" 81 | err = validateArgs(onDemandLabel, spotLabel) 82 | assert.EqualError(t, err, "the on demand node label is not correctly formatted: expected '' or '=', but got foo.bar/broken=worker=true") 83 | 84 | onDemandLabel = "foo.bar/role=worker" 85 | spotLabel = "foo.bar/node-role=spot=fail" 86 | err = validateArgs(onDemandLabel, spotLabel) 87 | assert.EqualError(t, err, "the spot node label is not correctly formatted: expected '' or '=', but got foo.bar/node-role=spot=fail") 88 | 89 | } 90 | 91 | func TestCanDrainNode(t *testing.T) { 92 | predicateChecker := simulator.NewTestPredicateChecker() 93 | 94 | pods1 := []*apiv1.Pod{ 95 | createTestPod("p1n1", 100), 96 | createTestPod("p2n1", 300), 97 | } 98 | pods2 := []*apiv1.Pod{ 99 | createTestPod("p1n2", 500), 100 | createTestPod("p2n2", 300), 101 | } 102 | pods3 := []*apiv1.Pod{ 103 | createTestPod("p1n3", 500), 104 | createTestPod("p2n3", 500), 105 | createTestPod("p3n3", 300), 106 | } 107 | 108 | spotNodeInfos := []*nodes.NodeInfo{ 109 | createTestNodeInfo(createTestNode("node3", 2000), pods3, 1300), 110 | createTestNodeInfo(createTestNode("node2", 1100), pods2, 800), 111 | createTestNodeInfo(createTestNode("node1", 500), pods1, 400), 112 | } 113 | 114 | podsForDeletion1 := []*apiv1.Pod{ 115 | createTestPod("pod1", 500), 116 | createTestPod("pod2", 300), 117 | createTestPod("pod1", 100), 118 | createTestPod("pod2", 100), 119 | createTestPod("pod1", 100), 120 | } 121 | podsForDeletion2 := []*apiv1.Pod{ 122 | createTestPod("pod1", 500), 123 | createTestPod("pod2", 400), 124 | createTestPod("pod1", 100), 125 | createTestPod("pod2", 100), 126 | createTestPod("pod1", 100), 127 | } 128 | 129 | err1 := canDrainNode(predicateChecker, spotNodeInfos, podsForDeletion1) 130 | if err1 != nil { 131 | assert.Fail(t, "canDrainNode should be successful with podsForDeletion1", "%v", err1) 132 | } 133 | 134 | err2 := canDrainNode(predicateChecker, spotNodeInfos, podsForDeletion2) 135 | if err2 == nil { 136 | assert.Fail(t, "canDrainNode should fail with podsForDeletion2, too much requested CPU.") 137 | } 138 | } 139 | 140 | func createTestPod(name string, cpu int64) *apiv1.Pod { 141 | pod := &apiv1.Pod{ 142 | ObjectMeta: metav1.ObjectMeta{ 143 | Namespace: "kube-system", 144 | Name: name, 145 | SelfLink: fmt.Sprintf("/api/v1/namespaces/default/pods/%s", name), 146 | }, 147 | Spec: apiv1.PodSpec{ 148 | Containers: []apiv1.Container{ 149 | { 150 | Resources: apiv1.ResourceRequirements{ 151 | Requests: apiv1.ResourceList{ 152 | apiv1.ResourceCPU: *resource.NewMilliQuantity(cpu, resource.DecimalSI), 153 | }, 154 | }, 155 | }, 156 | }, 157 | }, 158 | } 159 | return pod 160 | } 161 | 162 | func createTestNode(name string, cpu int64) *apiv1.Node { 163 | node := &apiv1.Node{ 164 | ObjectMeta: metav1.ObjectMeta{ 165 | Name: name, 166 | }, 167 | Status: apiv1.NodeStatus{ 168 | Capacity: apiv1.ResourceList{ 169 | apiv1.ResourceCPU: *resource.NewMilliQuantity(cpu, resource.DecimalSI), 170 | apiv1.ResourceMemory: *resource.NewQuantity(2*1024*1024*1024, resource.DecimalSI), 171 | apiv1.ResourcePods: *resource.NewQuantity(100, resource.DecimalSI), 172 | }, 173 | Conditions: []apiv1.NodeCondition{ 174 | { 175 | Type: apiv1.NodeReady, 176 | Status: apiv1.ConditionTrue, 177 | }, 178 | }, 179 | }, 180 | } 181 | node.Status.Allocatable = node.Status.Capacity 182 | return node 183 | } 184 | 185 | func createTestNodeInfo(node *apiv1.Node, pods []*apiv1.Pod, requests int64) *nodes.NodeInfo { 186 | nodeInfo := &nodes.NodeInfo{ 187 | Node: node, 188 | Pods: pods, 189 | RequestedCPU: requests, 190 | FreeCPU: node.Status.Capacity.Cpu().MilliValue() - requests, 191 | } 192 | return nodeInfo 193 | } 194 | -------------------------------------------------------------------------------- /scaler/scaler.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 The Kubernetes Authors. 3 | Modifications copyright 2017 Pusher Ltd. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | */ 17 | 18 | package scaler 19 | 20 | import ( 21 | "fmt" 22 | "time" 23 | 24 | "github.com/golang/glog" 25 | "github.com/pusher/k8s-spot-rescheduler/metrics" 26 | apiv1 "k8s.io/api/core/v1" 27 | policyv1 "k8s.io/api/policy/v1beta1" 28 | "k8s.io/apimachinery/pkg/api/errors" 29 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 | "k8s.io/autoscaler/cluster-autoscaler/utils/deletetaint" 31 | kube_client "k8s.io/client-go/kubernetes" 32 | kube_record "k8s.io/client-go/tools/record" 33 | ) 34 | 35 | const ( 36 | // EvictionRetryTime is the time after CA retries failed pod eviction. 37 | EvictionRetryTime = 10 * time.Second 38 | ) 39 | 40 | // Originally from https://github.com/kubernetes/autoscaler/blob/bf59e3daa5922c0e44027fa211948b50cb6b7a12/cluster-autoscaler/core/scale_down.go#L690-L723 41 | func evictPod(podToEvict *apiv1.Pod, client kube_client.Interface, recorder kube_record.EventRecorder, 42 | maxGracefulTerminationSec int, retryUntil time.Time, waitBetweenRetries time.Duration) error { 43 | recorder.Eventf(podToEvict, apiv1.EventTypeNormal, "Rescheduler", "deleting pod from on-demand node") 44 | maxGraceful64 := int64(maxGracefulTerminationSec) 45 | var lastError error 46 | for first := true; first || time.Now().Before(retryUntil); time.Sleep(waitBetweenRetries) { 47 | first = false 48 | eviction := &policyv1.Eviction{ 49 | ObjectMeta: metav1.ObjectMeta{ 50 | Namespace: podToEvict.Namespace, 51 | Name: podToEvict.Name, 52 | }, 53 | DeleteOptions: &metav1.DeleteOptions{ 54 | GracePeriodSeconds: &maxGraceful64, 55 | }, 56 | } 57 | lastError = client.Core().Pods(podToEvict.Namespace).Evict(eviction) 58 | if lastError == nil { 59 | return nil 60 | } 61 | } 62 | glog.Errorf("Failed to evict pod %s, error: %v", podToEvict.Name, lastError) 63 | recorder.Eventf(podToEvict, apiv1.EventTypeWarning, "ReschedulerFailed", "failed to delete pod from on-demand node") 64 | return fmt.Errorf("Failed to evict pod %s/%s within allowed timeout (last error: %v)", podToEvict.Namespace, podToEvict.Name, lastError) 65 | } 66 | 67 | // DrainNode performs drain logic on the node. Marks the node as unschedulable and later removes all pods, giving 68 | // them up to MaxGracefulTerminationTime to finish. 69 | // 70 | // Originally from https://github.com/kubernetes/autoscaler/blob/bf59e3daa5922c0e44027fa211948b50cb6b7a12/cluster-autoscaler/core/scale_down.go#L725-L783 71 | func DrainNode(node *apiv1.Node, pods []*apiv1.Pod, client kube_client.Interface, recorder kube_record.EventRecorder, 72 | maxGracefulTerminationSec int, maxPodEvictionTime time.Duration, waitBetweenRetries time.Duration) error { 73 | 74 | drainSuccessful := false 75 | toEvict := len(pods) 76 | if err := deletetaint.MarkToBeDeleted(node, client); err != nil { 77 | recorder.Eventf(node, apiv1.EventTypeWarning, "ReschedulerFailed", "failed to mark the node as draining/unschedulable: %v", err) 78 | return err 79 | } 80 | 81 | // If we fail to evict all the pods from the node we want to remove delete taint 82 | defer func() { 83 | if !drainSuccessful { 84 | deletetaint.CleanToBeDeleted(node, client) 85 | recorder.Eventf(node, apiv1.EventTypeWarning, "ReschedulerFailed", "failed to drain the node, aborting drain.") 86 | } 87 | }() 88 | 89 | recorder.Eventf(node, apiv1.EventTypeNormal, "Rescheduler", "marked the node as draining/unschedulable") 90 | 91 | retryUntil := time.Now().Add(maxPodEvictionTime) 92 | confirmations := make(chan error, toEvict) 93 | for _, pod := range pods { 94 | go func(podToEvict *apiv1.Pod) { 95 | confirmations <- evictPod(podToEvict, client, recorder, maxGracefulTerminationSec, retryUntil, waitBetweenRetries) 96 | }(pod) 97 | } 98 | 99 | evictionErrs := make([]error, 0) 100 | 101 | for range pods { 102 | select { 103 | case err := <-confirmations: 104 | if err != nil { 105 | evictionErrs = append(evictionErrs, err) 106 | } else { 107 | metrics.UpdateEvictionsCount() 108 | } 109 | case <-time.After(retryUntil.Sub(time.Now()) + 5*time.Second): 110 | return fmt.Errorf("Failed to drain node %s/%s: timeout when waiting for creating evictions", node.Namespace, node.Name) 111 | } 112 | } 113 | if len(evictionErrs) != 0 { 114 | return fmt.Errorf("Failed to drain node %s/%s, due to following errors: %v", node.Namespace, node.Name, evictionErrs) 115 | } 116 | 117 | // Evictions created successfully, wait for the remainder of maxPodEvictionTime to see if pods have been evicted 118 | var allGone bool 119 | for time.Now().Before(retryUntil.Add(5 * time.Second)) { 120 | allGone = true 121 | for _, pod := range pods { 122 | podreturned, err := client.Core().Pods(pod.Namespace).Get(pod.Name, metav1.GetOptions{}) 123 | if err == nil && (podreturned != nil && podreturned.Spec.NodeName == node.Name) { 124 | glog.Errorf("Not deleted yet %v", podreturned.Name) 125 | allGone = false 126 | break 127 | } 128 | if err != nil && !errors.IsNotFound(err) { 129 | glog.Errorf("Failed to check pod %s/%s: %v", pod.Namespace, pod.Name, err) 130 | allGone = false 131 | break 132 | } 133 | } 134 | if allGone { 135 | glog.V(4).Infof("All pods removed from %s", node.Name) 136 | // Let the defered function know there is no need for cleanup 137 | drainSuccessful = true 138 | recorder.Eventf(node, apiv1.EventTypeNormal, "Rescheduler", "marked the node as drained/schedulable") 139 | deletetaint.CleanToBeDeleted(node, client) 140 | return nil 141 | } 142 | time.Sleep(5 * time.Second) 143 | } 144 | return fmt.Errorf("Failed to drain node %s/%s: pods remaining after timeout", node.Namespace, node.Name) 145 | } 146 | -------------------------------------------------------------------------------- /version.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // VERSION contains injected version information 4 | var VERSION = "undefined" 5 | --------------------------------------------------------------------------------