├── .gitignore ├── Dockerfile ├── deploy ├── start.sh ├── crd.yaml └── scheduler │ └── config │ └── batch_scheduler_config.json ├── hack ├── boilerplate.go.txt ├── custom-boilerplate.go.txt ├── tools.go ├── update-codegen.sh └── verify-codegen.sh ├── pkg ├── apis │ └── podgroup │ │ ├── register.go │ │ └── v1 │ │ ├── doc.go │ │ ├── register.go │ │ ├── zz_generated.deepcopy.go │ │ └── types.go ├── generated │ ├── clientset │ │ └── versioned │ │ │ ├── doc.go │ │ │ ├── fake │ │ │ ├── doc.go │ │ │ ├── register.go │ │ │ └── clientset_generated.go │ │ │ ├── typed │ │ │ └── podgroup │ │ │ │ └── v1 │ │ │ │ ├── generated_expansion.go │ │ │ │ ├── doc.go │ │ │ │ ├── fake │ │ │ │ ├── doc.go │ │ │ │ ├── fake_podgroup_client.go │ │ │ │ └── fake_podgroup.go │ │ │ │ ├── podgroup_client.go │ │ │ │ └── podgroup.go │ │ │ ├── scheme │ │ │ ├── doc.go │ │ │ └── register.go │ │ │ └── clientset.go │ ├── listers │ │ └── podgroup │ │ │ └── v1 │ │ │ ├── expansion_generated.go │ │ │ └── podgroup.go │ └── informers │ │ └── externalversions │ │ ├── internalinterfaces │ │ └── factory_interfaces.go │ │ ├── podgroup │ │ ├── v1 │ │ │ ├── interface.go │ │ │ └── podgroup.go │ │ └── interface.go │ │ ├── generic.go │ │ └── factory.go ├── util │ ├── types.go │ ├── k8s_test.go │ └── k8s.go └── scheduler │ ├── core │ ├── core_test.go │ └── core.go │ ├── cache │ └── cache.go │ ├── controller │ └── controller.go │ └── batch │ └── batchscheduler.go ├── examples └── example1.yaml ├── cmd └── scheduler │ └── main.go ├── version.sh ├── Makefile ├── README.md ├── go.mod └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | .idea 3 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:centos7 2 | LABEL description="batch scheduler" 3 | 4 | COPY ./bin/batch-scheduler /scheduler 5 | ENTRYPOINT ["/scheduler"] -------------------------------------------------------------------------------- /deploy/start.sh: -------------------------------------------------------------------------------- 1 | kubectl apply -f crd.yaml 2 | cp -r ../bin/ ./ 3 | nohup bin/scheduler --v=5 --config scheduler/config/batch_scheduler_config.json 2>&1 & -------------------------------------------------------------------------------- /deploy/crd.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiextensions.k8s.io/v1beta1 2 | kind: CustomResourceDefinition 3 | metadata: 4 | name: podgroups.batch.scheduler.tencent.com 5 | spec: 6 | group: batch.scheduler.tencent.com 7 | names: 8 | kind: PodGroup 9 | plural: podgroups 10 | singular: podgroup 11 | shortNames: 12 | - pg 13 | - pgs 14 | scope: Namespaced 15 | version: v1 -------------------------------------------------------------------------------- /hack/boilerplate.go.txt: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | -------------------------------------------------------------------------------- /hack/custom-boilerplate.go.txt: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | -------------------------------------------------------------------------------- /pkg/apis/podgroup/register.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package podgroup 18 | 19 | // GroupName is the group name used in this package 20 | const ( 21 | GroupName = "batch.scheduler.tencent.com" 22 | ) 23 | -------------------------------------------------------------------------------- /pkg/generated/clientset/versioned/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by client-gen. DO NOT EDIT. 18 | 19 | // This package has the automatically generated clientset. 20 | package versioned 21 | -------------------------------------------------------------------------------- /pkg/generated/clientset/versioned/fake/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by client-gen. DO NOT EDIT. 18 | 19 | // This package has the automatically generated fake clientset. 20 | package fake 21 | -------------------------------------------------------------------------------- /pkg/generated/clientset/versioned/typed/podgroup/v1/generated_expansion.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by client-gen. DO NOT EDIT. 18 | 19 | package v1 20 | 21 | type PodGroupExpansion interface{} 22 | -------------------------------------------------------------------------------- /pkg/generated/clientset/versioned/typed/podgroup/v1/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by client-gen. DO NOT EDIT. 18 | 19 | // This package has the automatically generated typed clients. 20 | package v1 21 | -------------------------------------------------------------------------------- /pkg/generated/clientset/versioned/typed/podgroup/v1/fake/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by client-gen. DO NOT EDIT. 18 | 19 | // Package fake has the automatically generated clients. 20 | package fake 21 | -------------------------------------------------------------------------------- /pkg/generated/clientset/versioned/scheme/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by client-gen. DO NOT EDIT. 18 | 19 | // This package contains the scheme of the automatically generated clientset. 20 | package scheme 21 | -------------------------------------------------------------------------------- /examples/example1.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch.scheduler.tencent.com/v1 2 | kind: PodGroup 3 | metadata: 4 | name: group1 5 | namespace: default 6 | spec: 7 | minMember: 9 8 | --- 9 | apiVersion: apps/v1 10 | kind: StatefulSet 11 | metadata: 12 | name: web-group-valid1 13 | spec: 14 | selector: 15 | matchLabels: 16 | app: nginx 17 | podManagementPolicy: Parallel 18 | serviceName: "nginx" 19 | replicas: 9 20 | template: 21 | metadata: 22 | labels: 23 | group.batch.scheduler.tencent.com: "group1" 24 | app: nginx 25 | type: node 26 | spec: 27 | containers: 28 | - name: nginx 29 | image: nginx 30 | ports: 31 | - containerPort: 80 32 | name: web 33 | resources: 34 | limits: 35 | cpu: "1" 36 | requests: 37 | cpu: "1" -------------------------------------------------------------------------------- /hack/tools.go: -------------------------------------------------------------------------------- 1 | // +build tools 2 | 3 | /* 4 | * Copyright 2020 THL A29 Limited, a Tencent company. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | // This package imports things required by build scripts, to force `go mod` to see them as dependencies 20 | package tools 21 | 22 | import _ "k8s.io/code-generator" 23 | -------------------------------------------------------------------------------- /pkg/apis/podgroup/v1/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // +k8s:deepcopy-gen=package 18 | // +groupName=batch.scheduler.tencent.com 19 | 20 | // Package v1 is the v1 version of the API. 21 | package v1 // import "github.com/tenstack/batch-scheduler/pkg/apis/podgroup/v1" 22 | -------------------------------------------------------------------------------- /deploy/scheduler/config/batch_scheduler_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "kind": "KubeSchedulerConfiguration", 3 | "apiVersion": "kubescheduler.config.k8s.io/v1alpha1", 4 | "clientConnection": { 5 | "kubeconfig": "/var/run/kubernetes/admin.kubeconfig" 6 | }, 7 | "plugins": { 8 | "preFilter": { 9 | "enabled": [ 10 | { 11 | "name": "batch-scheduler" 12 | } 13 | ] 14 | }, 15 | "permit": { 16 | "enabled": [ 17 | { 18 | "name": "batch-scheduler" 19 | } 20 | ] 21 | }, 22 | "postBind": { 23 | "enabled": [ 24 | { 25 | "name": "batch-scheduler" 26 | } 27 | ] 28 | }, 29 | "queueSort": { 30 | "enabled": [ 31 | { 32 | "name": "batch-scheduler" 33 | } 34 | ] 35 | } 36 | }, 37 | "pluginConfig": [ 38 | { 39 | "name": "batch-scheduler", 40 | "args": { 41 | "kube_config": "/var/run/kubernetes/admin.kubeconfig" 42 | } 43 | } 44 | ] 45 | } -------------------------------------------------------------------------------- /pkg/generated/listers/podgroup/v1/expansion_generated.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by lister-gen. DO NOT EDIT. 18 | 19 | package v1 20 | 21 | // PodGroupListerExpansion allows custom methods to be added to 22 | // PodGroupLister. 23 | type PodGroupListerExpansion interface{} 24 | 25 | // PodGroupNamespaceListerExpansion allows custom methods to be added to 26 | // PodGroupNamespaceLister. 27 | type PodGroupNamespaceListerExpansion interface{} 28 | -------------------------------------------------------------------------------- /hack/update-codegen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2020 THL A29 Limited, a Tencent company. 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | set -o nounset 18 | 19 | export GO111MODULE=on 20 | SCRIPT_ROOT=${GOPATH}/src/github.com/tenstack/batch-scheduler 21 | CODEGEN_PKG=${GOPATH}/src/k8s.io/code-generator 22 | 23 | trap EXIT SIGINT SIGTERM 24 | 25 | ${CODEGEN_PKG}/generate-groups.sh all \ 26 | github.com/tenstack/batch-scheduler/pkg/generated github.com/tenstack/batch-scheduler/pkg/apis \ 27 | podgroup:v1 \ 28 | --go-header-file ${SCRIPT_ROOT}/hack/boilerplate.go.txt -------------------------------------------------------------------------------- /cmd/scheduler/main.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import ( 20 | "fmt" 21 | "os" 22 | 23 | scheduler "k8s.io/kubernetes/cmd/kube-scheduler/app" 24 | 25 | "github.com/tenstack/batch-scheduler/pkg/scheduler/batch" 26 | ) 27 | 28 | func main() { 29 | command := scheduler.NewSchedulerCommand( 30 | scheduler.WithPlugin(batch.Name, batch.New)) 31 | 32 | if err := command.Execute(); err != nil { 33 | fmt.Fprintf(os.Stderr, "%v\n", err) 34 | os.Exit(1) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /version.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin 2 | 3 | KUBE_GIT_COMMIT=`git rev-parse "HEAD^{commit}"` 4 | KUBE_GIT_VERSION=`git describe --tags --abbrev=14 "${KUBE_GIT_COMMIT}^{commit}" --always` 5 | KUBE_GO_PACKAGE='' 6 | ldflags() { 7 | local -a ldflags 8 | function add_ldflag() { 9 | local key=${1} 10 | local val=${2} 11 | ldflags+=( 12 | "-X '${KUBE_GO_PACKAGE}k8s.io/kubernetes/pkg/version.${key}=${val}'" 13 | ) 14 | } 15 | 16 | add_ldflag "buildDate" "$(date ${SOURCE_DATE_EPOCH:+"--date=@${SOURCE_DATE_EPOCH}"} -u +'%Y-%m-%dT%H:%M:%SZ')" 17 | if [[ -n ${KUBE_GIT_COMMIT-} ]]; then 18 | add_ldflag "gitCommit" "${KUBE_GIT_COMMIT}" 19 | add_ldflag "gitTreeState" "${KUBE_GIT_TREE_STATE}" 20 | fi 21 | 22 | if [[ -n ${KUBE_GIT_VERSION-} ]]; then 23 | add_ldflag "gitVersion" "${KUBE_GIT_VERSION}" 24 | fi 25 | 26 | if [[ -n ${KUBE_GIT_MAJOR-} && -n ${KUBE_GIT_MINOR-} ]]; then 27 | add_ldflag "gitMajor" "${KUBE_GIT_MAJOR}" 28 | add_ldflag "gitMinor" "${KUBE_GIT_MINOR}" 29 | fi 30 | 31 | # The -ldflags parameter takes a single string, so join the output. 32 | echo "${ldflags[*]-}" 33 | } 34 | 35 | ldflags 36 | -------------------------------------------------------------------------------- /pkg/util/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package util 18 | 19 | import "fmt" 20 | 21 | const ( 22 | // PodGroupAnn is not used now 23 | PodGroupAnn = "group.batch.scheduler.tencent.com" 24 | // PodGroupLabel is the default label of batch scheduler 25 | PodGroupLabel = "group.batch.scheduler.tencent.com" 26 | ) 27 | 28 | var ( 29 | // ErrorNotMatched means pod does not match batch scheduling 30 | ErrorNotMatched = fmt.Errorf("not match batch scheduling") 31 | // ErrorWaiting means pod number does not match the min pods required 32 | ErrorWaiting = fmt.Errorf("waiting") 33 | // ErrorResourceNotEnough means cluster resource is not enough, mainly used in Pre-Filter 34 | ErrorResourceNotEnough = fmt.Errorf("resource not enough") 35 | ) 36 | -------------------------------------------------------------------------------- /pkg/generated/clientset/versioned/typed/podgroup/v1/fake/fake_podgroup_client.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by client-gen. DO NOT EDIT. 18 | 19 | package fake 20 | 21 | import ( 22 | v1 "github.com/tenstack/batch-scheduler/pkg/generated/clientset/versioned/typed/podgroup/v1" 23 | rest "k8s.io/client-go/rest" 24 | testing "k8s.io/client-go/testing" 25 | ) 26 | 27 | type FakeBatchV1 struct { 28 | *testing.Fake 29 | } 30 | 31 | func (c *FakeBatchV1) PodGroups(namespace string) v1.PodGroupInterface { 32 | return &FakePodGroups{c, namespace} 33 | } 34 | 35 | // RESTClient returns a RESTClient that is used to communicate 36 | // with API server by this client implementation. 37 | func (c *FakeBatchV1) RESTClient() rest.Interface { 38 | var ret *rest.RESTClient 39 | return ret 40 | } 41 | -------------------------------------------------------------------------------- /hack/verify-codegen.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright 2020 THL A29 Limited, a Tencent company. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | set -o errexit 18 | set -o nounset 19 | set -o pipefail 20 | 21 | SCRIPT_ROOT=$(dirname "${BASH_SOURCE[0]}")/.. 22 | 23 | DIFFROOT="${SCRIPT_ROOT}/pkg" 24 | TMP_DIFFROOT="${SCRIPT_ROOT}/_tmp/pkg" 25 | _tmp="${SCRIPT_ROOT}/_tmp" 26 | 27 | cleanup() { 28 | rm -rf "${_tmp}" 29 | } 30 | trap "cleanup" EXIT SIGINT 31 | 32 | cleanup 33 | 34 | mkdir -p "${TMP_DIFFROOT}" 35 | cp -a "${DIFFROOT}"/* "${TMP_DIFFROOT}" 36 | 37 | "${SCRIPT_ROOT}/hack/update-codegen.sh" 38 | echo "diffing ${DIFFROOT} against freshly generated codegen" 39 | ret=0 40 | diff -Naupr "${DIFFROOT}" "${TMP_DIFFROOT}" || ret=$? 41 | cp -a "${TMP_DIFFROOT}"/* "${DIFFROOT}" 42 | if [[ $ret -eq 0 ]] 43 | then 44 | echo "${DIFFROOT} up to date." 45 | else 46 | echo "${DIFFROOT} is out of date. Please run hack/update-codegen.sh" 47 | exit 1 48 | fi 49 | -------------------------------------------------------------------------------- /pkg/generated/informers/externalversions/internalinterfaces/factory_interfaces.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by informer-gen. DO NOT EDIT. 18 | 19 | package internalinterfaces 20 | 21 | import ( 22 | time "time" 23 | 24 | versioned "github.com/tenstack/batch-scheduler/pkg/generated/clientset/versioned" 25 | v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 26 | runtime "k8s.io/apimachinery/pkg/runtime" 27 | cache "k8s.io/client-go/tools/cache" 28 | ) 29 | 30 | // NewInformerFunc takes versioned.Interface and time.Duration to return a SharedIndexInformer. 31 | type NewInformerFunc func(versioned.Interface, time.Duration) cache.SharedIndexInformer 32 | 33 | // SharedInformerFactory a small interface to allow for adding an informer without an import cycle 34 | type SharedInformerFactory interface { 35 | Start(stopCh <-chan struct{}) 36 | InformerFor(obj runtime.Object, newFunc NewInformerFunc) cache.SharedIndexInformer 37 | } 38 | 39 | // TweakListOptionsFunc is a function that transforms a v1.ListOptions. 40 | type TweakListOptionsFunc func(*v1.ListOptions) 41 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright 2020 THL A29 Limited, a Tencent company. 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | REGISTRY_NAME=xxx 17 | IMAGE_TAGS=canary 18 | LDFLAGS=$(shell bash version.sh) 19 | 20 | CMDS=scheduler 21 | all: build 22 | 23 | build: build-scheduler 24 | 25 | build-scheduler: 26 | go vet ./pkg/... 27 | mkdir -p bin 28 | GOOS=linux CGO_ENABLED=0 go build -ldflags "$(LDFLAGS)" -o ./bin/batch-scheduler ./cmd/scheduler 29 | 30 | container-%: build-% 31 | docker build -t $(REGISTRY_NAME)/$*:$(IMAGE_TAGS) -f $(shell if [ -e ./cmd/$*/Dockerfile ]; then echo ./cmd/$*/Dockerfile; else echo Dockerfile; fi) --label revision=$(REV) . 32 | 33 | push-%: container-% 34 | set -ex; \ 35 | push_image () { \ 36 | docker push $(REGISTRY_NAME)/$*:$(IMAGE_TAGS); \ 37 | }; \ 38 | for tag in $(IMAGE_TAGS); do \ 39 | if [ "$$tag" = "canary" ] || echo "$$tag" | grep -q -e '-canary$$'; then \ 40 | : "creating or overwriting canary image"; \ 41 | push_image; \ 42 | elif docker pull $(IMAGE_NAME):$$tag 2>&1 | tee /dev/stderr | grep -q "manifest for $(IMAGE_NAME):$$tag not found"; then \ 43 | : "creating release image"; \ 44 | push_image; \ 45 | else \ 46 | : "release image $(IMAGE_NAME):$$tag already exists, skipping push"; \ 47 | fi; \ 48 | done -------------------------------------------------------------------------------- /pkg/generated/informers/externalversions/podgroup/v1/interface.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by informer-gen. DO NOT EDIT. 18 | 19 | package v1 20 | 21 | import ( 22 | internalinterfaces "github.com/tenstack/batch-scheduler/pkg/generated/informers/externalversions/internalinterfaces" 23 | ) 24 | 25 | // Interface provides access to all the informers in this group version. 26 | type Interface interface { 27 | // PodGroups returns a PodGroupInformer. 28 | PodGroups() PodGroupInformer 29 | } 30 | 31 | type version struct { 32 | factory internalinterfaces.SharedInformerFactory 33 | namespace string 34 | tweakListOptions internalinterfaces.TweakListOptionsFunc 35 | } 36 | 37 | // New returns a new Interface. 38 | func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface { 39 | return &version{factory: f, namespace: namespace, tweakListOptions: tweakListOptions} 40 | } 41 | 42 | // PodGroups returns a PodGroupInformer. 43 | func (v *version) PodGroups() PodGroupInformer { 44 | return &podGroupInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} 45 | } 46 | -------------------------------------------------------------------------------- /pkg/generated/informers/externalversions/podgroup/interface.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by informer-gen. DO NOT EDIT. 18 | 19 | package podgroup 20 | 21 | import ( 22 | internalinterfaces "github.com/tenstack/batch-scheduler/pkg/generated/informers/externalversions/internalinterfaces" 23 | v1 "github.com/tenstack/batch-scheduler/pkg/generated/informers/externalversions/podgroup/v1" 24 | ) 25 | 26 | // Interface provides access to each of this group's versions. 27 | type Interface interface { 28 | // V1 provides access to shared informers for resources in V1. 29 | V1() v1.Interface 30 | } 31 | 32 | type group struct { 33 | factory internalinterfaces.SharedInformerFactory 34 | namespace string 35 | tweakListOptions internalinterfaces.TweakListOptionsFunc 36 | } 37 | 38 | // New returns a new Interface. 39 | func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface { 40 | return &group{factory: f, namespace: namespace, tweakListOptions: tweakListOptions} 41 | } 42 | 43 | // V1 returns a new v1.Interface. 44 | func (g *group) V1() v1.Interface { 45 | return v1.New(g.factory, g.namespace, g.tweakListOptions) 46 | } 47 | -------------------------------------------------------------------------------- /pkg/util/k8s_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package util 18 | 19 | import ( 20 | "testing" 21 | 22 | "k8s.io/kubernetes/pkg/apis/core" 23 | ) 24 | 25 | type test struct { 26 | old interface{} 27 | new interface{} 28 | expectedError bool 29 | } 30 | 31 | func TestCreateMergePatch(t *testing.T) { 32 | tests := []struct { 33 | old interface{} 34 | new interface{} 35 | expected string 36 | }{ 37 | { 38 | old: &core.Pod{ 39 | Spec: core.PodSpec{ 40 | Hostname: "test", 41 | }, 42 | }, 43 | new: &core.Pod{ 44 | Status: core.PodStatus{ 45 | Reason: "test", 46 | }, 47 | }, 48 | expected: `{"Spec":{"Hostname":""},"Status":{"Reason":"test"}}`, 49 | }, 50 | 51 | { 52 | old: &core.Pod{ 53 | Spec: core.PodSpec{ 54 | Hostname: "test", 55 | }, 56 | Status: core.PodStatus{ 57 | Reason: "test1", 58 | }, 59 | }, 60 | new: &core.Pod{ 61 | Status: core.PodStatus{ 62 | Reason: "test", 63 | }, 64 | }, 65 | expected: `{"Spec":{"Hostname":""},"Status":{"Reason":"test"}}`, 66 | }, 67 | } 68 | 69 | for _, tcase := range tests { 70 | patch, err := CreateMergePatch(tcase.old, tcase.new) 71 | if err != nil { 72 | t.Error(err) 73 | } 74 | if string(patch) != tcase.expected { 75 | t.Errorf("expected %v get %v", tcase.expected, string(patch)) 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /pkg/apis/podgroup/v1/register.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package v1 18 | 19 | import ( 20 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 21 | "k8s.io/apimachinery/pkg/runtime" 22 | "k8s.io/apimachinery/pkg/runtime/schema" 23 | 24 | "github.com/tenstack/batch-scheduler/pkg/apis/podgroup" 25 | ) 26 | 27 | // SchemeGroupVersion is group version used to register these objects 28 | var SchemeGroupVersion = schema.GroupVersion{Group: podgroup.GroupName, Version: "v1"} 29 | 30 | // Kind takes an unqualified kind and returns back a Group qualified GroupKind 31 | func Kind(kind string) schema.GroupKind { 32 | return SchemeGroupVersion.WithKind(kind).GroupKind() 33 | } 34 | 35 | // Resource takes an unqualified resource and returns a Group qualified GroupResource 36 | func Resource(resource string) schema.GroupResource { 37 | return SchemeGroupVersion.WithResource(resource).GroupResource() 38 | } 39 | 40 | var ( 41 | // SchemeBuilder initializes a scheme builder 42 | SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) 43 | // AddToScheme is a global function that registers this API group & version to a scheme 44 | AddToScheme = SchemeBuilder.AddToScheme 45 | ) 46 | 47 | // Adds the list of known types to Scheme. 48 | func addKnownTypes(scheme *runtime.Scheme) error { 49 | scheme.AddKnownTypes(SchemeGroupVersion, 50 | &PodGroup{}, 51 | &PodGroupList{}, 52 | ) 53 | metav1.AddToGroupVersion(scheme, SchemeGroupVersion) 54 | return nil 55 | } 56 | -------------------------------------------------------------------------------- /pkg/generated/clientset/versioned/fake/register.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by client-gen. DO NOT EDIT. 18 | 19 | package fake 20 | 21 | import ( 22 | batchv1 "github.com/tenstack/batch-scheduler/pkg/apis/podgroup/v1" 23 | v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 24 | runtime "k8s.io/apimachinery/pkg/runtime" 25 | schema "k8s.io/apimachinery/pkg/runtime/schema" 26 | serializer "k8s.io/apimachinery/pkg/runtime/serializer" 27 | utilruntime "k8s.io/apimachinery/pkg/util/runtime" 28 | ) 29 | 30 | var scheme = runtime.NewScheme() 31 | var codecs = serializer.NewCodecFactory(scheme) 32 | var parameterCodec = runtime.NewParameterCodec(scheme) 33 | var localSchemeBuilder = runtime.SchemeBuilder{ 34 | batchv1.AddToScheme, 35 | } 36 | 37 | // AddToScheme adds all types of this clientset into the given scheme. This allows composition 38 | // of clientsets, like in: 39 | // 40 | // import ( 41 | // "k8s.io/client-go/kubernetes" 42 | // clientsetscheme "k8s.io/client-go/kubernetes/scheme" 43 | // aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme" 44 | // ) 45 | // 46 | // kclientset, _ := kubernetes.NewForConfig(c) 47 | // _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme) 48 | // 49 | // After this, RawExtensions in Kubernetes types will serialize kube-aggregator types 50 | // correctly. 51 | var AddToScheme = localSchemeBuilder.AddToScheme 52 | 53 | func init() { 54 | v1.AddToGroupVersion(scheme, schema.GroupVersion{Version: "v1"}) 55 | utilruntime.Must(AddToScheme(scheme)) 56 | } 57 | -------------------------------------------------------------------------------- /pkg/generated/clientset/versioned/scheme/register.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by client-gen. DO NOT EDIT. 18 | 19 | package scheme 20 | 21 | import ( 22 | batchv1 "github.com/tenstack/batch-scheduler/pkg/apis/podgroup/v1" 23 | v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 24 | runtime "k8s.io/apimachinery/pkg/runtime" 25 | schema "k8s.io/apimachinery/pkg/runtime/schema" 26 | serializer "k8s.io/apimachinery/pkg/runtime/serializer" 27 | utilruntime "k8s.io/apimachinery/pkg/util/runtime" 28 | ) 29 | 30 | var Scheme = runtime.NewScheme() 31 | var Codecs = serializer.NewCodecFactory(Scheme) 32 | var ParameterCodec = runtime.NewParameterCodec(Scheme) 33 | var localSchemeBuilder = runtime.SchemeBuilder{ 34 | batchv1.AddToScheme, 35 | } 36 | 37 | // AddToScheme adds all types of this clientset into the given scheme. This allows composition 38 | // of clientsets, like in: 39 | // 40 | // import ( 41 | // "k8s.io/client-go/kubernetes" 42 | // clientsetscheme "k8s.io/client-go/kubernetes/scheme" 43 | // aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme" 44 | // ) 45 | // 46 | // kclientset, _ := kubernetes.NewForConfig(c) 47 | // _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme) 48 | // 49 | // After this, RawExtensions in Kubernetes types will serialize kube-aggregator types 50 | // correctly. 51 | var AddToScheme = localSchemeBuilder.AddToScheme 52 | 53 | func init() { 54 | v1.AddToGroupVersion(Scheme, schema.GroupVersion{Version: "v1"}) 55 | utilruntime.Must(AddToScheme(Scheme)) 56 | } 57 | -------------------------------------------------------------------------------- /pkg/generated/informers/externalversions/generic.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by informer-gen. DO NOT EDIT. 18 | 19 | package externalversions 20 | 21 | import ( 22 | "fmt" 23 | 24 | v1 "github.com/tenstack/batch-scheduler/pkg/apis/podgroup/v1" 25 | schema "k8s.io/apimachinery/pkg/runtime/schema" 26 | cache "k8s.io/client-go/tools/cache" 27 | ) 28 | 29 | // GenericInformer is type of SharedIndexInformer which will locate and delegate to other 30 | // sharedInformers based on type 31 | type GenericInformer interface { 32 | Informer() cache.SharedIndexInformer 33 | Lister() cache.GenericLister 34 | } 35 | 36 | type genericInformer struct { 37 | informer cache.SharedIndexInformer 38 | resource schema.GroupResource 39 | } 40 | 41 | // Informer returns the SharedIndexInformer. 42 | func (f *genericInformer) Informer() cache.SharedIndexInformer { 43 | return f.informer 44 | } 45 | 46 | // Lister returns the GenericLister. 47 | func (f *genericInformer) Lister() cache.GenericLister { 48 | return cache.NewGenericLister(f.Informer().GetIndexer(), f.resource) 49 | } 50 | 51 | // ForResource gives generic access to a shared informer of the matching type 52 | // TODO extend this to unknown resources with a client pool 53 | func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource) (GenericInformer, error) { 54 | switch resource { 55 | // Group=batch.scheduler.tencent.com, Version=v1 56 | case v1.SchemeGroupVersion.WithResource("podgroups"): 57 | return &genericInformer{resource: resource.GroupResource(), informer: f.Batch().V1().PodGroups().Informer()}, nil 58 | 59 | } 60 | 61 | return nil, fmt.Errorf("no informer found for %v", resource) 62 | } 63 | -------------------------------------------------------------------------------- /pkg/generated/clientset/versioned/typed/podgroup/v1/podgroup_client.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by client-gen. DO NOT EDIT. 18 | 19 | package v1 20 | 21 | import ( 22 | v1 "github.com/tenstack/batch-scheduler/pkg/apis/podgroup/v1" 23 | "github.com/tenstack/batch-scheduler/pkg/generated/clientset/versioned/scheme" 24 | rest "k8s.io/client-go/rest" 25 | ) 26 | 27 | type BatchV1Interface interface { 28 | RESTClient() rest.Interface 29 | PodGroupsGetter 30 | } 31 | 32 | // BatchV1Client is used to interact with features provided by the batch.scheduler.tencent.com group. 33 | type BatchV1Client struct { 34 | restClient rest.Interface 35 | } 36 | 37 | func (c *BatchV1Client) PodGroups(namespace string) PodGroupInterface { 38 | return newPodGroups(c, namespace) 39 | } 40 | 41 | // NewForConfig creates a new BatchV1Client for the given config. 42 | func NewForConfig(c *rest.Config) (*BatchV1Client, error) { 43 | config := *c 44 | if err := setConfigDefaults(&config); err != nil { 45 | return nil, err 46 | } 47 | client, err := rest.RESTClientFor(&config) 48 | if err != nil { 49 | return nil, err 50 | } 51 | return &BatchV1Client{client}, nil 52 | } 53 | 54 | // NewForConfigOrDie creates a new BatchV1Client for the given config and 55 | // panics if there is an error in the config. 56 | func NewForConfigOrDie(c *rest.Config) *BatchV1Client { 57 | client, err := NewForConfig(c) 58 | if err != nil { 59 | panic(err) 60 | } 61 | return client 62 | } 63 | 64 | // New creates a new BatchV1Client for the given RESTClient. 65 | func New(c rest.Interface) *BatchV1Client { 66 | return &BatchV1Client{c} 67 | } 68 | 69 | func setConfigDefaults(config *rest.Config) error { 70 | gv := v1.SchemeGroupVersion 71 | config.GroupVersion = &gv 72 | config.APIPath = "/apis" 73 | config.NegotiatedSerializer = scheme.Codecs.WithoutConversion() 74 | 75 | if config.UserAgent == "" { 76 | config.UserAgent = rest.DefaultKubernetesUserAgent() 77 | } 78 | 79 | return nil 80 | } 81 | 82 | // RESTClient returns a RESTClient that is used to communicate 83 | // with API server by this client implementation. 84 | func (c *BatchV1Client) RESTClient() rest.Interface { 85 | if c == nil { 86 | return nil 87 | } 88 | return c.restClient 89 | } 90 | -------------------------------------------------------------------------------- /pkg/util/k8s.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package util 18 | 19 | import ( 20 | "encoding/json" 21 | "fmt" 22 | "time" 23 | 24 | jsonpatch "github.com/evanphx/json-patch" 25 | v1 "k8s.io/api/core/v1" 26 | 27 | v12 "github.com/tenstack/batch-scheduler/pkg/apis/podgroup/v1" 28 | ) 29 | 30 | // DefaultWaitTime is 60s if MaxScheduleTime is not specified. 31 | const DefaultWaitTime = 60 * time.Second 32 | 33 | // CreateMergePatch return patch generated from original and new interfaces 34 | func CreateMergePatch(original, new interface{}) ([]byte, error) { 35 | pvByte, err := json.Marshal(original) 36 | if err != nil { 37 | return nil, err 38 | } 39 | cloneByte, err := json.Marshal(new) 40 | if err != nil { 41 | return nil, err 42 | } 43 | patch, err := jsonpatch.CreateMergePatch(pvByte, cloneByte) 44 | if err != nil { 45 | return nil, err 46 | } 47 | return patch, nil 48 | } 49 | 50 | // VerifyPodAnnSatisfied verifies if pod ann satisfies batch scheduling 51 | func VerifyPodAnnSatisfied(pod *v1.Pod) (string, bool) { 52 | if pod.Annotations == nil { 53 | return "", false 54 | } 55 | if pod.Annotations[PodGroupAnn] == "" { 56 | return "", false 57 | } 58 | return pod.Annotations[PodGroupAnn], true 59 | } 60 | 61 | // VerifyPodLabelSatisfied verifies if pod ann satisfies batch scheduling 62 | func VerifyPodLabelSatisfied(pod *v1.Pod) (string, bool) { 63 | if pod.Labels == nil { 64 | return "", false 65 | } 66 | if pod.Labels[PodGroupLabel] == "" { 67 | return "", false 68 | } 69 | return pod.Labels[PodGroupLabel], true 70 | } 71 | 72 | // GetPodGroupFullName verify if pod ann satisfies batch scheduling 73 | func GetPodGroupFullName(pg *v12.PodGroup) string { 74 | if pg == nil { 75 | return "" 76 | } 77 | 78 | return fmt.Sprintf("%v/%v", pg.Namespace, pg.Name) 79 | } 80 | 81 | // GetWaitTimeDuration verify if pod ann satisfies batch scheduling 82 | func GetWaitTimeDuration(pg *v12.PodGroup, defaultMaxScheTime *time.Duration) time.Duration { 83 | waitTime := DefaultWaitTime 84 | if defaultMaxScheTime != nil || *defaultMaxScheTime != 0 { 85 | waitTime = *defaultMaxScheTime 86 | } 87 | if pg != nil && pg.Spec.MaxScheduleTime != nil { 88 | return pg.Spec.MaxScheduleTime.Duration 89 | } 90 | return waitTime 91 | } 92 | -------------------------------------------------------------------------------- /pkg/generated/clientset/versioned/fake/clientset_generated.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by client-gen. DO NOT EDIT. 18 | 19 | package fake 20 | 21 | import ( 22 | clientset "github.com/tenstack/batch-scheduler/pkg/generated/clientset/versioned" 23 | batchv1 "github.com/tenstack/batch-scheduler/pkg/generated/clientset/versioned/typed/podgroup/v1" 24 | fakebatchv1 "github.com/tenstack/batch-scheduler/pkg/generated/clientset/versioned/typed/podgroup/v1/fake" 25 | "k8s.io/apimachinery/pkg/runtime" 26 | "k8s.io/apimachinery/pkg/watch" 27 | "k8s.io/client-go/discovery" 28 | fakediscovery "k8s.io/client-go/discovery/fake" 29 | "k8s.io/client-go/testing" 30 | ) 31 | 32 | // NewSimpleClientset returns a clientset that will respond with the provided objects. 33 | // It's backed by a very simple object tracker that processes creates, updates and deletions as-is, 34 | // without applying any validations and/or defaults. It shouldn't be considered a replacement 35 | // for a real clientset and is mostly useful in simple unit tests. 36 | func NewSimpleClientset(objects ...runtime.Object) *Clientset { 37 | o := testing.NewObjectTracker(scheme, codecs.UniversalDecoder()) 38 | for _, obj := range objects { 39 | if err := o.Add(obj); err != nil { 40 | panic(err) 41 | } 42 | } 43 | 44 | cs := &Clientset{tracker: o} 45 | cs.discovery = &fakediscovery.FakeDiscovery{Fake: &cs.Fake} 46 | cs.AddReactor("*", "*", testing.ObjectReaction(o)) 47 | cs.AddWatchReactor("*", func(action testing.Action) (handled bool, ret watch.Interface, err error) { 48 | gvr := action.GetResource() 49 | ns := action.GetNamespace() 50 | watch, err := o.Watch(gvr, ns) 51 | if err != nil { 52 | return false, nil, err 53 | } 54 | return true, watch, nil 55 | }) 56 | 57 | return cs 58 | } 59 | 60 | // Clientset implements clientset.Interface. Meant to be embedded into a 61 | // struct to get a default implementation. This makes faking out just the method 62 | // you want to test easier. 63 | type Clientset struct { 64 | testing.Fake 65 | discovery *fakediscovery.FakeDiscovery 66 | tracker testing.ObjectTracker 67 | } 68 | 69 | func (c *Clientset) Discovery() discovery.DiscoveryInterface { 70 | return c.discovery 71 | } 72 | 73 | func (c *Clientset) Tracker() testing.ObjectTracker { 74 | return c.tracker 75 | } 76 | 77 | var _ clientset.Interface = &Clientset{} 78 | 79 | // BatchV1 retrieves the BatchV1Client 80 | func (c *Clientset) BatchV1() batchv1.BatchV1Interface { 81 | return &fakebatchv1.FakeBatchV1{Fake: &c.Fake} 82 | } 83 | -------------------------------------------------------------------------------- /pkg/generated/clientset/versioned/clientset.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by client-gen. DO NOT EDIT. 18 | 19 | package versioned 20 | 21 | import ( 22 | "fmt" 23 | 24 | batchv1 "github.com/tenstack/batch-scheduler/pkg/generated/clientset/versioned/typed/podgroup/v1" 25 | discovery "k8s.io/client-go/discovery" 26 | rest "k8s.io/client-go/rest" 27 | flowcontrol "k8s.io/client-go/util/flowcontrol" 28 | ) 29 | 30 | type Interface interface { 31 | Discovery() discovery.DiscoveryInterface 32 | BatchV1() batchv1.BatchV1Interface 33 | } 34 | 35 | // Clientset contains the clients for groups. Each group has exactly one 36 | // version included in a Clientset. 37 | type Clientset struct { 38 | *discovery.DiscoveryClient 39 | batchV1 *batchv1.BatchV1Client 40 | } 41 | 42 | // BatchV1 retrieves the BatchV1Client 43 | func (c *Clientset) BatchV1() batchv1.BatchV1Interface { 44 | return c.batchV1 45 | } 46 | 47 | // Discovery retrieves the DiscoveryClient 48 | func (c *Clientset) Discovery() discovery.DiscoveryInterface { 49 | if c == nil { 50 | return nil 51 | } 52 | return c.DiscoveryClient 53 | } 54 | 55 | // NewForConfig creates a new Clientset for the given config. 56 | // If config's RateLimiter is not set and QPS and Burst are acceptable, 57 | // NewForConfig will generate a rate-limiter in configShallowCopy. 58 | func NewForConfig(c *rest.Config) (*Clientset, error) { 59 | configShallowCopy := *c 60 | if configShallowCopy.RateLimiter == nil && configShallowCopy.QPS > 0 { 61 | if configShallowCopy.Burst <= 0 { 62 | return nil, fmt.Errorf("Burst is required to be greater than 0 when RateLimiter is not set and QPS is set to greater than 0") 63 | } 64 | configShallowCopy.RateLimiter = flowcontrol.NewTokenBucketRateLimiter(configShallowCopy.QPS, configShallowCopy.Burst) 65 | } 66 | var cs Clientset 67 | var err error 68 | cs.batchV1, err = batchv1.NewForConfig(&configShallowCopy) 69 | if err != nil { 70 | return nil, err 71 | } 72 | 73 | cs.DiscoveryClient, err = discovery.NewDiscoveryClientForConfig(&configShallowCopy) 74 | if err != nil { 75 | return nil, err 76 | } 77 | return &cs, nil 78 | } 79 | 80 | // NewForConfigOrDie creates a new Clientset for the given config and 81 | // panics if there is an error in the config. 82 | func NewForConfigOrDie(c *rest.Config) *Clientset { 83 | var cs Clientset 84 | cs.batchV1 = batchv1.NewForConfigOrDie(c) 85 | 86 | cs.DiscoveryClient = discovery.NewDiscoveryClientForConfigOrDie(c) 87 | return &cs 88 | } 89 | 90 | // New creates a new Clientset for the given RESTClient. 91 | func New(c rest.Interface) *Clientset { 92 | var cs Clientset 93 | cs.batchV1 = batchv1.New(c) 94 | 95 | cs.DiscoveryClient = discovery.NewDiscoveryClient(c) 96 | return &cs 97 | } 98 | -------------------------------------------------------------------------------- /pkg/generated/listers/podgroup/v1/podgroup.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by lister-gen. DO NOT EDIT. 18 | 19 | package v1 20 | 21 | import ( 22 | v1 "github.com/tenstack/batch-scheduler/pkg/apis/podgroup/v1" 23 | "k8s.io/apimachinery/pkg/api/errors" 24 | "k8s.io/apimachinery/pkg/labels" 25 | "k8s.io/client-go/tools/cache" 26 | ) 27 | 28 | // PodGroupLister helps list PodGroups. 29 | type PodGroupLister interface { 30 | // List lists all PodGroups in the indexer. 31 | List(selector labels.Selector) (ret []*v1.PodGroup, err error) 32 | // PodGroups returns an object that can list and get PodGroups. 33 | PodGroups(namespace string) PodGroupNamespaceLister 34 | PodGroupListerExpansion 35 | } 36 | 37 | // podGroupLister implements the PodGroupLister interface. 38 | type podGroupLister struct { 39 | indexer cache.Indexer 40 | } 41 | 42 | // NewPodGroupLister returns a new PodGroupLister. 43 | func NewPodGroupLister(indexer cache.Indexer) PodGroupLister { 44 | return &podGroupLister{indexer: indexer} 45 | } 46 | 47 | // List lists all PodGroups in the indexer. 48 | func (s *podGroupLister) List(selector labels.Selector) (ret []*v1.PodGroup, err error) { 49 | err = cache.ListAll(s.indexer, selector, func(m interface{}) { 50 | ret = append(ret, m.(*v1.PodGroup)) 51 | }) 52 | return ret, err 53 | } 54 | 55 | // PodGroups returns an object that can list and get PodGroups. 56 | func (s *podGroupLister) PodGroups(namespace string) PodGroupNamespaceLister { 57 | return podGroupNamespaceLister{indexer: s.indexer, namespace: namespace} 58 | } 59 | 60 | // PodGroupNamespaceLister helps list and get PodGroups. 61 | type PodGroupNamespaceLister interface { 62 | // List lists all PodGroups in the indexer for a given namespace. 63 | List(selector labels.Selector) (ret []*v1.PodGroup, err error) 64 | // Get retrieves the PodGroup from the indexer for a given namespace and name. 65 | Get(name string) (*v1.PodGroup, error) 66 | PodGroupNamespaceListerExpansion 67 | } 68 | 69 | // podGroupNamespaceLister implements the PodGroupNamespaceLister 70 | // interface. 71 | type podGroupNamespaceLister struct { 72 | indexer cache.Indexer 73 | namespace string 74 | } 75 | 76 | // List lists all PodGroups in the indexer for a given namespace. 77 | func (s podGroupNamespaceLister) List(selector labels.Selector) (ret []*v1.PodGroup, err error) { 78 | err = cache.ListAllByNamespace(s.indexer, s.namespace, selector, func(m interface{}) { 79 | ret = append(ret, m.(*v1.PodGroup)) 80 | }) 81 | return ret, err 82 | } 83 | 84 | // Get retrieves the PodGroup from the indexer for a given namespace and name. 85 | func (s podGroupNamespaceLister) Get(name string) (*v1.PodGroup, error) { 86 | obj, exists, err := s.indexer.GetByKey(s.namespace + "/" + name) 87 | if err != nil { 88 | return nil, err 89 | } 90 | if !exists { 91 | return nil, errors.NewNotFound(v1.Resource("podgroup"), name) 92 | } 93 | return obj.(*v1.PodGroup), nil 94 | } 95 | -------------------------------------------------------------------------------- /pkg/generated/informers/externalversions/podgroup/v1/podgroup.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by informer-gen. DO NOT EDIT. 18 | 19 | package v1 20 | 21 | import ( 22 | time "time" 23 | 24 | podgroupv1 "github.com/tenstack/batch-scheduler/pkg/apis/podgroup/v1" 25 | versioned "github.com/tenstack/batch-scheduler/pkg/generated/clientset/versioned" 26 | internalinterfaces "github.com/tenstack/batch-scheduler/pkg/generated/informers/externalversions/internalinterfaces" 27 | v1 "github.com/tenstack/batch-scheduler/pkg/generated/listers/podgroup/v1" 28 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 | runtime "k8s.io/apimachinery/pkg/runtime" 30 | watch "k8s.io/apimachinery/pkg/watch" 31 | cache "k8s.io/client-go/tools/cache" 32 | ) 33 | 34 | // PodGroupInformer provides access to a shared informer and lister for 35 | // PodGroups. 36 | type PodGroupInformer interface { 37 | Informer() cache.SharedIndexInformer 38 | Lister() v1.PodGroupLister 39 | } 40 | 41 | type podGroupInformer struct { 42 | factory internalinterfaces.SharedInformerFactory 43 | tweakListOptions internalinterfaces.TweakListOptionsFunc 44 | namespace string 45 | } 46 | 47 | // NewPodGroupInformer constructs a new informer for PodGroup type. 48 | // Always prefer using an informer factory to get a shared informer instead of getting an independent 49 | // one. This reduces memory footprint and number of connections to the server. 50 | func NewPodGroupInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { 51 | return NewFilteredPodGroupInformer(client, namespace, resyncPeriod, indexers, nil) 52 | } 53 | 54 | // NewFilteredPodGroupInformer constructs a new informer for PodGroup type. 55 | // Always prefer using an informer factory to get a shared informer instead of getting an independent 56 | // one. This reduces memory footprint and number of connections to the server. 57 | func NewFilteredPodGroupInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { 58 | return cache.NewSharedIndexInformer( 59 | &cache.ListWatch{ 60 | ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { 61 | if tweakListOptions != nil { 62 | tweakListOptions(&options) 63 | } 64 | return client.BatchV1().PodGroups(namespace).List(options) 65 | }, 66 | WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { 67 | if tweakListOptions != nil { 68 | tweakListOptions(&options) 69 | } 70 | return client.BatchV1().PodGroups(namespace).Watch(options) 71 | }, 72 | }, 73 | &podgroupv1.PodGroup{}, 74 | resyncPeriod, 75 | indexers, 76 | ) 77 | } 78 | 79 | func (f *podGroupInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { 80 | return NewFilteredPodGroupInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) 81 | } 82 | 83 | func (f *podGroupInformer) Informer() cache.SharedIndexInformer { 84 | return f.factory.InformerFor(&podgroupv1.PodGroup{}, f.defaultInformer) 85 | } 86 | 87 | func (f *podGroupInformer) Lister() v1.PodGroupLister { 88 | return v1.NewPodGroupLister(f.Informer().GetIndexer()) 89 | } 90 | -------------------------------------------------------------------------------- /pkg/scheduler/core/core_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package core 18 | 19 | import ( 20 | "testing" 21 | 22 | corev1 "k8s.io/api/core/v1" 23 | "k8s.io/apimachinery/pkg/api/resource" 24 | "k8s.io/kubernetes/pkg/scheduler/nodeinfo" 25 | ) 26 | 27 | func Test_singleNodeResurouce(t *testing.T) { 28 | pod := corev1.Pod{ 29 | Spec: corev1.PodSpec{ 30 | Containers: []corev1.Container{ 31 | { 32 | Resources: corev1.ResourceRequirements{ 33 | Limits: corev1.ResourceList{ 34 | corev1.ResourceCPU: resource.MustParse("1"), 35 | corev1.ResourceName("alpha.kubernetes.io/nvidia-gpu"): resource.MustParse("1"), 36 | corev1.ResourceName("tencent.cr/tencentip"): resource.MustParse("1"), 37 | }, 38 | Requests: corev1.ResourceList{ 39 | corev1.ResourceCPU: resource.MustParse("1"), 40 | corev1.ResourceName("alpha.kubernetes.io/nvidia-gpu"): resource.MustParse("1"), 41 | 42 | corev1.ResourceName("tencent.cr/tencentip"): resource.MustParse("1"), 43 | }, 44 | }, 45 | }, 46 | }, 47 | }, 48 | } 49 | 50 | node := corev1.Node{ 51 | Spec: corev1.NodeSpec{}, 52 | Status: corev1.NodeStatus{ 53 | Capacity: corev1.ResourceList{ 54 | corev1.ResourceCPU: resource.MustParse("10"), 55 | corev1.ResourceName("alpha.kubernetes.io/nvidia-gpu"): resource.MustParse("10"), 56 | 57 | corev1.ResourcePods: resource.MustParse("100"), 58 | corev1.ResourceName("tencent.cr/tencentip"): resource.MustParse("20"), 59 | }, 60 | Allocatable: corev1.ResourceList{ 61 | corev1.ResourceCPU: resource.MustParse("10"), 62 | corev1.ResourceName("alpha.kubernetes.io/nvidia-gpu"): resource.MustParse("10"), 63 | 64 | corev1.ResourcePods: resource.MustParse("100"), 65 | corev1.ResourceName("tencent.cr/tencentip"): resource.MustParse("20"), 66 | }, 67 | }, 68 | } 69 | 70 | nodeIf := nodeinfo.NewNodeInfo() 71 | nodeIf.SetNode(&node) 72 | nodeIf.AddPod(&pod) 73 | podCopy := pod.DeepCopy() 74 | podCopy.Spec.Containers[0].Resources.Requests[corev1.ResourceName("alpha.kubernetes.io/nvidia-gpu")] = resource.MustParse("101") 75 | podCopy.Spec.Containers[0].Resources.Limits[corev1.ResourceName("alpha.kubernetes.io/nvidia-gpu")] = resource.MustParse("101") 76 | podCopy1 := pod.DeepCopy() 77 | 78 | podCopy1.Spec.Containers[0].Resources.Requests[corev1.ResourceName("tencent.cr/tencentip")] = resource.MustParse( 79 | "101") 80 | podCopy1.Spec.Containers[0].Resources.Limits[corev1.ResourceName("tencent.cr/tencentip")] = resource.MustParse( 81 | "101") 82 | cases := []struct { 83 | name string 84 | node *nodeinfo.NodeInfo 85 | pod *corev1.Pod 86 | desire bool 87 | }{ 88 | { 89 | node: nodeIf, 90 | pod: &pod, 91 | desire: true, 92 | }, 93 | { 94 | node: nodeIf, 95 | pod: podCopy, 96 | desire: false, 97 | }, 98 | { 99 | node: nodeIf, 100 | pod: podCopy1, 101 | desire: false, 102 | }, 103 | } 104 | 105 | for _, c := range cases { 106 | t.Logf("%+v", c.node.AllocatableResource()) 107 | t.Logf("%+v", c.node.RequestedResource()) 108 | re := singleNodeResource(c.node, c.pod, 1) 109 | req := getPodResourceRequire(c.pod) 110 | if compareResourceAndRequire(re, req) != c.desire { 111 | t.Error("not desire") 112 | } 113 | t.Logf("----\n %+v", req) 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /pkg/scheduler/cache/cache.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package cache 18 | 19 | import ( 20 | "sync" 21 | 22 | gochache "github.com/patrickmn/go-cache" 23 | v12 "k8s.io/api/core/v1" 24 | 25 | v1 "github.com/tenstack/batch-scheduler/pkg/apis/podgroup/v1" 26 | "k8s.io/apimachinery/pkg/types" 27 | ) 28 | 29 | // Cache provides func Get, Set, Delete to operation cache 30 | type Cache interface { 31 | Get(string) Object 32 | Set(string, Object) 33 | Delete(string) 34 | } 35 | 36 | // Object is A abstract interface 37 | type Object interface { 38 | } 39 | 40 | var _ Cache = &PGStatusCache{} 41 | 42 | var _ Object = &PodGroupMatchStatus{} 43 | 44 | // PGStatusCache provide safe function Get, Set, Delete. 45 | type PGStatusCache struct { 46 | sync.RWMutex 47 | // NodeDiskStatus maps podGroupName to its DiskStatus 48 | PGStatusMap map[string]*PodGroupMatchStatus 49 | } 50 | 51 | // PodGroupMatchStatus helps record pod group status 52 | type PodGroupMatchStatus struct { 53 | // used to lock map 54 | CountLock sync.RWMutex 55 | PodGroup *v1.PodGroup 56 | // store permitted pod node map with ttl 57 | MatchedPodNodes *gochache.Cache 58 | // store permitted pod name and uuid map with ttl 59 | PodNameUIDs *gochache.Cache 60 | // failed pods 61 | Failed map[string]string 62 | // succeed pods 63 | Succeed map[string]string 64 | Pod *v12.Pod 65 | // Scheduled marks if has scheduled 66 | Scheduled bool 67 | } 68 | 69 | // PodNodePair is a pair contains pod name and node 70 | type PodNodePair struct { 71 | PodName string 72 | Node string 73 | } 74 | 75 | // NewPGStatusCache initializers pod status cache 76 | func NewPGStatusCache() *PGStatusCache { 77 | return &PGStatusCache{ 78 | PGStatusMap: map[string]*PodGroupMatchStatus{}, 79 | } 80 | } 81 | 82 | // DeepCopy copies the pod node pair 83 | func DeepCopy(pairs map[types.UID]*PodNodePair) map[types.UID]*PodNodePair { 84 | newPair := make(map[types.UID]*PodNodePair) 85 | for id, pair := range pairs { 86 | pairObj := *pair 87 | pairObjCopy := pairObj 88 | newPair[id] = &pairObjCopy 89 | } 90 | return newPair 91 | } 92 | 93 | // Get cache according to podGroupName 94 | func (psc *PGStatusCache) Get(podGroupName string) Object { 95 | psc.RLock() 96 | defer psc.RUnlock() 97 | pg, found := psc.PGStatusMap[podGroupName] 98 | if found == false { 99 | return nil 100 | } 101 | return pg 102 | } 103 | 104 | //Set node status to the cache 105 | func (psc *PGStatusCache) Set(podGroupName string, pg Object) { 106 | psc.Lock() 107 | psc.PGStatusMap[podGroupName] = pg.(*PodGroupMatchStatus) 108 | psc.Unlock() 109 | } 110 | 111 | // Delete Node status from cache 112 | func (psc *PGStatusCache) Delete(podGroupName string) { 113 | psc.Lock() 114 | delete(psc.PGStatusMap, podGroupName) 115 | psc.Unlock() 116 | } 117 | 118 | /* 119 | var _ Cache = &NodeResourceCache{} 120 | 121 | var _ Object = &nodeinfo.Resource{} 122 | 123 | func NewNodeResourceCache() *NodeResourceCache { 124 | return &NodeResourceCache{ 125 | Resources: map[string]*nodeinfo.Resource{}, 126 | } 127 | } 128 | 129 | type NodeResourceCache struct { 130 | sync.RWMutex 131 | //map[nodeName] Pre-allocationIno 132 | Resources map[string]*nodeinfo.Resource 133 | } 134 | 135 | func (n *NodeResourceCache) Get(node string) Object { 136 | n.RLock() 137 | defer n.RUnlock() 138 | resource, found := n.Resources[node] 139 | if !found { 140 | return nil 141 | } 142 | return resource 143 | } 144 | 145 | func (n *NodeResourceCache) Set(node string, obj Object) { 146 | n.Lock() 147 | n.Resources[node] = obj.(*nodeinfo.Resource) 148 | n.Unlock() 149 | } 150 | 151 | func (n *NodeResourceCache) Delete(node string) { 152 | n.Lock() 153 | delete(n.Resources, node) 154 | defer n.Unlock() 155 | } 156 | */ 157 | -------------------------------------------------------------------------------- /pkg/apis/podgroup/v1/zz_generated.deepcopy.go: -------------------------------------------------------------------------------- 1 | // +build !ignore_autogenerated 2 | 3 | /* 4 | * Copyright 2020 THL A29 Limited, a Tencent company. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | // Code generated by deepcopy-gen. DO NOT EDIT. 20 | 21 | package v1 22 | 23 | import ( 24 | corev1 "k8s.io/api/core/v1" 25 | resource "k8s.io/apimachinery/pkg/api/resource" 26 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 | runtime "k8s.io/apimachinery/pkg/runtime" 28 | ) 29 | 30 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 31 | func (in *PodGroup) DeepCopyInto(out *PodGroup) { 32 | *out = *in 33 | out.TypeMeta = in.TypeMeta 34 | in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) 35 | in.Spec.DeepCopyInto(&out.Spec) 36 | in.Status.DeepCopyInto(&out.Status) 37 | return 38 | } 39 | 40 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodGroup. 41 | func (in *PodGroup) DeepCopy() *PodGroup { 42 | if in == nil { 43 | return nil 44 | } 45 | out := new(PodGroup) 46 | in.DeepCopyInto(out) 47 | return out 48 | } 49 | 50 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 51 | func (in *PodGroup) DeepCopyObject() runtime.Object { 52 | if c := in.DeepCopy(); c != nil { 53 | return c 54 | } 55 | return nil 56 | } 57 | 58 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 59 | func (in *PodGroupList) DeepCopyInto(out *PodGroupList) { 60 | *out = *in 61 | out.TypeMeta = in.TypeMeta 62 | in.ListMeta.DeepCopyInto(&out.ListMeta) 63 | if in.Items != nil { 64 | in, out := &in.Items, &out.Items 65 | *out = make([]PodGroup, len(*in)) 66 | for i := range *in { 67 | (*in)[i].DeepCopyInto(&(*out)[i]) 68 | } 69 | } 70 | return 71 | } 72 | 73 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodGroupList. 74 | func (in *PodGroupList) DeepCopy() *PodGroupList { 75 | if in == nil { 76 | return nil 77 | } 78 | out := new(PodGroupList) 79 | in.DeepCopyInto(out) 80 | return out 81 | } 82 | 83 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 84 | func (in *PodGroupList) DeepCopyObject() runtime.Object { 85 | if c := in.DeepCopy(); c != nil { 86 | return c 87 | } 88 | return nil 89 | } 90 | 91 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 92 | func (in *PodGroupSpec) DeepCopyInto(out *PodGroupSpec) { 93 | *out = *in 94 | if in.MinResources != nil { 95 | in, out := &in.MinResources, &out.MinResources 96 | *out = new(corev1.ResourceList) 97 | if **in != nil { 98 | in, out := *in, *out 99 | *out = make(map[corev1.ResourceName]resource.Quantity, len(*in)) 100 | for key, val := range *in { 101 | (*out)[key] = val.DeepCopy() 102 | } 103 | } 104 | } 105 | if in.MaxScheduleTime != nil { 106 | in, out := &in.MaxScheduleTime, &out.MaxScheduleTime 107 | *out = new(metav1.Duration) 108 | **out = **in 109 | } 110 | return 111 | } 112 | 113 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodGroupSpec. 114 | func (in *PodGroupSpec) DeepCopy() *PodGroupSpec { 115 | if in == nil { 116 | return nil 117 | } 118 | out := new(PodGroupSpec) 119 | in.DeepCopyInto(out) 120 | return out 121 | } 122 | 123 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 124 | func (in *PodGroupStatus) DeepCopyInto(out *PodGroupStatus) { 125 | *out = *in 126 | in.ScheduleStartTime.DeepCopyInto(&out.ScheduleStartTime) 127 | return 128 | } 129 | 130 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodGroupStatus. 131 | func (in *PodGroupStatus) DeepCopy() *PodGroupStatus { 132 | if in == nil { 133 | return nil 134 | } 135 | out := new(PodGroupStatus) 136 | in.DeepCopyInto(out) 137 | return out 138 | } 139 | -------------------------------------------------------------------------------- /pkg/apis/podgroup/v1/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package v1 18 | 19 | import ( 20 | v1 "k8s.io/api/core/v1" 21 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 22 | ) 23 | 24 | // PodGroupPhase is the phase of a pod group at the current time. 25 | type PodGroupPhase string 26 | 27 | // These are the valid phase of podGroups. 28 | const ( 29 | // PodPending means the pod group has been accepted by the system, but scheduler can not allocate 30 | // enough resources to it. 31 | PodGroupPending PodGroupPhase = "Pending" 32 | 33 | // PodRunning means `spec.minMember` pods of PodGroups has been in running phase. 34 | PodGroupRunning PodGroupPhase = "Running" 35 | 36 | // PreScheduling means all of pods has been are waiting to be scheduled, enqueue waitingPod 37 | PodGroupPreScheduling PodGroupPhase = "PreScheduling" 38 | 39 | // PodRunning means some of pods has been scheduling in running phase but have not reach the `spec. 40 | // minMember` pods of PodGroups. 41 | PodGroupScheduling PodGroupPhase = "Scheduling" 42 | 43 | // PodScheduled means `spec.minMember` pods of PodGroups have been scheduled finished and pods have been in running 44 | // phase. 45 | PodGroupScheduled PodGroupPhase = "Scheduled" 46 | 47 | // PodGroupUnknown means part of `spec.minMember` pods are running but the other part can not 48 | // be scheduled, e.g. not enough resource; scheduler will wait for related controller to recover it. 49 | PodGroupUnknown PodGroupPhase = "Unknown" 50 | 51 | // PodGroupFinish means all of `spec.minMember` pods are successfully. 52 | PodGroupFinished PodGroupPhase = "Finished" 53 | 54 | // PodGroupFailed means at least one of `spec.minMember` pods is failed. 55 | PodGroupFailed PodGroupPhase = "Failed" 56 | ) 57 | 58 | // +genclient 59 | // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object 60 | 61 | // PodGroup is a collection of Pod; used for batch workload. 62 | type PodGroup struct { 63 | metav1.TypeMeta `json:",inline"` 64 | // Standard object's metadata. 65 | // +optional 66 | metav1.ObjectMeta `json:"metadata,omitempty"` 67 | 68 | // Specification of the desired behavior of the pod group. 69 | // +optional 70 | Spec PodGroupSpec `json:"spec"` 71 | 72 | // Status represents the current information about a pod group. 73 | // This data may not be up to date. 74 | // +optional 75 | Status PodGroupStatus `json:"status"` 76 | } 77 | 78 | // PodGroupSpec represents the template of a pod group. 79 | type PodGroupSpec struct { 80 | // MinMember defines the minimal number of members/tasks to run the pod group; 81 | // if there's not enough resources to start all tasks, the scheduler 82 | // will not start anyone. 83 | MinMember uint32 `json:"minMember"` 84 | 85 | // If specified, indicates the PodGroup's priority. "system-node-critical" and 86 | // "system-cluster-critical" are two special keywords which indicate the 87 | // highest priorities with the former being the highest priority. Any other 88 | // name must be defined by creating a PriorityClass object with that name. 89 | // If not specified, the PodGroup priority will be default or zero if there is no 90 | // default. 91 | // +optional 92 | PriorityClassName string `json:"priorityClassName,omitempty"` 93 | 94 | // MinResources defines the minimal resource of members/tasks to run the pod group; 95 | // if there's not enough resources to start all tasks, the scheduler 96 | // will not start anyone. 97 | MinResources *v1.ResourceList `json:"minResources,omitempty"` 98 | 99 | // MaxScheduleTime defines the maximal time of members/tasks to wait before run the pod group; 100 | MaxScheduleTime *metav1.Duration `json:"maxScheduleTime,omitempty"` 101 | } 102 | 103 | // PodGroupStatus represents the current state of a pod group. 104 | type PodGroupStatus struct { 105 | // Current phase of PodGroup. 106 | Phase PodGroupPhase `json:"phase"` 107 | 108 | // OccupiedBy marks the podgroup occupied by which group. 109 | // Owner reference would be used to filled it, it not initialize, it is empty 110 | OccupiedBy string `json:"occupiedBy,omitempty"` 111 | 112 | // The number of actively running pods. 113 | // +optional 114 | Scheduled uint32 `json:"scheduled"` 115 | 116 | // The number of actively running pods. 117 | // +optional 118 | Running uint32 `json:"running"` 119 | 120 | // The number of pods which reached phase Succeeded. 121 | // +optional 122 | Succeeded uint32 `json:"succeeded"` 123 | 124 | // The number of pods which reached phase Failed. 125 | // +optional 126 | Failed uint32 `json:"failed"` 127 | 128 | // ScheduleStartTime of the group 129 | ScheduleStartTime metav1.Time `json:"scheduleStartTime"` 130 | } 131 | 132 | // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object 133 | 134 | // PodGroupList is a collection of pod groups. 135 | type PodGroupList struct { 136 | metav1.TypeMeta `json:",inline"` 137 | // Standard list metadata 138 | // +optional 139 | metav1.ListMeta `json:"metadata,omitempty"` 140 | 141 | // items is the list of PodGroup 142 | Items []PodGroup `json:"items"` 143 | } 144 | -------------------------------------------------------------------------------- /pkg/generated/clientset/versioned/typed/podgroup/v1/fake/fake_podgroup.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by client-gen. DO NOT EDIT. 18 | 19 | package fake 20 | 21 | import ( 22 | podgroupv1 "github.com/tenstack/batch-scheduler/pkg/apis/podgroup/v1" 23 | v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 24 | labels "k8s.io/apimachinery/pkg/labels" 25 | schema "k8s.io/apimachinery/pkg/runtime/schema" 26 | types "k8s.io/apimachinery/pkg/types" 27 | watch "k8s.io/apimachinery/pkg/watch" 28 | testing "k8s.io/client-go/testing" 29 | ) 30 | 31 | // FakePodGroups implements PodGroupInterface 32 | type FakePodGroups struct { 33 | Fake *FakeBatchV1 34 | ns string 35 | } 36 | 37 | var podgroupsResource = schema.GroupVersionResource{Group: "batch.scheduler.tencent.com", Version: "v1", Resource: "podgroups"} 38 | 39 | var podgroupsKind = schema.GroupVersionKind{Group: "batch.scheduler.tencent.com", Version: "v1", Kind: "PodGroup"} 40 | 41 | // Get takes name of the podGroup, and returns the corresponding podGroup object, and an error if there is any. 42 | func (c *FakePodGroups) Get(name string, options v1.GetOptions) (result *podgroupv1.PodGroup, err error) { 43 | obj, err := c.Fake. 44 | Invokes(testing.NewGetAction(podgroupsResource, c.ns, name), &podgroupv1.PodGroup{}) 45 | 46 | if obj == nil { 47 | return nil, err 48 | } 49 | return obj.(*podgroupv1.PodGroup), err 50 | } 51 | 52 | // List takes label and field selectors, and returns the list of PodGroups that match those selectors. 53 | func (c *FakePodGroups) List(opts v1.ListOptions) (result *podgroupv1.PodGroupList, err error) { 54 | obj, err := c.Fake. 55 | Invokes(testing.NewListAction(podgroupsResource, podgroupsKind, c.ns, opts), &podgroupv1.PodGroupList{}) 56 | 57 | if obj == nil { 58 | return nil, err 59 | } 60 | 61 | label, _, _ := testing.ExtractFromListOptions(opts) 62 | if label == nil { 63 | label = labels.Everything() 64 | } 65 | list := &podgroupv1.PodGroupList{ListMeta: obj.(*podgroupv1.PodGroupList).ListMeta} 66 | for _, item := range obj.(*podgroupv1.PodGroupList).Items { 67 | if label.Matches(labels.Set(item.Labels)) { 68 | list.Items = append(list.Items, item) 69 | } 70 | } 71 | return list, err 72 | } 73 | 74 | // Watch returns a watch.Interface that watches the requested podGroups. 75 | func (c *FakePodGroups) Watch(opts v1.ListOptions) (watch.Interface, error) { 76 | return c.Fake. 77 | InvokesWatch(testing.NewWatchAction(podgroupsResource, c.ns, opts)) 78 | 79 | } 80 | 81 | // Create takes the representation of a podGroup and creates it. Returns the server's representation of the podGroup, and an error, if there is any. 82 | func (c *FakePodGroups) Create(podGroup *podgroupv1.PodGroup) (result *podgroupv1.PodGroup, err error) { 83 | obj, err := c.Fake. 84 | Invokes(testing.NewCreateAction(podgroupsResource, c.ns, podGroup), &podgroupv1.PodGroup{}) 85 | 86 | if obj == nil { 87 | return nil, err 88 | } 89 | return obj.(*podgroupv1.PodGroup), err 90 | } 91 | 92 | // Update takes the representation of a podGroup and updates it. Returns the server's representation of the podGroup, and an error, if there is any. 93 | func (c *FakePodGroups) Update(podGroup *podgroupv1.PodGroup) (result *podgroupv1.PodGroup, err error) { 94 | obj, err := c.Fake. 95 | Invokes(testing.NewUpdateAction(podgroupsResource, c.ns, podGroup), &podgroupv1.PodGroup{}) 96 | 97 | if obj == nil { 98 | return nil, err 99 | } 100 | return obj.(*podgroupv1.PodGroup), err 101 | } 102 | 103 | // UpdateStatus was generated because the type contains a Status member. 104 | // Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). 105 | func (c *FakePodGroups) UpdateStatus(podGroup *podgroupv1.PodGroup) (*podgroupv1.PodGroup, error) { 106 | obj, err := c.Fake. 107 | Invokes(testing.NewUpdateSubresourceAction(podgroupsResource, "status", c.ns, podGroup), &podgroupv1.PodGroup{}) 108 | 109 | if obj == nil { 110 | return nil, err 111 | } 112 | return obj.(*podgroupv1.PodGroup), err 113 | } 114 | 115 | // Delete takes name of the podGroup and deletes it. Returns an error if one occurs. 116 | func (c *FakePodGroups) Delete(name string, options *v1.DeleteOptions) error { 117 | _, err := c.Fake. 118 | Invokes(testing.NewDeleteAction(podgroupsResource, c.ns, name), &podgroupv1.PodGroup{}) 119 | 120 | return err 121 | } 122 | 123 | // DeleteCollection deletes a collection of objects. 124 | func (c *FakePodGroups) DeleteCollection(options *v1.DeleteOptions, listOptions v1.ListOptions) error { 125 | action := testing.NewDeleteCollectionAction(podgroupsResource, c.ns, listOptions) 126 | 127 | _, err := c.Fake.Invokes(action, &podgroupv1.PodGroupList{}) 128 | return err 129 | } 130 | 131 | // Patch applies the patch and returns the patched podGroup. 132 | func (c *FakePodGroups) Patch(name string, pt types.PatchType, data []byte, subresources ...string) (result *podgroupv1.PodGroup, err error) { 133 | obj, err := c.Fake. 134 | Invokes(testing.NewPatchSubresourceAction(podgroupsResource, c.ns, name, pt, data, subresources...), &podgroupv1.PodGroup{}) 135 | 136 | if obj == nil { 137 | return nil, err 138 | } 139 | return obj.(*podgroupv1.PodGroup), err 140 | } 141 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Batch-scheduler 2 | 3 | ## Background 4 | 5 | Currently, through the default scheduler of Kubernetes, we cannot ensure a group of pods scheduled at the same time 6 | . Under some scene, it would waste resources since some pods need work together, like `spark`, `tensorflow` and so on 7 | . So, batch-scheduler is aimed at solving the issue. 8 | 9 | ## Method 10 | 11 | ### Features 12 | 13 | - lightweight 14 | - no resource race 15 | - gang scheduling 16 | 17 | ### Implementation 18 | 19 | Based on the latest [scheduling framework](https://kubernetes.io/docs/concepts/scheduling-eviction/scheduling-framework), we designed the scheduler. So only one scheduler is needed to run in the 20 | cluster, which makes sure `resource race` would not happen. 21 | 22 | This scheduler also makes sure gang scheduling, e.g. 23 | - scene1 24 | 25 | A group consists of 5 pods. The batch-scheduler would not schedule any pod until enough resources are found. 26 | 27 | - scene2 28 | Only 6 cpu exist in the cluster. Two groups require 5 cpus and 5 cpus are submitted, then only one and at least one 29 | group would be scheduled. 30 | 31 | How to keep light-weight. We named a CRD [PodGroup](./pkg/apis/podgroup/v1/types.go). When we would to running a group 32 | of pods, just need submit a `PodGroup`, e.g. `group1` into the cluster. The pods needs to run as a group should only add a label named: `group.batch 33 | .scheduler.tencent.com: group1` 34 | 35 | ### Main Progress 36 | 37 | 1. `PreFilter`: Compute resource requirements before we start predicts for a pod. If a pod is not permitted, we add it 38 | to freeze cache, then the pods belong to the same group would be rejected directly. 39 | 40 | 2. `Less`: this interface decides the sequence of pods. Currently, pods having higher `Priority` would be scheduled 41 | first. If pods have same the priority, PodGroup Creation time would be compared. 42 | 43 | 3. `Permit`: it is used for approving a pod or denying one. If a pod can be scheduled, but the number of pods belongs 44 | to the same group has not reached the min requested, it returns `Wait`. If a pod cannot be scheduled, it returns 45 | `Unschedulable`. 46 | 47 | 4. It is better to set MaxScheduleTime for a PodGroup. If one of the pods belong to the same PodGroup times out 48 | , other pods would also be rejected. 49 | 50 | ## Build 51 | 52 | ``` 53 | # git clone git@github.com/tenstack/batch-scheduler.git 54 | # make build 55 | ``` 56 | 57 | ## Deploy 58 | 59 | - Deploy CRD 60 | 61 | ``` 62 | # cd deploy 63 | # kubectl apply -f deploy crd.yaml 64 | ``` 65 | - Configuration 66 | 67 | Default config has been written, but `kube_config` in it should be changed to your self stored. 68 | 69 | - Deploy batch-scheduler 70 | 71 | ```$xslt 72 | # cd deploy 73 | # bash start.sh 74 | ``` 75 | 76 | ## Example 77 | 78 | This example shows the resource race scene. Only 8 cpu exist in the cluster, and 0.9 has been occupied. 79 | 80 | ```$xslt 81 | Allocated resources: 82 | (Total limits may be over 100 percent, i.e., overcommitted.) 83 | Resource Requests Limits 84 | -------- -------- ------ 85 | cpu 900m (11%) 0 (0%) 86 | memory 140Mi (0%) 340Mi (2%) 87 | ephemeral-storage 0 (0%) 0 (0%) 88 | Events: 89 | ``` 90 | 91 | - Yaml file named `sts-group-valid-race.yaml` is as follow 92 | ```$xslt 93 | apiVersion: batch.scheduler.tencent.com/v1 94 | kind: PodGroup 95 | metadata: 96 | name: group1 97 | namespace: default 98 | spec: 99 | minMember: 5 100 | --- 101 | apiVersion: batch.scheduler.tencent.com/v1 102 | kind: PodGroup 103 | metadata: 104 | name: group2 105 | namespace: default 106 | spec: 107 | minMember: 5 108 | --- 109 | apiVersion: apps/v1 110 | kind: StatefulSet 111 | metadata: 112 | name: web-group-race1 113 | spec: 114 | selector: 115 | matchLabels: 116 | app: nginx 117 | podManagementPolicy: Parallel 118 | serviceName: "nginx" 119 | replicas: 5 120 | template: 121 | metadata: 122 | labels: 123 | group.batch.scheduler.tencent.com: "group1" 124 | app: nginx 125 | type: node 126 | spec: 127 | containers: 128 | - name: nginx 129 | image: nginx 130 | ports: 131 | - containerPort: 80 132 | name: web 133 | resources: 134 | limits: 135 | cpu: "1" 136 | requests: 137 | cpu: "1" 138 | --- 139 | apiVersion: apps/v1 140 | kind: StatefulSet 141 | metadata: 142 | name: web-group-race2 143 | spec: 144 | selector: 145 | matchLabels: 146 | app: nginx 147 | podManagementPolicy: Parallel 148 | serviceName: "nginx" 149 | replicas: 5 150 | template: 151 | metadata: 152 | labels: 153 | group.batch.scheduler.tencent.com: "group2" 154 | app: nginx 155 | type: node 156 | spec: 157 | containers: 158 | - name: nginx 159 | image: nginx 160 | ports: 161 | - containerPort: 80 162 | name: web 163 | resources: 164 | limits: 165 | cpu: "1" 166 | requests: 167 | cpu: "1" 168 | ``` 169 | 170 | - Submit it 171 | 172 | ```$xslt 173 | # kubectl apply -f sts-group-valid-race.yaml 174 | ``` 175 | - Results 176 | ```$xslt 177 | [root@cwd-dev ~]# kubectl get pod 178 | NAME READY STATUS RESTARTS AGE 179 | web-group-race1-0 0/1 ContainerCreating 0 16s 180 | web-group-race1-1 0/1 ContainerCreating 0 16s 181 | web-group-race1-2 0/1 ContainerCreating 0 16s 182 | web-group-race1-3 1/1 Running 0 16s 183 | web-group-race1-4 0/1 ContainerCreating 0 16s 184 | web-group-race2-0 0/1 Pending 0 16s 185 | web-group-race2-1 0/1 Pending 0 16s 186 | web-group-race2-2 0/1 Pending 0 16s 187 | web-group-race2-3 0/1 Pending 0 16s 188 | web-group-race2-4 0/1 Pending 0 16s 189 | ``` -------------------------------------------------------------------------------- /pkg/generated/clientset/versioned/typed/podgroup/v1/podgroup.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by client-gen. DO NOT EDIT. 18 | 19 | package v1 20 | 21 | import ( 22 | "time" 23 | 24 | v1 "github.com/tenstack/batch-scheduler/pkg/apis/podgroup/v1" 25 | scheme "github.com/tenstack/batch-scheduler/pkg/generated/clientset/versioned/scheme" 26 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 | types "k8s.io/apimachinery/pkg/types" 28 | watch "k8s.io/apimachinery/pkg/watch" 29 | rest "k8s.io/client-go/rest" 30 | ) 31 | 32 | // PodGroupsGetter has a method to return a PodGroupInterface. 33 | // A group's client should implement this interface. 34 | type PodGroupsGetter interface { 35 | PodGroups(namespace string) PodGroupInterface 36 | } 37 | 38 | // PodGroupInterface has methods to work with PodGroup resources. 39 | type PodGroupInterface interface { 40 | Create(*v1.PodGroup) (*v1.PodGroup, error) 41 | Update(*v1.PodGroup) (*v1.PodGroup, error) 42 | UpdateStatus(*v1.PodGroup) (*v1.PodGroup, error) 43 | Delete(name string, options *metav1.DeleteOptions) error 44 | DeleteCollection(options *metav1.DeleteOptions, listOptions metav1.ListOptions) error 45 | Get(name string, options metav1.GetOptions) (*v1.PodGroup, error) 46 | List(opts metav1.ListOptions) (*v1.PodGroupList, error) 47 | Watch(opts metav1.ListOptions) (watch.Interface, error) 48 | Patch(name string, pt types.PatchType, data []byte, subresources ...string) (result *v1.PodGroup, err error) 49 | PodGroupExpansion 50 | } 51 | 52 | // podGroups implements PodGroupInterface 53 | type podGroups struct { 54 | client rest.Interface 55 | ns string 56 | } 57 | 58 | // newPodGroups returns a PodGroups 59 | func newPodGroups(c *BatchV1Client, namespace string) *podGroups { 60 | return &podGroups{ 61 | client: c.RESTClient(), 62 | ns: namespace, 63 | } 64 | } 65 | 66 | // Get takes name of the podGroup, and returns the corresponding podGroup object, and an error if there is any. 67 | func (c *podGroups) Get(name string, options metav1.GetOptions) (result *v1.PodGroup, err error) { 68 | result = &v1.PodGroup{} 69 | err = c.client.Get(). 70 | Namespace(c.ns). 71 | Resource("podgroups"). 72 | Name(name). 73 | VersionedParams(&options, scheme.ParameterCodec). 74 | Do(). 75 | Into(result) 76 | return 77 | } 78 | 79 | // List takes label and field selectors, and returns the list of PodGroups that match those selectors. 80 | func (c *podGroups) List(opts metav1.ListOptions) (result *v1.PodGroupList, err error) { 81 | var timeout time.Duration 82 | if opts.TimeoutSeconds != nil { 83 | timeout = time.Duration(*opts.TimeoutSeconds) * time.Second 84 | } 85 | result = &v1.PodGroupList{} 86 | err = c.client.Get(). 87 | Namespace(c.ns). 88 | Resource("podgroups"). 89 | VersionedParams(&opts, scheme.ParameterCodec). 90 | Timeout(timeout). 91 | Do(). 92 | Into(result) 93 | return 94 | } 95 | 96 | // Watch returns a watch.Interface that watches the requested podGroups. 97 | func (c *podGroups) Watch(opts metav1.ListOptions) (watch.Interface, error) { 98 | var timeout time.Duration 99 | if opts.TimeoutSeconds != nil { 100 | timeout = time.Duration(*opts.TimeoutSeconds) * time.Second 101 | } 102 | opts.Watch = true 103 | return c.client.Get(). 104 | Namespace(c.ns). 105 | Resource("podgroups"). 106 | VersionedParams(&opts, scheme.ParameterCodec). 107 | Timeout(timeout). 108 | Watch() 109 | } 110 | 111 | // Create takes the representation of a podGroup and creates it. Returns the server's representation of the podGroup, and an error, if there is any. 112 | func (c *podGroups) Create(podGroup *v1.PodGroup) (result *v1.PodGroup, err error) { 113 | result = &v1.PodGroup{} 114 | err = c.client.Post(). 115 | Namespace(c.ns). 116 | Resource("podgroups"). 117 | Body(podGroup). 118 | Do(). 119 | Into(result) 120 | return 121 | } 122 | 123 | // Update takes the representation of a podGroup and updates it. Returns the server's representation of the podGroup, and an error, if there is any. 124 | func (c *podGroups) Update(podGroup *v1.PodGroup) (result *v1.PodGroup, err error) { 125 | result = &v1.PodGroup{} 126 | err = c.client.Put(). 127 | Namespace(c.ns). 128 | Resource("podgroups"). 129 | Name(podGroup.Name). 130 | Body(podGroup). 131 | Do(). 132 | Into(result) 133 | return 134 | } 135 | 136 | // UpdateStatus was generated because the type contains a Status member. 137 | // Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). 138 | 139 | func (c *podGroups) UpdateStatus(podGroup *v1.PodGroup) (result *v1.PodGroup, err error) { 140 | result = &v1.PodGroup{} 141 | err = c.client.Put(). 142 | Namespace(c.ns). 143 | Resource("podgroups"). 144 | Name(podGroup.Name). 145 | SubResource("status"). 146 | Body(podGroup). 147 | Do(). 148 | Into(result) 149 | return 150 | } 151 | 152 | // Delete takes name of the podGroup and deletes it. Returns an error if one occurs. 153 | func (c *podGroups) Delete(name string, options *metav1.DeleteOptions) error { 154 | return c.client.Delete(). 155 | Namespace(c.ns). 156 | Resource("podgroups"). 157 | Name(name). 158 | Body(options). 159 | Do(). 160 | Error() 161 | } 162 | 163 | // DeleteCollection deletes a collection of objects. 164 | func (c *podGroups) DeleteCollection(options *metav1.DeleteOptions, listOptions metav1.ListOptions) error { 165 | var timeout time.Duration 166 | if listOptions.TimeoutSeconds != nil { 167 | timeout = time.Duration(*listOptions.TimeoutSeconds) * time.Second 168 | } 169 | return c.client.Delete(). 170 | Namespace(c.ns). 171 | Resource("podgroups"). 172 | VersionedParams(&listOptions, scheme.ParameterCodec). 173 | Timeout(timeout). 174 | Body(options). 175 | Do(). 176 | Error() 177 | } 178 | 179 | // Patch applies the patch and returns the patched podGroup. 180 | func (c *podGroups) Patch(name string, pt types.PatchType, data []byte, subresources ...string) (result *v1.PodGroup, err error) { 181 | result = &v1.PodGroup{} 182 | err = c.client.Patch(pt). 183 | Namespace(c.ns). 184 | Resource("podgroups"). 185 | SubResource(subresources...). 186 | Name(name). 187 | Body(data). 188 | Do(). 189 | Into(result) 190 | return 191 | } 192 | -------------------------------------------------------------------------------- /pkg/generated/informers/externalversions/factory.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Code generated by informer-gen. DO NOT EDIT. 18 | 19 | package externalversions 20 | 21 | import ( 22 | reflect "reflect" 23 | sync "sync" 24 | time "time" 25 | 26 | versioned "github.com/tenstack/batch-scheduler/pkg/generated/clientset/versioned" 27 | internalinterfaces "github.com/tenstack/batch-scheduler/pkg/generated/informers/externalversions/internalinterfaces" 28 | podgroup "github.com/tenstack/batch-scheduler/pkg/generated/informers/externalversions/podgroup" 29 | v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 | runtime "k8s.io/apimachinery/pkg/runtime" 31 | schema "k8s.io/apimachinery/pkg/runtime/schema" 32 | cache "k8s.io/client-go/tools/cache" 33 | ) 34 | 35 | // SharedInformerOption defines the functional option type for SharedInformerFactory. 36 | type SharedInformerOption func(*sharedInformerFactory) *sharedInformerFactory 37 | 38 | type sharedInformerFactory struct { 39 | client versioned.Interface 40 | namespace string 41 | tweakListOptions internalinterfaces.TweakListOptionsFunc 42 | lock sync.Mutex 43 | defaultResync time.Duration 44 | customResync map[reflect.Type]time.Duration 45 | 46 | informers map[reflect.Type]cache.SharedIndexInformer 47 | // startedInformers is used for tracking which informers have been started. 48 | // This allows Start() to be called multiple times safely. 49 | startedInformers map[reflect.Type]bool 50 | } 51 | 52 | // WithCustomResyncConfig sets a custom resync period for the specified informer types. 53 | func WithCustomResyncConfig(resyncConfig map[v1.Object]time.Duration) SharedInformerOption { 54 | return func(factory *sharedInformerFactory) *sharedInformerFactory { 55 | for k, v := range resyncConfig { 56 | factory.customResync[reflect.TypeOf(k)] = v 57 | } 58 | return factory 59 | } 60 | } 61 | 62 | // WithTweakListOptions sets a custom filter on all listers of the configured SharedInformerFactory. 63 | func WithTweakListOptions(tweakListOptions internalinterfaces.TweakListOptionsFunc) SharedInformerOption { 64 | return func(factory *sharedInformerFactory) *sharedInformerFactory { 65 | factory.tweakListOptions = tweakListOptions 66 | return factory 67 | } 68 | } 69 | 70 | // WithNamespace limits the SharedInformerFactory to the specified namespace. 71 | func WithNamespace(namespace string) SharedInformerOption { 72 | return func(factory *sharedInformerFactory) *sharedInformerFactory { 73 | factory.namespace = namespace 74 | return factory 75 | } 76 | } 77 | 78 | // NewSharedInformerFactory constructs a new instance of sharedInformerFactory for all namespaces. 79 | func NewSharedInformerFactory(client versioned.Interface, defaultResync time.Duration) SharedInformerFactory { 80 | return NewSharedInformerFactoryWithOptions(client, defaultResync) 81 | } 82 | 83 | // NewFilteredSharedInformerFactory constructs a new instance of sharedInformerFactory. 84 | // Listers obtained via this SharedInformerFactory will be subject to the same filters 85 | // as specified here. 86 | // Deprecated: Please use NewSharedInformerFactoryWithOptions instead 87 | func NewFilteredSharedInformerFactory(client versioned.Interface, defaultResync time.Duration, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) SharedInformerFactory { 88 | return NewSharedInformerFactoryWithOptions(client, defaultResync, WithNamespace(namespace), WithTweakListOptions(tweakListOptions)) 89 | } 90 | 91 | // NewSharedInformerFactoryWithOptions constructs a new instance of a SharedInformerFactory with additional options. 92 | func NewSharedInformerFactoryWithOptions(client versioned.Interface, defaultResync time.Duration, options ...SharedInformerOption) SharedInformerFactory { 93 | factory := &sharedInformerFactory{ 94 | client: client, 95 | namespace: v1.NamespaceAll, 96 | defaultResync: defaultResync, 97 | informers: make(map[reflect.Type]cache.SharedIndexInformer), 98 | startedInformers: make(map[reflect.Type]bool), 99 | customResync: make(map[reflect.Type]time.Duration), 100 | } 101 | 102 | // Apply all options 103 | for _, opt := range options { 104 | factory = opt(factory) 105 | } 106 | 107 | return factory 108 | } 109 | 110 | // Start initializes all requested informers. 111 | func (f *sharedInformerFactory) Start(stopCh <-chan struct{}) { 112 | f.lock.Lock() 113 | defer f.lock.Unlock() 114 | 115 | for informerType, informer := range f.informers { 116 | if !f.startedInformers[informerType] { 117 | go informer.Run(stopCh) 118 | f.startedInformers[informerType] = true 119 | } 120 | } 121 | } 122 | 123 | // WaitForCacheSync waits for all started informers' cache were synced. 124 | func (f *sharedInformerFactory) WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool { 125 | informers := func() map[reflect.Type]cache.SharedIndexInformer { 126 | f.lock.Lock() 127 | defer f.lock.Unlock() 128 | 129 | informers := map[reflect.Type]cache.SharedIndexInformer{} 130 | for informerType, informer := range f.informers { 131 | if f.startedInformers[informerType] { 132 | informers[informerType] = informer 133 | } 134 | } 135 | return informers 136 | }() 137 | 138 | res := map[reflect.Type]bool{} 139 | for informType, informer := range informers { 140 | res[informType] = cache.WaitForCacheSync(stopCh, informer.HasSynced) 141 | } 142 | return res 143 | } 144 | 145 | // InternalInformerFor returns the SharedIndexInformer for obj using an internal 146 | // client. 147 | func (f *sharedInformerFactory) InformerFor(obj runtime.Object, newFunc internalinterfaces.NewInformerFunc) cache.SharedIndexInformer { 148 | f.lock.Lock() 149 | defer f.lock.Unlock() 150 | 151 | informerType := reflect.TypeOf(obj) 152 | informer, exists := f.informers[informerType] 153 | if exists { 154 | return informer 155 | } 156 | 157 | resyncPeriod, exists := f.customResync[informerType] 158 | if !exists { 159 | resyncPeriod = f.defaultResync 160 | } 161 | 162 | informer = newFunc(f.client, resyncPeriod) 163 | f.informers[informerType] = informer 164 | 165 | return informer 166 | } 167 | 168 | // SharedInformerFactory provides shared informers for resources in all known 169 | // API group versions. 170 | type SharedInformerFactory interface { 171 | internalinterfaces.SharedInformerFactory 172 | ForResource(resource schema.GroupVersionResource) (GenericInformer, error) 173 | WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool 174 | 175 | Batch() podgroup.Interface 176 | } 177 | 178 | func (f *sharedInformerFactory) Batch() podgroup.Interface { 179 | return podgroup.New(f, f.namespace, f.tweakListOptions) 180 | } 181 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/tenstack/batch-scheduler 2 | 3 | go 1.14 4 | 5 | require ( 6 | bitbucket.org/bertimus9/systemstat v0.0.0-20180207000608-0eeff89b0690 // indirect 7 | github.com/JeffAshton/win_pdh v0.0.0-20161109143554-76bb4ee9f0ab // indirect 8 | github.com/MakeNowJust/heredoc v0.0.0-20170808103936-bb23615498cd // indirect 9 | github.com/Microsoft/go-winio v0.4.11 // indirect 10 | github.com/Microsoft/hcsshim v0.0.0-20190417211021-672e52e9209d // indirect 11 | github.com/Rican7/retry v0.1.0 // indirect 12 | github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e // indirect 13 | github.com/auth0/go-jwt-middleware v0.0.0-20170425171159-5493cabe49f7 // indirect 14 | github.com/bazelbuild/bazel-gazelle v0.19.1-0.20191105222053-70208cbdc798 // indirect 15 | github.com/boltdb/bolt v1.3.1 // indirect 16 | github.com/cespare/prettybench v0.0.0-20150116022406-03b8cfe5406c // indirect 17 | github.com/chai2010/gettext-go v0.0.0-20160711120539-c6fed771bfd5 // indirect 18 | github.com/checkpoint-restore/go-criu v0.0.0-20190109184317-bdb7599cd87b // indirect 19 | github.com/clusterhq/flocker-go v0.0.0-20160920122132-2b8b7259d313 // indirect 20 | github.com/codegangsta/negroni v1.0.0 // indirect 21 | github.com/container-storage-interface/spec v1.2.0 // indirect 22 | github.com/containerd/console v0.0.0-20170925154832-84eeaae905fa // indirect 23 | github.com/containerd/containerd v1.0.2 // indirect 24 | github.com/containerd/typeurl v0.0.0-20190228175220-2a93cfde8c20 // indirect 25 | github.com/containernetworking/cni v0.7.1 // indirect 26 | github.com/coredns/corefile-migration v1.0.4 // indirect 27 | github.com/cyphar/filepath-securejoin v0.2.2 // indirect 28 | github.com/daviddengcn/go-colortext v0.0.0-20160507010035-511bcaf42ccd // indirect 29 | github.com/docker/distribution v2.7.1+incompatible // indirect 30 | github.com/docker/go-connections v0.3.0 // indirect 31 | github.com/docker/libnetwork v0.8.0-dev.2.0.20190624125649-f0e46a78ea34 // indirect 32 | github.com/euank/go-kmsg-parser v2.0.0+incompatible // indirect 33 | github.com/evanphx/json-patch v4.5.0+incompatible 34 | github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d // indirect 35 | github.com/fatih/camelcase v1.0.0 // indirect 36 | github.com/go-bindata/go-bindata v3.1.1+incompatible // indirect 37 | github.com/go-openapi/validate v0.19.5 // indirect 38 | github.com/go-ozzo/ozzo-validation v3.5.0+incompatible // indirect 39 | github.com/godbus/dbus v0.0.0-20181101234600-2ff6f7ffd60f // indirect 40 | github.com/gogo/protobuf v1.3.1 41 | github.com/golangplus/bytes v0.0.0-20160111154220-45c989fe5450 // indirect 42 | github.com/golangplus/fmt v0.0.0-20150411045040-2a5d6d7d2995 // indirect 43 | github.com/golangplus/testing v0.0.0-20180327235837-af21d9c3145e // indirect 44 | github.com/google/cadvisor v0.35.0 // indirect 45 | github.com/gorilla/context v1.1.1 // indirect 46 | github.com/gorilla/mux v1.7.0 // indirect 47 | github.com/heketi/heketi v9.0.1-0.20190917153846-c2e2a4ab7ab9+incompatible // indirect 48 | github.com/heketi/tests v0.0.0-20151005000721-f3775cbcefd6 // indirect 49 | github.com/karrick/godirwalk v1.7.5 // indirect 50 | github.com/libopenstorage/openstorage v1.0.0 // indirect 51 | github.com/lpabon/godbc v0.1.1 // indirect 52 | github.com/mattn/go-shellwords v1.0.5 // indirect 53 | github.com/mesos/mesos-go v0.0.9 // indirect 54 | github.com/miekg/dns v1.1.4 // indirect 55 | github.com/mindprince/gonvml v0.0.0-20190828220739-9ebdce4bb989 // indirect 56 | github.com/mistifyio/go-zfs v2.1.1+incompatible // indirect 57 | github.com/mitchellh/go-wordwrap v1.0.0 // indirect 58 | github.com/mohae/deepcopy v0.0.0-20170603005431-491d3605edfb // indirect 59 | github.com/morikuni/aec v0.0.0-20170113033406-39771216ff4c // indirect 60 | github.com/mrunalp/fileutils v0.0.0-20171103030105-7d4729fb3618 // indirect 61 | github.com/mvdan/xurls v1.1.0 // indirect 62 | github.com/opencontainers/go-digest v1.0.0-rc1 // indirect 63 | github.com/opencontainers/image-spec v1.0.1 // indirect 64 | github.com/opencontainers/runc v1.0.0-rc9 // indirect 65 | github.com/opencontainers/runtime-spec v1.0.0 // indirect 66 | github.com/opencontainers/selinux v1.3.1-0.20190929122143-5215b1806f52 // indirect 67 | github.com/patrickmn/go-cache v2.1.0+incompatible 68 | github.com/pquerna/ffjson v0.0.0-20180717144149-af8b230fcd20 // indirect 69 | github.com/quobyte/api v0.1.2 // indirect 70 | github.com/robfig/cron v1.1.0 // indirect 71 | github.com/seccomp/libseccomp-golang v0.9.1 // indirect 72 | github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a // indirect 73 | github.com/spf13/jwalterweatherman v1.1.0 // indirect 74 | github.com/storageos/go-api v0.0.0-20180912212459-343b3eff91fc // indirect 75 | github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2 // indirect 76 | github.com/thecodeteam/goscaleio v0.1.0 // indirect 77 | github.com/urfave/negroni v1.0.0 // indirect 78 | github.com/vishvananda/netlink v1.0.0 // indirect 79 | github.com/vishvananda/netns v0.0.0-20171111001504-be1fbeda1936 // indirect 80 | github.com/xlab/handysort v0.0.0-20150421192137-fb3537ed64a1 // indirect 81 | gotest.tools/gotestsum v0.3.5 // indirect 82 | honnef.co/go/tools v0.0.1-2019.2.2 // indirect 83 | k8s.io/api v0.17.5 84 | k8s.io/apiextensions-apiserver v0.0.0 85 | k8s.io/apimachinery v0.17.5 86 | k8s.io/cli-runtime v0.17.5 87 | k8s.io/client-go v0.17.5 88 | k8s.io/cloud-provider v0.17.5 89 | k8s.io/cluster-bootstrap v0.17.5 90 | k8s.io/code-generator v0.17.5 91 | k8s.io/component-base v0.17.5 92 | k8s.io/cri-api v0.17.5 93 | k8s.io/csi-translation-lib v0.17.5 94 | k8s.io/heapster v1.2.0-beta.1 // indirect 95 | k8s.io/klog v1.0.0 96 | k8s.io/kube-aggregator v0.17.5 97 | k8s.io/kube-controller-manager v0.17.5 98 | k8s.io/kube-proxy v0.17.5 99 | k8s.io/kube-scheduler v0.17.5 100 | k8s.io/kubectl v0.17.5 101 | k8s.io/kubelet v0.17.5 102 | k8s.io/kubernetes v1.17.5 103 | k8s.io/legacy-cloud-providers v0.17.5 104 | k8s.io/metrics v0.17.5 105 | k8s.io/repo-infra v0.0.1-alpha.1 // indirect 106 | k8s.io/sample-apiserver v0.17.5 107 | k8s.io/system-validators v1.0.4 // indirect 108 | vbom.ml/util v0.0.0-20160121211510-db5cfe13f5cc // indirect 109 | 110 | ) 111 | 112 | replace ( 113 | github.com/Sirupsen/logrus => github.com/sirupsen/logrus v1.4.1 114 | k8s.io/api => k8s.io/api v0.17.5 115 | k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.17.5 116 | k8s.io/apimachinery => k8s.io/apimachinery v0.17.5 117 | k8s.io/apiserver => k8s.io/apiserver v0.17.5 118 | k8s.io/cli-runtime => k8s.io/cli-runtime v0.17.5 119 | k8s.io/client-go => k8s.io/client-go v0.17.5 120 | k8s.io/cloud-provider => k8s.io/cloud-provider v0.17.5 121 | k8s.io/cluster-bootstrap => k8s.io/cluster-bootstrap v0.17.5 122 | k8s.io/code-generator => k8s.io/code-generator v0.17.5 123 | k8s.io/component-base => k8s.io/component-base v0.17.5 124 | k8s.io/cri-api => k8s.io/cri-api v0.17.5 125 | k8s.io/csi-translation-lib => k8s.io/csi-translation-lib v0.17.5 126 | k8s.io/klog => k8s.io/klog v1.0.0 127 | k8s.io/kube-aggregator => k8s.io/kube-aggregator v0.17.5 128 | k8s.io/kube-controller-manager => k8s.io/kube-controller-manager v0.17.5 129 | k8s.io/kube-proxy => k8s.io/kube-proxy v0.17.5 130 | k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.17.5 131 | k8s.io/kubectl => k8s.io/kubectl v0.17.5 132 | k8s.io/kubelet => k8s.io/kubelet v0.17.5 133 | k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.17.5 134 | k8s.io/metrics => k8s.io/metrics v0.17.5 135 | k8s.io/sample-apiserver => k8s.io/sample-apiserver v0.17.5 136 | ) 137 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /pkg/scheduler/controller/controller.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package controller 18 | 19 | import ( 20 | "reflect" 21 | "time" 22 | 23 | gochache "github.com/patrickmn/go-cache" 24 | "github.com/tenstack/batch-scheduler/pkg/util" 25 | v1 "k8s.io/api/core/v1" 26 | apierrs "k8s.io/apimachinery/pkg/api/errors" 27 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 | "k8s.io/apimachinery/pkg/labels" 29 | "k8s.io/apimachinery/pkg/types" 30 | "k8s.io/apimachinery/pkg/util/runtime" 31 | "k8s.io/apimachinery/pkg/util/wait" 32 | "k8s.io/client-go/kubernetes" 33 | "k8s.io/client-go/kubernetes/scheme" 34 | corev1 "k8s.io/client-go/kubernetes/typed/core/v1" 35 | "k8s.io/client-go/tools/cache" 36 | "k8s.io/client-go/tools/record" 37 | "k8s.io/client-go/util/workqueue" 38 | "k8s.io/klog" 39 | 40 | pgv1 "github.com/tenstack/batch-scheduler/pkg/apis/podgroup/v1" 41 | pgclientset "github.com/tenstack/batch-scheduler/pkg/generated/clientset/versioned" 42 | pginformer "github.com/tenstack/batch-scheduler/pkg/generated/informers/externalversions/podgroup/v1" 43 | pglister "github.com/tenstack/batch-scheduler/pkg/generated/listers/podgroup/v1" 44 | pgcache "github.com/tenstack/batch-scheduler/pkg/scheduler/cache" 45 | ) 46 | 47 | // PodGroupController is a controller that process pod groups using provided Handler interface 48 | type PodGroupController struct { 49 | client kubernetes.Interface 50 | eventRecorder record.EventRecorder 51 | pgQueue workqueue.RateLimitingInterface 52 | pgLister pglister.PodGroupLister 53 | pgListerSynced cache.InformerSynced 54 | pgClient *pgclientset.Clientset 55 | cache pgcache.Cache 56 | rejectPod func(types.UID) 57 | addToBackOff func(string) 58 | } 59 | 60 | // NewPodGroupController returns a new *PodGroupController 61 | func NewPodGroupController(client kubernetes.Interface, 62 | pgInformer pginformer.PodGroupInformer, 63 | pgRateLimiter workqueue.RateLimiter, 64 | pgClient *pgclientset.Clientset, 65 | pgCache pgcache.Cache, rejectPod func(types.UID), 66 | addToBackOff func(string)) *PodGroupController { 67 | broadcaster := record.NewBroadcaster() 68 | broadcaster.StartRecordingToSink(&corev1.EventSinkImpl{Interface: client.CoreV1().Events(v1.NamespaceAll)}) 69 | var eventRecorder record.EventRecorder 70 | eventRecorder = broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "batch-scheduling"}) 71 | 72 | ctrl := &PodGroupController{ 73 | client: client, 74 | eventRecorder: eventRecorder, 75 | pgQueue: workqueue.NewNamedRateLimitingQueue(pgRateLimiter, "batch-scheduling-queue"), 76 | cache: pgCache, 77 | rejectPod: rejectPod, 78 | addToBackOff: addToBackOff, 79 | } 80 | 81 | pgInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 82 | AddFunc: ctrl.pgAdded, 83 | UpdateFunc: ctrl.pgUpdated, 84 | DeleteFunc: ctrl.pgDelete, 85 | }) 86 | ctrl.pgLister = pgInformer.Lister() 87 | ctrl.pgListerSynced = pgInformer.Informer().HasSynced 88 | ctrl.pgClient = pgClient 89 | return ctrl 90 | } 91 | 92 | // Run starts listening on channel events 93 | func (ctrl *PodGroupController) Run(workers int, stopCh <-chan struct{}) { 94 | defer ctrl.pgQueue.ShutDown() 95 | 96 | klog.Infof("Starting batch scheduler") 97 | defer klog.Infof("Shutting batch scheduler") 98 | 99 | if !cache.WaitForCacheSync(stopCh, ctrl.pgListerSynced, ctrl.pgListerSynced) { 100 | klog.Errorf("Cannot sync caches") 101 | return 102 | } 103 | for i := 0; i < workers; i++ { 104 | go wait.Until(ctrl.sync, 0, stopCh) 105 | } 106 | 107 | <-stopCh 108 | } 109 | 110 | // pgAdded reacts to a PG creation 111 | func (ctrl *PodGroupController) pgAdded(obj interface{}) { 112 | key, err := cache.MetaNamespaceKeyFunc(obj) 113 | if err != nil { 114 | runtime.HandleError(err) 115 | return 116 | } 117 | pg := obj.(*pgv1.PodGroup) 118 | if pg.Status.Phase == pgv1.PodGroupFinished || pg.Status.Phase == pgv1.PodGroupFailed { 119 | return 120 | } 121 | // If startScheduleTime - createTime > 2days, do not enqueue again because pod may have be GC 122 | if pg.Status.Scheduled == pg.Spec.MinMember && pg.Status.Running == 0 && 123 | pg.Status.ScheduleStartTime.Sub(pg.CreationTimestamp.Time) > 48*time.Hour { 124 | return 125 | } 126 | klog.Info("enqueue ", "key ", key) 127 | ctrl.pgQueue.Add(key) 128 | } 129 | 130 | // pgUpdated reacts to a PG update 131 | func (ctrl *PodGroupController) pgUpdated(old, new interface{}) { 132 | ctrl.pgAdded(new) 133 | } 134 | 135 | // pgDelete reacts to a PG update 136 | func (ctrl *PodGroupController) pgDelete(new interface{}) { 137 | key, err := cache.MetaNamespaceKeyFunc(new) 138 | if err != nil { 139 | runtime.HandleError(err) 140 | return 141 | } 142 | klog.Info("enqueue ", "key ", key) 143 | ctrl.cache.Delete(key) 144 | klog.V(3).Infof("pg %q delete change", key) 145 | } 146 | 147 | // syncPG deals with one key off the queue. It returns false when it's time to quit. 148 | func (ctrl *PodGroupController) sync() { 149 | keyObj, quit := ctrl.pgQueue.Get() 150 | if quit { 151 | return 152 | } 153 | defer ctrl.pgQueue.Done(keyObj) 154 | 155 | key := keyObj.(string) 156 | namespace, pgName, err := cache.SplitMetaNamespaceKey(key) 157 | klog.V(4).Infof("Started PG processing %q", pgName) 158 | 159 | // get PG to process 160 | pg, err := ctrl.pgLister.PodGroups(namespace).Get(pgName) 161 | if err != nil { 162 | if apierrs.IsNotFound(err) { 163 | pg, err = ctrl.pgClient.BatchV1().PodGroups(namespace).Get(pgName, metav1.GetOptions{}) 164 | if err != nil && apierrs.IsNotFound(err) { 165 | // PG was deleted in the meantime, ignore. 166 | klog.V(3).Infof("PG %q deleted", pgName) 167 | ctrl.cache.Delete(key) 168 | return 169 | } 170 | } 171 | klog.Errorf("Error getting PodGroup %q: %v", pgName, err) 172 | ctrl.pgQueue.AddRateLimited(keyObj) 173 | return 174 | } 175 | ctrl.syncHandler(pg) 176 | } 177 | 178 | // syncHandle syncs pod group and convert status 179 | func (ctrl *PodGroupController) syncHandler(pg *pgv1.PodGroup) { 180 | 181 | key, err := cache.MetaNamespaceKeyFunc(pg) 182 | if err != nil { 183 | runtime.HandleError(err) 184 | return 185 | } 186 | 187 | defer func() { 188 | if err != nil { 189 | ctrl.pgQueue.AddRateLimited(key) 190 | return 191 | } 192 | }() 193 | 194 | pgsObj := ctrl.cache.Get(key) 195 | if pgsObj == nil { 196 | pgsObj = ctrl.initPodGroupMatchStatus(pg, key) 197 | } 198 | pgCopy := pg.DeepCopy() 199 | if string(pgCopy.Status.Phase) == "" { 200 | pgCopy.Status.Phase = pgv1.PodGroupPending 201 | } else if pgCopy.Status.Phase == pgv1.PodGroupPending && !pgCopy.Status.ScheduleStartTime.IsZero() { 202 | // recover from abnormal exit 203 | selector := labels.Set(map[string]string{util.PodGroupLabel: pgCopy.Name}).AsSelector() 204 | options := metav1.ListOptions{LabelSelector: selector.String()} 205 | var pods *v1.PodList 206 | pods, err = ctrl.client.CoreV1().Pods(pgCopy.Namespace).List(options) 207 | if err != nil { 208 | return 209 | } 210 | pgCopy.Status.Scheduled = uint32(len(pods.Items)) 211 | if pgCopy.Status.Scheduled > 0 && !reflect.DeepEqual(pg, pgCopy) { 212 | patch, err := util.CreateMergePatch(pg, pgCopy) 213 | if err != nil { 214 | return 215 | } 216 | 217 | pg, err = ctrl.pgClient.BatchV1().PodGroups(pg.Namespace).Patch(pg.Name, types.MergePatchType, patch) 218 | if err != nil { 219 | return 220 | } 221 | } 222 | } 223 | 224 | pgs := pgsObj.(*pgcache.PodGroupMatchStatus) 225 | pgs.PodGroup.Status = pgCopy.Status 226 | ctrl.cache.Set(key, pgs) 227 | // If startScheduleTime - createTime > 2days, do not enqueue again because pod may have be GC 228 | if pgCopy.Status.Scheduled == pgCopy.Spec.MinMember && pgCopy.Status.Running == 0 && 229 | pgCopy.Status.ScheduleStartTime.Sub(pgCopy.CreationTimestamp.Time) > 48*time.Hour { 230 | return 231 | } 232 | 233 | // Add PodGroupScheduling batch to queue to avoid scheduling stop when postBind, 234 | // so we do not know how many pods have been scheduled 235 | if pgCopy.Status.Phase == pgv1.PodGroupScheduled || pgCopy.Status.Phase == pgv1.PodGroupRunning || pgCopy.Status. 236 | Phase == pgv1.PodGroupScheduling { 237 | selector := labels.Set(map[string]string{util.PodGroupLabel: pgCopy.Name}).AsSelector() 238 | options := metav1.ListOptions{LabelSelector: selector.String()} 239 | var pods *v1.PodList 240 | pods, err = ctrl.client.CoreV1().Pods(pgCopy.Namespace).List(options) 241 | if err != nil { 242 | return 243 | } 244 | pgs.CountLock.Lock() 245 | var notPending uint32 = 0 246 | var running uint32 = 0 247 | if pods != nil { 248 | for _, pod := range pods.Items { 249 | switch pod.Status.Phase { 250 | case v1.PodRunning: 251 | running++ 252 | case v1.PodSucceeded: 253 | pgs.Succeed[string(pod.UID)] = "" 254 | case v1.PodFailed: 255 | pgs.Failed[string(pod.UID)] = "" 256 | } 257 | 258 | // to avoid schedule stop when postBind, so when do not know how many pods have scheduled 259 | if pod.Status.Phase != v1.PodPending { 260 | notPending++ 261 | } 262 | 263 | } 264 | } 265 | pgCopy.Status.Failed = uint32(len(pgs.Failed)) 266 | pgCopy.Status.Succeeded = uint32(len(pgs.Succeed)) 267 | pgCopy.Status.Running = running 268 | 269 | if notPending > pgCopy.Status.Scheduled { 270 | pgCopy.Status.Scheduled = notPending 271 | 272 | } 273 | pgs.CountLock.Unlock() 274 | 275 | // recover from exit 276 | if notPending < pgCopy.Spec.MinMember && notPending != 0 { 277 | pgCopy.Status.Scheduled = notPending 278 | pgCopy.Status.Phase = pgv1.PodGroupScheduling 279 | } 280 | 281 | if pgCopy.Status.Succeeded+pgCopy.Status.Running >= pg.Spec.MinMember { 282 | pgCopy.Status.Phase = pgv1.PodGroupRunning 283 | } 284 | // Final state of pod group 285 | if pgCopy.Status.Failed != 0 && pgCopy.Status.Failed+pgCopy.Status.Running+pgCopy.Status.Succeeded >= pg.Spec. 286 | MinMember { 287 | pgCopy.Status.Phase = pgv1.PodGroupFailed 288 | } 289 | if pgCopy.Status.Succeeded >= pg.Spec.MinMember { 290 | pgCopy.Status.Phase = pgv1.PodGroupFinished 291 | } 292 | } 293 | if !reflect.DeepEqual(pg, pgCopy) { 294 | var patch []byte 295 | patch, err = util.CreateMergePatch(pg, pgCopy) 296 | if err != nil { 297 | return 298 | } 299 | 300 | pg, err = ctrl.pgClient.BatchV1().PodGroups(pg.Namespace).Patch(pg.Name, types.MergePatchType, patch) 301 | if err != nil { 302 | return 303 | } 304 | if pg.Status.Phase == pgv1.PodGroupFinished || pg.Status.Phase == pgv1.PodGroupFailed { 305 | ctrl.cache.Delete(key) 306 | } 307 | pgs.PodGroup.Status = pg.Status 308 | ctrl.pgQueue.Forget(pg) 309 | } 310 | ctrl.pgQueue.AddRateLimited(key) 311 | } 312 | 313 | // initPodGroupMatchStatus init pod groups 314 | func (ctrl *PodGroupController) initPodGroupMatchStatus(pg *pgv1.PodGroup, key string) *pgcache.PodGroupMatchStatus { 315 | pgs := &pgcache.PodGroupMatchStatus{ 316 | PodGroup: pg, 317 | MatchedPodNodes: gochache.New(1*time.Minute, 2*time.Minute), 318 | PodNameUIDs: gochache.New(1*time.Minute, 2*time.Minute), 319 | Failed: make(map[string]string), 320 | Succeed: make(map[string]string), 321 | } 322 | pgs.PodNameUIDs.OnEvicted(func(s string, i interface{}) { 323 | klog.V(4).Infof("Evict triggered group %v", key) 324 | for podID := range pgs.MatchedPodNodes.Items() { 325 | klog.Infof("Foreach %v", podID) 326 | ctrl.rejectPod(types.UID(podID)) 327 | if pgs.MatchedPodNodes != nil { 328 | pgs.MatchedPodNodes.Delete(podID) 329 | } 330 | } 331 | pgs.PodNameUIDs.Flush() 332 | ctrl.addToBackOff(key) 333 | }) 334 | return pgs 335 | } 336 | -------------------------------------------------------------------------------- /pkg/scheduler/batch/batchscheduler.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package batch 18 | 19 | import ( 20 | "context" 21 | "encoding/json" 22 | "fmt" 23 | "os" 24 | "strings" 25 | "sync" 26 | "time" 27 | 28 | corev1 "k8s.io/api/core/v1" 29 | apiextensionsv1beta1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1" 30 | apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" 31 | apierrs "k8s.io/apimachinery/pkg/api/errors" 32 | v12 "k8s.io/apimachinery/pkg/apis/meta/v1" 33 | "k8s.io/apimachinery/pkg/runtime" 34 | "k8s.io/apimachinery/pkg/types" 35 | "k8s.io/client-go/kubernetes" 36 | "k8s.io/client-go/tools/clientcmd" 37 | "k8s.io/client-go/tools/leaderelection/resourcelock" 38 | "k8s.io/client-go/util/workqueue" 39 | "k8s.io/klog" 40 | framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" 41 | schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo" 42 | 43 | v1 "github.com/tenstack/batch-scheduler/pkg/apis/podgroup/v1" 44 | pgclientset "github.com/tenstack/batch-scheduler/pkg/generated/clientset/versioned" 45 | pgformers "github.com/tenstack/batch-scheduler/pkg/generated/informers/externalversions" 46 | "github.com/tenstack/batch-scheduler/pkg/scheduler/cache" 47 | "github.com/tenstack/batch-scheduler/pkg/scheduler/controller" 48 | "github.com/tenstack/batch-scheduler/pkg/scheduler/core" 49 | "github.com/tenstack/batch-scheduler/pkg/util" 50 | ) 51 | 52 | // batchScheduler support schedule batch pods 53 | type batchScheduler interface { 54 | // UpdateBatchCache update the pod cache which should be scheduled as a group 55 | UpdateBatchCache() 56 | // StartBatchSchedule receives the group name of pod to start scheduling 57 | StartBatchSchedule(string) 58 | } 59 | 60 | type batchSchedulingPlugin struct { 61 | frameworkHandler framework.FrameworkHandle 62 | operation *core.ScheduleOperation 63 | startChan chan string 64 | maxScheduleTime *time.Duration 65 | client pgclientset.Interface 66 | extension *batchSchedulingPluginExtension 67 | sync.RWMutex 68 | } 69 | 70 | // Configuration defines the config for batch scheduler 71 | type Configuration struct { 72 | KubeMaster string `json:"kube_master,omitempty"` 73 | KubeConfig string `json:"kube_config,omitempty"` 74 | MaxScheduleTime int64 `json:"max_schedule_time,omitempty"` 75 | } 76 | 77 | var _ batchScheduler = &batchSchedulingPlugin{} 78 | var _ framework.PreFilterPlugin = &batchSchedulingPlugin{} 79 | var _ framework.FilterPlugin = &batchSchedulingPlugin{} 80 | var _ framework.PermitPlugin = &batchSchedulingPlugin{} 81 | var _ framework.PostBindPlugin = &batchSchedulingPlugin{} 82 | var _ framework.QueueSortPlugin = &batchSchedulingPlugin{} 83 | 84 | // Name is the name of the plug used in Registry and configurations. 85 | const ( 86 | Name = "batch-scheduler" 87 | retryAllowLimit = 50 88 | getWaitPodLimit = 3 89 | ) 90 | 91 | // Name returns name of the plugin. It is used in logs, etc. 92 | func (bs *batchSchedulingPlugin) Name() string { 93 | return Name 94 | } 95 | 96 | type batchSchedulingPluginExtension struct { 97 | operation *core.ScheduleOperation 98 | } 99 | 100 | // PreFilter is called at the beginning of the scheduling cycle. All PreFilter 101 | // plugins must return success or the pod will be rejected. 102 | func (bs *batchSchedulingPlugin) PreFilter(ctx context.Context, state *framework.CycleState, 103 | p *corev1.Pod) *framework.Status { 104 | if err := bs.operation.PreFilter(p); err != nil { 105 | return framework.NewStatus(framework.Unschedulable, err.Error()) 106 | } 107 | return framework.NewStatus(framework.Success, "") 108 | } 109 | 110 | // PreFilterExtensions returns a PreFilterExtensions interface if the plugin implements one, 111 | // or nil if it does not. A Pre-filter plugin can provide extensions to incrementally 112 | // modify its pre-processed info. The framework guarantees that the extensions 113 | // AddPod/RemovePod will only be called after PreFilter, possibly on a cloned 114 | // CycleState, and may call those functions more than once before calling 115 | // Filter again on a specific node. 116 | func (bs *batchSchedulingPlugin) PreFilterExtensions() framework.PreFilterExtensions { 117 | return bs.extension 118 | } 119 | 120 | // AddPod is called by the framework while trying to evaluate the impact 121 | // of adding podToAdd to the node while scheduling podToSchedule. 122 | func (bs *batchSchedulingPluginExtension) AddPod(ctx context.Context, state *framework.CycleState, 123 | podToSchedule *corev1.Pod, podToAdd *corev1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *framework.Status { 124 | if err := bs.operation.PreemptAddPod(podToAdd, nodeInfo.Node().Name); err != nil { 125 | return framework.NewStatus(framework.Unschedulable, err.Error()) 126 | } 127 | return framework.NewStatus(framework.Success, "") 128 | } 129 | 130 | // RemovePod is called by the framework while trying to evaluate the impact 131 | // of removing podToRemove from the node while scheduling podToSchedule. 132 | func (bs *batchSchedulingPluginExtension) RemovePod(ctx context.Context, state *framework.CycleState, 133 | podToSchedule *corev1.Pod, podToRemove *corev1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *framework.Status { 134 | //klog.V(5).Infof("Batch scheduler try to remove pod %s/%s add pod %s/%s", 135 | // podToRemove.Namespace, podToRemove.Name, podToSchedule.Namespace, podToSchedule.Name) 136 | if err := bs.operation.PreemptRemovePod(podToSchedule, podToRemove); err != nil { 137 | if klog.V(5) { 138 | klog.Error(err) 139 | } 140 | return framework.NewStatus(framework.Unschedulable, err.Error()) 141 | } 142 | klog.V(5).Infof("batchSchedulingPluginExtension pass remove pod %v", podToRemove.Name) 143 | return framework.NewStatus(framework.Success, "") 144 | } 145 | 146 | // Filter is called by the scheduling framework. 147 | // All FilterPlugins should return "Success" to declare that 148 | // the given node fits the pod. If Filter doesn't return "Success", 149 | // please refer scheduler/algorithm/predicates/error.go 150 | // to set error message. 151 | func (bs *batchSchedulingPlugin) Filter(ctx context.Context, state *framework.CycleState, p *corev1.Pod, 152 | node *schedulernodeinfo.NodeInfo) *framework.Status { 153 | if err := bs.operation.Filter(p, node.Node().Name); err != nil { 154 | return framework.NewStatus(framework.Unschedulable, err.Error()) 155 | } 156 | return framework.NewStatus(framework.Success, "") 157 | } 158 | 159 | // Permit is called before binding a pod (and before prebind plugins). Permit 160 | // plugins are used to prevent or delay the binding of a Pod. A permit plugin 161 | // must return success or wait with timeout duration, or the pod will be rejected. 162 | // The pod will also be rejected if the wait timeout or the pod is rejected while 163 | // waiting. Note that if the plugin returns "wait", the framework will wait only 164 | // after running the remaining plugins given that no other plugin rejects the pod. 165 | func (bs *batchSchedulingPlugin) Permit(ctx context.Context, state *framework.CycleState, p *corev1.Pod, nodeName string) (*framework.Status, time.Duration) { 166 | var pgs *cache.PodGroupMatchStatus 167 | fullName := "" 168 | ready, pgName, err := bs.operation.Permit(p, nodeName) 169 | if pgName != "" { 170 | fullName = fmt.Sprintf("%v/%v", p.Namespace, pgName) 171 | pgObj := bs.operation.PodGroupStatusCache().Get(fullName) 172 | if pgObj != nil { 173 | pgs = pgObj.(*cache.PodGroupMatchStatus) 174 | } 175 | } 176 | waitTime := util.DefaultWaitTime 177 | if pgs != nil { 178 | waitTime = util.GetWaitTimeDuration(pgs.PodGroup, bs.maxScheduleTime) 179 | } 180 | // add 1 second to keep the ttl cache would 181 | // expired before the the waiting deadline 182 | waitTime = waitTime + 1*time.Second 183 | if err != nil { 184 | if err == util.ErrorWaiting { 185 | klog.Infof("Pod: %v/%v is waiting to be scheduled to node: %v", p.Namespace, p.Name, nodeName) 186 | return framework.NewStatus(framework.Wait, ""), waitTime 187 | } 188 | // For pod not belongs to any groups 189 | if err == util.ErrorNotMatched { 190 | return framework.NewStatus(framework.Success, ""), 0 191 | 192 | } 193 | klog.Infof("bs.operation.Permit error %v", err) 194 | return framework.NewStatus(framework.Unschedulable, err.Error()), util.DefaultWaitTime 195 | } 196 | klog.V(5).Infof("Pod requires pgName %v", pgName) 197 | if ready { 198 | go bs.sendStartScheduleSignal(fullName) 199 | } 200 | 201 | return framework.NewStatus(framework.Wait, ""), waitTime 202 | } 203 | 204 | // PostBind is called after a pod is successfully bound. These plugins are 205 | // informational. A common application of this extension point is for cleaning 206 | // up. If a plugin needs to clean-up its state after a pod is scheduled and 207 | // bound, Postbind is the extension point that it should register. 208 | func (bs *batchSchedulingPlugin) PostBind(ctx context.Context, state *framework.CycleState, p *corev1.Pod, nodeName string) { 209 | klog.V(5).Infof("PostBind pod: %v/%v", p.Namespace, p.Name) 210 | bs.operation.PostBind(p, nodeName) 211 | } 212 | 213 | // Less are used to sort pods in the scheduling queue. 214 | func (bs *batchSchedulingPlugin) Less(pi1 *framework.PodInfo, pi2 *framework.PodInfo) bool { 215 | return bs.operation.Compare(pi1, pi2) 216 | } 217 | 218 | // UpdateLocalCache upadte pgstatus cache 219 | func (bs *batchSchedulingPlugin) UpdateBatchCache() { 220 | // update pods cache 221 | bs.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) { 222 | klog.V(5).Info("Start IterateOverWaitingPods") 223 | pod := waitingPod.GetPod() 224 | pgName, satisfied := util.VerifyPodLabelSatisfied(pod) 225 | klog.V(5).Infof("Start walking through pod %v/%v uid: %v", pod.Namespace, pod.Name, pod.UID) 226 | if satisfied && len(pgName) != 0 { 227 | fullName := fmt.Sprintf("%v/%v", pod.Namespace, pgName) 228 | pgsObj := bs.operation.PodGroupStatusCache().Get(fullName) 229 | if pgsObj == nil { 230 | return 231 | } 232 | pgs := pgsObj.(*cache.PodGroupMatchStatus) 233 | klog.V(5).Infof("Wanted cache pod %v/%v", pod.Namespace, pod.Name) 234 | oldUID, found := pgs.PodNameUIDs.Get(fmt.Sprintf("%v/%v", pod.Namespace, pod.Name)) 235 | // pod has been scheduled ever 236 | if found { 237 | if oldUID.(string) != string(pod.UID) { 238 | // delete the expired one 239 | pgs.MatchedPodNodes.Delete(oldUID.(string)) 240 | pgs.PodNameUIDs.Delete(fmt.Sprintf("%v/%v", pod.Namespace, pod.Name)) 241 | klog.V(3).Infof("Delete old cache data %v", oldUID) 242 | } 243 | } else { 244 | // newly add one 245 | klog.V(3).Infof("Add new cache data %v", pod.UID) 246 | } 247 | klog.V(6).Infof("Current pod group state %+v", pgs.PodNameUIDs.Items()) 248 | bs.operation.PodGroupStatusCache().Set(util.GetPodGroupFullName(pgs.PodGroup), pgs) 249 | } 250 | }) 251 | } 252 | 253 | // StartBatchSchedule receives the group name of pod to start scheduling 254 | func (bs *batchSchedulingPlugin) StartBatchSchedule(fullName string) { 255 | 256 | pgCache := bs.operation.PodGroupStatusCache().(*cache.PGStatusCache) 257 | pgs := pgCache.Get(fullName).(*cache.PodGroupMatchStatus) 258 | if pgs.PodGroup.Status.Phase != v1.PodGroupPreScheduling && pgs.PodGroup.Status.Phase != v1. 259 | PodGroupScheduling { 260 | return 261 | } 262 | 263 | // record time to avoid abnormal exit when bind 264 | if pgs.PodGroup.Status.Scheduled >= pgs.PodGroup.Spec.MinMember { 265 | pg, err := bs.client.BatchV1().PodGroups(pgs.PodGroup.Namespace).Get(pgs.PodGroup.Name, v12.GetOptions{}) 266 | defer func() { 267 | if err != nil { 268 | bs.startChan <- fullName 269 | } 270 | }() 271 | if err != nil { 272 | klog.Error(err) 273 | return 274 | } 275 | pgCopy := pg.DeepCopy() 276 | pgCopy.Status.ScheduleStartTime = v12.Now() 277 | 278 | var patch []byte 279 | patch, err = util.CreateMergePatch(pg, pgCopy) 280 | if err != nil { 281 | return 282 | } 283 | 284 | pg, err = bs.client.BatchV1().PodGroups(pg.Namespace).Patch(pg.Name, types.MergePatchType, patch) 285 | if err != nil { 286 | return 287 | } 288 | } 289 | 290 | // Start batch scheduling 291 | klog.V(4).Infof("Start batch scheduling %v", fullName) 292 | pendingPods := bs.operation.GetPodNodePairs(fullName) 293 | if pendingPods == nil { 294 | klog.V(4).Infof("Can not found pending pods for %v", fullName) 295 | return 296 | } 297 | pendingPodNameIDs := bs.operation.GetPodNameUIDs(fullName) 298 | if pendingPodNameIDs == nil { 299 | klog.V(4).Infof("Can not found pending IDs for %v", fullName) 300 | return 301 | } 302 | pendingPodsMap := pendingPods.Items() 303 | if uint32(len(pendingPodsMap)) < pgs.PodGroup.Spec.MinMember-pgs.PodGroup.Status.Scheduled { 304 | return 305 | } 306 | 307 | klog.V(5).Infof("Current pod group: %v state %+v, count: %d", fullName, pendingPodNameIDs, 308 | len(pendingPodsMap)) 309 | 310 | for uid, pair := range pendingPodsMap { 311 | // double check 312 | var waitingPod framework.WaitingPod 313 | for i := 0; i < getWaitPodLimit; i++ { 314 | waitingPod = bs.frameworkHandler.GetWaitingPod(types.UID(uid)) 315 | if waitingPod == nil { 316 | if i == 2 { 317 | // to avoid sig send, but scheduler cache have not been flushed 318 | klog.V(4).Infof("Remove pod uid %v, pair %v", uid, pair.Object) 319 | pendingPods.Delete(uid) 320 | pnPair := pair.Object.(*cache.PodNodePair) 321 | pendingPodNameIDs.Delete(pnPair.PodName) 322 | return 323 | } 324 | time.Sleep(10 * time.Millisecond) 325 | continue 326 | 327 | } 328 | } 329 | for i := 0; i < retryAllowLimit; i++ { 330 | if success := waitingPod.Allow(Name); success { 331 | klog.V(5).Infof("Approved pod %+v", pair.Object) 332 | pendingPods.Delete(uid) 333 | pendingPodNameIDs.Delete(uid) 334 | break 335 | } 336 | time.Sleep(50 * time.Millisecond) 337 | if i == 50 { 338 | klog.Warningf("Approved pod %+v failed", pair.Object) 339 | return 340 | } 341 | 342 | } 343 | } 344 | } 345 | 346 | // rejectPod rejects pod in cache 347 | func (bs *batchSchedulingPlugin) rejectPod(uid types.UID) { 348 | waitingPod := bs.frameworkHandler.GetWaitingPod(uid) 349 | if success := waitingPod.Reject("Group failed"); success { 350 | return 351 | } 352 | klog.Warningf("Rejected pod %+v failed", uid) 353 | 354 | } 355 | 356 | // ReconcileStatus reconcile pod cache states and decide when to schedule 357 | func (bs *batchSchedulingPlugin) ReconcileStatus(stopChan <-chan struct{}) { 358 | for { 359 | select { 360 | case pgName := <-bs.startChan: 361 | bs.UpdateBatchCache() 362 | bs.StartBatchSchedule(pgName) 363 | case <-stopChan: 364 | klog.Info("Reconcile exit") 365 | return 366 | } 367 | } 368 | } 369 | 370 | // sendStartScheduleSignal send stat scheduling signal to scheduler 371 | func (bs *batchSchedulingPlugin) sendStartScheduleSignal(pgName string) { 372 | bs.startChan <- pgName 373 | klog.Info("Send StartScheduleSignal success") 374 | } 375 | 376 | // New initializes a new plugin and returns it. 377 | func New(configuration *runtime.Unknown, f framework.FrameworkHandle) (framework.Plugin, error) { 378 | var config Configuration 379 | // TODO: decode it in a better way 380 | if err := json.Unmarshal(configuration.Raw, &config); err != nil { 381 | klog.Errorf("Failed to decode %+v: %v", configuration.Raw, err) 382 | return nil, fmt.Errorf("failed to decode configuration: %v", err) 383 | } 384 | 385 | klog.V(4).Infof("Plugin %s's config: master(%s), kube-config(%s)", Name, config.KubeMaster, config.KubeConfig) 386 | // Init client and Informer 387 | c, err := clientcmd.BuildConfigFromFlags(config.KubeMaster, config.KubeConfig) 388 | if err != nil { 389 | return nil, fmt.Errorf("failed to init rest.Config: %v", err) 390 | } 391 | c.QPS = 10 392 | c.Burst = 20 393 | 394 | pgClient := pgclientset.NewForConfigOrDie(c) 395 | scheduleInformer := pgformers.NewSharedInformerFactory(pgClient, 0) 396 | pgInformer := scheduleInformer.Batch().V1().PodGroups() 397 | 398 | kubeClient := f.ClientSet() 399 | 400 | pgCache := cache.NewPGStatusCache() 401 | startChan := make(chan string) 402 | 403 | extAPIClient := apiextensionsclient.NewForConfigOrDie(c) 404 | 405 | //resourceCache := cache.NewNodeResourceCache() 406 | scheduleTimeDuration := time.Duration(config.MaxScheduleTime) * time.Minute 407 | pgOperation := core.NewScheduleOperation(pgClient, pgCache, f, &scheduleTimeDuration, pgInformer) 408 | plugin := &batchSchedulingPlugin{ 409 | frameworkHandler: f, 410 | operation: pgOperation, 411 | startChan: startChan, 412 | maxScheduleTime: &scheduleTimeDuration, 413 | client: pgClient, 414 | } 415 | 416 | crd := &apiextensionsv1beta1.CustomResourceDefinition{ 417 | ObjectMeta: v12.ObjectMeta{ 418 | Name: "podgroups.batch.scheduler.tencent.com", 419 | }, 420 | Spec: apiextensionsv1beta1.CustomResourceDefinitionSpec{ 421 | Group: "batch.scheduler.tencent.com", 422 | Version: "v1", 423 | Scope: apiextensionsv1beta1.NamespaceScoped, 424 | Names: apiextensionsv1beta1.CustomResourceDefinitionNames{ 425 | Kind: "PodGroup", 426 | Plural: "podgroups", 427 | ShortNames: []string{"pg", "pgs"}, 428 | }, 429 | }, 430 | } 431 | 432 | _, err = extAPIClient.ApiextensionsV1beta1().CustomResourceDefinitions().Create(crd) 433 | if err != nil && !apierrs.IsAlreadyExists(err) { 434 | klog.Errorf("Failed to create crd %v", err.Error()) 435 | return nil, err 436 | } 437 | 438 | ctx := context.TODO() 439 | go plugin.ReconcileStatus(ctx.Done()) 440 | 441 | rateLimiter := workqueue.NewItemExponentialFailureRateLimiter(time.Second, 10*time.Second) 442 | controller := controller.NewPodGroupController(kubeClient, pgInformer, rateLimiter, pgClient, pgCache, 443 | plugin.rejectPod, pgOperation.AddToDenyCache) 444 | scheduleInformer.Start(ctx.Done()) 445 | 446 | go tryRunController(kubeClient, controller, ctx.Done()) 447 | return plugin, nil 448 | } 449 | 450 | // tryRunController try to run the controller 451 | // need this func because we do not want to run controller if the scheduler is not leader 452 | func tryRunController(client kubernetes.Interface, groupController *controller.PodGroupController, 453 | stopCh <-chan struct{}) { 454 | ticker := time.NewTicker(time.Second) 455 | defer ticker.Stop() 456 | stopChPG := make(chan struct{}) 457 | started := false 458 | el := resourcelock.EndpointsLock{ 459 | EndpointsMeta: v12.ObjectMeta{ 460 | Name: "kube-scheduler", 461 | Namespace: "kube-system", 462 | }, 463 | Client: client.CoreV1(), 464 | } 465 | 466 | hostname, err := os.Hostname() 467 | if err != nil { 468 | panic(err) 469 | } 470 | 471 | for { 472 | select { 473 | case <-ticker.C: 474 | elRecord, _, err := el.Get() 475 | if err != nil || elRecord == nil { 476 | continue 477 | } 478 | 479 | if strings.Contains(elRecord.HolderIdentity, hostname+"_") { 480 | if !started && time.Now().Sub(elRecord.RenewTime.Time) < time.Duration(elRecord. 481 | LeaseDurationSeconds)*time.Second { 482 | klog.Info("Group controller started") 483 | go groupController.Run(10, stopChPG) 484 | started = true 485 | } 486 | } else { 487 | if started { 488 | klog.Info("Group controller exit") 489 | started = false 490 | stopChPG <- struct{}{} 491 | } 492 | } 493 | case rev := <-stopCh: 494 | if started { 495 | started = false 496 | stopChPG <- rev 497 | } 498 | klog.Info("Group controller exit") 499 | return 500 | } 501 | } 502 | } 503 | -------------------------------------------------------------------------------- /pkg/scheduler/core/core.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package core 18 | 19 | import ( 20 | "fmt" 21 | "strings" 22 | "sync" 23 | "time" 24 | 25 | "github.com/gogo/protobuf/sortkeys" 26 | gochache "github.com/patrickmn/go-cache" 27 | corev1 "k8s.io/api/core/v1" 28 | v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 | "k8s.io/apimachinery/pkg/types" 30 | k8scache "k8s.io/client-go/tools/cache" 31 | "k8s.io/klog" 32 | podutil "k8s.io/kubernetes/pkg/api/v1/pod" 33 | "k8s.io/kubernetes/pkg/scheduler/algorithm/predicates" 34 | framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" 35 | "k8s.io/kubernetes/pkg/scheduler/nodeinfo" 36 | 37 | pgv1 "github.com/tenstack/batch-scheduler/pkg/apis/podgroup/v1" 38 | pgclientset "github.com/tenstack/batch-scheduler/pkg/generated/clientset/versioned" 39 | pginformer "github.com/tenstack/batch-scheduler/pkg/generated/informers/externalversions/podgroup/v1" 40 | pglister "github.com/tenstack/batch-scheduler/pkg/generated/listers/podgroup/v1" 41 | "github.com/tenstack/batch-scheduler/pkg/scheduler/cache" 42 | errors "github.com/tenstack/batch-scheduler/pkg/util" 43 | ) 44 | 45 | // Max score for Score 46 | const Max = 2147483647 47 | 48 | // ScheduleOperation defines the scheduling opeation called 49 | type ScheduleOperation struct { 50 | pgClient *pgclientset.Clientset 51 | frameworkHandler framework.FrameworkHandle 52 | podGroupStatusCache cache.Cache 53 | maxScheTime *time.Duration 54 | lastDeniedPG *gochache.Cache 55 | lastPermittedPod *gochache.Cache 56 | pgLister pglister.PodGroupLister 57 | pgListerSynced k8scache.InformerSynced 58 | maxFinishedPG string 59 | maxPGStatus *cache.PodGroupMatchStatus 60 | sync.RWMutex 61 | } 62 | 63 | // NewScheduleOperation create a new operation object 64 | func NewScheduleOperation(pgClient *pgclientset.Clientset, cache cache.Cache, 65 | frameworkHandler framework.FrameworkHandle, maxScheTime *time.Duration, pgInformer pginformer.PodGroupInformer) *ScheduleOperation { 66 | so := &ScheduleOperation{ 67 | pgClient: pgClient, 68 | podGroupStatusCache: cache, 69 | frameworkHandler: frameworkHandler, 70 | maxScheTime: maxScheTime, 71 | lastDeniedPG: gochache.New(30*time.Second, 3*time.Second), 72 | lastPermittedPod: gochache.New(3*time.Second, 3*time.Second), 73 | } 74 | so.pgLister = pgInformer.Lister() 75 | so.pgListerSynced = pgInformer.Informer().HasSynced 76 | return so 77 | } 78 | 79 | // PodGroupStatusCache returns the podGroupStatusCache 80 | func (sop *ScheduleOperation) PodGroupStatusCache() cache.Cache { 81 | return sop.podGroupStatusCache 82 | } 83 | 84 | // PreFilter pre-filter pod include: 85 | // 1. cluster resource 86 | // 2. last denied pod to increase schedule qps 87 | // 3. last permitted pod to increase schedule qps 88 | func (sop *ScheduleOperation) PreFilter(pod *corev1.Pod) error { 89 | pgName, satisfied := errors.VerifyPodLabelSatisfied(pod) 90 | if pgName == "" && !satisfied { 91 | return nil 92 | } 93 | fullName := fmt.Sprintf("%v/%v", pod.Namespace, pgName) 94 | 95 | _, ok := sop.lastPermittedPod.Get(string(pod.UID)) 96 | if ok { 97 | return nil 98 | } 99 | 100 | pgsObj := sop.podGroupStatusCache.Get(fullName) 101 | if pgsObj == nil { 102 | return fmt.Errorf("can not found pod group: %v", fullName) 103 | } 104 | 105 | _, ok = sop.lastDeniedPG.Get(fullName) 106 | if ok { 107 | err := fmt.Errorf("pod with pgName: %v last failed in 20s, deny", fullName) 108 | klog.V(6).Info(err) 109 | return err 110 | } 111 | 112 | pgs := pgsObj.(*cache.PodGroupMatchStatus) 113 | if err := sop.fillOccupiedObj(pgs, pod); err != nil { 114 | return err 115 | } 116 | 117 | // compute resource to check if sum of cluster resource can meet required resource 118 | pgCache := sop.podGroupStatusCache.(*cache.PGStatusCache) 119 | pgCache.RLock() 120 | maxFinishedPG, maxPGStatus, _ := findMaxPG(pgCache) 121 | sop.maxFinishedPG = maxFinishedPG 122 | sop.maxPGStatus = maxPGStatus 123 | pgCache.RUnlock() 124 | if maxPGStatus != nil { 125 | klog.V(6).Infof("group %v schedule status %+v", maxFinishedPG, maxPGStatus.PodNameUIDs.Items()) 126 | } 127 | if maxFinishedPG == "" || maxPGStatus == nil || maxPGStatus.PodGroup == nil { 128 | klog.V(5).Infof("Pod %v/%v has passed check", pod.Namespace, pod.Name) 129 | return nil 130 | } 131 | 132 | maxMatchedPodNodes := maxPGStatus.MatchedPodNodes.Items() 133 | 134 | // First scheduled pod group, set it to the current 135 | matched := len(maxMatchedPodNodes) 136 | if matched == 0 { 137 | maxPGStatus = pgs 138 | maxFinishedPG = fullName 139 | preAllocatedResource := getPreAllocatedResource(maxPGStatus, matched) 140 | if !sop.compareClusterResourceAndRequire(maxPGStatus.Pod, &preAllocatedResource, 1) { 141 | klog.V(5).Info("LeftResource can not satisfy preAllocationResource") 142 | sop.AddToDenyCache(fullName) 143 | return fmt.Errorf("cluster resource not enough") 144 | } 145 | klog.Warning("Returned for not found pod of PodGroupStatus") 146 | return nil 147 | } 148 | 149 | // max finished one, pass it 150 | if sop.maxFinishedPG == fullName { 151 | klog.V(5).Infof("Current pod group %v is the currently max one, pass it", fullName) 152 | klog.V(5).Infof("Current cluster resource %v", *(sop.computeClusterResource(sop.maxPGStatus.Pod))) 153 | 154 | return nil 155 | } 156 | 157 | preAllocatedResource := getPreAllocatedResource(maxPGStatus, matched) 158 | currentPodRequired := getPodResourceRequire(pod) 159 | preAllocatedResource.Add(currentPodRequired.ResourceList()) 160 | klog.V(4).Infof("Current need pre-allocated resource: %+v", preAllocatedResource) 161 | if !sop.compareClusterResourceAndRequire(maxPGStatus.Pod, &preAllocatedResource, 0.7) { 162 | klog.V(5).Info("LeftResource can not satisfy preAllocationResource") 163 | sop.AddToDenyCache(fullName) 164 | return fmt.Errorf("cluster resource not enough") 165 | } 166 | return nil 167 | } 168 | 169 | // Filter filter pod if not fit on the node 170 | func (sop *ScheduleOperation) Filter(pod *corev1.Pod, nodeName string) error { 171 | pgName, satisfied := errors.VerifyPodLabelSatisfied(pod) 172 | if pgName == "" && !satisfied { 173 | return nil 174 | } 175 | fullName := fmt.Sprintf("%v/%v", pod.Namespace, pgName) 176 | 177 | pgsObj := sop.podGroupStatusCache.Get(fullName) 178 | if pgsObj == nil { 179 | return fmt.Errorf("can not found pod group: %v", pgName) 180 | } 181 | pgs := pgsObj.(*cache.PodGroupMatchStatus) 182 | 183 | if err := sop.computeResourceSatisfied(pgs, pod, nodeName); err != nil { 184 | sop.AddToDenyCache(fullName) 185 | return err 186 | } 187 | // do not check it node have finished on one of node. 188 | sop.lastPermittedPod.Add(string(pod.UID), "", 2*time.Second) 189 | klog.V(6).Infof("Calling filter %v/%v, node %v, group %v", pod.Namespace, pod.Name, nodeName, pgName) 190 | return nil 191 | } 192 | 193 | // PreemptAddPod currently always return nil 194 | func (sop *ScheduleOperation) PreemptAddPod(podToAdd *corev1.Pod, nodeName string) error { 195 | return nil 196 | } 197 | 198 | // PreemptRemovePod requires: 199 | // 1. online resource could preempt online offline 200 | // 2. offline could preempt low process and not running offline 201 | // 3. online preempt offline 202 | // 4. offline preempt offline 203 | func (sop *ScheduleOperation) PreemptRemovePod(podToSchedule, podToRemove *corev1.Pod) error { 204 | pgNameToRemove, offlinePodToRemove := errors.VerifyPodLabelSatisfied(podToRemove) 205 | pgNameToSchedule, offlinePodToSchedule := errors.VerifyPodLabelSatisfied(podToSchedule) 206 | 207 | // 1. online resource could preempt online offline 208 | // 2. offline could preempt low process and not running offline 209 | 210 | // online preempt online 211 | if !offlinePodToSchedule && !offlinePodToRemove { 212 | return nil 213 | } 214 | 215 | // offline preempt online: forbid 216 | if offlinePodToSchedule && !offlinePodToRemove { 217 | return fmt.Errorf("offline pods %v are forbidden to preempt online %v", podToSchedule.Name, podToRemove.Name) 218 | } 219 | 220 | checkPreemption := func() (string, error) { 221 | fullNameToRemove := fmt.Sprintf("%v/%v", podToRemove.Namespace, pgNameToRemove) 222 | pgsObj := sop.podGroupStatusCache.Get(fullNameToRemove) 223 | if pgsObj == nil { 224 | return "", fmt.Errorf("can not found pod group: %v", fullNameToRemove) 225 | } 226 | // Todo: if we need forbid to preempt the max group 227 | // 228 | //maxFinishedPG, _, _ := findMaxPG(sop.podGroupStatusCache.(*cache.PGStatusCache)) 229 | //if fullNameToRemove == maxFinishedPG { 230 | // return "", fmt.Errorf("max finished pod group can not been preempted: %v", fullNameToRemove) 231 | //} 232 | // 233 | pgs := pgsObj.(*cache.PodGroupMatchStatus) 234 | 235 | if pgs.PodGroup.Status.Phase == pgv1.PodGroupScheduled || 236 | pgs.PodGroup.Status.Phase == pgv1.PodGroupRunning { 237 | return "", fmt.Errorf("pod belongs to Scheduled or Running pod group can not be scheduled") 238 | } 239 | return fullNameToRemove, nil 240 | } 241 | 242 | fullNameToRemove, err := checkPreemption() 243 | 244 | // online preempt offline 245 | if !offlinePodToSchedule && offlinePodToRemove { 246 | return err 247 | } 248 | 249 | // offline preempt offline 250 | fullNameToSchedule := fmt.Sprintf("%v/%v", podToSchedule.Namespace, pgNameToSchedule) 251 | if fullNameToRemove == fullNameToSchedule { 252 | return fmt.Errorf("podToSchedule and podToRemove belong to same pod group, do not preempt") 253 | } 254 | if err != nil { 255 | return err 256 | } 257 | klog.V(5).Infof("Try to preempt pod %v/%v, schedule pod %v/%v", podToRemove.Namespace, podToRemove.Name, 258 | podToSchedule.Namespace, podToSchedule.Name) 259 | return nil 260 | } 261 | 262 | // Score return the score, currently it is preserved. 263 | func (sop *ScheduleOperation) Score(pod *corev1.Pod, nodeName string) (int, error) { 264 | return Max, nil 265 | } 266 | 267 | // Permit permits a pod to run, if the minMember match, it would send a signal to chan. 268 | func (sop *ScheduleOperation) Permit(pod *corev1.Pod, nodeName string) (bool, string, error) { 269 | pgName, satisfied := errors.VerifyPodLabelSatisfied(pod) 270 | if pgName == "" && !satisfied { 271 | return true, pgName, errors.ErrorNotMatched 272 | } 273 | fullName := fmt.Sprintf("%v/%v", pod.Namespace, pgName) 274 | pgsObj := sop.podGroupStatusCache.Get(fullName) 275 | if pgsObj == nil { 276 | return false, pgName, fmt.Errorf("can not found pod group: %v", pgName) 277 | } 278 | pgs := pgsObj.(*cache.PodGroupMatchStatus) 279 | if pgs.PodGroup.Status.Phase == pgv1.PodGroupPending { 280 | pgs.PodGroup.Status.Phase = pgv1.PodGroupPreScheduling 281 | } 282 | klog.V(5).Infof("Desired group %v phase: %v", pgs.PodGroup.Name, pgs.PodGroup.Status.Phase) 283 | 284 | pair := cache.PodNodePair{ 285 | PodName: fmt.Sprintf("%v/%v", pod.Namespace, pod.Name), 286 | Node: nodeName, 287 | } 288 | 289 | waitTime := errors.GetWaitTimeDuration(pgs.PodGroup, sop.maxScheTime) 290 | pgs.MatchedPodNodes.Set(string(pod.UID), &pair, waitTime) 291 | oldUID, found := pgs.PodNameUIDs.Get(fmt.Sprintf("%v/%v", pod.Namespace, pod.Name)) 292 | // pod has been scheduled ever 293 | if found { 294 | // delete the expired one 295 | pgs.MatchedPodNodes.Delete(oldUID.(string)) 296 | } else { 297 | // newly add one 298 | klog.V(4).Infof("Add new cache data %v", pod.UID) 299 | } 300 | pgs.PodNameUIDs.Set(fmt.Sprintf("%v/%v", pod.Namespace, pod.Name), string(pod.UID), waitTime) 301 | 302 | klog.V(5).Infof("Current pod group state: %+v", pgs.PodGroup) 303 | ready := uint32(len(pgs.MatchedPodNodes.Items())) >= pgs.PodGroup.Spec.MinMember-pgs.PodGroup.Status.Scheduled 304 | if ready { 305 | pgs.Scheduled = true 306 | return true, pgName, nil 307 | } 308 | return false, pgName, errors.ErrorWaiting 309 | } 310 | 311 | // PostBind is used for send metrics and update to api if all finish 312 | func (sop *ScheduleOperation) PostBind(pod *corev1.Pod, nodeName string) { 313 | pgName, satisfied := errors.VerifyPodLabelSatisfied(pod) 314 | if pgName == "" && !satisfied { 315 | return 316 | } 317 | 318 | fullName := fmt.Sprintf("%v/%v", pod.Namespace, pgName) 319 | sop.Lock() 320 | defer sop.Unlock() 321 | pgsObj := sop.podGroupStatusCache.Get(fullName) 322 | if pgsObj == nil { 323 | return 324 | } 325 | pgs := pgsObj.(*cache.PodGroupMatchStatus) 326 | pgCopy := pgs.PodGroup.DeepCopy() 327 | pgCopy.Status.Scheduled++ 328 | 329 | if pgCopy.Status.Scheduled >= pgs.PodGroup.Spec.MinMember { 330 | pgCopy.Status.Phase = pgv1.PodGroupScheduled 331 | } else { 332 | pgCopy.Status.Phase = pgv1.PodGroupScheduling 333 | if pgCopy.Status.ScheduleStartTime.IsZero() { 334 | pgCopy.Status.ScheduleStartTime = v1.Time{Time: time.Now()} 335 | } 336 | } 337 | 338 | if pgCopy.Status.Phase != pgs.PodGroup.Status.Phase { 339 | 340 | pg, err := sop.pgClient.BatchV1().PodGroups(pgCopy.Namespace).Get(pgCopy.Name, v1.GetOptions{}) 341 | if err != nil { 342 | klog.Error(err) 343 | return 344 | } 345 | 346 | patch, err := errors.CreateMergePatch(pg, pgCopy) 347 | if err != nil { 348 | return 349 | } 350 | 351 | pg, err = sop.pgClient.BatchV1().PodGroups(pg.Namespace).Patch(pg.Name, types.MergePatchType, patch) 352 | if err != nil { 353 | return 354 | } 355 | 356 | pgs.PodGroup.Status.Phase = pg.Status.Phase 357 | } 358 | 359 | pgs.PodGroup.Status.Scheduled = pgCopy.Status.Scheduled 360 | 361 | return 362 | } 363 | 364 | // Compare returns the true or false, base on 365 | // 1. priority 366 | // 2. podGroup creation time 367 | // 3. pod creation time 368 | func (sop *ScheduleOperation) Compare(podInfo1, podInfo2 interface{}) bool { 369 | // Sort pod also record to progress 370 | pInfo1 := podInfo1.(*framework.PodInfo) 371 | pInfo2 := podInfo2.(*framework.PodInfo) 372 | prio1 := podutil.GetPodPriority(pInfo1.Pod) 373 | prio2 := podutil.GetPodPriority(pInfo2.Pod) 374 | 375 | pgName1, _ := errors.VerifyPodLabelSatisfied(pInfo1.Pod) 376 | pgName2, _ := errors.VerifyPodLabelSatisfied(pInfo2.Pod) 377 | 378 | compare := func() bool { 379 | if prio1 > prio2 { 380 | return true 381 | } 382 | 383 | if prio1 == prio2 { 384 | if pgName1 == "" && pgName2 == "" { 385 | return pInfo1.Timestamp.Before(pInfo2.Timestamp) 386 | } 387 | 388 | if pgName1 == "" { 389 | return true 390 | } 391 | if pgName2 == "" { 392 | return false 393 | } 394 | } 395 | pg1, err1 := sop.pgLister.PodGroups(pInfo1.Pod.Namespace).Get(pgName1) 396 | pg2, err2 := sop.pgLister.PodGroups(pInfo2.Pod.Namespace).Get(pgName2) 397 | if err1 != nil || err2 != nil { 398 | return false 399 | } 400 | if prio1 == prio2 && pg1.CreationTimestamp.Before(&pg2.CreationTimestamp) { 401 | return true 402 | } 403 | 404 | if prio1 == prio2 && pg1.CreationTimestamp.Equal(&pg2.CreationTimestamp) && pgName1 > pgName2 { 405 | return true 406 | } 407 | return prio1 == prio2 && pg1.CreationTimestamp.Equal(&pg2.CreationTimestamp) && pgName1 == pgName2 && pInfo1. 408 | Timestamp.Before(pInfo2.Timestamp) 409 | } 410 | return compare() 411 | } 412 | 413 | // GetPodNodePairs returns the pod-node paired to be scheduled. 414 | func (sop *ScheduleOperation) GetPodNodePairs(fullName string) *gochache.Cache { 415 | pgs := sop.podGroupStatusCache.Get(fullName) 416 | if pgs == nil { 417 | return nil 418 | } 419 | return pgs.(*cache.PodGroupMatchStatus).MatchedPodNodes 420 | } 421 | 422 | // AddToDenyCache add podGroup to the back list cache 423 | func (sop *ScheduleOperation) AddToDenyCache(fullName string) { 424 | sop.lastDeniedPG.Add(fullName, "", 20*time.Second) 425 | } 426 | 427 | // GetPodNameUIDs returns podName and uid pairs. 428 | func (sop *ScheduleOperation) GetPodNameUIDs(fullName string) *gochache.Cache { 429 | pgs := sop.podGroupStatusCache.Get(fullName) 430 | if pgs == nil { 431 | return nil 432 | } 433 | return pgs.(*cache.PodGroupMatchStatus).PodNameUIDs 434 | } 435 | 436 | func (sop *ScheduleOperation) getLeftResource(nodeName string) *nodeinfo.Resource { 437 | snapShot := sop.frameworkHandler.SnapshotSharedLister() 438 | if snapShot == nil || snapShot.NodeInfos() == nil { 439 | return nil 440 | } 441 | klog.V(6).Infof("NodeInfoSnapshot() %+v", snapShot) 442 | info, err := snapShot.NodeInfos().Get(nodeName) 443 | if err != nil { 444 | return nil 445 | } 446 | 447 | if info == nil { 448 | return nil 449 | } 450 | 451 | var leftResource nodeinfo.Resource 452 | allocatable := info.AllocatableResource() 453 | requested := info.RequestedResource() 454 | 455 | podCount := requested.AllowedPodNumber 456 | if podCount == 0 { 457 | podCount = len(info.Pods()) 458 | } 459 | 460 | leftResource.MilliCPU = allocatable.MilliCPU - requested.MilliCPU 461 | leftResource.AllowedPodNumber = allocatable.AllowedPodNumber - podCount 462 | leftResource.Memory = allocatable.Memory - requested.Memory 463 | leftResource.EphemeralStorage = allocatable.EphemeralStorage - requested.EphemeralStorage 464 | 465 | leftResourceCopy := leftResource.Clone() 466 | for k, v1 := range leftResourceCopy.ScalarResources { 467 | v2, ok := allocatable.ScalarResources[k] 468 | if !ok { 469 | continue 470 | } 471 | leftResource.ScalarResources[k] = v1 - v2 472 | } 473 | 474 | return &leftResource 475 | } 476 | 477 | func (sop *ScheduleOperation) fillOccupiedObj(pgs *cache.PodGroupMatchStatus, pod *corev1.Pod) error { 478 | if pgs == nil || pgs.PodGroup == nil { 479 | return fmt.Errorf("PodGroupMatchStatus is nil") 480 | } 481 | fullName := fmt.Sprintf("%v/%v", pod.Namespace, pgs.PodGroup.Name) 482 | refs := make([]string, 0) 483 | for _, ownerRef := range pod.OwnerReferences { 484 | refs = append(refs, string(ownerRef.UID)) 485 | } 486 | if pgs.Pod == nil { 487 | pgs.Pod = pod 488 | } 489 | if pgs.PodGroup.Spec.MinResources == nil { 490 | // initialize pod group resource 491 | reql := getPodResourceRequire(pod).ResourceList() 492 | pgs.PodGroup.Spec.MinResources = &reql 493 | } 494 | if pgs.PodGroup.Status.OccupiedBy == "" { 495 | // if do not have refs or initialize, return nil 496 | if len(refs) != 0 { 497 | // initialize pod group 498 | sortkeys.Strings(refs) 499 | pgs.PodGroup.Status.OccupiedBy = strings.Join(refs, ",") 500 | } 501 | return nil 502 | } 503 | // if refs not match, return error 504 | if len(refs) == 0 { 505 | return fmt.Errorf("pod group %s has been occupied by %v", fullName, pgs.PodGroup.Status.OccupiedBy) 506 | } 507 | sortkeys.Strings(refs) 508 | if strings.Join(refs, ",") != pgs.PodGroup.Status.OccupiedBy { 509 | return fmt.Errorf("pod group has been occupied by %v", pgs.PodGroup.Status.OccupiedBy) 510 | } 511 | return nil 512 | } 513 | 514 | func (sop *ScheduleOperation) computeResourceSatisfied(podPGS *cache.PodGroupMatchStatus, pod *corev1.Pod, 515 | nodeName string) error { 516 | // sum resources 517 | // pre-filter would filter some pod according to cluster resource, so do not need check again. 518 | // case1. if pod is the current max finished group, turn true, else return false. 519 | // case2. if can satisfied max one + current one, return true 520 | // case3. if any one pod in the max finished group requires more resource than node, but node can satisfy 521 | // the current one, return true, else return false 522 | 523 | fullName := fmt.Sprintf("%v/%v", pod.Namespace, podPGS.PodGroup.Name) 524 | var maxSingleRequired *nodeinfo.Resource 525 | if sop.maxPGStatus.PodGroup.Spec.MinResources != nil { 526 | maxSingleRequired = &nodeinfo.Resource{} 527 | maxSingleRequired.Add(*sop.maxPGStatus.PodGroup.Spec.MinResources) 528 | } 529 | 530 | // case1 531 | if sop.maxFinishedPG == fullName { 532 | klog.V(5).Infof("Current pod group %v is the currently max one, pass it", podPGS.PodGroup.Name) 533 | 534 | return nil 535 | } 536 | 537 | if klog.V(5) { 538 | klog.Infof("Current cluster resource %v", *(sop.computeClusterResource(sop.maxPGStatus.Pod))) 539 | } 540 | 541 | // do not check current pod required, it has checked 542 | if maxSingleRequired == nil { 543 | return nil 544 | } 545 | nodeLeftResource := sop.getLeftResource(nodeName) 546 | if nodeLeftResource == nil { 547 | return fmt.Errorf("SnapShot not initialized") 548 | } 549 | 550 | // case2 551 | currentPodRequire := getPodResourceRequire(pod) 552 | currentPodRequire.Add(maxSingleRequired.ResourceList()) 553 | if compareResourceAndRequire(nodeLeftResource, currentPodRequire) { 554 | return nil 555 | } 556 | 557 | // case3 558 | if !compareResourceAndRequire(nodeLeftResource, maxSingleRequired) { 559 | klog.V(5).Info("LeftResource can not satisfy the pod of the max finished one, meet the current one") 560 | return nil 561 | } 562 | klog.V(5).Infof("Pod %v can not scheduled: resource not enough", pod.Name) 563 | return errors.ErrorResourceNotEnough 564 | } 565 | 566 | func (sop *ScheduleOperation) computeClusterResource(pod *corev1.Pod) *nodeinfo.Resource { 567 | snapShotList, err := sop.frameworkHandler.SnapshotSharedLister().NodeInfos().List() 568 | if err != nil || snapShotList == nil { 569 | return nil 570 | } 571 | klog.V(6).Infof("NodeInfoSnapshot() %+v", snapShotList) 572 | var leftResources nodeinfo.Resource 573 | for k, info := range snapShotList { 574 | if info == nil { 575 | continue 576 | } 577 | 578 | if info.Node() == nil { 579 | klog.Warningf("Node: %v can not found", k) 580 | continue 581 | } 582 | 583 | if info.Node().Spec.Unschedulable { 584 | continue 585 | } 586 | 587 | leftResource := singleNodeResource(info, pod, 1) 588 | 589 | leftResources.Add(leftResource.ResourceList()) 590 | 591 | } 592 | return &leftResources 593 | } 594 | 595 | func (sop *ScheduleOperation) compareClusterResourceAndRequire(pod *corev1.Pod, 596 | reqResource *nodeinfo.Resource, percent float32) bool { 597 | snapShotList, err := sop.frameworkHandler.SnapshotSharedLister().NodeInfos().List() 598 | if err != nil || snapShotList == nil { 599 | return false 600 | } 601 | klog.V(6).Infof("NodeInfoSnapshot() %+v", snapShotList) 602 | var leftResources nodeinfo.Resource 603 | count := 0 604 | for k, info := range snapShotList { 605 | count++ 606 | if info == nil { 607 | continue 608 | } 609 | 610 | if info.Node() == nil { 611 | klog.Warningf("Node: %v can not found", k) 612 | continue 613 | } 614 | 615 | if info.Node().Spec.Unschedulable { 616 | continue 617 | } 618 | 619 | leftResource := singleNodeResource(info, pod, percent) 620 | 621 | leftResources.Add(leftResource.ResourceList()) 622 | 623 | if compareResourceAndRequire(&leftResources, reqResource) { 624 | klog.V(5).Infof("Current left %v nodes resources %+v \nRequired resource %+v", count, 625 | leftResources, *reqResource) 626 | return true 627 | } 628 | klog.V(5).Infof("Current left %v nodes resources %+v \nRequired resource %+v", count, 629 | leftResources, *reqResource) 630 | } 631 | return false 632 | } 633 | 634 | func singleNodeResource(info *nodeinfo.NodeInfo, pod *corev1.Pod, percent float32) *nodeinfo.Resource { 635 | leftResource := nodeinfo.Resource{ 636 | ScalarResources: make(map[corev1.ResourceName]int64), 637 | } 638 | satisfied := false 639 | if _, err := info.Taints(); err != nil { 640 | return &leftResource 641 | } 642 | satisfied = checkFit(pod, info) 643 | if !satisfied { 644 | return &leftResource 645 | } 646 | 647 | allocatable := info.AllocatableResource() 648 | requested := info.RequestedResource() 649 | 650 | podCount := requested.AllowedPodNumber 651 | if podCount == 0 { 652 | podCount = len(info.Pods()) 653 | } 654 | 655 | // reserve `percent` resource 656 | leftResource.AllowedPodNumber = int(float32(allocatable.AllowedPodNumber)*percent) - podCount 657 | leftResource.MilliCPU = int64(float32(allocatable.MilliCPU)*percent) - requested.MilliCPU 658 | leftResource.Memory = int64(float32(allocatable.Memory)*percent) - requested.Memory 659 | leftResource.EphemeralStorage = int64(float32(allocatable.EphemeralStorage)*percent) - requested.EphemeralStorage 660 | 661 | // calculate extend resources 662 | for k, allocatableEx := range allocatable.ScalarResources { 663 | requestEx, ok := requested.ScalarResources[k] 664 | if !ok { 665 | continue 666 | } 667 | leftResource.ScalarResources[k] = int64(float32(allocatableEx)*percent) - requestEx 668 | } 669 | return &leftResource 670 | } 671 | 672 | func compareResourceAndRequire(leftResource, req *nodeinfo.Resource) bool { 673 | if leftResource.Memory < req.Memory { 674 | return false 675 | } 676 | if leftResource.MilliCPU < req.MilliCPU { 677 | return false 678 | } 679 | if leftResource.EphemeralStorage < req.EphemeralStorage { 680 | return false 681 | } 682 | // AllowedPodNumber seems not correct, so we need check again 683 | if leftResource.AllowedPodNumber < req.AllowedPodNumber { 684 | return false 685 | } 686 | for k, v1 := range req.ScalarResources { 687 | v2, ok := leftResource.ScalarResources[k] 688 | if !ok { 689 | if v1 != 0 { 690 | return false 691 | } 692 | continue 693 | } 694 | if v1 > v2 { 695 | return false 696 | } 697 | } 698 | return true 699 | } 700 | 701 | func findMaxPG(pgCache *cache.PGStatusCache) (maxFinishedPG string, 702 | maxPGStatus *cache.PodGroupMatchStatus, maxFinished uint32) { 703 | for pgName, pgs := range pgCache.PGStatusMap { 704 | // if we have meet min request, make other pods priority 705 | var finished uint32 = 0 706 | if pgs.Scheduled { 707 | continue 708 | } 709 | if pgs.Pod == nil { 710 | continue 711 | } 712 | if pgs.PodGroup.Spec.MinMember-pgs.PodGroup. 713 | Status.Scheduled <= 0 { 714 | finished = 0 715 | } else { 716 | finished = (uint32(len(pgs.MatchedPodNodes.Items())) + pgs. 717 | PodGroup.Status.Scheduled) * 1000 / (pgs.PodGroup.Spec.MinMember) 718 | klog.V(5).Infof("group %v schedule progress %d/100", pgName, finished/10) 719 | } 720 | 721 | if finished > maxFinished { 722 | maxFinished = finished 723 | maxFinishedPG = pgName 724 | maxPGStatus = pgs 725 | } else if finished == maxFinished { 726 | // avoid maxPG is always the finished one 727 | // 1. if max is nil, set the value 728 | // 2. if the max is scheduled and running, make the not scheduled first 729 | if maxPGStatus == nil || 730 | maxPGStatus.PodGroup.Status.Scheduled >= maxPGStatus.PodGroup.Spec.MinMember && 731 | pgs.PodGroup.Status.Scheduled == 0 { 732 | maxFinished = finished 733 | maxFinishedPG = pgName 734 | maxPGStatus = pgs 735 | } 736 | } 737 | } 738 | return 739 | } 740 | 741 | func checkFit(pod *corev1.Pod, info *nodeinfo.NodeInfo) bool { 742 | predicateFails := make([]predicates.PredicateFailureReason, 0) 743 | fit, reasons, err := predicates.PodMatchNodeSelector(pod, nil, info) 744 | if err != nil { 745 | return false 746 | } 747 | if !fit { 748 | predicateFails = append(predicateFails, reasons...) 749 | } 750 | 751 | fit, reasons, err = predicates.PodToleratesNodeTaints(pod, nil, info) 752 | if err != nil { 753 | return false 754 | } 755 | if !fit { 756 | predicateFails = append(predicateFails, reasons...) 757 | } 758 | return len(predicateFails) == 0 759 | } 760 | 761 | func getPodResourceRequire(pod *corev1.Pod) *nodeinfo.Resource { 762 | var currentPodRequired nodeinfo.Resource 763 | // Compute current pod required 764 | for _, c := range pod.Spec.Containers { 765 | if c.Resources.Limits != nil { 766 | currentPodRequired.Add(c.Resources.Limits) 767 | } else { 768 | currentPodRequired.Add(c.Resources.Requests) 769 | } 770 | } 771 | return ¤tPodRequired 772 | } 773 | 774 | func getPreAllocatedResource(maxPGStatus *cache.PodGroupMatchStatus, matched int) (preAllocatedResource nodeinfo. 775 | Resource) { 776 | notFinished := 0 777 | scheduled := int(maxPGStatus.PodGroup.Status.Scheduled) 778 | if matched != 0 { 779 | notFinished = int(maxPGStatus.PodGroup.Spec.MinMember) - matched 780 | } else { 781 | // recover from stop 782 | notFinished = int(maxPGStatus.PodGroup.Spec.MinMember) - scheduled 783 | } 784 | for i := 0; i < notFinished; i++ { 785 | if maxPGStatus.PodGroup.Spec.MinResources != nil { 786 | preAllocatedResource.Add(*maxPGStatus.PodGroup.Spec.MinResources) 787 | } 788 | } 789 | if preAllocatedResource.AllowedPodNumber == 0 { 790 | preAllocatedResource.AllowedPodNumber = int(maxPGStatus.PodGroup.Spec.MinMember) + 1 791 | } 792 | return 793 | } 794 | --------------------------------------------------------------------------------