├── pkg ├── framework │ ├── volcano.go │ ├── doc.go │ └── kubescheduler.go ├── const.go ├── version │ ├── base.go │ ├── sharedcommand │ │ └── sharedcommand.go │ └── version.go ├── simulator │ ├── capacityestimation │ │ ├── podgenerator.go │ │ ├── simulator.go │ │ └── report.go │ ├── schedulersimulation │ │ ├── simulator.go │ │ └── report.go │ └── clustercompression │ │ ├── report.go │ │ ├── nodeFilter.go │ │ ├── options.go │ │ └── simulator.go ├── interface.go ├── status.go ├── plugins │ └── generic │ │ └── plugin.go └── utils │ ├── pod.go │ └── utils.go ├── docs └── images │ └── capacity-management-capacity-icon.jpeg ├── .github ├── ISSUE_TEMPLATE │ ├── custom.md │ ├── feature_request.md │ └── bug_report.md ├── workflows │ ├── release.yml │ └── ci.yml └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── hack ├── util.sh ├── verify-staticcheck.sh ├── verify-import-aliases.sh ├── .import-aliases └── tools │ └── preferredimports │ └── preferredimports.go ├── app ├── cmds │ ├── option.go │ ├── capacityestimation │ │ ├── options │ │ │ ├── namespacename.go │ │ │ └── capacityestimation.go │ │ └── capacityestimation.go │ ├── schedulersimulation │ │ ├── options │ │ │ └── schedulersimulation.go │ │ └── schedulersimulation.go │ └── clustercompression │ │ ├── options │ │ └── clustercompression.go │ │ └── clustercompression.go └── root.go ├── main.go ├── .krew.yaml ├── check_label.py ├── .goreleaser.yml ├── Makefile ├── README-ZH.md ├── go.mod ├── README.md └── LICENSE /pkg/framework/volcano.go: -------------------------------------------------------------------------------- 1 | package framework 2 | -------------------------------------------------------------------------------- /pkg/const.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | const ( 4 | PodProvisioner = "kc.k-cloud-labs.io/provisioned-by" 5 | SchedulerName = "simulator-scheduler" 6 | ) 7 | -------------------------------------------------------------------------------- /docs/images/capacity-management-capacity-icon.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k-cloud-labs/kluster-capacity/HEAD/docs/images/capacity-management-capacity-icon.jpeg -------------------------------------------------------------------------------- /pkg/framework/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | this folder is used to implement frameworks for all scheduler, e.g. kube-scheduler, volcano, YuniKorn 3 | */ 4 | 5 | package framework 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/custom.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Custom issue template 3 | about: Describe this issue template's purpose here. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | test.sh 11 | 12 | # Output of the go coverage tool, specifically when used with LiteIDE 13 | *.out 14 | 15 | # Dependency directories (remove the comment below to include it) 16 | vendor/ 17 | 18 | # binary file 19 | kluster-capacity 20 | 21 | # conf 22 | pod*.yaml 23 | schedulerconfig 24 | kubeconfig 25 | 26 | # IDE 27 | .idea/ 28 | 29 | -------------------------------------------------------------------------------- /pkg/version/base.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | // Base version information. 4 | // 5 | // This is the fallback data used when version information from git is not 6 | // provided via go ldflags. It provides an approximation of the kinitiras 7 | // version for ad-hoc builds (e.g. `go build`) that cannot get the version 8 | // information from git. 9 | var ( 10 | gitVersion = "v0.0.0-master" 11 | gitCommit = "unknown" // sha1 from git, output of $(git rev-parse HEAD) 12 | gitTreeState = "unknown" // state of git tree, either "clean" or "dirty" 13 | 14 | buildDate = "unknown" // build date in ISO8601 format, output of $(date -u +'%Y-%m-%dT%H:%M:%SZ') 15 | ) 16 | -------------------------------------------------------------------------------- /hack/util.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | # This script holds common bash variables and utility functions. 8 | 9 | # This function installs a Go tools by 'go install' command. 10 | # Parameters: 11 | # - $1: package name, such as "sigs.k8s.io/controller-tools/cmd/controller-gen" 12 | # - $2: package version, such as "v0.4.1" 13 | function util::install_tools() { 14 | local package="$1" 15 | local version="$2" 16 | echo "go install ${package}@${version}" 17 | GO111MODULE=on go install "${package}"@"${version}" 18 | GOPATH=$(go env GOPATH | awk -F ':' '{print $1}') 19 | export PATH=$PATH:$GOPATH/bin 20 | } -------------------------------------------------------------------------------- /app/cmds/option.go: -------------------------------------------------------------------------------- 1 | package cmds 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | ) 7 | 8 | type Options struct { 9 | SchedulerConfig string 10 | KubeConfig string 11 | Verbose bool 12 | OutputFormat string 13 | // file to load initial data instead of from k8s cluster 14 | Snapshot string 15 | // file to save the result 16 | SaveTo string 17 | ExcludeNodes []string 18 | MaxLimit int 19 | } 20 | 21 | func (o *Options) Default() { 22 | if len(o.KubeConfig) == 0 { 23 | config := os.Getenv("KUBECONFIG") 24 | if len(config) == 0 { 25 | config = filepath.Join(os.Getenv("HOME"), ".kube/config") 26 | } 27 | o.KubeConfig = config 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /hack/verify-staticcheck.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | REPO_ROOT=$(dirname "${BASH_SOURCE[0]}")/.. 8 | GOLANGCI_LINT_PKG="github.com/golangci/golangci-lint/cmd/golangci-lint" 9 | GOLANGCI_LINT_VER="v1.50.1" 10 | 11 | cd "${REPO_ROOT}" 12 | source "hack/util.sh" 13 | 14 | util::install_tools ${GOLANGCI_LINT_PKG} ${GOLANGCI_LINT_VER} 15 | 16 | if golangci-lint run --timeout=5m; then 17 | echo 'Congratulations! All Go source files have passed staticcheck.' 18 | else 19 | echo # print one empty line, separate from warning messages. 20 | echo 'Please review the above warnings.' 21 | echo 'If the above warnings do not make sense, feel free to file an issue.' 22 | exit 1 23 | fi -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 k-cloud-labs org 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package main 17 | 18 | import "github.com/k-cloud-labs/kluster-capacity/app" 19 | 20 | func main() { 21 | app.Execute() 22 | } 23 | -------------------------------------------------------------------------------- /pkg/simulator/capacityestimation/podgenerator.go: -------------------------------------------------------------------------------- 1 | package capacityestimation 2 | 3 | import ( 4 | "fmt" 5 | 6 | corev1 "k8s.io/api/core/v1" 7 | 8 | "github.com/k-cloud-labs/kluster-capacity/pkg/utils" 9 | ) 10 | 11 | type singlePodGenerator struct { 12 | counter uint 13 | podTemplate *corev1.Pod 14 | } 15 | 16 | func NewSinglePodGenerator(podTemplate *corev1.Pod) PodGenerator { 17 | return &singlePodGenerator{ 18 | counter: 0, 19 | podTemplate: podTemplate, 20 | } 21 | } 22 | 23 | func (g *singlePodGenerator) Generate() *corev1.Pod { 24 | pod := utils.InitPod(g.podTemplate) 25 | // use simulated pod name with an index to construct the name 26 | pod.ObjectMeta.Name = fmt.Sprintf("%v-%v", g.podTemplate.Name, g.counter) 27 | 28 | // Ensures uniqueness 29 | g.counter++ 30 | 31 | return pod 32 | } 33 | -------------------------------------------------------------------------------- /pkg/interface.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | corev1 "k8s.io/api/core/v1" 5 | "k8s.io/apimachinery/pkg/runtime" 6 | ) 7 | 8 | // Framework need to be implemented by all scheduler framework 9 | type Framework interface { 10 | Run(init func() error) error 11 | Initialize(objs ...runtime.Object) error 12 | CreatePod(pod *corev1.Pod) error 13 | UpdateEstimationPods(pod ...*corev1.Pod) 14 | UpdateNodesToScaleDown(nodeName string) 15 | Status() *Status 16 | GetPodsByNode(nodeName string) ([]*corev1.Pod, error) 17 | Stop(reason string) error 18 | } 19 | 20 | // Simulator need to be implemented by all simulator 21 | type Simulator interface { 22 | Run() error 23 | Initialize(objs ...runtime.Object) error 24 | Report() Printer 25 | } 26 | 27 | type Printer interface { 28 | Print(verbose bool, format string) error 29 | } 30 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | on: 3 | push: 4 | tags: 5 | - v* 6 | jobs: 7 | goreleaser: 8 | runs-on: ubuntu-latest 9 | environment: release 10 | steps: 11 | - uses: actions/checkout@v3 12 | with: 13 | fetch-depth: 0 14 | - run: git fetch --force --tags 15 | - uses: actions/setup-go@v3 16 | with: 17 | go-version: '>=1.19' 18 | cache: true 19 | - name: GoReleaser 20 | uses: goreleaser/goreleaser-action@v4 21 | with: 22 | version: latest 23 | args: release --rm-dist 24 | env: 25 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 26 | TAP_GITHUB_TOKEN: ${{ secrets.TAP_GITHUB_TOKEN }} 27 | - name: Update new version in krew-index 28 | uses: rajatjindal/krew-release-bot@v0.0.43 -------------------------------------------------------------------------------- /pkg/version/sharedcommand/sharedcommand.go: -------------------------------------------------------------------------------- 1 | package sharedcommand 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | 7 | "github.com/spf13/cobra" 8 | 9 | "github.com/k-cloud-labs/kluster-capacity/pkg/version" 10 | ) 11 | 12 | var ( 13 | versionShort = `Print the version information.` 14 | versionLong = `Print the version information.` 15 | versionExample = ` # Print %s command version 16 | %s version` 17 | ) 18 | 19 | // NewCmdVersion prints out the release version info for this command binary. 20 | // It is used as a subcommand of a parent command. 21 | func NewCmdVersion(out io.Writer, parentCommand string) *cobra.Command { 22 | cmd := &cobra.Command{ 23 | Use: "version", 24 | Short: versionShort, 25 | Long: versionLong, 26 | Example: fmt.Sprintf(versionExample, parentCommand, parentCommand), 27 | Run: func(cmd *cobra.Command, args []string) { 28 | fmt.Fprintf(out, "%s version: %s\n", parentCommand, version.Get()) 29 | }, 30 | } 31 | 32 | return cmd 33 | } 34 | -------------------------------------------------------------------------------- /pkg/status.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | corev1 "k8s.io/api/core/v1" 5 | ) 6 | 7 | // Status capture all scheduled pods with reason why the estimation could not continue 8 | type Status struct { 9 | // all pods 10 | Pods []corev1.Pod `json:"pods"` 11 | // all nodes 12 | Nodes map[string]corev1.Node `json:"nodes"` 13 | // for ce 14 | PodsForEstimation []*corev1.Pod `json:"pods_for_estimation"` 15 | // for cc 16 | NodesToScaleDown []string `json:"nodes_to_scale_down"` 17 | SelectNodeCount int `json:"select_node_count"` 18 | SchedulerCount int `json:"scheduler_count"` 19 | FailedSchedulerCount int `json:"failed_scheduler_count"` 20 | // stop reason 21 | StopReason string `json:"stop_reason"` 22 | } 23 | 24 | func (s *Status) SelectNodeCountInc() { 25 | s.SelectNodeCount++ 26 | } 27 | 28 | func (s *Status) SchedulerCountInc() { 29 | s.SchedulerCount++ 30 | } 31 | 32 | func (s *Status) FailedSchedulerCountInc() { 33 | s.FailedSchedulerCount++ 34 | } 35 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /hack/verify-import-aliases.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | SCRIPT_ROOT=$(dirname "${BASH_SOURCE[0]}")/.. 8 | cd "${SCRIPT_ROOT}" 9 | ROOT_PATH=$(pwd) 10 | 11 | IMPORT_ALIASES_PATH="${ROOT_PATH}/hack/.import-aliases" 12 | INCLUDE_PATH="(${ROOT_PATH}/app|${ROOT_PATH}/pkg)" 13 | 14 | ret=0 15 | # We can't directly install preferredimports by `go install` due to the go.mod issue: 16 | # go install k8s.io/kubernetes/cmd/preferredimports@v1.21.3: k8s.io/kubernetes@v1.21.3 17 | # The go.mod file for the module providing named packages contains one or 18 | # more replace directives. It must not contain directives that would cause 19 | # it to be interpreted differently than if it were the main module. 20 | go run "${ROOT_PATH}/hack/tools/preferredimports/preferredimports.go" -import-aliases "${IMPORT_ALIASES_PATH}" -include-path "${INCLUDE_PATH}" "${ROOT_PATH}" || ret=$? 21 | if [[ $ret -ne 0 ]]; then 22 | echo "!!! Please see hack/.import-aliases for the preferred aliases for imports." >&2 23 | exit 1 24 | fi -------------------------------------------------------------------------------- /pkg/version/version.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | import ( 4 | "fmt" 5 | "runtime" 6 | ) 7 | 8 | // Info contains versioning information. 9 | type Info struct { 10 | GitVersion string `json:"gitVersion"` 11 | GitCommit string `json:"gitCommit"` 12 | GitTreeState string `json:"gitTreeState"` 13 | BuildDate string `json:"buildDate"` 14 | GoVersion string `json:"goVersion"` 15 | Compiler string `json:"compiler"` 16 | Platform string `json:"platform"` 17 | } 18 | 19 | // String returns a Go-syntax representation of the Info. 20 | func (info Info) String() string { 21 | return fmt.Sprintf("%#v", info) 22 | } 23 | 24 | // Get returns the overall codebase version. It's for detecting 25 | // what code a binary was built from. 26 | func Get() Info { 27 | return Info{ 28 | GitVersion: gitVersion, 29 | GitCommit: gitCommit, 30 | GitTreeState: gitTreeState, 31 | BuildDate: buildDate, 32 | GoVersion: runtime.Version(), 33 | Compiler: runtime.Compiler, 34 | Platform: fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH), 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /app/cmds/capacityestimation/options/namespacename.go: -------------------------------------------------------------------------------- 1 | package options 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "strings" 7 | 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | ) 10 | 11 | type NamespaceNames []NamespaceName 12 | 13 | type NamespaceName struct { 14 | Namespace string `json:"namespace"` 15 | Name string `json:"name"` 16 | } 17 | 18 | func (n *NamespaceNames) Set(nns string) error { 19 | for _, nn := range strings.Split(nns, ",") { 20 | nnStrs := strings.Split(nn, "/") 21 | if len(nnStrs) == 1 { 22 | *n = append(*n, NamespaceName{ 23 | Namespace: metav1.NamespaceDefault, 24 | Name: nnStrs[0], 25 | }) 26 | } else if len(nnStrs) == 2 { 27 | *n = append(*n, NamespaceName{ 28 | Namespace: nnStrs[0], 29 | Name: nnStrs[1], 30 | }) 31 | } else { 32 | return errors.New("invalid format") 33 | } 34 | } 35 | 36 | return nil 37 | } 38 | 39 | func (n *NamespaceNames) String() string { 40 | strs := []string{} 41 | for _, nn := range *n { 42 | strs = append(strs, fmt.Sprintf("%s/%s", nn.Namespace, nn.Name)) 43 | } 44 | 45 | return strings.Join(strs, ",") 46 | } 47 | 48 | func (n *NamespaceNames) Type() string { 49 | return "NamespaceNames" 50 | } 51 | -------------------------------------------------------------------------------- /.krew.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: krew.googlecontainertools.github.com/v1alpha2 2 | kind: Plugin 3 | metadata: 4 | name: kluster-capacity 5 | spec: 6 | version: {{ .TagName }} 7 | homepage: https://github.com/k-cloud-labs/kluster-capacity 8 | shortDescription: Scheduler simulation for capacity analysis. 9 | description: | 10 | A simple CLI provide cluster capacity analysis with scheduler simulation. 11 | platforms: 12 | - selector: 13 | matchLabels: 14 | os: darwin 15 | arch: amd64 16 | bin: kluster-capacity 17 | {{addURIAndSha "https://github.com/k-cloud-labs/kluster-capacity/releases/download/{{ .TagName }}/kluster-capacity_{{ .TagName }}_darwin_x86_64.tar.gz" .TagName }} 18 | - selector: 19 | matchLabels: 20 | os: darwin 21 | arch: arm64 22 | bin: kluster-capacity 23 | {{addURIAndSha "https://github.com/k-cloud-labs/kluster-capacity/releases/download/{{ .TagName }}/kluster-capacity_{{ .TagName }}_darwin_arm64.tar.gz" .TagName }} 24 | - selector: 25 | matchLabels: 26 | os: linux 27 | arch: amd64 28 | bin: kluster-capacity 29 | {{addURIAndSha "https://github.com/k-cloud-labs/kluster-capacity/releases/download/{{ .TagName }}/kluster-capacity_{{ .TagName }}_linux_x86_64.tar.gz" .TagName }} 30 | - selector: 31 | matchLabels: 32 | os: linux 33 | arch: arm64 34 | bin: kluster-capacity 35 | {{addURIAndSha "https://github.com/k-cloud-labs/kluster-capacity/releases/download/{{ .TagName }}/kluster-capacity_{{ .TagName }}_linux_arm64.tar.gz" .TagName }} -------------------------------------------------------------------------------- /check_label.py: -------------------------------------------------------------------------------- 1 | from github import Github 2 | import os 3 | import sys 4 | 5 | # GitHub API 认证 6 | g = Github(os.environ['K_GITHUB_TOKEN']) 7 | repo = g.get_repo(os.environ['GITHUB_REPOSITORY']) 8 | 9 | # 获取 repository 下的所有标签 10 | labels = [label.name for label in repo.get_labels()] 11 | 12 | if sys.argv[1] == 'issues': 13 | issue_number = sys.argv[2] 14 | issue = repo.get_issue(int(issue_number)) 15 | issue_labels = [label.name for label in issue.labels] 16 | 17 | # 判断 issue 是否包含所有标签中的其中之一 18 | if not set(labels).intersection(set(issue_labels)): 19 | message = "Please add a label from the following list: " + str(labels) 20 | issue.create_comment(message) 21 | 22 | if sys.argv[1] == 'pull_request': 23 | pull_request_number = sys.argv[2] 24 | pull_request = repo.get_pull(int(pull_request_number)) 25 | 26 | # 自动添加一个标明大小的标签 27 | size_labels = ["size/S", "size/M", "size/L", "size/XL"] 28 | lines_of_code = pull_request.additions + pull_request.deletions 29 | if lines_of_code <= 50: 30 | pull_request.add_to_labels(size_labels[0]) 31 | elif lines_of_code <= 100: 32 | pull_request.add_to_labels(size_labels[1]) 33 | elif lines_of_code <= 500: 34 | pull_request.add_to_labels(size_labels[2]) 35 | else: 36 | pull_request.add_to_labels(size_labels[3]) 37 | 38 | 39 | pull_request_labels = [label.name for label in pull_request.labels] 40 | 41 | # 判断 pull_request 是否包含所有标签中的其中之一 42 | if not set(labels).intersection(set(pull_request_labels)): 43 | message = "Please add a label from the following list: " + str(labels) 44 | pull_request.create_issue_comment(message) 45 | 46 | 47 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | builds: 2 | - env: 3 | - CGO_ENABLED=0 4 | goos: 5 | - linux 6 | - darwin 7 | # - windows 8 | goarch: 9 | - arm64 10 | - amd64 11 | - 386 12 | goarm: 13 | - 6 14 | - 7 15 | archives: 16 | - name_template: |- 17 | kluster-capacity_{{ .Tag }}_{{ .Os }}_{{ .Arch -}} 18 | {{- with .Arm -}} 19 | {{- if (eq . "6") -}}hf 20 | {{- else -}}v{{- . -}} 21 | {{- end -}} 22 | {{- end -}} 23 | replacements: 24 | 386: i386 25 | amd64: x86_64 26 | # format_overrides: 27 | # - goos: windows 28 | # format: zip 29 | checksum: 30 | name_template: 'checksums.txt' 31 | snapshot: 32 | name_template: "{{ .Tag }}-next" 33 | changelog: 34 | skip: false 35 | use: git 36 | sort: asc 37 | groups: 38 | - title: Features 39 | regexp: "^.*feat[(\\w)]*:+.*$" 40 | order: 0 41 | - title: 'Bug fixes' 42 | regexp: "^.*fix[(\\w)]*:+.*$" 43 | order: 1 44 | - title: Others 45 | order: 999 46 | filters: 47 | exclude: 48 | - "^docs:" 49 | - "^test:" 50 | brews: 51 | - name: kluster-capacity 52 | tap: 53 | # The token determines the release type (Github/Gitlab). 54 | owner: k-cloud-labs 55 | name: homebrew-tap 56 | token: "{{ .Env.TAP_GITHUB_TOKEN }}" 57 | folder: Formula 58 | # Brew fails from multiple 32-bit arm versions. 59 | # Specify which version should be used. 60 | goarm: 6 61 | homepage: https://github.com/k-cloud-labs/kluster-capacity 62 | license: apache-2.0 63 | description: A simple CLI provide cluster capacity analysis with scheduler simulation. 64 | dependencies: 65 | - name: git 66 | test: | 67 | system "#{bin}/kube-capacity version" -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: 4 | # Run this workflow every time a new commit pushed to upstream/fork repository. 5 | # Run workflow on fork repository will help contributors find and resolve issues before sending a PR. 6 | push: 7 | pull_request: 8 | jobs: 9 | golangci: 10 | name: lint 11 | runs-on: ubuntu-20.04 12 | steps: 13 | - name: checkout code 14 | uses: actions/checkout@v3 15 | - name: install Go 16 | uses: actions/setup-go@v3 17 | with: 18 | go-version: 1.19.x 19 | - name: lint 20 | run: hack/verify-staticcheck.sh 21 | - name: import alias 22 | run: hack/verify-import-aliases.sh 23 | fmt: 24 | name: gofmt 25 | runs-on: ubuntu-20.04 26 | steps: 27 | - name: checkout code 28 | uses: actions/checkout@v3 29 | - name: install Go 30 | uses: actions/setup-go@v3 31 | with: 32 | go-version: 1.19.x 33 | - name: go fmt check 34 | run: make fmt-check 35 | vet: 36 | name: go vet 37 | runs-on: ubuntu-20.04 38 | steps: 39 | - name: checkout code 40 | uses: actions/checkout@v3 41 | - name: install Go 42 | uses: actions/setup-go@v3 43 | with: 44 | go-version: 1.19.x 45 | - name: go vet 46 | run: make vet 47 | # test: 48 | # name: unit test 49 | # needs: 50 | # - fmt 51 | # - vet 52 | # runs-on: ubuntu-18.04 53 | # steps: 54 | # - name: checkout code 55 | # uses: actions/checkout@v2 56 | # - name: install Go 57 | # uses: actions/setup-go@v2 58 | # with: 59 | # go-version: 1.19.x 60 | # - name: Run coverage 61 | # run: ./script/test.sh 62 | # - name: Codecov 63 | # uses: codecov/codecov-action@v3.1.0 64 | 65 | -------------------------------------------------------------------------------- /pkg/plugins/generic/plugin.go: -------------------------------------------------------------------------------- 1 | package generic 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | corev1 "k8s.io/api/core/v1" 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | "k8s.io/client-go/kubernetes" 10 | "k8s.io/klog/v2" 11 | "k8s.io/kubernetes/pkg/scheduler/framework" 12 | 13 | "github.com/k-cloud-labs/kluster-capacity/pkg" 14 | ) 15 | 16 | const Name = "GenericBinder" 17 | 18 | type GenericBinder struct { 19 | client kubernetes.Interface 20 | postBindHook func(*corev1.Pod) error 21 | status *pkg.Status 22 | } 23 | 24 | func New(postBindHook func(*corev1.Pod) error, client kubernetes.Interface, status *pkg.Status) (framework.Plugin, error) { 25 | return &GenericBinder{ 26 | postBindHook: postBindHook, 27 | client: client, 28 | status: status, 29 | }, nil 30 | } 31 | 32 | func (b *GenericBinder) Name() string { 33 | return Name 34 | } 35 | 36 | func (b *GenericBinder) Bind(ctx context.Context, state *framework.CycleState, p *corev1.Pod, nodeName string) *framework.Status { 37 | pod, err := b.client.CoreV1().Pods(p.Namespace).Get(context.TODO(), p.Name, metav1.GetOptions{}) 38 | if err != nil { 39 | return framework.NewStatus(framework.Error, fmt.Sprintf("Unable to bind: %v", err)) 40 | } 41 | updatedPod := pod.DeepCopy() 42 | updatedPod.Spec.NodeName = nodeName 43 | updatedPod.Status.Phase = corev1.PodRunning 44 | 45 | if _, err = b.client.CoreV1().Pods(pod.Namespace).Update(ctx, updatedPod, metav1.UpdateOptions{}); err != nil { 46 | return framework.NewStatus(framework.Error, fmt.Sprintf("Unable to update binded pod: %v", err)) 47 | } 48 | 49 | return nil 50 | } 51 | 52 | func (b *GenericBinder) PreBind(ctx context.Context, state *framework.CycleState, p *corev1.Pod, nodeName string) *framework.Status { 53 | return nil 54 | } 55 | 56 | func (b *GenericBinder) PostBind(_ context.Context, _ *framework.CycleState, pod *corev1.Pod, _ string) { 57 | if b.postBindHook != nil { 58 | if !metav1.HasAnnotation(pod.ObjectMeta, pkg.PodProvisioner) { 59 | return 60 | } 61 | b.status.SchedulerCountInc() 62 | 63 | if err := b.postBindHook(pod); err != nil { 64 | klog.ErrorS(err, "Invoking postBindHook gives an error", "pod", klog.KObj(pod)) 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /hack/.import-aliases: -------------------------------------------------------------------------------- 1 | { 2 | "k8s.io/api/admissionregistration/v1": "admissionregistrationv1", 3 | "k8s.io/api/admissionregistration/v1beta1": "admissionregistrationv1beta1", 4 | "k8s.io/api/admission/v1beta1": "admissionv1beta1", 5 | "k8s.io/api/admission/v1": "admissionv1", 6 | "k8s.io/api/apps/v1": "appsv1", 7 | "k8s.io/api/apps/v1beta1": "appsv1beta1", 8 | "k8s.io/api/apps/v1beta2": "appsv1beta2", 9 | "k8s.io/api/authentication/v1": "authenticationv1", 10 | "k8s.io/api/authentication/v1beta1": "authenticationv1beta1", 11 | "k8s.io/api/authorization/v1": "authorizationv1", 12 | "k8s.io/api/authorization/v1beta1": "authorizationv1beta1", 13 | "k8s.io/api/autoscaling/v1": "autoscalingv1", 14 | "k8s.io/api/batch/v1": "batchv1", 15 | "k8s.io/api/batch/v1beta1": "batchv1beta1", 16 | "k8s.io/api/certificates/v1beta1": "certificatesv1beta1", 17 | "k8s.io/api/coordination/v1": "coordinationv1", 18 | "k8s.io/api/coordination/v1beta1": "coordinationv1beta1", 19 | "k8s.io/api/core/v1": "corev1", 20 | "k8s.io/api/discovery/v1": "discoveryv1", 21 | "k8s.io/api/events/v1": "eventsv1", 22 | "k8s.io/api/events/v1beta1": "eventsv1beta1", 23 | "k8s.io/api/extensions/v1beta1": "extensionsv1beta1", 24 | "k8s.io/api/imagepolicy/v1alpha1": "imagepolicyv1alpha1", 25 | "k8s.io/api/networking/v1": "networkingv1", 26 | "k8s.io/api/networking/v1beta1": "networkingv1beta1", 27 | "k8s.io/api/node/v1alpha1": "nodev1alpha1", 28 | "k8s.io/api/node/v1beta1": "nodev1beta1", 29 | "k8s.io/api/node/v1": "nodev1", 30 | "k8s.io/api/policy/v1": "policyv1", 31 | "k8s.io/api/policy/v1beta1": "policyv1beta1", 32 | "k8s.io/api/rbac/v1": "rbacv1", 33 | "k8s.io/api/rbac/v1alpha1": "rbacv1alpha1", 34 | "k8s.io/api/rbac/v1beta1": "rbacv1beta1", 35 | "k8s.io/api/scheduling/v1": "schedulingv1", 36 | "k8s.io/api/scheduling/v1alpha1": "schedulingv1alpha1", 37 | "k8s.io/api/scheduling/v1beta1": "schedulingv1beta1", 38 | "k8s.io/api/storage/v1": "storagev1", 39 | "k8s.io/api/storage/v1alpha1": "storagev1alpha1", 40 | "k8s.io/api/storage/v1beta1": "storagev1beta1", 41 | "k8s.io/apimachinery/pkg/api/errors": "apierrors", 42 | "k8s.io/apimachinery/pkg/apis/meta/v1": "metav1", 43 | 44 | "github.com/k-cloud-labs/pkg/apis/policy/v1alpha1": "policyv1alpha1" 45 | } -------------------------------------------------------------------------------- /app/cmds/schedulersimulation/options/schedulersimulation.go: -------------------------------------------------------------------------------- 1 | package options 2 | 3 | import ( 4 | "github.com/spf13/pflag" 5 | "k8s.io/apimachinery/pkg/runtime" 6 | 7 | "github.com/k-cloud-labs/kluster-capacity/app/cmds" 8 | ) 9 | 10 | const ( 11 | // FromCluster represent an existing cluster without pods 12 | FromCluster = "Cluster" 13 | // FromSnapshot represent a snapshot 14 | FromSnapshot = "Snapshot" 15 | 16 | // ExitWhenAllScheduled means exit when all pods have been scheduled once 17 | ExitWhenAllScheduled = "AllScheduled" 18 | // ExitWhenAllSucceed means exit when all pods have been scheduled successfully 19 | ExitWhenAllSucceed = "AllSucceed" 20 | ) 21 | 22 | type Snapshot struct { 23 | // key is gk 24 | Objects map[string][]runtime.Object `json:"objects"` 25 | } 26 | 27 | type SchedulerSimulationOptions struct { 28 | cmds.Options 29 | // Cluster, Snapshot 30 | SourceFrom string 31 | ExitCondition string 32 | IgnorePodsOnExcludeNodes bool 33 | } 34 | 35 | type SchedulerSimulationConfig struct { 36 | Options *SchedulerSimulationOptions 37 | InitObjs []runtime.Object 38 | } 39 | 40 | func NewSchedulerSimulationOptions() *SchedulerSimulationOptions { 41 | return &SchedulerSimulationOptions{} 42 | } 43 | 44 | func NewSchedulerSimulationConfig(option *SchedulerSimulationOptions) *SchedulerSimulationConfig { 45 | return &SchedulerSimulationConfig{ 46 | Options: option, 47 | } 48 | } 49 | 50 | func (s *SchedulerSimulationOptions) AddFlags(fs *pflag.FlagSet) { 51 | fs.StringVar(&s.KubeConfig, "kubeconfig", s.KubeConfig, "Path to the kubeconfig file to use for the analysis") 52 | fs.StringVar(&s.SchedulerConfig, "schedulerconfig", s.SchedulerConfig, "Path to JSON or YAML file containing scheduler configuration. Used when source-from is cluster") 53 | fs.StringVarP(&s.OutputFormat, "output", "o", s.OutputFormat, "Output format. One of: json|yaml") 54 | fs.StringSliceVar(&s.ExcludeNodes, "exclude-nodes", s.ExcludeNodes, "Exclude nodes to be scheduled") 55 | fs.BoolVarP(&s.IgnorePodsOnExcludeNodes, "ignore-pods-on-excludes-nodes", "i", true, "Whether ignore the pods on the excludes nodes. By default true") 56 | fs.StringVar(&s.Snapshot, "snapshot", s.Snapshot, "Path of snapshot to initialize the world. Used when source-from is snapshot") 57 | fs.StringVar(&s.SourceFrom, "source-from", "Cluster", "Source of the init data. One of: Cluster|Snapshot") 58 | fs.StringVar(&s.ExitCondition, "exit-condition", "AllSucceed", "Exit condition of the simulator. One of: AllScheduled|AllSucceed") 59 | fs.BoolVar(&s.Verbose, "verbose", s.Verbose, "Verbose mode") 60 | fs.StringVarP(&s.SaveTo, "save", "s", s.SaveTo, "File path to save the simulation result") 61 | } 62 | -------------------------------------------------------------------------------- /app/cmds/clustercompression/options/clustercompression.go: -------------------------------------------------------------------------------- 1 | package options 2 | 3 | import ( 4 | "github.com/spf13/pflag" 5 | 6 | "github.com/k-cloud-labs/kluster-capacity/app/cmds" 7 | ) 8 | 9 | type ClusterCompressionOptions struct { 10 | cmds.Options 11 | FilterNodeOptions FilterNodeOptions 12 | } 13 | 14 | type FilterNodeOptions struct { 15 | ExcludeNotReadyNode bool 16 | ExcludeTaintNode bool 17 | IgnoreStaticPod bool 18 | IgnoreMirrorPod bool 19 | IgnoreCloneSet bool 20 | IgnoreVolumePod bool 21 | } 22 | 23 | type ClusterCompressionConfig struct { 24 | Options *ClusterCompressionOptions 25 | } 26 | 27 | func NewClusterCompressionConfig(opt *ClusterCompressionOptions) *ClusterCompressionConfig { 28 | return &ClusterCompressionConfig{ 29 | Options: opt, 30 | } 31 | } 32 | 33 | func NewClusterCompressionOptions() *ClusterCompressionOptions { 34 | return &ClusterCompressionOptions{} 35 | } 36 | 37 | func (s *ClusterCompressionOptions) AddFlags(fs *pflag.FlagSet) { 38 | fs.StringVar(&s.KubeConfig, "kubeconfig", s.KubeConfig, "Path to the kubeconfig file to use for the analysis.") 39 | fs.StringVarP(&s.OutputFormat, "output", "o", s.OutputFormat, "Output format. One of: json|default (Note: output is not versioned or guaranteed to be stable across releases)") 40 | fs.StringVar(&s.SchedulerConfig, "schedulerconfig", s.SchedulerConfig, "Path to JSON or YAML file containing scheduler configuration.") 41 | fs.IntVar(&s.MaxLimit, "max-limit", 0, "Number of instances of node to be scale down after which analysis stops.. By default unlimited.") 42 | fs.BoolVar(&s.FilterNodeOptions.ExcludeTaintNode, "exclude-taint-node", true, "Whether to filter nodes with taint when selecting nodes. By default true.") 43 | fs.BoolVar(&s.FilterNodeOptions.ExcludeNotReadyNode, "exclude-not-ready-node", true, "Whether to filter nodes with not ready when selecting nodes. By default true.") 44 | fs.BoolVar(&s.FilterNodeOptions.IgnoreStaticPod, "ignore-static-pod", false, "Whether to ignore nodes with static pods when filtering nodes. By default true.") 45 | fs.BoolVar(&s.FilterNodeOptions.IgnoreMirrorPod, "ignore-mirror-pod", false, "Whether to ignore nodes with mirror pods when filtering nodes. By default false.") 46 | fs.BoolVar(&s.FilterNodeOptions.IgnoreCloneSet, "ignore-cloneset", false, "Whether to ignore nodes with cloneSet pods when filtering nodes. By default false.") 47 | fs.BoolVar(&s.FilterNodeOptions.IgnoreVolumePod, "ignore-volume-pod", false, "Whether to ignore nodes with volume pods when filtering nodes. By default false.") 48 | fs.StringSliceVar(&s.ExcludeNodes, "exclude-nodes", s.ExcludeNodes, "Exclude nodes to be scheduled") 49 | fs.BoolVar(&s.Verbose, "verbose", s.Verbose, "Verbose mode") 50 | } 51 | -------------------------------------------------------------------------------- /pkg/utils/pod.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | 6 | uuid "github.com/satori/go.uuid" 7 | corev1 "k8s.io/api/core/v1" 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | "k8s.io/apimachinery/pkg/types" 10 | apiv1 "k8s.io/kubernetes/pkg/apis/core/v1" 11 | 12 | "github.com/k-cloud-labs/kluster-capacity/pkg" 13 | ) 14 | 15 | // IsMirrorPod returns true if the pod is a Mirror Pod. 16 | func IsMirrorPod(pod *corev1.Pod) bool { 17 | _, ok := pod.Annotations[corev1.MirrorPodAnnotationKey] 18 | return ok 19 | } 20 | 21 | // IsPodTerminating returns true if the pod DeletionTimestamp is set. 22 | func IsPodTerminating(pod *corev1.Pod) bool { 23 | return pod.DeletionTimestamp != nil 24 | } 25 | 26 | // IsStaticPod returns true if the pod is a static pod. 27 | func IsStaticPod(pod *corev1.Pod) bool { 28 | source, err := GetPodSource(pod) 29 | return err == nil && source != "api" 30 | } 31 | 32 | // IsCloneSetPod returns true if the pod is a IsCloneSetPod. 33 | func IsCloneSetPod(ownerRefList []metav1.OwnerReference) bool { 34 | for _, ownerRef := range ownerRefList { 35 | if ownerRef.Kind == "CloneSet" { 36 | return true 37 | } 38 | } 39 | return false 40 | } 41 | 42 | // IsDaemonsetPod returns true if the pod is a IsDaemonsetPod. 43 | func IsDaemonsetPod(ownerRefList []metav1.OwnerReference) bool { 44 | for _, ownerRef := range ownerRefList { 45 | if ownerRef.Kind == "DaemonSet" { 46 | return true 47 | } 48 | } 49 | return false 50 | } 51 | 52 | // IsPodWithLocalStorage returns true if the pod has local storage. 53 | func IsPodWithLocalStorage(pod *corev1.Pod) bool { 54 | for _, volume := range pod.Spec.Volumes { 55 | if volume.HostPath != nil || volume.EmptyDir != nil { 56 | return true 57 | } 58 | } 59 | 60 | return false 61 | } 62 | 63 | // GetPodSource returns the source of the pod based on the annotation. 64 | func GetPodSource(pod *corev1.Pod) (string, error) { 65 | if pod.Annotations != nil { 66 | if source, ok := pod.Annotations["kubernetes.io/config.source"]; ok { 67 | return source, nil 68 | } 69 | } 70 | return "", fmt.Errorf("cannot get source of pod %q", pod.UID) 71 | } 72 | 73 | func InitPod(podTemplate *corev1.Pod) *corev1.Pod { 74 | pod := podTemplate.DeepCopy() 75 | 76 | apiv1.SetObjectDefaults_Pod(pod) 77 | 78 | // reset pod 79 | pod.Spec.NodeName = "" 80 | pod.Spec.SchedulerName = pkg.SchedulerName 81 | pod.Namespace = podTemplate.Namespace 82 | if pod.Namespace == "" { 83 | pod.Namespace = metav1.NamespaceDefault 84 | } 85 | pod.Status = corev1.PodStatus{} 86 | 87 | // use simulated pod name with an index to construct the name 88 | pod.ObjectMeta.Name = podTemplate.Name 89 | pod.ObjectMeta.UID = types.UID(uuid.NewV4().String()) 90 | 91 | // Add pod provisioner annotation 92 | if pod.ObjectMeta.Annotations == nil { 93 | pod.ObjectMeta.Annotations = map[string]string{} 94 | } 95 | pod.ObjectMeta.Annotations[pkg.PodProvisioner] = pkg.SchedulerName 96 | 97 | return pod 98 | } 99 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 10 | 11 | #### What type of PR is this? 12 | 13 | 27 | 28 | #### What this PR does / why we need it: 29 | 30 | #### Which issue(s) this PR fixes: 31 | 36 | Fixes # 37 | 38 | #### Special notes for your reviewer: 39 | 40 | #### Does this PR introduce a user-facing change? 41 | 48 | ```release-note 49 | 50 | ``` 51 | 52 | #### Additional documentation e.g., KEPs (Kubernetes Enhancement Proposals), usage docs, etc.: 53 | 54 | 69 | ```docs 70 | 71 | ``` 72 | -------------------------------------------------------------------------------- /app/root.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 k-cloud-labs org 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package app 17 | 18 | import ( 19 | "fmt" 20 | "os" 21 | 22 | "github.com/spf13/cobra" 23 | "github.com/spf13/viper" 24 | _ "k8s.io/client-go/plugin/pkg/client/auth" 25 | "k8s.io/klog/v2" 26 | 27 | "github.com/k-cloud-labs/kluster-capacity/app/cmds/capacityestimation" 28 | "github.com/k-cloud-labs/kluster-capacity/app/cmds/clustercompression" 29 | "github.com/k-cloud-labs/kluster-capacity/app/cmds/schedulersimulation" 30 | "github.com/k-cloud-labs/kluster-capacity/pkg/version/sharedcommand" 31 | ) 32 | 33 | var cfgFile string 34 | 35 | // rootCmd represents the base command when called without any subcommands 36 | var rootCmd = &cobra.Command{ 37 | Use: "kluster-capacity", 38 | Short: "A tool which support capacity estimation, scheduler simulation, cluster compression.", 39 | Long: `A tool which support capacity estimation, scheduler simulation, cluster compression.`, 40 | // Uncomment the following line if your bare application 41 | // has an action associated with it: 42 | //Run: func(cmd *cobra.Command, args []string) {}, 43 | } 44 | 45 | // Execute adds all child commands to the root command and sets flags appropriately. 46 | // This is called by main.main(). It only needs to happen once to the rootCmd. 47 | func Execute() { 48 | err := rootCmd.Execute() 49 | if err != nil { 50 | os.Exit(1) 51 | } 52 | } 53 | 54 | func init() { 55 | klog.InitFlags(nil) 56 | 57 | cobra.OnInitialize(initConfig) 58 | 59 | // Here you will define your flags and configuration settings. 60 | // Cobra supports persistent flags, which, if defined here, 61 | // will be global for your application. 62 | 63 | rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is $HOME/.kluster-capacity.yaml)") 64 | 65 | // Cobra also supports local flags, which will only run 66 | // when this action is called directly. 67 | rootCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle") 68 | 69 | rootCmd.AddCommand(capacityestimation.NewCapacityEstimationCmd(), schedulersimulation.NewSchedulerSimulationCmd(), clustercompression.NewClusterCompressionCmd()) 70 | rootCmd.AddCommand(sharedcommand.NewCmdVersion(os.Stdout, "kluster-capacity")) 71 | } 72 | 73 | // initConfig reads in config file and ENV variables if set. 74 | func initConfig() { 75 | if cfgFile != "" { 76 | // Use config file from the flag. 77 | viper.SetConfigFile(cfgFile) 78 | } else { 79 | // Find home directory. 80 | home, err := os.UserHomeDir() 81 | cobra.CheckErr(err) 82 | 83 | // Search config in home directory with name ".kluster-capacity" (without extension). 84 | viper.AddConfigPath(home) 85 | viper.SetConfigType("yaml") 86 | viper.SetConfigName(".kluster-capacity") 87 | } 88 | 89 | viper.AutomaticEnv() // read in environment variables that match 90 | 91 | // If a config file is found, read it in. 92 | if err := viper.ReadInConfig(); err == nil { 93 | fmt.Fprintln(os.Stderr, "Using config file:", viper.ConfigFileUsed()) 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /app/cmds/clustercompression/clustercompression.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 k-cloud-labs org 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package clustercompression 18 | 19 | import ( 20 | "errors" 21 | "flag" 22 | "fmt" 23 | 24 | "github.com/lithammer/dedent" 25 | "github.com/spf13/cobra" 26 | cliflag "k8s.io/component-base/cli/flag" 27 | "k8s.io/klog/v2" 28 | 29 | "github.com/k-cloud-labs/kluster-capacity/app/cmds/clustercompression/options" 30 | "github.com/k-cloud-labs/kluster-capacity/pkg" 31 | "github.com/k-cloud-labs/kluster-capacity/pkg/simulator/clustercompression" 32 | ) 33 | 34 | var clusterCompressionLong = dedent.Dedent(` 35 | The "cc" tool simulates an API server by copying the initial state from the Kubernetes environment, 36 | using the configuration specified in KUBECONFIG. It attempts to scale down the number of nodes to 37 | the limit specified by the --max-limits flag, and if this flag is not provided, it schedules pods 38 | onto as few nodes as possible and provides a list of nodes that can be taken offline. 39 | `) 40 | 41 | func NewClusterCompressionCmd() *cobra.Command { 42 | opt := options.NewClusterCompressionOptions() 43 | 44 | var cmd = &cobra.Command{ 45 | Use: "cc", 46 | Short: "cc uses simulation scheduling to calculate the number of nodes that can be offline in the cluster", 47 | Long: clusterCompressionLong, 48 | SilenceErrors: false, 49 | RunE: func(cmd *cobra.Command, args []string) error { 50 | flag.Parse() 51 | 52 | opt.Default() 53 | err := validateOptions(opt) 54 | if err != nil { 55 | return err 56 | } 57 | 58 | err = run(opt) 59 | if err != nil { 60 | return err 61 | } 62 | 63 | return nil 64 | }, 65 | } 66 | 67 | flags := cmd.Flags() 68 | flags.SetNormalizeFunc(cliflag.WordSepNormalizeFunc) 69 | flags.AddGoFlagSet(flag.CommandLine) 70 | opt.AddFlags(flags) 71 | 72 | return cmd 73 | } 74 | 75 | func validateOptions(opt *options.ClusterCompressionOptions) error { 76 | if len(opt.KubeConfig) == 0 { 77 | return errors.New("kubeconfig is missing") 78 | } 79 | 80 | return nil 81 | } 82 | 83 | func run(opt *options.ClusterCompressionOptions) error { 84 | defer klog.Flush() 85 | conf := options.NewClusterCompressionConfig(opt) 86 | 87 | reports, err := runCCSimulator(conf) 88 | if err != nil { 89 | klog.Errorf("runCCSimulator err: %s\n", err.Error()) 90 | return err 91 | } 92 | 93 | if err := reports.Print(conf.Options.Verbose, conf.Options.OutputFormat); err != nil { 94 | return fmt.Errorf("error while printing: %v\n", err) 95 | } 96 | return nil 97 | } 98 | 99 | func runCCSimulator(conf *options.ClusterCompressionConfig) (pkg.Printer, error) { 100 | s, err := clustercompression.NewCCSimulatorExecutor(conf) 101 | if err != nil { 102 | return nil, err 103 | } 104 | 105 | err = s.Initialize() 106 | if err != nil { 107 | return nil, err 108 | } 109 | 110 | err = s.Run() 111 | if err != nil { 112 | return nil, err 113 | } 114 | 115 | return s.Report(), nil 116 | } 117 | -------------------------------------------------------------------------------- /pkg/simulator/schedulersimulation/simulator.go: -------------------------------------------------------------------------------- 1 | package schedulersimulation 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | 7 | corev1 "k8s.io/api/core/v1" 8 | "k8s.io/client-go/informers" 9 | "k8s.io/client-go/tools/cache" 10 | 11 | "github.com/k-cloud-labs/kluster-capacity/app/cmds/schedulersimulation/options" 12 | "github.com/k-cloud-labs/kluster-capacity/pkg" 13 | "github.com/k-cloud-labs/kluster-capacity/pkg/framework" 14 | "github.com/k-cloud-labs/kluster-capacity/pkg/utils" 15 | ) 16 | 17 | type simulator struct { 18 | pkg.Framework 19 | 20 | exitCondition string 21 | } 22 | 23 | func NewSSSimulatorExecutor(conf *options.SchedulerSimulationConfig) (pkg.Simulator, error) { 24 | kubeSchedulerConfig, err := utils.BuildKubeSchedulerCompletedConfig(conf.Options.SchedulerConfig, conf.Options.KubeConfig) 25 | if err != nil { 26 | return nil, err 27 | } 28 | 29 | kubeConfig, err := utils.BuildRestConfig(conf.Options.KubeConfig) 30 | if err != nil { 31 | return nil, err 32 | } 33 | 34 | framework, err := framework.NewKubeSchedulerFramework(kubeSchedulerConfig, kubeConfig, 35 | framework.WithNodeImages(false), 36 | framework.WithScheduledPods(false), 37 | framework.WithTerminatingPods(false), 38 | framework.WithExcludeNodes(conf.Options.ExcludeNodes), 39 | framework.WithSaveTo(conf.Options.SaveTo)) 40 | if err != nil { 41 | return nil, err 42 | } 43 | 44 | s := &simulator{ 45 | Framework: framework, 46 | exitCondition: conf.Options.ExitCondition, 47 | } 48 | 49 | err = s.addEventHandlers(kubeSchedulerConfig.InformerFactory) 50 | if err != nil { 51 | return nil, err 52 | } 53 | 54 | return s, nil 55 | } 56 | 57 | func (s *simulator) Run() error { 58 | return s.Framework.Run(nil) 59 | } 60 | 61 | func (s *simulator) Report() pkg.Printer { 62 | return generateReport(s.Status()) 63 | } 64 | 65 | func (s *simulator) addEventHandlers(informerFactory informers.SharedInformerFactory) (err error) { 66 | succeedPodMap := sync.Map{} 67 | failedPodMap := sync.Map{} 68 | keyFunc := func(pod *corev1.Pod) string { 69 | return pod.Namespace + "/" + pod.Name 70 | } 71 | count := 0 72 | _, _ = informerFactory.Core().V1().Pods().Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 73 | AddFunc: func(obj interface{}) { 74 | pod := obj.(*corev1.Pod) 75 | if len(pod.Spec.NodeName) > 0 { 76 | succeedPodMap.Store(keyFunc(pod), true) 77 | } 78 | count++ 79 | }, 80 | UpdateFunc: func(oldObj, newObj interface{}) { 81 | pod := newObj.(*corev1.Pod) 82 | key := keyFunc(pod) 83 | if len(pod.Spec.NodeName) > 0 { 84 | succeedPodMap.Store(key, true) 85 | if _, ok := failedPodMap.Load(key); ok { 86 | failedPodMap.Delete(key) 87 | } 88 | } else { 89 | for _, cond := range pod.Status.Conditions { 90 | if cond.Type == corev1.PodScheduled && cond.Status == corev1.ConditionFalse { 91 | failedPodMap.Store(key, true) 92 | } 93 | } 94 | } 95 | 96 | var ( 97 | succeedCount int 98 | failedCount int 99 | stop bool 100 | reason string 101 | ) 102 | succeedPodMap.Range(func(key, value any) bool { 103 | succeedCount++ 104 | return true 105 | }) 106 | failedPodMap.Range(func(key, value any) bool { 107 | failedCount++ 108 | return true 109 | }) 110 | 111 | if s.exitCondition == options.ExitWhenAllScheduled && succeedCount+failedCount == count { 112 | stop = true 113 | reason = "AllScheduled: %d pod(s) have been scheduled once." 114 | } else if s.exitCondition == options.ExitWhenAllSucceed && succeedCount == count { 115 | stop = true 116 | reason = "AllSucceed: %d pod(s) have been scheduled successfully." 117 | } 118 | 119 | if stop { 120 | err = s.Stop(fmt.Sprintf(reason, count)) 121 | } 122 | }, 123 | }) 124 | 125 | return 126 | } 127 | -------------------------------------------------------------------------------- /pkg/simulator/clustercompression/report.go: -------------------------------------------------------------------------------- 1 | package clustercompression 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "time" 7 | 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | 10 | "github.com/k-cloud-labs/kluster-capacity/pkg" 11 | "github.com/k-cloud-labs/kluster-capacity/pkg/utils" 12 | ) 13 | 14 | type ClusterCompressionReview struct { 15 | metav1.TypeMeta 16 | Status ClusterCompressionReviewReviewStatus `json:"status"` 17 | } 18 | 19 | type ClusterCompressionReviewReviewStatus struct { 20 | CreationTimestamp time.Time `json:"creationTimestamp"` 21 | StopReason *ClusterCompressionReviewScheduleStopReason `json:"stopReason"` 22 | ScaleDownNodeNames []string `json:"scaleDownNodeNames"` 23 | SelectNodeCount int `json:"SelectNodeCount"` 24 | SchedulerCount int `json:"schedulerCount"` 25 | FailedSchedulerCount int `json:"failedSchedulerCount"` 26 | } 27 | 28 | type ClusterCompressionReviewScheduleStopReason struct { 29 | StopType string `json:"stopType"` 30 | StopMessage string `json:"stopMessage"` 31 | } 32 | 33 | func generateReport(status *pkg.Status) *ClusterCompressionReview { 34 | return &ClusterCompressionReview{ 35 | Status: getReviewStatus(status), 36 | } 37 | } 38 | 39 | func getReviewStatus(status *pkg.Status) ClusterCompressionReviewReviewStatus { 40 | return ClusterCompressionReviewReviewStatus{ 41 | CreationTimestamp: time.Now(), 42 | StopReason: getMainStopReason(status.StopReason), 43 | ScaleDownNodeNames: status.NodesToScaleDown, 44 | SelectNodeCount: status.SelectNodeCount, 45 | SchedulerCount: status.SchedulerCount, 46 | FailedSchedulerCount: status.FailedSchedulerCount, 47 | } 48 | } 49 | 50 | func getMainStopReason(message string) *ClusterCompressionReviewScheduleStopReason { 51 | slicedMessage := strings.Split(message, "\n") 52 | colon := strings.Index(slicedMessage[0], ":") 53 | 54 | reason := &ClusterCompressionReviewScheduleStopReason{ 55 | StopType: slicedMessage[0][:colon], 56 | StopMessage: strings.Trim(slicedMessage[0][colon+1:], " "), 57 | } 58 | return reason 59 | } 60 | 61 | func (r *ClusterCompressionReview) Print(verbose bool, format string) error { 62 | switch format { 63 | case "json": 64 | return utils.PrintJson(r) 65 | default: 66 | return clusterCapacityReviewDefaultPrint(r, verbose) 67 | } 68 | } 69 | 70 | func clusterCapacityReviewDefaultPrint(r *ClusterCompressionReview, verbose bool) error { 71 | if r != nil && len(r.Status.ScaleDownNodeNames) > 0 { 72 | if verbose { 73 | fmt.Printf("Select node %d times.\n", r.Status.SelectNodeCount) 74 | fmt.Printf("Scheduled pod %d times, with %d scheduling failure.\n", r.Status.SchedulerCount+r.Status.FailedSchedulerCount, r.Status.FailedSchedulerCount) 75 | fmt.Printf("%d node(s) in the cluster can be scaled down.\n", len(r.Status.ScaleDownNodeNames)) 76 | fmt.Printf("\nTermination reason: %v: %v\n", r.Status.StopReason.StopType, r.Status.StopReason.StopMessage) 77 | fmt.Printf("\nnodes selected to be scaled down:\n") 78 | 79 | for i := range r.Status.ScaleDownNodeNames { 80 | fmt.Printf("\t- %s\n", r.Status.ScaleDownNodeNames[i]) 81 | } 82 | } else { 83 | for i := range r.Status.ScaleDownNodeNames { 84 | fmt.Println(r.Status.ScaleDownNodeNames[i]) 85 | } 86 | } 87 | } else { 88 | fmt.Printf("Select node %d times.\n", r.Status.SelectNodeCount) 89 | fmt.Printf("Scheduled pod %d times, with %d scheduling failure.\n", r.Status.SchedulerCount+r.Status.FailedSchedulerCount, r.Status.FailedSchedulerCount) 90 | fmt.Println("No nodes in the cluster can be scaled down.") 91 | fmt.Printf("\nTermination reason: %v: %v\n", r.Status.StopReason.StopType, r.Status.StopReason.StopMessage) 92 | } 93 | 94 | return nil 95 | } 96 | -------------------------------------------------------------------------------- /app/cmds/schedulersimulation/schedulersimulation.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 k-cloud-labs org 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package schedulersimulation 18 | 19 | import ( 20 | "errors" 21 | "flag" 22 | "fmt" 23 | 24 | "github.com/lithammer/dedent" 25 | "github.com/spf13/cobra" 26 | cliflag "k8s.io/component-base/cli/flag" 27 | "k8s.io/klog/v2" 28 | 29 | "github.com/k-cloud-labs/kluster-capacity/app/cmds/schedulersimulation/options" 30 | "github.com/k-cloud-labs/kluster-capacity/pkg" 31 | "github.com/k-cloud-labs/kluster-capacity/pkg/simulator/schedulersimulation" 32 | ) 33 | 34 | var schedulerSimulationLong = dedent.Dedent(` 35 | ss simulates an API server with initial state copied from the Kubernetes environment 36 | with its configuration specified in KUBECONFIG. The simulated API server tries to schedule the number of 37 | pods from existing cluster. 38 | `) 39 | 40 | func NewSchedulerSimulationCmd() *cobra.Command { 41 | opt := options.NewSchedulerSimulationOptions() 42 | 43 | // ssCmd represents the ss command 44 | var cmd = &cobra.Command{ 45 | Use: "ss", 46 | Short: "ss is used for simulating scheduling of pods", 47 | Long: schedulerSimulationLong, 48 | SilenceErrors: false, 49 | RunE: func(cmd *cobra.Command, args []string) error { 50 | flag.Parse() 51 | 52 | opt.Default() 53 | err := validate(opt) 54 | if err != nil { 55 | return err 56 | } 57 | 58 | err = run(opt) 59 | if err != nil { 60 | return err 61 | } 62 | 63 | return nil 64 | }, 65 | } 66 | 67 | flags := cmd.Flags() 68 | flags.SetNormalizeFunc(cliflag.WordSepNormalizeFunc) 69 | flags.AddGoFlagSet(flag.CommandLine) 70 | opt.AddFlags(flags) 71 | 72 | return cmd 73 | } 74 | 75 | func validate(opt *options.SchedulerSimulationOptions) error { 76 | if opt.SourceFrom == options.FromCluster && len(opt.KubeConfig) == 0 { 77 | return errors.New("kubeconfig must be specified when source-from is cluster") 78 | } 79 | 80 | if opt.SourceFrom == options.FromSnapshot && len(opt.Snapshot) == 0 { 81 | return errors.New("snapshot must be specified when source-from is snapshot") 82 | } 83 | 84 | if opt.ExitCondition != options.ExitWhenAllSucceed && opt.ExitCondition != options.ExitWhenAllScheduled { 85 | return errors.New("exit condition must be AllSucceed or AllScheduled") 86 | } 87 | 88 | if len(opt.KubeConfig) == 0 { 89 | return errors.New("kubeconfig is missing") 90 | } 91 | 92 | return nil 93 | } 94 | 95 | func run(opt *options.SchedulerSimulationOptions) error { 96 | defer klog.Flush() 97 | conf := options.NewSchedulerSimulationConfig(opt) 98 | 99 | // TODO: init simulator from snapshot 100 | //if opt.SourceFrom == options.FromSnapshot { 101 | //} 102 | 103 | reports, err := runSimulator(conf) 104 | if err != nil { 105 | return err 106 | } 107 | 108 | if err := reports.Print(opt.Verbose, opt.OutputFormat); err != nil { 109 | return fmt.Errorf("error while printing: %v", err) 110 | } 111 | 112 | return nil 113 | } 114 | 115 | func runSimulator(conf *options.SchedulerSimulationConfig) (pkg.Printer, error) { 116 | s, err := schedulersimulation.NewSSSimulatorExecutor(conf) 117 | if err != nil { 118 | return nil, err 119 | } 120 | 121 | err = s.Initialize(conf.InitObjs...) 122 | if err != nil { 123 | return nil, err 124 | } 125 | 126 | err = s.Run() 127 | if err != nil { 128 | return nil, err 129 | } 130 | 131 | return s.Report(), nil 132 | } 133 | -------------------------------------------------------------------------------- /app/cmds/capacityestimation/capacityestimation.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 k-cloud-labs org 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package capacityestimation 18 | 19 | import ( 20 | "errors" 21 | "flag" 22 | "fmt" 23 | 24 | "github.com/lithammer/dedent" 25 | "github.com/spf13/cobra" 26 | cliflag "k8s.io/component-base/cli/flag" 27 | "k8s.io/klog/v2" 28 | 29 | "github.com/k-cloud-labs/kluster-capacity/app/cmds/capacityestimation/options" 30 | "github.com/k-cloud-labs/kluster-capacity/pkg" 31 | "github.com/k-cloud-labs/kluster-capacity/pkg/simulator/capacityestimation" 32 | ) 33 | 34 | var capacityEstimationLong = dedent.Dedent(` 35 | ce simulates an API server with initial state copied from the Kubernetes environment 36 | with its configuration specified in KUBECONFIG. The simulated API server tries to schedule the number of 37 | pods specified by --max-limits flag. If the --max-limits flag is not specified, pods are scheduled until 38 | the simulated API server runs out of resources. 39 | `) 40 | 41 | func NewCapacityEstimationCmd() *cobra.Command { 42 | opt := options.NewCapacityEstimationOptions() 43 | 44 | var cmd = &cobra.Command{ 45 | Use: "ce --kubeconfig KUBECONFIG --pods-from-templates PODYAML | --pods-from-cluster Namespace/Name", 46 | Short: "ce is used to get the remaining capacity for specified pod", 47 | Long: capacityEstimationLong, 48 | SilenceErrors: false, 49 | RunE: func(cmd *cobra.Command, args []string) error { 50 | flag.Parse() 51 | 52 | opt.Default() 53 | err := validate(opt) 54 | if err != nil { 55 | return err 56 | } 57 | 58 | err = run(opt) 59 | if err != nil { 60 | return err 61 | } 62 | 63 | return nil 64 | }, 65 | } 66 | 67 | flags := cmd.Flags() 68 | flags.SetNormalizeFunc(cliflag.WordSepNormalizeFunc) 69 | flags.AddGoFlagSet(flag.CommandLine) 70 | opt.AddFlags(flags) 71 | 72 | return cmd 73 | } 74 | 75 | func validate(opt *options.CapacityEstimationOptions) error { 76 | if len(opt.PodsFromTemplate) == 0 && len(opt.PodsFromCluster) == 0 { 77 | return errors.New("pod template file and pod from cluster both is missing") 78 | } 79 | 80 | if len(opt.PodsFromTemplate) != 0 && len(opt.PodsFromCluster) != 0 { 81 | return errors.New("pod template file and pod from cluster is exclusive") 82 | } 83 | 84 | if len(opt.KubeConfig) == 0 { 85 | return errors.New("kubeconfig is missing") 86 | } 87 | 88 | return nil 89 | } 90 | 91 | func run(opt *options.CapacityEstimationOptions) error { 92 | defer klog.Flush() 93 | conf := options.NewCapacityEstimationConfig(opt) 94 | 95 | err := conf.ParseAPISpec() 96 | if err != nil { 97 | return fmt.Errorf("failed to parse pod spec file: %v ", err) 98 | } 99 | 100 | reports, err := runSimulator(conf) 101 | if err != nil { 102 | return err 103 | } 104 | 105 | if err := reports.Print(conf.Options.Verbose, conf.Options.OutputFormat); err != nil { 106 | return fmt.Errorf("error while printing: %v", err) 107 | } 108 | 109 | return nil 110 | } 111 | 112 | func runSimulator(conf *options.CapacityEstimationConfig) (pkg.Printer, error) { 113 | s, err := capacityestimation.NewCESimulatorExecutor(conf) 114 | if err != nil { 115 | return nil, err 116 | } 117 | 118 | err = s.Initialize(conf.InitObjs...) 119 | if err != nil { 120 | return nil, err 121 | } 122 | 123 | err = s.Run() 124 | if err != nil { 125 | return nil, err 126 | } 127 | 128 | return s.Report(), nil 129 | } 130 | -------------------------------------------------------------------------------- /app/cmds/capacityestimation/options/capacityestimation.go: -------------------------------------------------------------------------------- 1 | package options 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "net/http" 8 | "os" 9 | "path/filepath" 10 | "strings" 11 | 12 | "github.com/spf13/pflag" 13 | corev1 "k8s.io/api/core/v1" 14 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 15 | "k8s.io/apimachinery/pkg/runtime" 16 | "k8s.io/apimachinery/pkg/util/yaml" 17 | clientset "k8s.io/client-go/kubernetes" 18 | 19 | "github.com/k-cloud-labs/kluster-capacity/app/cmds" 20 | "github.com/k-cloud-labs/kluster-capacity/pkg/utils" 21 | ) 22 | 23 | type CapacityEstimationOptions struct { 24 | cmds.Options 25 | PodsFromTemplate []string 26 | PodsFromCluster NamespaceNames 27 | } 28 | 29 | type CapacityEstimationConfig struct { 30 | Pods []*corev1.Pod 31 | InitObjs []runtime.Object 32 | Options *CapacityEstimationOptions 33 | } 34 | 35 | func NewCapacityEstimationConfig(opt *CapacityEstimationOptions) *CapacityEstimationConfig { 36 | return &CapacityEstimationConfig{ 37 | Options: opt, 38 | } 39 | } 40 | 41 | func NewCapacityEstimationOptions() *CapacityEstimationOptions { 42 | return &CapacityEstimationOptions{} 43 | } 44 | 45 | func (s *CapacityEstimationOptions) AddFlags(fs *pflag.FlagSet) { 46 | fs.StringVar(&s.KubeConfig, "kubeconfig", s.KubeConfig, "Path to the kubeconfig file to use for the analysis") 47 | fs.StringSliceVar(&s.PodsFromTemplate, "pods-from-template", s.PodsFromTemplate, "Path to JSON or YAML file containing pod definition. Comma seperated and Exclusive with --pods-from-cluster") 48 | fs.Var(&s.PodsFromCluster, "pods-from-cluster", "Namespace/Name of the pod from existing cluster. Comma seperated and Exclusive with --pods-from-template") 49 | fs.IntVar(&s.MaxLimit, "max-limit", 0, "Number of instances of pod to be scheduled after which analysis stops. By default unlimited") 50 | fs.StringVar(&s.SchedulerConfig, "schedulerconfig", s.SchedulerConfig, "Path to JSON or YAML file containing scheduler configuration") 51 | fs.BoolVar(&s.Verbose, "verbose", s.Verbose, "Verbose mode") 52 | fs.StringVarP(&s.OutputFormat, "output", "o", s.OutputFormat, "Output format. One of: json|yaml (Note: output is not versioned or guaranteed to be stable across releases)") 53 | fs.StringSliceVar(&s.ExcludeNodes, "exclude-nodes", s.ExcludeNodes, "Exclude nodes to be scheduled") 54 | } 55 | 56 | func (s *CapacityEstimationConfig) ParseAPISpec() error { 57 | getPodFromTemplate := func(template string) (*corev1.Pod, error) { 58 | var ( 59 | err error 60 | versionedPod = &corev1.Pod{} 61 | spec io.Reader 62 | ) 63 | 64 | if strings.HasPrefix(template, "http://") || strings.HasPrefix(template, "https://") { 65 | response, err := http.Get(template) 66 | if err != nil { 67 | return nil, err 68 | } 69 | defer response.Body.Close() 70 | if response.StatusCode != http.StatusOK { 71 | return nil, fmt.Errorf("unable to read URL %q, server reported %v, status code=%v", template, response.Status, response.StatusCode) 72 | } 73 | spec = response.Body 74 | } else { 75 | filename, _ := filepath.Abs(template) 76 | spec, err = os.Open(filename) 77 | if err != nil { 78 | return nil, fmt.Errorf("failed to open config file: %v", err) 79 | } 80 | } 81 | 82 | decoder := yaml.NewYAMLOrJSONDecoder(spec, 4096) 83 | err = decoder.Decode(versionedPod) 84 | if err != nil { 85 | return nil, fmt.Errorf("failed to decode config file: %v", err) 86 | } 87 | 88 | return versionedPod, nil 89 | } 90 | 91 | if len(s.Options.PodsFromTemplate) != 0 { 92 | for _, template := range s.Options.PodsFromTemplate { 93 | pod, err := getPodFromTemplate(template) 94 | if err != nil { 95 | return err 96 | } 97 | s.Pods = append(s.Pods, pod) 98 | } 99 | } else { 100 | cfg, err := utils.BuildRestConfig(s.Options.KubeConfig) 101 | if err != nil { 102 | return err 103 | } 104 | 105 | kubeClient, err := clientset.NewForConfig(cfg) 106 | if err != nil { 107 | return err 108 | } 109 | 110 | for _, nn := range s.Options.PodsFromCluster { 111 | pod, err := kubeClient.CoreV1().Pods(nn.Namespace).Get(context.TODO(), nn.Name, metav1.GetOptions{ResourceVersion: "0"}) 112 | if err != nil { 113 | return err 114 | } 115 | s.Pods = append(s.Pods, pod) 116 | } 117 | } 118 | 119 | return nil 120 | } 121 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Go information 2 | GO ?= go 3 | GOFMT ?= gofmt "-s" 4 | GOOS ?= $(shell go env GOOS) 5 | GOARCH ?= $(shell go env GOARCH) 6 | SOURCES := $(shell find . -type f -name '*.go') 7 | 8 | GOFILES := $(shell find . -name "*.go" | grep -v vendor) 9 | TESTFOLDER := $(shell $(GO) list ./... | grep -v examples) 10 | TESTTAGS ?= "" 11 | VETPACKAGES ?= $(shell $(GO) list ./... | grep -v /examples/) 12 | 13 | # Git information 14 | GIT_VERSION ?= $(shell git describe --tags --dirty --always) 15 | GIT_COMMIT_HASH ?= $(shell git rev-parse HEAD) 16 | GIT_TREESTATE = "clean" 17 | GIT_DIFF = $(shell git diff --quiet >/dev/null 2>&1; if [ $$? -eq 1 ]; then echo "1"; fi) 18 | ifeq ($(GIT_DIFF), 1) 19 | GIT_TREESTATE = "dirty" 20 | endif 21 | BUILDDATE = $(shell date -u +'%Y-%m-%dT%H:%M:%SZ') 22 | 23 | LDFLAGS := "-X github.com/k-cloud-labs/kluster-capacity/pkg/version.gitVersion=$(GIT_VERSION) \ 24 | -X github.com/k-cloud-labs/kluster-capacity/pkg/version.gitCommit=$(GIT_COMMIT_HASH) \ 25 | -X github.com/k-cloud-labs/kluster-capacity/pkg/version.gitTreeState=$(GIT_TREESTATE) \ 26 | -X github.com/k-cloud-labs/kluster-capacity/pkg/version.buildDate=$(BUILDDATE)" 27 | 28 | # Set your version by env or using latest tags from git 29 | VERSION?="" 30 | ifeq ($(VERSION), "") 31 | LATEST_TAG=$(shell git describe --tags --always) 32 | ifeq ($(LATEST_TAG),) 33 | # Forked repo may not sync tags from upstream, so give it a default tag to make CI happy. 34 | VERSION="unknown" 35 | else 36 | VERSION=$(LATEST_TAG) 37 | endif 38 | endif 39 | 40 | # Setting SHELL to bash allows bash commands to be executed by recipes. 41 | # This is a requirement for 'setup-envtest.sh' in the test target. 42 | # Options are set to exit when a recipe line exits non-zero or a piped command fails. 43 | SHELL = /usr/bin/env bash -o pipefail 44 | .SHELLFLAGS = -ec 45 | 46 | ##@ General 47 | 48 | # The help target prints out all targets with their descriptions organized 49 | # beneath their categories. The categories are represented by '##@' and the 50 | # target descriptions by '##'. The awk commands is responsible for reading the 51 | # entire set of makefiles included in this invocation, looking for lines of the 52 | # file as xyz: ## something, and then pretty-format the target and help. Then, 53 | # if there's a line with ##@ something, that gets pretty-printed as a category. 54 | # More info on the usage of ANSI control characters for terminal formatting: 55 | # https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters 56 | # More info on the awk command: 57 | # http://linuxcommand.org/lc3_adv_awk.php 58 | 59 | .PHONY: help 60 | help: ## Display this help. 61 | @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) 62 | 63 | .PHONY: checkall 64 | checkall: fmt-check vet ## Do all check 65 | hack/verify-staticcheck.sh 66 | hack/verify-import-aliases.sh 67 | 68 | .PHONY: build 69 | build: $(SOURCES) ## Build kluster-capacity webhook binary file 70 | @CGO_ENABLED=0 GOOS=$(GOOS) go build \ 71 | -ldflags $(LDFLAGS) \ 72 | -o kluster-capacity \ 73 | main.go 74 | 75 | .PHONY: clean 76 | clean: ## Clean kluster-capacity webhook binary file 77 | @rm -rf kluster-capacity 78 | 79 | .PHONY: fmt 80 | fmt: ## Format project files 81 | @$(GOFMT) -w $(GOFILES) 82 | 83 | .PHONY: fmt-check 84 | fmt-check: ## Check project files format info 85 | @diff=$$($(GOFMT) -d $(GOFILES)); \ 86 | if [ -n "$$diff" ]; then \ 87 | echo "Please run 'make fmt' and commit the result:"; \ 88 | echo "$${diff}"; \ 89 | exit 1; \ 90 | fi; 91 | 92 | .PHONY: vet 93 | vet: 94 | @$(GO) vet $(VETPACKAGES) 95 | 96 | .PHONY: test 97 | test: fmt-check vet ## Run project unit test and generate coverage result 98 | echo "mode: count" > coverage.out 99 | for d in $(TESTFOLDER); do \ 100 | $(GO) test -tags $(TESTTAGS) -v -covermode=count -coverprofile=profile.out $$d > tmp.out; \ 101 | cat tmp.out; \ 102 | if grep -q "^--- FAIL" tmp.out; then \ 103 | rm tmp.out; \ 104 | exit 1; \ 105 | elif grep -q "build failed" tmp.out; then \ 106 | rm tmp.out; \ 107 | exit 1; \ 108 | elif grep -q "setup failed" tmp.out; then \ 109 | rm tmp.out; \ 110 | exit 1; \ 111 | fi; \ 112 | if [ -f profile.out ]; then \ 113 | cat profile.out | grep -v "mode:" >> coverage.out; \ 114 | rm profile.out; \ 115 | fi; \ 116 | done 117 | -------------------------------------------------------------------------------- /pkg/simulator/schedulersimulation/report.go: -------------------------------------------------------------------------------- 1 | package schedulersimulation 2 | 3 | import ( 4 | "fmt" 5 | 6 | corev1 "k8s.io/api/core/v1" 7 | "k8s.io/kubernetes/pkg/scheduler/framework" 8 | 9 | "github.com/k-cloud-labs/kluster-capacity/pkg" 10 | "github.com/k-cloud-labs/kluster-capacity/pkg/utils" 11 | ) 12 | 13 | type SchedulerSimulationReview struct { 14 | UnschedulablePods []corev1.Pod `json:"unschedulablePods"` 15 | Details []ScheduleDetail `json:"details"` 16 | StopReason string `json:"stopReason"` 17 | } 18 | 19 | type ScheduleDetail struct { 20 | NodeName string `json:"nodeName"` 21 | Replicas int `json:"replicas"` 22 | NodeAllocatable corev1.ResourceList `json:"nodeAllocatable"` 23 | PodRequest framework.Resource `json:"podRequest"` 24 | OnlyDSPod bool `json:"onlyDSPod"` 25 | } 26 | 27 | func (r *SchedulerSimulationReview) Print(verbose bool, format string) error { 28 | switch format { 29 | case "json": 30 | return utils.PrintJson(r) 31 | case "yaml": 32 | return utils.PrintYaml(r) 33 | case "": 34 | prettyPrint(r, verbose) 35 | return nil 36 | default: 37 | return fmt.Errorf("output format %q not recognized", format) 38 | } 39 | } 40 | 41 | func prettyPrint(r *SchedulerSimulationReview, verbose bool) { 42 | fmt.Printf("Termination reason: %s\n\n", r.StopReason) 43 | if len(r.UnschedulablePods) > 0 { 44 | fmt.Printf("Unschedulabel pods(%d):\n", len(r.UnschedulablePods)) 45 | } 46 | 47 | for _, pod := range r.UnschedulablePods { 48 | if verbose { 49 | fmt.Printf("- %v/%s, reason: %s\n", pod.Namespace, pod.Name, getUnschedulableReason(&pod)) 50 | } else { 51 | fmt.Printf("- %v/%s\n", pod.Namespace, pod.Name) 52 | } 53 | } 54 | 55 | if len(r.UnschedulablePods) > 0 { 56 | fmt.Printf("\n\n") 57 | } 58 | fmt.Printf("Pod distribution among %d nodes:\n", len(r.Details)) 59 | 60 | for _, detail := range r.Details { 61 | if verbose { 62 | var msg string 63 | if detail.OnlyDSPod { 64 | msg = "Only DaemonSet Pod" 65 | } 66 | fmt.Printf("\t- %v: %v instance(s) %s\n", detail.NodeName, detail.Replicas, msg) 67 | } else { 68 | fmt.Printf("\t- %v\n", detail.NodeName) 69 | } 70 | } 71 | } 72 | 73 | func getUnschedulableReason(pod *corev1.Pod) string { 74 | for _, podCondition := range pod.Status.Conditions { 75 | // Only for pending pods provisioned by ce 76 | if podCondition.Type == corev1.PodScheduled && podCondition.Status == corev1.ConditionFalse && 77 | podCondition.Reason == corev1.PodReasonUnschedulable { 78 | return podCondition.Message 79 | } 80 | } 81 | 82 | return "" 83 | } 84 | 85 | func generateReport(status *pkg.Status) *SchedulerSimulationReview { 86 | details := make([]ScheduleDetail, 0) 87 | unschedulablePods := make([]corev1.Pod, 0) 88 | nodePodMap := make(map[string][]corev1.Pod) 89 | 90 | for _, pod := range status.Pods { 91 | nodePodMap[pod.Spec.NodeName] = append(nodePodMap[pod.Spec.NodeName], pod) 92 | } 93 | 94 | for node, pods := range nodePodMap { 95 | if node == "" { 96 | unschedulablePods = append(unschedulablePods, pods...) 97 | continue 98 | } 99 | 100 | var request framework.Resource 101 | 102 | for _, pod := range pods { 103 | addResource(&request, utils.ComputePodResourceRequest(&pod)) 104 | } 105 | 106 | detail := ScheduleDetail{ 107 | NodeName: node, 108 | Replicas: len(nodePodMap[node]), 109 | PodRequest: request, 110 | OnlyDSPod: func(pods []corev1.Pod) bool { 111 | for i := range pods { 112 | if !utils.IsDaemonsetPod(pods[i].OwnerReferences) { 113 | return false 114 | } 115 | } 116 | 117 | return true 118 | }(nodePodMap[node]), 119 | } 120 | if node, ok := status.Nodes[node]; ok { 121 | detail.NodeAllocatable = node.Status.Allocatable 122 | } 123 | details = append(details, detail) 124 | } 125 | 126 | return &SchedulerSimulationReview{ 127 | UnschedulablePods: unschedulablePods, 128 | Details: details, 129 | StopReason: status.StopReason, 130 | } 131 | } 132 | 133 | func addResource(source *framework.Resource, res *framework.Resource) { 134 | source.MilliCPU += res.MilliCPU 135 | source.Memory += res.Memory 136 | source.EphemeralStorage += res.EphemeralStorage 137 | if source.ScalarResources == nil && len(res.ScalarResources) > 0 { 138 | source.ScalarResources = map[corev1.ResourceName]int64{} 139 | } 140 | for rName, rQuant := range res.ScalarResources { 141 | source.ScalarResources[rName] += rQuant 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /pkg/simulator/clustercompression/nodeFilter.go: -------------------------------------------------------------------------------- 1 | package clustercompression 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "strings" 7 | 8 | corev1 "k8s.io/api/core/v1" 9 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 10 | clientset "k8s.io/client-go/kubernetes" 11 | "k8s.io/client-go/util/workqueue" 12 | 13 | "github.com/k-cloud-labs/kluster-capacity/app/cmds/clustercompression/options" 14 | ) 15 | 16 | const ( 17 | NodeScaledDownFailedLabel = "kc.k-cloud-labs.io/node-scale-down-failed" 18 | NodeScaledDownSuccessLabel = "kc.k-cloud-labs.io/node-scale-down-success" 19 | KubernetesMasterNodeLabel = "node-role.kubernetes.io/master" 20 | NodeScaleDownDisableLabel = "kc.k-cloud-labs.io/scale-down-disabled" 21 | ) 22 | 23 | type NodeFilter interface { 24 | SelectNode() *Status 25 | Done() 26 | } 27 | 28 | func defaultFilterFunc() FilterFunc { 29 | return func(node *corev1.Node) *FilterStatus { 30 | if node.Labels != nil { 31 | _, ok := node.Labels[KubernetesMasterNodeLabel] 32 | if ok { 33 | return &FilterStatus{ 34 | Success: false, 35 | ErrReason: ErrReasonMasterNode, 36 | } 37 | } 38 | 39 | _, ok = node.Labels[NodeScaledDownFailedLabel] 40 | if ok { 41 | return &FilterStatus{ 42 | Success: false, 43 | ErrReason: ErrReasonFailedScaleDown, 44 | } 45 | } 46 | 47 | _, ok = node.Labels[NodeScaledDownSuccessLabel] 48 | if ok { 49 | return &FilterStatus{ 50 | Success: false, 51 | ErrReason: ErrReasonSuccessScaleDown, 52 | } 53 | } 54 | 55 | v, ok := node.Labels[NodeScaleDownDisableLabel] 56 | if ok && v == "true" { 57 | return &FilterStatus{ 58 | Success: false, 59 | ErrReason: ErrReasonScaleDownDisabled, 60 | } 61 | } 62 | } 63 | return &FilterStatus{Success: true} 64 | } 65 | } 66 | 67 | type singleNodeFilter struct { 68 | clientset clientset.Interface 69 | nodeFilter FilterFunc 70 | selectedCount int 71 | candidateNode []*corev1.Node 72 | candidateIndex int 73 | } 74 | 75 | type Status struct { 76 | Node *corev1.Node 77 | ErrReason string 78 | } 79 | 80 | func NewNodeFilter(client clientset.Interface, getPodsByNode PodsByNodeFunc, excludeNodes []string, filterNodeOptions options.FilterNodeOptions) (NodeFilter, error) { 81 | excludeNodeMap := make(map[string]bool) 82 | for i := range excludeNodes { 83 | excludeNodeMap[excludeNodes[i]] = true 84 | } 85 | 86 | nodeFilter := NewOptions(). 87 | WithFilter(defaultFilterFunc()). 88 | WithExcludeNodes(excludeNodeMap). 89 | WithExcludeTaintNodes(filterNodeOptions.ExcludeTaintNode). 90 | WithExcludeNotReadyNodes(filterNodeOptions.ExcludeNotReadyNode). 91 | WithIgnoreStaticPod(filterNodeOptions.IgnoreStaticPod). 92 | WithIgnoreCloneSet(filterNodeOptions.IgnoreCloneSet). 93 | WithIgnoreMirrorPod(filterNodeOptions.IgnoreMirrorPod). 94 | WithIgnoreVolumePod(filterNodeOptions.IgnoreVolumePod). 95 | WithPodsByNodeFunc(getPodsByNode). 96 | BuildFilterFunc() 97 | 98 | return &singleNodeFilter{ 99 | clientset: client, 100 | nodeFilter: nodeFilter, 101 | }, nil 102 | } 103 | 104 | func (g *singleNodeFilter) SelectNode() *Status { 105 | if len(g.candidateNode) != 0 && g.candidateIndex <= len(g.candidateNode)-1 { 106 | selectNode := g.candidateNode[g.candidateIndex] 107 | g.candidateIndex++ 108 | if g.candidateIndex == len(g.candidateNode) { 109 | g.candidateNode = nil 110 | g.candidateIndex = 0 111 | } 112 | return &Status{Node: selectNode} 113 | } 114 | 115 | g.candidateNode = nil 116 | g.candidateIndex = 0 117 | 118 | nodes, err := g.clientset.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) 119 | if err != nil { 120 | return nil 121 | } 122 | 123 | var ( 124 | statuses []*FilterStatus 125 | result = make([]interface{}, len(nodes.Items)) 126 | ) 127 | 128 | workqueue.ParallelizeUntil(context.TODO(), 16, len(nodes.Items), func(index int) { 129 | node := &nodes.Items[index] 130 | status := g.nodeFilter(node) 131 | if status.Success { 132 | result[index] = node 133 | } else { 134 | result[index] = status 135 | } 136 | }) 137 | 138 | for i := 0; i < len(nodes.Items); i++ { 139 | switch result[i].(type) { 140 | case *FilterStatus: 141 | statuses = append(statuses, result[i].(*FilterStatus)) 142 | case *corev1.Node: 143 | g.candidateNode = append(g.candidateNode, result[i].(*corev1.Node)) 144 | } 145 | } 146 | 147 | if len(g.candidateNode) == 0 { 148 | return convertFilterStatusesToStatus(statuses, g.selectedCount) 149 | } 150 | 151 | g.candidateIndex++ 152 | 153 | return &Status{Node: g.candidateNode[0]} 154 | } 155 | 156 | func (g *singleNodeFilter) Done() { 157 | g.selectedCount++ 158 | } 159 | 160 | func convertFilterStatusesToStatus(statuses []*FilterStatus, selectedCount int) *Status { 161 | statusMap := make(map[string]int) 162 | 163 | for _, status := range statuses { 164 | statusMap[status.ErrReason]++ 165 | } 166 | 167 | // for taint added by self 168 | if count, ok := statusMap[ErrReasonTaintNode]; ok { 169 | realCount := count - selectedCount 170 | if realCount == 0 { 171 | delete(statusMap, ErrReasonTaintNode) 172 | } else { 173 | statusMap[ErrReasonTaintNode] = realCount 174 | } 175 | } 176 | 177 | sb := strings.Builder{} 178 | for reason, count := range statusMap { 179 | _, _ = sb.WriteString(fmt.Sprintf("%d %s; ", count, reason)) 180 | } 181 | 182 | return &Status{ErrReason: sb.String()} 183 | } 184 | -------------------------------------------------------------------------------- /pkg/simulator/capacityestimation/simulator.go: -------------------------------------------------------------------------------- 1 | package capacityestimation 2 | 3 | import ( 4 | "fmt" 5 | 6 | "golang.org/x/sync/errgroup" 7 | corev1 "k8s.io/api/core/v1" 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | "k8s.io/apimachinery/pkg/runtime" 10 | "k8s.io/client-go/informers" 11 | "k8s.io/client-go/tools/cache" 12 | "k8s.io/klog/v2" 13 | 14 | "github.com/k-cloud-labs/kluster-capacity/app/cmds/capacityestimation/options" 15 | "github.com/k-cloud-labs/kluster-capacity/pkg" 16 | pkgframework "github.com/k-cloud-labs/kluster-capacity/pkg/framework" 17 | "github.com/k-cloud-labs/kluster-capacity/pkg/utils" 18 | ) 19 | 20 | type PodGenerator interface { 21 | Generate() *corev1.Pod 22 | } 23 | 24 | // only support one scheduler for now and the scheduler name is "default-scheduler" 25 | type simulator struct { 26 | pkg.Framework 27 | 28 | podGenerator PodGenerator 29 | simulatedPod *corev1.Pod 30 | maxSimulated int 31 | simulated int 32 | } 33 | 34 | type multiSimulator struct { 35 | simulators []*simulator 36 | reports pkg.Printer 37 | } 38 | 39 | // NewCESimulatorExecutor create a ce simulator which is completely independent of apiserver so no need 40 | // for kubeconfig nor for apiserver url 41 | func NewCESimulatorExecutor(conf *options.CapacityEstimationConfig) (pkg.Simulator, error) { 42 | newSimulator := func(pod *corev1.Pod) (*simulator, error) { 43 | kubeSchedulerConfig, err := utils.BuildKubeSchedulerCompletedConfig(conf.Options.SchedulerConfig, conf.Options.KubeConfig) 44 | if err != nil { 45 | return nil, err 46 | } 47 | 48 | kubeConfig, err := utils.BuildRestConfig(conf.Options.KubeConfig) 49 | if err != nil { 50 | return nil, err 51 | } 52 | 53 | s := &simulator{ 54 | podGenerator: NewSinglePodGenerator(pod), 55 | simulatedPod: pod, 56 | simulated: 0, 57 | maxSimulated: conf.Options.MaxLimit, 58 | } 59 | 60 | err = s.addEventHandlers(kubeSchedulerConfig.InformerFactory) 61 | if err != nil { 62 | return nil, err 63 | } 64 | 65 | framework, err := pkgframework.NewKubeSchedulerFramework(kubeSchedulerConfig, kubeConfig, 66 | pkgframework.WithExcludeNodes(conf.Options.ExcludeNodes), 67 | pkgframework.WithPostBindHook(s.postBindHook)) 68 | if err != nil { 69 | return nil, err 70 | } 71 | 72 | s.Framework = framework 73 | 74 | return s, nil 75 | } 76 | 77 | ms := &multiSimulator{ 78 | simulators: make([]*simulator, 0), 79 | } 80 | 81 | for _, pod := range conf.Pods { 82 | s, err := newSimulator(pod) 83 | if err != nil { 84 | return nil, err 85 | } 86 | 87 | ms.simulators = append(ms.simulators, s) 88 | } 89 | 90 | return ms, nil 91 | } 92 | 93 | func (s *simulator) Run(func() error) error { 94 | return s.Framework.Run(s.createNextPod) 95 | } 96 | 97 | func (s *simulator) Report() pkg.Printer { 98 | return generateReport([]*corev1.Pod{s.simulatedPod}, s.Status()) 99 | } 100 | 101 | func (ms *multiSimulator) Initialize(objs ...runtime.Object) error { 102 | for _, s := range ms.simulators { 103 | if err := s.Initialize(objs...); err != nil { 104 | return err 105 | } 106 | } 107 | 108 | return nil 109 | } 110 | 111 | func (ms *multiSimulator) Run() error { 112 | g := errgroup.Group{} 113 | reports := make(CapacityEstimationReviews, len(ms.simulators)) 114 | for i, s := range ms.simulators { 115 | i := i 116 | s := s 117 | g.Go(func() error { 118 | err := s.Run(nil) 119 | if err != nil { 120 | return err 121 | } 122 | reports[i] = s.Report().(*CapacityEstimationReview) 123 | return nil 124 | }) 125 | } 126 | 127 | err := g.Wait() 128 | if err != nil { 129 | return err 130 | } 131 | 132 | ms.reports = reports 133 | 134 | return nil 135 | } 136 | 137 | func (ms *multiSimulator) Report() pkg.Printer { 138 | return ms.reports 139 | } 140 | 141 | func (s *simulator) postBindHook(bindPod *corev1.Pod) error { 142 | s.UpdateEstimationPods(bindPod) 143 | 144 | if s.maxSimulated > 0 && s.simulated >= s.maxSimulated { 145 | return s.Stop(fmt.Sprintf("LimitReached: Maximum number of pods simulated: %v", s.maxSimulated)) 146 | } 147 | 148 | if err := s.createNextPod(); err != nil { 149 | return fmt.Errorf("unable to create next pod for simulated scheduling: %v", err) 150 | } 151 | return nil 152 | } 153 | 154 | func (s *simulator) createNextPod() error { 155 | pod := s.podGenerator.Generate() 156 | s.simulated++ 157 | klog.V(2).InfoS("create simulate pod", "count", s.simulated, "key", pod.Namespace+"/"+pod.Name) 158 | 159 | return s.CreatePod(pod) 160 | } 161 | 162 | func (s *simulator) addEventHandlers(informerFactory informers.SharedInformerFactory) (err error) { 163 | _, _ = informerFactory.Core().V1().Pods().Informer().AddEventHandler( 164 | cache.FilteringResourceEventHandler{ 165 | FilterFunc: func(obj interface{}) bool { 166 | if pod, ok := obj.(*corev1.Pod); ok && pod.Spec.SchedulerName == pkg.SchedulerName && 167 | metav1.HasAnnotation(pod.ObjectMeta, pkg.PodProvisioner) { 168 | return true 169 | } 170 | return false 171 | }, 172 | Handler: cache.ResourceEventHandlerFuncs{ 173 | UpdateFunc: func(oldObj, newObj interface{}) { 174 | if pod, ok := newObj.(*corev1.Pod); ok { 175 | for _, podCondition := range pod.Status.Conditions { 176 | // Only for pending pods provisioned by ce 177 | if podCondition.Type == corev1.PodScheduled && podCondition.Status == corev1.ConditionFalse && 178 | podCondition.Reason == corev1.PodReasonUnschedulable { 179 | err = s.Stop(fmt.Sprintf("%v: %v", podCondition.Reason, podCondition.Message)) 180 | } 181 | } 182 | } 183 | }, 184 | }, 185 | }, 186 | ) 187 | 188 | return 189 | } 190 | -------------------------------------------------------------------------------- /pkg/simulator/clustercompression/options.go: -------------------------------------------------------------------------------- 1 | package clustercompression 2 | 3 | import ( 4 | corev1 "k8s.io/api/core/v1" 5 | 6 | "github.com/k-cloud-labs/kluster-capacity/pkg/utils" 7 | ) 8 | 9 | const ( 10 | ErrReasonFailedScaleDown = "node(s) can't be scale down because of insufficient resource in other nodes" 11 | ErrReasonSuccessScaleDown = "node(s) has been successfully scale down" 12 | ErrReasonScaleDownDisabled = "node(s) have label with scale down disabled" 13 | ErrReasonMasterNode = "master node(s)" 14 | ErrReasonTaintNode = "node(s) have taint" 15 | ErrReasonNotReadyNode = "not ready node(s)" 16 | ErrReasonStaticPod = "node(s) have static pod" 17 | ErrReasonMirrorPod = "node(s) have mirror pod" 18 | ErrReasonCloneset = "node(s) have inplace update pod" 19 | ErrReasonVolumePod = "node(s) have pod used hostpath" 20 | ErrReasonUnknown = "node(s) have unknown error" 21 | ) 22 | 23 | // FilterFunc is a filter for a node. 24 | type FilterFunc func(*corev1.Node) *FilterStatus 25 | type PodsByNodeFunc func(name string) ([]*corev1.Pod, error) 26 | 27 | type FilterStatus struct { 28 | // true means that node is able to simulate 29 | Success bool 30 | ErrReason string 31 | } 32 | 33 | type Options struct { 34 | filter FilterFunc 35 | getPodsByNode PodsByNodeFunc 36 | excludeNodes map[string]bool 37 | excludeTaintNode bool 38 | excludeNotReadyNode bool 39 | ignoreStaticPod bool 40 | ignoreMirrorPod bool 41 | ignoreCloneSet bool 42 | ignoreVolumePod bool 43 | } 44 | 45 | // NewOptions returns an empty Options. 46 | func NewOptions() *Options { 47 | return &Options{} 48 | } 49 | 50 | // WithFilter sets a node filter. 51 | func (o *Options) WithFilter(filter FilterFunc) *Options { 52 | o.filter = filter 53 | return o 54 | } 55 | 56 | // WithExcludeNodes sets excluded node 57 | func (o *Options) WithExcludeNodes(nodes map[string]bool) *Options { 58 | o.excludeNodes = nodes 59 | return o 60 | } 61 | 62 | // WithExcludeTaintNodes set taint options 63 | func (o *Options) WithExcludeTaintNodes(excludeTaintNode bool) *Options { 64 | o.excludeTaintNode = excludeTaintNode 65 | return o 66 | } 67 | 68 | // WithExcludeNotReadyNodes set notReady options 69 | func (o *Options) WithExcludeNotReadyNodes(excludeNotReadyNode bool) *Options { 70 | o.excludeNotReadyNode = excludeNotReadyNode 71 | return o 72 | } 73 | 74 | // WithIgnoreStaticPod set ignoreStaticPod options 75 | func (o *Options) WithIgnoreStaticPod(ignoreStaticPod bool) *Options { 76 | o.ignoreStaticPod = ignoreStaticPod 77 | return o 78 | } 79 | 80 | // WithIgnoreMirrorPod set ignoreMirrorPod options 81 | func (o *Options) WithIgnoreMirrorPod(ignoreMirrorPod bool) *Options { 82 | o.ignoreMirrorPod = ignoreMirrorPod 83 | return o 84 | } 85 | 86 | // WithIgnoreCloneSet set ignoreCloneSet options 87 | func (o *Options) WithIgnoreCloneSet(ignoreCloneSet bool) *Options { 88 | o.ignoreCloneSet = ignoreCloneSet 89 | return o 90 | } 91 | 92 | // WithIgnoreVolumePod set ignoreVolumePod options 93 | func (o *Options) WithIgnoreVolumePod(ignoreVolumePod bool) *Options { 94 | o.ignoreVolumePod = ignoreVolumePod 95 | return o 96 | } 97 | 98 | func (o *Options) WithPodsByNodeFunc(podsByNodeFunc PodsByNodeFunc) *Options { 99 | o.getPodsByNode = podsByNodeFunc 100 | return o 101 | } 102 | 103 | // BuildFilterFunc builds a final FilterFunc based on Options. 104 | func (o *Options) BuildFilterFunc() FilterFunc { 105 | return func(node *corev1.Node) *FilterStatus { 106 | if o.filter != nil { 107 | status := o.filter(node) 108 | if status != nil && !status.Success { 109 | return status 110 | } 111 | } 112 | 113 | if o.excludeTaintNode && haveNodeTaint(node) { 114 | return &FilterStatus{ 115 | Success: false, 116 | ErrReason: ErrReasonTaintNode, 117 | } 118 | } 119 | if o.excludeNotReadyNode && isNodeNotReady(node) { 120 | return &FilterStatus{ 121 | Success: false, 122 | ErrReason: ErrReasonNotReadyNode, 123 | } 124 | } 125 | 126 | podList, err := o.getPodsByNode(node.Name) 127 | if err != nil { 128 | return &FilterStatus{ 129 | Success: false, 130 | ErrReason: ErrReasonUnknown, 131 | } 132 | } 133 | 134 | for i := range podList { 135 | if o.ignoreStaticPod && utils.IsStaticPod(podList[i]) { 136 | return &FilterStatus{ 137 | Success: false, 138 | ErrReason: ErrReasonStaticPod, 139 | } 140 | } 141 | 142 | if o.ignoreMirrorPod && utils.IsMirrorPod(podList[i]) { 143 | return &FilterStatus{ 144 | Success: false, 145 | ErrReason: ErrReasonMirrorPod, 146 | } 147 | } 148 | 149 | if o.ignoreVolumePod && utils.IsPodWithLocalStorage(podList[i]) { 150 | return &FilterStatus{ 151 | Success: false, 152 | ErrReason: ErrReasonVolumePod, 153 | } 154 | } 155 | 156 | if o.ignoreCloneSet && utils.IsCloneSetPod(podList[i].OwnerReferences) { 157 | return &FilterStatus{ 158 | Success: false, 159 | ErrReason: ErrReasonCloneset, 160 | } 161 | } 162 | 163 | } 164 | return &FilterStatus{Success: true} 165 | } 166 | } 167 | 168 | func haveNodeTaint(node *corev1.Node) bool { 169 | return len(node.Spec.Taints) != 0 170 | } 171 | 172 | func isNodeNotReady(node *corev1.Node) bool { 173 | for _, cond := range node.Status.Conditions { 174 | // We consider the node for scheduling only when its: 175 | // - NodeReady condition status is ConditionTrue, 176 | // - NodeNetworkUnavailable condition status is ConditionFalse. 177 | if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue { 178 | return false 179 | } 180 | } 181 | return true 182 | } 183 | -------------------------------------------------------------------------------- /README-ZH.md: -------------------------------------------------------------------------------- 1 | # kluster-capacity 2 | [[English](./README.md)] 3 | 4 | ![kluster-capacity-logo](docs/images/capacity-management-capacity-icon.jpeg) 5 | 6 | [![Build Status](https://github.com/k-cloud-labs/kluster-capacity/actions/workflows/ci.yml/badge.svg)](https://github.com/k-cloud-labs/kluster-capacity/actions?query=workflow%3Abuild) 7 | [![Go Report Card](https://goreportcard.com/badge/github.com/k-cloud-labs/kluster-capacity)](https://goreportcard.com/report/github.com/k-cloud-labs/kluster-capacity) 8 | [![Go doc](https://img.shields.io/badge/go.dev-reference-brightgreen?logo=go&logoColor=white&style=flat)](https://pkg.go.dev/github.com/k-cloud-labs/kluster-capacity) 9 | 10 | 集群容量分析工具支持剩余容量估算、调度器模拟和集群压缩等功能。 11 | 这个仓库的灵感来自于 https://github.com/kubernetes-sigs/cluster-capacity。 12 | 13 | ## 安装 14 | ### Homebrew 15 | 通过 [Homebrew](https://brew.sh/) 安装: 16 | ``` 17 | brew tap k-cloud-labs/tap 18 | brew install k-cloud-labs/tap/kluster-capacity 19 | ``` 20 | 21 | ### Krew 22 | 通过 [Krew](https://github.com/GoogleContainerTools/krew) 安装: 23 | ``` 24 | kubectl krew install kluster-capacity 25 | ``` 26 | 27 | ### 从源码编译 28 | 编译整个程序: 29 | 30 | ```sh 31 | $ cd $GOPATH/src/github.com/k-cloud-labs/ 32 | $ git clone https://github.com/k-cloud-labs/kluster-capacity 33 | $ cd kluster-capacity 34 | $ make build 35 | ``` 36 | 37 | 有三个可用的子命令:ce、cc和ss,分别表示剩余容量估算、集群压缩和调度模拟。 38 | 39 | ## 容量评估 40 | ### 介绍 41 | 随着集群中节点上新的 Pod 被调度,消耗的资源越来越多。监控集群中可用的资源非常重要,因为运维人员可以及时增加当前的资源,以免所有资源都耗尽。或者,采取不同的步骤来增加可用资源。 42 | 43 | 集群容量包括单个集群节点的容量。容量涵盖了 CPU、内存、磁盘空间和其他资源。 44 | 45 | 整体剩余可分配容量是一个估计值。目标是分析剩余可分配的资源并估计可用容量,即可以在集群中安排给定资源需求的 Pod 实例数量。 46 | 47 | ### 增强 48 | 以下是对原集群容量的一些增强功能: 49 | 50 | - 支持直接从集群中使用现有的 Pod 作为 Pod 模板。 51 | - 支持针对不同的 Pod 模板进行批量模拟。 52 | 53 | ### 运行 54 | 55 | ```sh 56 | # 直接使用指定的 pod 模板 57 | $ ./kluster-capacity ce --pods-from-template 58 | # 使用集群中指定的 pod 作为模板 59 | $ ./kluster-capacity ce --pods-from-cluster 60 | ``` 61 | 更多运行参数及功能,请执行如下命令: 62 | 63 | ```sh 64 | $ ./kluster-capacity ce --help 65 | ``` 66 | 67 | ### 演示 68 | 假设集群运行有 4 个节点和 1 个主节点,每个节点有 2 个 CPU 和 4GB 内存。而每个 Pod 所需的资源为 150m CPU 和 100Mi 内存。 69 | 70 | ```sh 71 | $ ./kluster-capacity ce --pods-from-template --verbose 72 | Pod requirements: 73 | - cpu: 150m 74 | - memory: 100Mi 75 | 76 | The cluster can schedule 52 instance(s) of the pod. 77 | Termination reason: FailedScheduling: pod (small-pod-52) failed to fit in any node 78 | fit failure on node (kube-node-1): Insufficient cpu 79 | fit failure on node (kube-node-4): Insufficient cpu 80 | fit failure on node (kube-node-2): Insufficient cpu 81 | fit failure on node (kube-node-3): Insufficient cpu 82 | 83 | 84 | Pod distribution among nodes: 85 | - kube-node-1: 13 instance(s) 86 | - kube-node-4: 13 instance(s) 87 | - kube-node-2: 13 instance(s) 88 | - kube-node-3: 13 instance(s) 89 | ``` 90 | 91 | 随着集群中运行的 pod 数量增加,再次运行分析时,可调度的 pod 数量也会减少。 92 | 93 | ```sh 94 | $ ./kluster-capacity ce --pods-from-template --verbose 95 | Pod requirements: 96 | - cpu: 150m 97 | - memory: 100Mi 98 | 99 | The cluster can schedule 46 instance(s) of the pod. 100 | Termination reason: FailedScheduling: pod (small-pod-46) failed to fit in any node 101 | fit failure on node (kube-node-1): Insufficient cpu 102 | fit failure on node (kube-node-4): Insufficient cpu 103 | fit failure on node (kube-node-2): Insufficient cpu 104 | fit failure on node (kube-node-3): Insufficient cpu 105 | 106 | 107 | Pod distribution among nodes: 108 | - kube-node-1: 11 instance(s) 109 | - kube-node-4: 12 instance(s) 110 | - kube-node-2: 11 instance(s) 111 | - kube-node-3: 12 instance(s) 112 | ``` 113 | 114 | ### 输出格式 115 | `ce` 命令有一个 `--output (-o)` 标志,可以将其输出格式化为 json 或 yaml。 116 | 117 | ```sh 118 | $ ./kluster-capacity ce --pods-from-template -o json|yaml 119 | ``` 120 | 121 | ## 调度模拟 122 | ### 介绍 123 | 124 | 调度器模拟以当前集群中的所有 node、pod 等相关资源为输入,模拟从没有 pod 到创建并调度所有 pod 的过程。这可以用来计算集群压缩率比,以评估调度效果或衡量调度算法的质量。 125 | 126 | 与集群压缩相比,其结果更加激进和理想化。 127 | 128 | ### 运行 129 | 130 | ```shell 131 | ./kluster-capacity ss 132 | ``` 133 | 更多运行参数及功能,请执行如下命令: 134 | 135 | ```sh 136 | $ ./kluster-capacity ss --help 137 | ``` 138 | 它支持两种终止条件:`AllSucceed` 和 `AllScheduled`。前者是指所有pod调度成功后程序结束,后者是指所有 pod 至少被调度一次后程序退出。默认值为 `AllSucceed`。可以使用 `--exit-condition` 标志设置退出条件。 139 | 140 | ### 演示 141 | 142 | 假设集群运行有 4 个节点和 1 个主节点,每个节点有 2 个 CPU 和 4GB 内存。有 40 个资源需求是 100m CPU 和 200Mi 内存的 Pod 需要被调度。 143 | 144 | 如果调度器使用 `LeastAllocated` 策略,调度结果可能如下所示: 145 | 146 | ```sh 147 | $ ./kluster-capacity ss --verbose 148 | Termination reason: AllSucceed: 40 pod(s) have been scheduled successfully. 149 | 150 | Pod distribution among nodes: 151 | - kube-node-1: 10 instance(s) 152 | - kube-node-2: 10 instance(s) 153 | - kube-node-3: 10 instance(s) 154 | - kube-node-4: 10 instance(s) 155 | ``` 156 | 157 | 如果调整调度器使用 `MostAllocated` 策略,调度结果可能如下所示: 158 | 159 | ```sh 160 | $ ./kluster-capacity ss --verbose 161 | Termination reason: AllSucceed: 40 pod(s) have been scheduled successfully. 162 | 163 | Pod distribution among nodes: 164 | - kube-node-1: 20 instance(s) 165 | - kube-node-2: 20 instance(s) 166 | ``` 167 | 168 | 可以分析上面的调度结果来评估调度策略的有效性和集群容量压缩比。例如,上面的结果表示集群压缩比为2,这意味着在理想情况下有50%的资源浪费。 169 | 170 | 171 | ## 集群压缩 172 | ### 介绍 173 | 集群压缩以集群的当前状态,包括所有 node、pod 和其他相关资源作为输入,模拟通过移除节点来压缩集群的过程。它可用于计算集群的压缩比,这是衡量资源利用效率的指标。 174 | 175 | 与模拟调度相比,集群压缩的结果通常更显示,可操作性更强。 176 | 177 | ### 运行 178 | 179 | ```shell 180 | ./kluster-capacity cc --verbose 181 | ``` 182 | 更多运行参数及功能,请执行如下命令: 183 | 184 | ```sh 185 | $ ./kluster-capacity cc --help 186 | ``` 187 | 188 | ### 演示 189 | 190 | 假设集群运行有 4 个节点和 1 个主节点,每个节点有 2 个 CPU 和 4GB 内存。运行有 40 个资源需求是 100m CPU 和 200Mi 内存的 Pod。 191 | 192 | ```shell 193 | ./kluster-capacity cc --verbose 194 | 2 node(s) in the cluster can be scaled down. 195 | 196 | Termination reason: FailedSelectNode: could not find a node that satisfies the condition, 1 master node(s); 2 node(s) can't be scale down because of insufficient resource in other nodes; 197 | 198 | nodes selected to be scaled down: 199 | - kube-node-1 200 | - kube-node-3 201 | ``` 202 | 203 | 上面的结果表明,给定 40 个 pod 的资源需求,在保证所有 pod 都能被调度的情况下,集群可以去掉 2 个节点,压缩比为 2,也就是有 50% 的资源浪费。 204 | 205 | ## Feature 206 | - [x] 集群压缩 207 | - [x] 容量评估 208 | - [x] 调度模拟 209 | - [ ] 基于 snapshot 的模拟 210 | - [ ] 资源碎片分析 211 | 212 | 欢迎体验并提出您的宝贵意见,谢谢! -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/k-cloud-labs/kluster-capacity 2 | 3 | go 1.19 4 | 5 | require ( 6 | github.com/ghodss/yaml v1.0.0 7 | github.com/jedib0t/go-pretty/v6 v6.4.4 8 | github.com/lithammer/dedent v1.1.0 9 | github.com/satori/go.uuid v1.2.0 10 | github.com/spf13/cobra v1.6.1 11 | github.com/spf13/pflag v1.0.5 12 | github.com/spf13/viper v1.14.0 13 | golang.org/x/sync v0.1.0 14 | golang.org/x/term v0.3.0 15 | k8s.io/api v0.26.1 16 | k8s.io/apimachinery v0.26.1 17 | k8s.io/apiserver v0.26.0 18 | k8s.io/client-go v0.26.1 19 | k8s.io/component-base v0.26.1 20 | k8s.io/klog/v2 v2.80.1 21 | k8s.io/kube-scheduler v0.0.0 22 | k8s.io/kubernetes v1.26.0 23 | sigs.k8s.io/controller-runtime v0.14.2 24 | ) 25 | 26 | require ( 27 | github.com/NYTimes/gziphandler v1.1.1 // indirect 28 | github.com/antlr/antlr4/runtime/Go/antlr v1.4.10 // indirect 29 | github.com/beorn7/perks v1.0.1 // indirect 30 | github.com/blang/semver/v4 v4.0.0 // indirect 31 | github.com/cenkalti/backoff/v4 v4.1.3 // indirect 32 | github.com/cespare/xxhash/v2 v2.1.2 // indirect 33 | github.com/coreos/go-semver v0.3.0 // indirect 34 | github.com/coreos/go-systemd/v22 v22.3.2 // indirect 35 | github.com/davecgh/go-spew v1.1.1 // indirect 36 | github.com/docker/distribution v2.8.1+incompatible // indirect 37 | github.com/emicklei/go-restful/v3 v3.9.0 // indirect 38 | github.com/evanphx/json-patch v4.12.0+incompatible // indirect 39 | github.com/felixge/httpsnoop v1.0.3 // indirect 40 | github.com/fsnotify/fsnotify v1.6.0 // indirect 41 | github.com/go-logr/logr v1.2.3 // indirect 42 | github.com/go-logr/stdr v1.2.2 // indirect 43 | github.com/go-openapi/jsonpointer v0.19.5 // indirect 44 | github.com/go-openapi/jsonreference v0.20.0 // indirect 45 | github.com/go-openapi/swag v0.19.14 // indirect 46 | github.com/gogo/protobuf v1.3.2 // indirect 47 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect 48 | github.com/golang/protobuf v1.5.2 // indirect 49 | github.com/google/cel-go v0.12.5 // indirect 50 | github.com/google/gnostic v0.5.7-v3refs // indirect 51 | github.com/google/go-cmp v0.5.9 // indirect 52 | github.com/google/gofuzz v1.1.0 // indirect 53 | github.com/google/uuid v1.1.2 // indirect 54 | github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect 55 | github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect 56 | github.com/hashicorp/hcl v1.0.0 // indirect 57 | github.com/imdario/mergo v0.3.6 // indirect 58 | github.com/inconshreveable/mousetrap v1.0.1 // indirect 59 | github.com/josharian/intern v1.0.0 // indirect 60 | github.com/json-iterator/go v1.1.12 // indirect 61 | github.com/magiconair/properties v1.8.6 // indirect 62 | github.com/mailru/easyjson v0.7.6 // indirect 63 | github.com/mattn/go-runewidth v0.0.13 // indirect 64 | github.com/matttproud/golang_protobuf_extensions v1.0.2 // indirect 65 | github.com/mitchellh/mapstructure v1.5.0 // indirect 66 | github.com/moby/sys/mountinfo v0.6.2 // indirect 67 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 68 | github.com/modern-go/reflect2 v1.0.2 // indirect 69 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 70 | github.com/opencontainers/go-digest v1.0.0 // indirect 71 | github.com/opencontainers/selinux v1.10.0 // indirect 72 | github.com/pelletier/go-toml v1.9.5 // indirect 73 | github.com/pelletier/go-toml/v2 v2.0.5 // indirect 74 | github.com/pkg/errors v0.9.1 // indirect 75 | github.com/prometheus/client_golang v1.14.0 // indirect 76 | github.com/prometheus/client_model v0.3.0 // indirect 77 | github.com/prometheus/common v0.37.0 // indirect 78 | github.com/prometheus/procfs v0.8.0 // indirect 79 | github.com/rivo/uniseg v0.2.0 // indirect 80 | github.com/spf13/afero v1.9.2 // indirect 81 | github.com/spf13/cast v1.5.0 // indirect 82 | github.com/spf13/jwalterweatherman v1.1.0 // indirect 83 | github.com/stoewer/go-strcase v1.2.0 // indirect 84 | github.com/subosito/gotenv v1.4.1 // indirect 85 | go.etcd.io/etcd/api/v3 v3.5.5 // indirect 86 | go.etcd.io/etcd/client/pkg/v3 v3.5.5 // indirect 87 | go.etcd.io/etcd/client/v3 v3.5.5 // indirect 88 | go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.35.0 // indirect 89 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.35.0 // indirect 90 | go.opentelemetry.io/otel v1.10.0 // indirect 91 | go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.10.0 // indirect 92 | go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.10.0 // indirect 93 | go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.10.0 // indirect 94 | go.opentelemetry.io/otel/metric v0.31.0 // indirect 95 | go.opentelemetry.io/otel/sdk v1.10.0 // indirect 96 | go.opentelemetry.io/otel/trace v1.10.0 // indirect 97 | go.opentelemetry.io/proto/otlp v0.19.0 // indirect 98 | go.uber.org/atomic v1.9.0 // indirect 99 | go.uber.org/multierr v1.8.0 // indirect 100 | go.uber.org/zap v1.24.0 // indirect 101 | golang.org/x/crypto v0.1.0 // indirect 102 | golang.org/x/net v0.3.1-0.20221206200815-1e63c2f08a10 // indirect 103 | golang.org/x/oauth2 v0.0.0-20221014153046-6fdb5e3db783 // indirect 104 | golang.org/x/sys v0.3.0 // indirect 105 | golang.org/x/text v0.5.0 // indirect 106 | golang.org/x/time v0.3.0 // indirect 107 | google.golang.org/appengine v1.6.7 // indirect 108 | google.golang.org/genproto v0.0.0-20221024183307-1bc688fe9f3e // indirect 109 | google.golang.org/grpc v1.50.1 // indirect 110 | google.golang.org/protobuf v1.28.1 // indirect 111 | gopkg.in/inf.v0 v0.9.1 // indirect 112 | gopkg.in/ini.v1 v1.67.0 // indirect 113 | gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect 114 | gopkg.in/yaml.v2 v2.4.0 // indirect 115 | gopkg.in/yaml.v3 v3.0.1 // indirect 116 | k8s.io/cloud-provider v0.0.0 // indirect 117 | k8s.io/component-helpers v0.26.0 // indirect 118 | k8s.io/csi-translation-lib v0.0.0 // indirect 119 | k8s.io/dynamic-resource-allocation v0.0.0 // indirect 120 | k8s.io/kms v0.26.0 // indirect 121 | k8s.io/kube-openapi v0.0.0-20221012153701-172d655c2280 // indirect 122 | k8s.io/mount-utils v0.0.0 // indirect 123 | k8s.io/utils v0.0.0-20221128185143-99ec85e7a448 // indirect 124 | sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.33 // indirect 125 | sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect 126 | sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect 127 | sigs.k8s.io/yaml v1.3.0 // indirect 128 | ) 129 | 130 | replace ( 131 | k8s.io/cloud-provider => k8s.io/cloud-provider v0.26.0 132 | k8s.io/csi-translation-lib => k8s.io/csi-translation-lib v0.26.0 133 | k8s.io/dynamic-resource-allocation => k8s.io/dynamic-resource-allocation v0.26.0 134 | k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.26.0 135 | k8s.io/mount-utils => k8s.io/mount-utils v0.26.0 136 | ) 137 | -------------------------------------------------------------------------------- /pkg/utils/utils.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "os" 7 | 8 | "github.com/ghodss/yaml" 9 | corev1 "k8s.io/api/core/v1" 10 | "k8s.io/client-go/informers" 11 | fakeclientset "k8s.io/client-go/kubernetes/fake" 12 | restclient "k8s.io/client-go/rest" 13 | "k8s.io/client-go/tools/clientcmd" 14 | "k8s.io/client-go/tools/events" 15 | configv1alpha1 "k8s.io/component-base/config/v1alpha1" 16 | "k8s.io/component-base/logs" 17 | kubeschedulerconfigv1 "k8s.io/kube-scheduler/config/v1" 18 | schedconfig "k8s.io/kubernetes/cmd/kube-scheduler/app/config" 19 | kubescheduleroptions "k8s.io/kubernetes/cmd/kube-scheduler/app/options" 20 | kubeschedulerconfig "k8s.io/kubernetes/pkg/scheduler/apis/config" 21 | "k8s.io/kubernetes/pkg/scheduler/apis/config/latest" 22 | kubeschedulerscheme "k8s.io/kubernetes/pkg/scheduler/apis/config/scheme" 23 | "k8s.io/kubernetes/pkg/scheduler/apis/config/validation" 24 | "k8s.io/kubernetes/pkg/scheduler/framework" 25 | 26 | "github.com/k-cloud-labs/kluster-capacity/pkg" 27 | ) 28 | 29 | const ( 30 | DefaultQPS = 10000 31 | DefaultBurst = 20000 32 | ) 33 | 34 | func BuildRestConfig(config string) (*restclient.Config, error) { 35 | if len(config) != 0 { 36 | master, err := getMasterFromKubeConfig(config) 37 | if err != nil { 38 | return nil, fmt.Errorf("failed to parse kubeconfig file: %v ", err) 39 | } 40 | 41 | cfg, err := clientcmd.BuildConfigFromFlags(master, config) 42 | if err != nil { 43 | return nil, fmt.Errorf("unable to build config: %v", err) 44 | } 45 | 46 | cfg.QPS = DefaultQPS 47 | cfg.Burst = DefaultBurst 48 | 49 | return cfg, nil 50 | } else { 51 | cfg, err := restclient.InClusterConfig() 52 | if err != nil { 53 | return nil, fmt.Errorf("unable to build in cluster config: %v", err) 54 | } 55 | 56 | cfg.QPS = DefaultQPS 57 | cfg.Burst = DefaultBurst 58 | 59 | return cfg, nil 60 | } 61 | } 62 | 63 | func BuildKubeSchedulerCompletedConfig(config, kubeconfig string) (*schedconfig.CompletedConfig, error) { 64 | var kcfg *kubeschedulerconfig.KubeSchedulerConfiguration 65 | if len(config) > 0 { 66 | cfg, err := loadConfigFromFile(config) 67 | if err != nil { 68 | return nil, err 69 | } 70 | if err := validation.ValidateKubeSchedulerConfiguration(cfg); err != nil { 71 | return nil, err 72 | } 73 | kcfg = cfg 74 | } else { 75 | cfg, err := latest.Default() 76 | if err != nil { 77 | return nil, err 78 | } 79 | kcfg = cfg 80 | } 81 | 82 | if len(kcfg.ClientConnection.Kubeconfig) == 0 && len(kubeconfig) > 0 { 83 | kcfg.ClientConnection.Kubeconfig = kubeconfig 84 | } 85 | 86 | cc, err := buildKubeSchedulerCompletedConfig(kcfg) 87 | if err != nil { 88 | return nil, fmt.Errorf("failed to init kube scheduler configuration: %v ", err) 89 | } 90 | 91 | return cc, nil 92 | } 93 | 94 | func PrintJson(r pkg.Printer) error { 95 | jsonBytes, err := json.Marshal(r) 96 | if err != nil { 97 | return fmt.Errorf("failed to create json: %v", err) 98 | } 99 | fmt.Println(string(jsonBytes)) 100 | return nil 101 | } 102 | 103 | func PrintYaml(r pkg.Printer) error { 104 | yamlBytes, err := yaml.Marshal(r) 105 | if err != nil { 106 | return fmt.Errorf("failed to create yaml: %v", err) 107 | } 108 | fmt.Print(string(yamlBytes)) 109 | return nil 110 | } 111 | 112 | func ComputePodResourceRequest(pod *corev1.Pod) *framework.Resource { 113 | result := &framework.Resource{} 114 | 115 | for _, container := range pod.Spec.Containers { 116 | result.Add(container.Resources.Requests) 117 | } 118 | 119 | // take max_resource(sum_pod, any_init_container) 120 | for _, container := range pod.Spec.InitContainers { 121 | result.SetMaxResource(container.Resources.Requests) 122 | } 123 | 124 | // If Overhead is being utilized, add to the total requests for the pod 125 | if pod.Spec.Overhead != nil { 126 | result.Add(pod.Spec.Overhead) 127 | } 128 | return result 129 | } 130 | 131 | func buildKubeSchedulerCompletedConfig(kcfg *kubeschedulerconfig.KubeSchedulerConfiguration) (*schedconfig.CompletedConfig, error) { 132 | if kcfg == nil { 133 | kcfg = &kubeschedulerconfig.KubeSchedulerConfiguration{} 134 | versionedCfg := kubeschedulerconfigv1.KubeSchedulerConfiguration{} 135 | versionedCfg.DebuggingConfiguration = *configv1alpha1.NewRecommendedDebuggingConfiguration() 136 | 137 | kubeschedulerscheme.Scheme.Default(&versionedCfg) 138 | if err := kubeschedulerscheme.Scheme.Convert(&versionedCfg, kcfg, nil); err != nil { 139 | return nil, err 140 | } 141 | } 142 | 143 | // inject scheduler config 144 | if len(kcfg.Profiles) == 0 { 145 | kcfg.Profiles = []kubeschedulerconfig.KubeSchedulerProfile{ 146 | {}, 147 | } 148 | } 149 | 150 | kcfg.Profiles[0].SchedulerName = pkg.SchedulerName 151 | if kcfg.Profiles[0].Plugins == nil { 152 | kcfg.Profiles[0].Plugins = &kubeschedulerconfig.Plugins{} 153 | } 154 | 155 | opts := &kubescheduleroptions.Options{ 156 | ComponentConfig: kcfg, 157 | Logs: logs.NewOptions(), 158 | } 159 | 160 | c := &schedconfig.Config{} 161 | // clear out all unnecessary options so no port is bound 162 | // to allow running multiple instances in a row 163 | opts.Deprecated = nil 164 | opts.SecureServing = nil 165 | if err := opts.ApplyTo(c); err != nil { 166 | return nil, fmt.Errorf("unable to get scheduler kcfg: %v", err) 167 | } 168 | 169 | // Get the completed config 170 | cc := c.Complete() 171 | 172 | // completely ignore the events 173 | cc.EventBroadcaster = events.NewEventBroadcasterAdapter(fakeclientset.NewSimpleClientset()) 174 | 175 | // black magic 176 | cc.Client = fakeclientset.NewSimpleClientset() 177 | cc.InformerFactory = informers.NewSharedInformerFactory(cc.Client, 0) 178 | 179 | return &cc, nil 180 | } 181 | 182 | func loadConfigFromFile(file string) (*kubeschedulerconfig.KubeSchedulerConfiguration, error) { 183 | data, err := os.ReadFile(file) 184 | if err != nil { 185 | return nil, err 186 | } 187 | return loadConfig(data) 188 | } 189 | 190 | func loadConfig(data []byte) (*kubeschedulerconfig.KubeSchedulerConfiguration, error) { 191 | // The UniversalDecoder runs defaulting and returns the internal type by default. 192 | obj, gvk, err := kubeschedulerscheme.Codecs.UniversalDecoder().Decode(data, nil, nil) 193 | if err != nil { 194 | return nil, err 195 | } 196 | if cfgObj, ok := obj.(*kubeschedulerconfig.KubeSchedulerConfiguration); ok { 197 | // We don't set this field in pkg/scheduler/apis/config/{version}/conversion.go 198 | // because the field will be cleared later by API machinery during 199 | // conversion. See KubeSchedulerConfiguration internal type definition for 200 | // more details. 201 | cfgObj.TypeMeta.APIVersion = gvk.GroupVersion().String() 202 | return cfgObj, nil 203 | } 204 | return nil, fmt.Errorf("couldn't decode as KubeSchedulerConfiguration, got %s: ", gvk) 205 | } 206 | 207 | func getMasterFromKubeConfig(filename string) (string, error) { 208 | config, err := clientcmd.LoadFromFile(filename) 209 | if err != nil { 210 | return "", fmt.Errorf("can not load kubeconfig file: %v", err) 211 | } 212 | 213 | context, ok := config.Contexts[config.CurrentContext] 214 | if !ok { 215 | return "", fmt.Errorf("failed to get master address from kubeconfig") 216 | } 217 | 218 | if val, ok := config.Clusters[context.Cluster]; ok { 219 | return val.Server, nil 220 | } 221 | return "", fmt.Errorf("failed to get master address from kubeconfig") 222 | } 223 | -------------------------------------------------------------------------------- /hack/tools/preferredimports/preferredimports.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // This code is directly lifted from the Kubernetes codebase in order to avoid relying on the k8s.io/kubernetes package. 18 | // For reference: https://github.com/kubernetes/kubernetes/blob/release-1.22/cmd/preferredimports/preferredimports.go 19 | 20 | // verify that all the imports have our preferred alias(es). 21 | package main 22 | 23 | import ( 24 | "bytes" 25 | "encoding/json" 26 | "flag" 27 | "fmt" 28 | "go/ast" 29 | "go/build" 30 | "go/format" 31 | "go/parser" 32 | "go/token" 33 | "log" 34 | "os" 35 | "path/filepath" 36 | "regexp" 37 | "sort" 38 | "strings" 39 | 40 | "golang.org/x/term" 41 | ) 42 | 43 | var ( 44 | importAliases = flag.String("import-aliases", "hack/.import-aliases", "json file with import aliases") 45 | confirm = flag.Bool("confirm", false, "update file with the preferred aliases for imports") 46 | regex = flag.String("include-path", "(test/e2e/|test/e2e_node)", "only files with paths matching this regex is touched") 47 | isTerminal = term.IsTerminal(int(os.Stdout.Fd())) 48 | logPrefix = "" 49 | aliases map[string]string 50 | ) 51 | 52 | type analyzer struct { 53 | fset *token.FileSet // positions are relative to fset 54 | ctx build.Context 55 | failed bool 56 | donePaths map[string]interface{} 57 | } 58 | 59 | func newAnalyzer() *analyzer { 60 | ctx := build.Default 61 | ctx.CgoEnabled = true 62 | 63 | a := &analyzer{ 64 | fset: token.NewFileSet(), 65 | ctx: ctx, 66 | donePaths: make(map[string]interface{}), 67 | } 68 | 69 | return a 70 | } 71 | 72 | // collect extracts test metadata from a file. 73 | func (a *analyzer) collect(dir string) { 74 | if _, ok := a.donePaths[dir]; ok { 75 | return 76 | } 77 | a.donePaths[dir] = nil 78 | 79 | // Create the AST by parsing src. 80 | fs, err := parser.ParseDir(a.fset, dir, nil, parser.AllErrors|parser.ParseComments) 81 | 82 | if err != nil { 83 | fmt.Fprintln(os.Stderr, "ERROR(syntax)", logPrefix, err) 84 | a.failed = true 85 | return 86 | } 87 | 88 | for _, p := range fs { 89 | // returns first error, but a.handleError deals with it 90 | files := a.filterFiles(p.Files) 91 | for _, file := range files { 92 | replacements := make(map[string]string) 93 | pathToFile := a.fset.File(file.Pos()).Name() 94 | for _, imp := range file.Imports { 95 | importPath := strings.Replace(imp.Path.Value, "\"", "", -1) 96 | pathSegments := strings.Split(importPath, "/") 97 | importName := pathSegments[len(pathSegments)-1] 98 | if imp.Name != nil { 99 | importName = imp.Name.Name 100 | } 101 | if alias, ok := aliases[importPath]; ok { 102 | if alias != importName { 103 | if !*confirm { 104 | fmt.Fprintf(os.Stderr, "%sERROR wrong alias for import \"%s\" should be %s in file %s\n", logPrefix, importPath, alias, pathToFile) 105 | a.failed = true 106 | } 107 | replacements[importName] = alias 108 | if imp.Name != nil { 109 | imp.Name.Name = alias 110 | } else { 111 | imp.Name = ast.NewIdent(alias) 112 | } 113 | } 114 | } 115 | } 116 | 117 | if len(replacements) > 0 { 118 | if *confirm { 119 | fmt.Printf("%sReplacing imports with aliases in file %s\n", logPrefix, pathToFile) 120 | for key, value := range replacements { 121 | renameImportUsages(file, key, value) 122 | } 123 | ast.SortImports(a.fset, file) 124 | var buffer bytes.Buffer 125 | if err = format.Node(&buffer, a.fset, file); err != nil { 126 | panic(fmt.Sprintf("Error formatting ast node after rewriting import.\n%s\n", err.Error())) 127 | } 128 | 129 | fileInfo, err := os.Stat(pathToFile) 130 | if err != nil { 131 | panic(fmt.Sprintf("Error stat'ing file: %s\n%s\n", pathToFile, err.Error())) 132 | } 133 | 134 | err = os.WriteFile(pathToFile, buffer.Bytes(), fileInfo.Mode()) 135 | if err != nil { 136 | panic(fmt.Sprintf("Error writing file: %s\n%s\n", pathToFile, err.Error())) 137 | } 138 | } 139 | } 140 | } 141 | } 142 | } 143 | 144 | func renameImportUsages(f *ast.File, old, new string) { 145 | // use this to avoid renaming the package declaration, eg: 146 | // given: package foo; import foo "bar"; foo.Baz, rename foo->qux 147 | // yield: package foo; import qux "bar"; qux.Baz 148 | var pkg *ast.Ident 149 | 150 | // Rename top-level old to new, both unresolved names 151 | // (probably defined in another file) and names that resolve 152 | // to a declaration we renamed. 153 | ast.Inspect(f, func(node ast.Node) bool { 154 | if node == nil { 155 | return false 156 | } 157 | switch id := node.(type) { 158 | case *ast.File: 159 | pkg = id.Name 160 | case *ast.Ident: 161 | if pkg != nil && id == pkg { 162 | return false 163 | } 164 | if id.Name == old { 165 | id.Name = new 166 | } 167 | } 168 | return true 169 | }) 170 | } 171 | 172 | func (a *analyzer) filterFiles(fs map[string]*ast.File) []*ast.File { 173 | var files []*ast.File 174 | for _, f := range fs { 175 | files = append(files, f) 176 | } 177 | return files 178 | } 179 | 180 | type collector struct { 181 | dirs []string 182 | regex *regexp.Regexp 183 | } 184 | 185 | // handlePath walks the filesystem recursively, collecting directories, 186 | // ignoring some unneeded directories (hidden/vendored) that are handled 187 | // specially later. 188 | func (c *collector) handlePath(path string, info os.FileInfo, err error) error { 189 | if err != nil { 190 | return err 191 | } 192 | if info.IsDir() { 193 | // Ignore hidden directories (.git, .cache, etc) 194 | if len(path) > 1 && path[0] == '.' || 195 | // Staging code is symlinked from vendor/k8s.io, and uses import 196 | // paths as if it were inside of vendor/. It fails typechecking 197 | // inside of staging/, but works when typechecked as part of vendor/. 198 | path == "staging" || 199 | // OS-specific vendor code tends to be imported by OS-specific 200 | // packages. We recursively typecheck imported vendored packages for 201 | // each OS, but don't typecheck everything for every OS. 202 | path == "vendor" || 203 | path == "_output" || 204 | // This is a weird one. /testdata/ is *mostly* ignored by Go, 205 | // and this translates to kubernetes/vendor not working. 206 | // edit/record.go doesn't compile without gopkg.in/yaml.v2 207 | // in $GOSRC/$GOROOT (both typecheck and the shell script). 208 | path == "pkg/kubectl/cmd/testdata/edit" { 209 | return filepath.SkipDir 210 | } 211 | if c.regex.MatchString(path) { 212 | c.dirs = append(c.dirs, path) 213 | } 214 | } 215 | return nil 216 | } 217 | 218 | func main() { 219 | flag.Parse() 220 | args := flag.Args() 221 | 222 | if len(args) == 0 { 223 | args = append(args, ".") 224 | } 225 | 226 | regex, err := regexp.Compile(*regex) 227 | if err != nil { 228 | log.Fatalf("Error compiling regex: %v", err) 229 | } 230 | c := collector{regex: regex} 231 | for _, arg := range args { 232 | err := filepath.Walk(arg, c.handlePath) 233 | if err != nil { 234 | log.Fatalf("Error walking: %v", err) 235 | } 236 | } 237 | sort.Strings(c.dirs) 238 | 239 | if len(*importAliases) > 0 { 240 | bytes, err := os.ReadFile(*importAliases) 241 | if err != nil { 242 | log.Fatalf("Error reading import aliases: %v", err) 243 | } 244 | err = json.Unmarshal(bytes, &aliases) 245 | if err != nil { 246 | log.Fatalf("Error loading aliases: %v", err) 247 | } 248 | } 249 | if isTerminal { 250 | logPrefix = "\r" // clear status bar when printing 251 | } 252 | fmt.Println("checking-imports: ") 253 | 254 | a := newAnalyzer() 255 | for _, dir := range c.dirs { 256 | if isTerminal { 257 | fmt.Printf("\r\033[0m %-80s\n", dir) 258 | } 259 | a.collect(dir) 260 | } 261 | fmt.Println() 262 | if a.failed { 263 | os.Exit(1) 264 | } 265 | } 266 | -------------------------------------------------------------------------------- /pkg/simulator/capacityestimation/report.go: -------------------------------------------------------------------------------- 1 | package capacityestimation 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "strings" 7 | "time" 8 | 9 | "github.com/jedib0t/go-pretty/v6/table" 10 | corev1 "k8s.io/api/core/v1" 11 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 12 | "k8s.io/apimachinery/pkg/labels" 13 | "k8s.io/kubernetes/pkg/scheduler/framework" 14 | 15 | "github.com/k-cloud-labs/kluster-capacity/pkg" 16 | "github.com/k-cloud-labs/kluster-capacity/pkg/utils" 17 | ) 18 | 19 | type CapacityEstimationReview struct { 20 | metav1.TypeMeta 21 | Spec CapacityEstimationReviewSpec `json:"spec"` 22 | Status CapacityEstimationReviewStatus `json:"status"` 23 | } 24 | 25 | type CapacityEstimationReviews []*CapacityEstimationReview 26 | 27 | type CapacityEstimationReviewSpec struct { 28 | // the pod desired for scheduling 29 | Templates []corev1.Pod `json:"templates"` 30 | PodRequirements []*Requirements `json:"podRequirements"` 31 | } 32 | 33 | type CapacityEstimationReviewStatus struct { 34 | CreationTimestamp time.Time `json:"creationTimestamp"` 35 | // actual number of replicas that could schedule 36 | Replicas int32 `json:"replicas"` 37 | StopReason *CapacityEstimationReviewScheduleStopReason `json:"stopReason"` 38 | // per node information about the scheduling simulation 39 | Pods []*CapacityEstimationReviewResult `json:"pods"` 40 | } 41 | 42 | type CapacityEstimationReviewResult struct { 43 | PodName string `json:"podName"` 44 | // numbers of replicas on nodes 45 | ReplicasOnNodes []*ReplicasOnNode `json:"replicasOnNodes"` 46 | // reason why no more pods could schedule (if any on this node) 47 | Summary []StopReasonSummary `json:"summary"` 48 | } 49 | 50 | type ReplicasOnNode struct { 51 | NodeName string `json:"nodeName"` 52 | Replicas int `json:"replicas"` 53 | } 54 | 55 | type StopReasonSummary struct { 56 | Reason string `json:"reason"` 57 | Count int `json:"count"` 58 | } 59 | 60 | type Resources struct { 61 | PrimaryResources corev1.ResourceList `json:"primaryResources"` 62 | ScalarResources map[corev1.ResourceName]int64 `json:"scalarResources"` 63 | } 64 | 65 | type Requirements struct { 66 | PodName string `json:"podName"` 67 | Resources *framework.Resource `json:"resources"` 68 | NodeSelectors map[string]string `json:"nodeSelectors"` 69 | } 70 | 71 | type CapacityEstimationReviewScheduleStopReason struct { 72 | StopType string `json:"stopType"` 73 | StopMessage string `json:"stopMessage"` 74 | } 75 | 76 | func (r *CapacityEstimationReview) Print(verbose bool, format string) error { 77 | switch format { 78 | case "json": 79 | return utils.PrintJson(r) 80 | case "yaml": 81 | return utils.PrintYaml(r) 82 | case "": 83 | capacityEstimationReviewPrettyPrint(r, verbose) 84 | return nil 85 | default: 86 | return fmt.Errorf("output format %q not recognized", format) 87 | } 88 | } 89 | 90 | func (r CapacityEstimationReviews) Print(verbose bool, format string) error { 91 | t := table.NewWriter() 92 | t.AppendHeader(table.Row{"spec", "replicas"}) 93 | for i, review := range r { 94 | if i > 0 && (format != "" || verbose) { 95 | fmt.Println("---------------------------------------------------------------") 96 | } 97 | switch format { 98 | case "json": 99 | err := utils.PrintJson(review) 100 | if err != nil { 101 | return err 102 | } 103 | case "yaml": 104 | err := utils.PrintYaml(review) 105 | if err != nil { 106 | return err 107 | } 108 | case "": 109 | if verbose { 110 | capacityEstimationReviewPrettyPrint(review, verbose) 111 | } else { 112 | output, err := json.Marshal(review.Spec.PodRequirements[0]) 113 | if err != nil { 114 | return err 115 | } 116 | t.AppendRow(table.Row{string(output), review.Status.Replicas}) 117 | } 118 | default: 119 | return fmt.Errorf("output format %q not recognized", format) 120 | } 121 | } 122 | 123 | if format == "" && !verbose { 124 | fmt.Println(t.Render()) 125 | } 126 | 127 | return nil 128 | } 129 | 130 | func generateReport(pods []*corev1.Pod, status *pkg.Status) *CapacityEstimationReview { 131 | return &CapacityEstimationReview{ 132 | Spec: getReviewSpec(pods), 133 | Status: getReviewStatus(pods, status), 134 | } 135 | } 136 | 137 | func getMainStopReason(message string) *CapacityEstimationReviewScheduleStopReason { 138 | slicedMessage := strings.Split(message, "\n") 139 | colon := strings.Index(slicedMessage[0], ":") 140 | 141 | reason := &CapacityEstimationReviewScheduleStopReason{ 142 | StopType: slicedMessage[0][:colon], 143 | StopMessage: strings.Trim(slicedMessage[0][colon+1:], " "), 144 | } 145 | return reason 146 | } 147 | 148 | func parsePodsReview(templatePods []*corev1.Pod, status *pkg.Status) []*CapacityEstimationReviewResult { 149 | templatesCount := len(templatePods) 150 | result := make([]*CapacityEstimationReviewResult, 0) 151 | 152 | for i := 0; i < templatesCount; i++ { 153 | result = append(result, &CapacityEstimationReviewResult{ 154 | ReplicasOnNodes: make([]*ReplicasOnNode, 0), 155 | PodName: templatePods[i].Name, 156 | }) 157 | } 158 | 159 | for i, pod := range status.PodsForEstimation { 160 | nodeName := pod.Spec.NodeName 161 | first := true 162 | for _, sum := range result[i%templatesCount].ReplicasOnNodes { 163 | if sum.NodeName == nodeName { 164 | sum.Replicas++ 165 | first = false 166 | } 167 | } 168 | if first { 169 | result[i%templatesCount].ReplicasOnNodes = append(result[i%templatesCount].ReplicasOnNodes, &ReplicasOnNode{ 170 | NodeName: nodeName, 171 | Replicas: 1, 172 | }) 173 | } 174 | } 175 | 176 | slicedMessage := strings.Split(status.StopReason, "\n") 177 | if len(slicedMessage) == 1 { 178 | return result 179 | } 180 | 181 | return result 182 | } 183 | 184 | func getReviewSpec(podTemplates []*corev1.Pod) CapacityEstimationReviewSpec { 185 | podCopies := make([]corev1.Pod, len(podTemplates)) 186 | deepCopyPods(podTemplates, podCopies) 187 | return CapacityEstimationReviewSpec{ 188 | Templates: podCopies, 189 | PodRequirements: getPodsRequirements(podTemplates), 190 | } 191 | } 192 | 193 | func getReviewStatus(pods []*corev1.Pod, status *pkg.Status) CapacityEstimationReviewStatus { 194 | return CapacityEstimationReviewStatus{ 195 | CreationTimestamp: time.Now(), 196 | Replicas: int32(len(status.PodsForEstimation)), 197 | StopReason: getMainStopReason(status.StopReason), 198 | Pods: parsePodsReview(pods, status), 199 | } 200 | } 201 | 202 | func deepCopyPods(in []*corev1.Pod, out []corev1.Pod) { 203 | for i, pod := range in { 204 | out[i] = *pod.DeepCopy() 205 | } 206 | } 207 | 208 | func getPodsRequirements(pods []*corev1.Pod) []*Requirements { 209 | result := make([]*Requirements, 0) 210 | for _, pod := range pods { 211 | podRequirements := &Requirements{ 212 | PodName: pod.Name, 213 | Resources: utils.ComputePodResourceRequest(pod), 214 | NodeSelectors: pod.Spec.NodeSelector, 215 | } 216 | result = append(result, podRequirements) 217 | } 218 | return result 219 | } 220 | 221 | func instancesSum(replicasOnNodes []*ReplicasOnNode) int { 222 | result := 0 223 | for _, v := range replicasOnNodes { 224 | result += v.Replicas 225 | } 226 | return result 227 | } 228 | 229 | func capacityEstimationReviewPrettyPrint(r *CapacityEstimationReview, verbose bool) { 230 | if verbose { 231 | for _, req := range r.Spec.PodRequirements { 232 | fmt.Printf("%v pod requirements:\n", req.PodName) 233 | fmt.Printf("\t- CPU(m): %v\n", req.Resources.MilliCPU) 234 | fmt.Printf("\t- Memory(B): %v\n", req.Resources.Memory) 235 | if req.Resources.ScalarResources != nil { 236 | fmt.Printf("\t- ScalarResources: %v\n", req.Resources.ScalarResources) 237 | } 238 | 239 | if req.NodeSelectors != nil { 240 | fmt.Printf("\t- NodeSelector: %v\n", labels.SelectorFromSet(req.NodeSelectors).String()) 241 | } 242 | fmt.Printf("\n") 243 | } 244 | } 245 | 246 | for _, pod := range r.Status.Pods { 247 | if verbose { 248 | fmt.Printf("The cluster can schedule %v instance(s) of the pod %v.\n", instancesSum(pod.ReplicasOnNodes), pod.PodName) 249 | } else { 250 | fmt.Printf("%v\n", instancesSum(pod.ReplicasOnNodes)) 251 | } 252 | } 253 | 254 | if verbose { 255 | fmt.Printf("\nTermination reason: %v: %v\n", r.Status.StopReason.StopType, r.Status.StopReason.StopMessage) 256 | } 257 | 258 | if verbose && r.Status.Replicas > 0 { 259 | for _, pod := range r.Status.Pods { 260 | if pod.Summary != nil { 261 | fmt.Printf("fit failure summary on nodes: ") 262 | for _, fs := range pod.Summary { 263 | fmt.Printf("%v (%v), ", fs.Reason, fs.Count) 264 | } 265 | fmt.Printf("\n") 266 | } 267 | } 268 | fmt.Printf("\nPod distribution among nodes:\n") 269 | for _, pod := range r.Status.Pods { 270 | fmt.Printf("%v\n", pod.PodName) 271 | for _, ron := range pod.ReplicasOnNodes { 272 | fmt.Printf("\t- %v: %v instance(s)\n", ron.NodeName, ron.Replicas) 273 | } 274 | } 275 | } 276 | } 277 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kluster-capacity 2 | [[中文](./README-ZH.md)] 3 | 4 | ![kluster-capacity-logo](docs/images/capacity-management-capacity-icon.jpeg) 5 | 6 | [![Build Status](https://github.com/k-cloud-labs/kluster-capacity/actions/workflows/ci.yml/badge.svg)](https://github.com/k-cloud-labs/kluster-capacity/actions?query=workflow%3Abuild) 7 | [![Go Report Card](https://goreportcard.com/badge/github.com/k-cloud-labs/kluster-capacity)](https://goreportcard.com/report/github.com/k-cloud-labs/kluster-capacity) 8 | [![Go doc](https://img.shields.io/badge/go.dev-reference-brightgreen?logo=go&logoColor=white&style=flat)](https://pkg.go.dev/github.com/k-cloud-labs/kluster-capacity) 9 | 10 | 11 | Cluster capacity tool supports capacity estimation, scheduler simulation, and cluster compression. 12 | This repository was inspired by https://github.com/kubernetes-sigs/cluster-capacity. 13 | 14 | ## Install 15 | Go binaries are automatically built with each release by [GoReleaser](https://github.com/goreleaser/goreleaser). These can be accessed on the GitHub [releases page](https://github.com/robscott/kube-capacity/releases) for this project. 16 | 17 | ### Homebrew 18 | This project can be installed with [Homebrew](https://brew.sh/): 19 | ``` 20 | brew tap k-cloud-labs/tap 21 | brew install k-cloud-labs/tap/kluster-capacity 22 | ``` 23 | 24 | ### Krew 25 | This project can be installed with [Krew](https://github.com/GoogleContainerTools/krew): 26 | ``` 27 | kubectl krew install kluster-capacity 28 | ``` 29 | 30 | ### From Source Code 31 | Build the framework: 32 | 33 | ```sh 34 | $ cd $GOPATH/src/github.com/k-cloud-labs/ 35 | $ git clone https://github.com/k-cloud-labs/kluster-capacity 36 | $ cd kluster-capacity 37 | $ make build 38 | ``` 39 | 40 | There are three available sub-commands: ce, cc, and ss, which represent capacity estimation, cluster compression, and scheduler simulation, respectively. 41 | 42 | ## Capacity Estimation 43 | ### Intro 44 | As new pods get scheduled on nodes in a cluster, more resources get consumed. Monitoring available resources in the cluster is very important as operators can increase the current resources in time before all of them get exhausted. Or, carry different steps that lead to increase of available resources. 45 | 46 | Cluster capacity consists of capacities of individual cluster nodes. Capacity covers CPU, memory, disk space and other resources. 47 | 48 | Overall remaining allocatable capacity is an estimation. The goal is to analyze the remaining allocatable resources and estimate the available capacity that can still be consumed in terms of the number of pod instances with given requirements that can be scheduled in a cluster. 49 | 50 | ### Enhancement 51 | Here are some enhancements to the cluster capacity mentioned above. 52 | - Support using an existing pod as a pod template directly from the cluster. 53 | - Support batch simulation for different pod templates. 54 | 55 | ### Run 56 | run the analysis: 57 | 58 | ```sh 59 | # use an specified pod yaml file as pod template 60 | $ ./kluster-capacity ce --pods-from-template 61 | # use an existing pod from cluster as pod template 62 | $ ./kluster-capacity ce --pods-from-cluster 63 | ``` 64 | For more information about available options run: 65 | 66 | ```sh 67 | $ ./kluster-capacity ce --help 68 | ``` 69 | 70 | ### Demonstration 71 | 72 | Assuming a cluster is running with 4 nodes and 1 master with each node with 2 CPUs and 4GB of memory. 73 | With pod resource requirements to be `150m` of CPU and `100Mi` of Memory. 74 | 75 | ```sh 76 | $ ./kluster-capacity ce --pods-from-template --verbose 77 | Pod requirements: 78 | - cpu: 150m 79 | - memory: 100Mi 80 | 81 | The cluster can schedule 52 instance(s) of the pod. 82 | Termination reason: FailedScheduling: pod (small-pod-52) failed to fit in any node 83 | fit failure on node (kube-node-1): Insufficient cpu 84 | fit failure on node (kube-node-4): Insufficient cpu 85 | fit failure on node (kube-node-2): Insufficient cpu 86 | fit failure on node (kube-node-3): Insufficient cpu 87 | 88 | 89 | Pod distribution among nodes: 90 | - kube-node-1: 13 instance(s) 91 | - kube-node-4: 13 instance(s) 92 | - kube-node-2: 13 instance(s) 93 | - kube-node-3: 13 instance(s) 94 | ``` 95 | 96 | Once the number of running pods in the cluster grows and the analysis is run again, 97 | the number of schedulable pods decreases as well: 98 | 99 | ```sh 100 | $ ./kluster-capacity ce --pods-from-template --verbose 101 | Pod requirements: 102 | - cpu: 150m 103 | - memory: 100Mi 104 | 105 | The cluster can schedule 46 instance(s) of the pod. 106 | Termination reason: FailedScheduling: pod (small-pod-46) failed to fit in any node 107 | fit failure on node (kube-node-1): Insufficient cpu 108 | fit failure on node (kube-node-4): Insufficient cpu 109 | fit failure on node (kube-node-2): Insufficient cpu 110 | fit failure on node (kube-node-3): Insufficient cpu 111 | 112 | 113 | Pod distribution among nodes: 114 | - kube-node-1: 11 instance(s) 115 | - kube-node-4: 12 instance(s) 116 | - kube-node-2: 11 instance(s) 117 | - kube-node-3: 12 instance(s) 118 | ``` 119 | 120 | ### Output format 121 | `ce` command has a flag `--output (-o)` to format its output as json or yaml. 122 | 123 | ```sh 124 | $ ./kluster-capacity ce --pods-from-template -o json|yaml 125 | ``` 126 | 127 | The json or yaml output is not versioned and is not guaranteed to be stable across various releases. 128 | 129 | ## Scheduler Simulation 130 | ### Intro 131 | The scheduler simulation takes all nodes, pods, and other related resources in the current cluster as input to simulate the process from having no pods to creating and scheduling all pods. This can be used to calculate the cluster compression ratio to evaluate the effectiveness of the scheduling or to measure the quality of the scheduling algorithm. 132 | 133 | Compared to cluster compression, its results are more extreme and idealized. 134 | 135 | ### Run 136 | run the analysis: 137 | 138 | ```shell 139 | ./kluster-capacity ss 140 | ``` 141 | For more information about available options run: 142 | 143 | ```sh 144 | $ ./kluster-capacity ss --help 145 | ``` 146 | It supports two termination conditions: `AllSucceed` and `AllScheduled`. The former means the program ends when all pods are successfully scheduled, while the latter means it exits after all pods have been scheduled at least once. The default is `AllSucceed`. The exit condition can be set using the `--exit-condition` flag. 147 | 148 | ### Demonstration 149 | 150 | Assuming a cluster is running with 4 nodes and 1 master with each node with 2 CPUs and 4GB of memory. 151 | With 40 pod with resource requirements to be `100m` of CPU and `200Mi` of Memory to schedule. 152 | 153 | If the scheduler uses the `LeastAllocated` strategy, the scheduling result may be as follows: 154 | 155 | ```sh 156 | $ ./kluster-capacity ss --verbose 157 | Termination reason: AllSucceed: 40 pod(s) have been scheduled successfully. 158 | 159 | Pod distribution among nodes: 160 | - kube-node-1: 10 instance(s) 161 | - kube-node-2: 10 instance(s) 162 | - kube-node-3: 10 instance(s) 163 | - kube-node-4: 10 instance(s) 164 | ``` 165 | 166 | Once the scheduler uses the `MostAllocated` strategy, the scheduling result may be as follows: 167 | 168 | ```sh 169 | $ ./kluster-capacity ss --verbose 170 | Termination reason: AllSucceed: 40 pod(s) have been scheduled successfully. 171 | 172 | Pod distribution among nodes: 173 | - kube-node-1: 20 instance(s) 174 | - kube-node-2: 20 instance(s) 175 | ``` 176 | 177 | The scheduling result above can be analyzed to evaluate the effectiveness of the scheduling strategy and the cluster capacity compression ratio. For example, the above result represents a cluster compression ratio of 2, which means that there is 50% resource waste in an ideal situation. 178 | 179 | 180 | ## Cluster Compression 181 | ### Intro 182 | Cluster compression takes the current state of the cluster, including all nodes, pods, and other relevant resources, as input, and simulates the process of compressing the cluster by removing nodes. It can be used to calculate the compression ratio of the cluster, which is a measure of how efficiently the resources are being utilized. 183 | 184 | Compared to simulation scheduling, the results of cluster compression are generally more realistic. 185 | 186 | ### Run 187 | run the analysis: 188 | 189 | ```shell 190 | ./kluster-capacity cc --verbose 191 | ``` 192 | For more information about available options run: 193 | 194 | ```sh 195 | $ ./kluster-capacity cc --help 196 | ``` 197 | 198 | ### Demonstration 199 | 200 | Assuming a cluster is running with 4 nodes and 1 master with each node with 2 CPUs and 4GB of memory. 201 | With 40 pod with resource requirements to be `100m` of CPU and `200Mi` of Memory bind to the 4 nodes. 202 | 203 | ```shell 204 | ./kluster-capacity cc --verbose 205 | 2 node(s) in the cluster can be scaled down. 206 | 207 | Termination reason: FailedSelectNode: could not find a node that satisfies the condition, 1 master node(s); 2 node(s) can't be scale down because of insufficient resource in other nodes; 208 | 209 | nodes selected to be scaled down: 210 | - kube-node-1 211 | - kube-node-3 212 | ``` 213 | 214 | The above result indicates that with the given resource requirements for 40 pods, ensuring that all pods can be scheduled, the cluster can remove 2 additional nodes, resulting in a compression ratio of 2, which means there is 50% resource waste. 215 | 216 | ## Feature 217 | - [x] cluster compression 218 | - [x] capacity estimation 219 | - [x] scheduler simulation 220 | - [ ] snapshot based simulation 221 | - [ ] fragmentation rate analysis 222 | 223 | Enjoy it and feel free to give your opinion, thanks! -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /pkg/simulator/clustercompression/simulator.go: -------------------------------------------------------------------------------- 1 | package clustercompression 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | 8 | corev1 "k8s.io/api/core/v1" 9 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 10 | "k8s.io/client-go/informers" 11 | clientset "k8s.io/client-go/kubernetes" 12 | "k8s.io/client-go/tools/cache" 13 | "k8s.io/klog/v2" 14 | 15 | "github.com/k-cloud-labs/kluster-capacity/app/cmds/clustercompression/options" 16 | "github.com/k-cloud-labs/kluster-capacity/pkg" 17 | pkgframework "github.com/k-cloud-labs/kluster-capacity/pkg/framework" 18 | "github.com/k-cloud-labs/kluster-capacity/pkg/utils" 19 | ) 20 | 21 | const FailedSelectNode = "FailedSelectNode: could not find a node that satisfies the condition" 22 | 23 | // only support one scheduler for now and the scheduler name is "default-scheduler" 24 | type simulator struct { 25 | pkg.Framework 26 | 27 | maxSimulated int 28 | simulated int 29 | fakeClient clientset.Interface 30 | createdPods []*corev1.Pod 31 | createPodIndex int 32 | currentNode string 33 | currentNodeUnschedulable bool 34 | bindSuccessPodCount int 35 | nodeFilter NodeFilter 36 | } 37 | 38 | // NewCCSimulatorExecutor create a ce simulator which is completely independent of apiserver so no need 39 | // for kubeconfig nor for apiserver url 40 | func NewCCSimulatorExecutor(conf *options.ClusterCompressionConfig) (pkg.Simulator, error) { 41 | cc, err := utils.BuildKubeSchedulerCompletedConfig(conf.Options.SchedulerConfig, conf.Options.KubeConfig) 42 | if err != nil { 43 | return nil, err 44 | } 45 | 46 | kubeConfig, err := utils.BuildRestConfig(conf.Options.KubeConfig) 47 | if err != nil { 48 | return nil, err 49 | } 50 | 51 | s := &simulator{ 52 | simulated: 0, 53 | bindSuccessPodCount: 0, 54 | createPodIndex: 0, 55 | maxSimulated: conf.Options.MaxLimit, 56 | } 57 | 58 | // add your custom event handlers 59 | err = s.addEventHandlers(cc.InformerFactory) 60 | if err != nil { 61 | return nil, err 62 | } 63 | 64 | framework, err := pkgframework.NewKubeSchedulerFramework(cc, kubeConfig, 65 | pkgframework.WithExcludeNodes(conf.Options.ExcludeNodes), 66 | pkgframework.WithPostBindHook(s.postBindHook), 67 | ) 68 | if err != nil { 69 | return nil, err 70 | } 71 | 72 | s.Framework = framework 73 | s.fakeClient = cc.Client 74 | nodeFilter, err := NewNodeFilter(s.fakeClient, s.GetPodsByNode, conf.Options.ExcludeNodes, conf.Options.FilterNodeOptions) 75 | if err != nil { 76 | return nil, err 77 | } 78 | 79 | s.nodeFilter = nodeFilter 80 | return s, nil 81 | } 82 | 83 | func (s *simulator) Run() error { 84 | return s.Framework.Run(s.selectNextNode) 85 | } 86 | 87 | func (s *simulator) Report() pkg.Printer { 88 | klog.V(2).Infof("the following nodes can be offline to save resources: %v", s.Status().NodesToScaleDown) 89 | klog.V(2).Infof("the clusterCompression StopReason: %s", s.Status().StopReason) 90 | return generateReport(s.Status()) 91 | } 92 | 93 | func (s *simulator) postBindHook(bindPod *corev1.Pod) error { 94 | 95 | if s.maxSimulated > 0 && s.simulated >= s.maxSimulated { 96 | return s.Stop(fmt.Sprintf("LimitReached: maximum number of nodes simulated: %v", s.maxSimulated)) 97 | } 98 | 99 | s.bindSuccessPodCount++ 100 | if len(s.createdPods) > 0 && s.createPodIndex < len(s.createdPods) { 101 | klog.V(2).Infof("create %d pod: %s", s.createPodIndex, s.createdPods[s.createPodIndex].Namespace+"/"+s.createdPods[s.createPodIndex].Name) 102 | _, err := s.fakeClient.CoreV1().Pods(s.createdPods[s.createPodIndex].Namespace).Create(context.TODO(), utils.InitPod(s.createdPods[s.createPodIndex]), metav1.CreateOptions{}) 103 | if err != nil { 104 | return err 105 | } 106 | s.createPodIndex++ 107 | } else if s.bindSuccessPodCount == len(s.createdPods) { 108 | klog.V(2).Infof("add node %s to simulator status", s.currentNode) 109 | s.UpdateNodesToScaleDown(s.currentNode) 110 | 111 | err := s.addLabelToNode(s.currentNode, NodeScaledDownSuccessLabel, "true") 112 | if err != nil { 113 | _ = s.Stop("FailedAddLabelToNode: " + err.Error()) 114 | } 115 | 116 | s.simulated++ 117 | s.nodeFilter.Done() 118 | 119 | err = s.selectNextNode() 120 | if err != nil { 121 | return s.Stop(fmt.Sprintf("%s, %s", FailedSelectNode, err.Error())) 122 | } 123 | } 124 | 125 | return nil 126 | } 127 | 128 | func (s *simulator) selectNextNode() error { 129 | s.Status().SelectNodeCountInc() 130 | status := s.nodeFilter.SelectNode() 131 | if status != nil && status.Node == nil { 132 | return errors.New(status.ErrReason) 133 | } 134 | node := status.Node 135 | klog.V(2).Infof("select node %s to simulate\n", node.Name) 136 | 137 | s.createdPods = nil 138 | s.bindSuccessPodCount = 0 139 | s.createPodIndex = 0 140 | s.currentNode = node.Name 141 | s.currentNodeUnschedulable = node.Spec.Unschedulable 142 | 143 | err := s.cordon(node) 144 | if err != nil { 145 | return err 146 | } 147 | 148 | err = s.deletePodsByNode(node) 149 | if err != nil { 150 | return err 151 | } 152 | klog.V(2).Infof("node %s needs to create %d pods\n", node.Name, len(s.createdPods)) 153 | 154 | if len(s.createdPods) > 0 { 155 | _, err = s.fakeClient.CoreV1().Pods(s.createdPods[s.createPodIndex].Namespace).Create(context.TODO(), utils.InitPod(s.createdPods[s.createPodIndex]), metav1.CreateOptions{}) 156 | klog.V(2).Infof("create %d pod: %s", s.createPodIndex, s.createdPods[s.createPodIndex].Namespace+"/"+s.createdPods[s.createPodIndex].Name) 157 | if err != nil { 158 | return err 159 | } 160 | s.createPodIndex++ 161 | } else { 162 | klog.V(2).Infof("add node %s to simulator status", s.currentNode) 163 | s.UpdateNodesToScaleDown(s.currentNode) 164 | 165 | err := s.addLabelToNode(s.currentNode, NodeScaledDownSuccessLabel, "true") 166 | if err != nil { 167 | _ = s.Stop("FailedAddLabelToNode: " + err.Error()) 168 | } 169 | 170 | s.simulated++ 171 | s.nodeFilter.Done() 172 | return s.selectNextNode() 173 | } 174 | 175 | return nil 176 | } 177 | 178 | func (s *simulator) cordon(node *corev1.Node) error { 179 | node, err := s.fakeClient.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) 180 | if err != nil { 181 | return err 182 | } 183 | 184 | copy := node.DeepCopy() 185 | 186 | taints := []corev1.Taint{} 187 | unScheduleTaint := corev1.Taint{ 188 | Key: corev1.TaintNodeUnschedulable, 189 | Effect: corev1.TaintEffectNoSchedule, 190 | } 191 | taints = append(taints, unScheduleTaint) 192 | 193 | for i := range copy.Spec.Taints { 194 | if copy.Spec.Taints[i].Key != corev1.TaintNodeUnschedulable { 195 | taints = append(taints, copy.Spec.Taints[i]) 196 | } 197 | } 198 | copy.Spec.Taints = taints 199 | 200 | _, err = s.fakeClient.CoreV1().Nodes().Update(context.TODO(), copy, metav1.UpdateOptions{}) 201 | if err != nil { 202 | return err 203 | } 204 | klog.V(2).Infof("cordon node %s successfully\n", node.Name) 205 | return nil 206 | } 207 | 208 | func (s *simulator) unCordon(nodeName string) error { 209 | node, err := s.fakeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) 210 | if err != nil { 211 | return err 212 | } 213 | 214 | copy := node.DeepCopy() 215 | 216 | taints := []corev1.Taint{} 217 | for i := range copy.Spec.Taints { 218 | if copy.Spec.Taints[i].Key != corev1.TaintNodeUnschedulable { 219 | taints = append(taints, copy.Spec.Taints[i]) 220 | } 221 | } 222 | copy.Spec.Taints = taints 223 | 224 | _, err = s.fakeClient.CoreV1().Nodes().Update(context.TODO(), copy, metav1.UpdateOptions{}) 225 | if err != nil { 226 | return err 227 | } 228 | klog.V(2).Infof("unCordon node %s successfully\n", nodeName) 229 | return nil 230 | 231 | } 232 | 233 | func (s *simulator) addLabelToNode(nodeName string, labelKey string, labelValue string) error { 234 | node, err := s.fakeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) 235 | if err != nil { 236 | return err 237 | } 238 | 239 | copy := node.DeepCopy() 240 | 241 | copy.Labels[labelKey] = labelValue 242 | _, err = s.fakeClient.CoreV1().Nodes().Update(context.TODO(), copy, metav1.UpdateOptions{}) 243 | if err != nil { 244 | return err 245 | } 246 | klog.V(2).Infof("add node %s failed scale down label successfully\n", node.Name) 247 | return nil 248 | } 249 | 250 | func (s *simulator) updatePodsFromCreatedPods() error { 251 | podList := s.createdPods 252 | for index := s.createPodIndex - 1; index >= 0; index-- { 253 | err := s.fakeClient.CoreV1().Pods(podList[index].Namespace).Delete(context.TODO(), podList[index].Name, metav1.DeleteOptions{}) 254 | if err != nil { 255 | return err 256 | } 257 | klog.V(2).Infof("delete %d pod: %s", index, s.createdPods[index].Namespace+"/"+s.createdPods[index].Name) 258 | } 259 | 260 | for index := range s.createdPods { 261 | _, err := s.fakeClient.CoreV1().Pods(s.createdPods[index].Namespace).Create(context.TODO(), s.createdPods[index], metav1.CreateOptions{}) 262 | if err != nil { 263 | return err 264 | } 265 | klog.V(2).Infof("create %d pod: %s", index, s.createdPods[index].Namespace+"/"+s.createdPods[index].Name) 266 | } 267 | 268 | return nil 269 | } 270 | 271 | func (s *simulator) deletePodsByNode(node *corev1.Node) error { 272 | podList, err := s.getPodsByNode(node) 273 | if err != nil { 274 | return err 275 | } 276 | 277 | var createdPods []*corev1.Pod 278 | for i := range podList { 279 | if !utils.IsDaemonsetPod(podList[i].OwnerReferences) && podList[i].DeletionTimestamp == nil { 280 | createdPods = append(createdPods, podList[i]) 281 | err := s.fakeClient.CoreV1().Pods(podList[i].Namespace).Delete(context.TODO(), podList[i].Name, metav1.DeleteOptions{}) 282 | if err != nil { 283 | return err 284 | } 285 | } 286 | } 287 | 288 | s.createdPods = createdPods 289 | return nil 290 | } 291 | 292 | func (s *simulator) addEventHandlers(informerFactory informers.SharedInformerFactory) (err error) { 293 | 294 | _, _ = informerFactory.Core().V1().Pods().Informer().AddEventHandler( 295 | cache.FilteringResourceEventHandler{ 296 | FilterFunc: func(obj interface{}) bool { 297 | if pod, ok := obj.(*corev1.Pod); ok && pod.Spec.SchedulerName == pkg.SchedulerName && 298 | metav1.HasAnnotation(pod.ObjectMeta, pkg.PodProvisioner) { 299 | return true 300 | } 301 | return false 302 | }, 303 | Handler: cache.ResourceEventHandlerFuncs{ 304 | UpdateFunc: func(oldObj, newObj interface{}) { 305 | if pod, ok := newObj.(*corev1.Pod); ok { 306 | for _, podCondition := range pod.Status.Conditions { 307 | // Only for pending pods provisioned by cc 308 | if podCondition.Type == corev1.PodScheduled && podCondition.Status == corev1.ConditionFalse && 309 | podCondition.Reason == corev1.PodReasonUnschedulable { 310 | s.Status().FailedSchedulerCountInc() 311 | // 1. Empty all Pods created by fake before 312 | // 2. Uncordon this node if needed 313 | // 3. Type the flags that cannot be filtered, clear the flags that prohibit scheduling, add failed scale down label, then selectNextNode 314 | klog.V(2).Infof("Failed scheduling pod %s, reason: %s, message: %s\n", pod.Namespace+"/"+pod.Name, podCondition.Reason, podCondition.Message) 315 | err = s.updatePodsFromCreatedPods() 316 | if err != nil { 317 | err = s.Stop("FailedDeletePodsFromCreatedPods: " + err.Error()) 318 | } 319 | 320 | if !s.currentNodeUnschedulable { 321 | err = s.unCordon(s.currentNode) 322 | if err != nil { 323 | err = s.Stop("FailedUnCordon: " + err.Error()) 324 | } 325 | } 326 | 327 | err = s.addLabelToNode(s.currentNode, NodeScaledDownFailedLabel, "true") 328 | if err != nil { 329 | err = s.Stop("FailedAddLabelToNode: " + err.Error()) 330 | } 331 | 332 | err = s.selectNextNode() 333 | if err != nil { 334 | _ = s.Stop(fmt.Sprintf("%s, %s", FailedSelectNode, err.Error())) 335 | } 336 | } 337 | } 338 | } 339 | }, 340 | }, 341 | }, 342 | ) 343 | 344 | return 345 | } 346 | 347 | func (s *simulator) getPodsByNode(node *corev1.Node) ([]*corev1.Pod, error) { 348 | podList, err := s.GetPodsByNode(node.Name) 349 | if err != nil { 350 | return nil, err 351 | } 352 | 353 | klog.V(2).Infof("node %s has %d pods\n", node.Name, len(podList)) 354 | return podList, nil 355 | } 356 | -------------------------------------------------------------------------------- /pkg/framework/kubescheduler.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 k-cloud-labs org 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package framework 18 | 19 | import ( 20 | "context" 21 | "encoding/json" 22 | "errors" 23 | "fmt" 24 | "os" 25 | "sync" 26 | "time" 27 | 28 | appsv1 "k8s.io/api/apps/v1" 29 | corev1 "k8s.io/api/core/v1" 30 | resourcev1alpha1 "k8s.io/api/resource/v1alpha1" 31 | storagev1 "k8s.io/api/storage/v1" 32 | apierrors "k8s.io/apimachinery/pkg/api/errors" 33 | "k8s.io/apimachinery/pkg/api/meta" 34 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 35 | "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 36 | "k8s.io/apimachinery/pkg/runtime" 37 | "k8s.io/apimachinery/pkg/runtime/schema" 38 | "k8s.io/apimachinery/pkg/util/sets" 39 | utilfeature "k8s.io/apiserver/pkg/util/feature" 40 | "k8s.io/client-go/dynamic" 41 | "k8s.io/client-go/dynamic/dynamicinformer" 42 | "k8s.io/client-go/informers" 43 | coreinformers "k8s.io/client-go/informers/core/v1" 44 | clientset "k8s.io/client-go/kubernetes" 45 | restclient "k8s.io/client-go/rest" 46 | "k8s.io/client-go/testing" 47 | "k8s.io/client-go/tools/cache" 48 | "k8s.io/client-go/tools/events" 49 | "k8s.io/klog/v2" 50 | schedconfig "k8s.io/kubernetes/cmd/kube-scheduler/app/config" 51 | "k8s.io/kubernetes/pkg/api/legacyscheme" 52 | "k8s.io/kubernetes/pkg/features" 53 | "k8s.io/kubernetes/pkg/scheduler" 54 | kubeschedulerconfig "k8s.io/kubernetes/pkg/scheduler/apis/config" 55 | "k8s.io/kubernetes/pkg/scheduler/framework" 56 | "k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder" 57 | "k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption" 58 | "k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding" 59 | frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime" 60 | "k8s.io/kubernetes/pkg/scheduler/profile" 61 | "sigs.k8s.io/controller-runtime/pkg/client/apiutil" 62 | 63 | "github.com/k-cloud-labs/kluster-capacity/pkg" 64 | "github.com/k-cloud-labs/kluster-capacity/pkg/plugins/generic" 65 | "github.com/k-cloud-labs/kluster-capacity/pkg/utils" 66 | ) 67 | 68 | func init() { 69 | if err := corev1.AddToScheme(legacyscheme.Scheme); err != nil { 70 | fmt.Printf("err: %v\n", err) 71 | } 72 | // add your own scheme here to use dynamic informer factory when you have some custom filter plugins 73 | // which uses other resources than defined in scheduler. 74 | // for details, refer to k8s.io/kubernetes/pkg/scheduler/eventhandlers.go 75 | } 76 | 77 | var ( 78 | initResources = map[schema.GroupVersionKind]func() runtime.Object{ 79 | corev1.SchemeGroupVersion.WithKind("Namespace"): func() runtime.Object { return &corev1.Namespace{} }, 80 | corev1.SchemeGroupVersion.WithKind("Pod"): func() runtime.Object { return &corev1.Pod{} }, 81 | corev1.SchemeGroupVersion.WithKind("Node"): func() runtime.Object { return &corev1.Node{} }, 82 | corev1.SchemeGroupVersion.WithKind("PersistentVolume"): func() runtime.Object { return &corev1.PersistentVolume{} }, 83 | corev1.SchemeGroupVersion.WithKind("PersistentVolumeClaim"): func() runtime.Object { return &corev1.PersistentVolumeClaim{} }, 84 | corev1.SchemeGroupVersion.WithKind("Service"): func() runtime.Object { return &corev1.Service{} }, 85 | corev1.SchemeGroupVersion.WithKind("ReplicationController"): func() runtime.Object { return &corev1.ReplicationController{} }, 86 | appsv1.SchemeGroupVersion.WithKind("StatefulSet"): func() runtime.Object { return &appsv1.StatefulSet{} }, 87 | appsv1.SchemeGroupVersion.WithKind("ReplicaSet"): func() runtime.Object { return &appsv1.ReplicaSet{} }, 88 | storagev1.SchemeGroupVersion.WithKind("StorageClass"): func() runtime.Object { return &storagev1.StorageClass{} }, 89 | storagev1.SchemeGroupVersion.WithKind("CSINode"): func() runtime.Object { return &storagev1.CSINode{} }, 90 | storagev1.SchemeGroupVersion.WithKind("CSIDriver"): func() runtime.Object { return &storagev1.CSIDriver{} }, 91 | storagev1.SchemeGroupVersion.WithKind("CSIStorageCapacity"): func() runtime.Object { return &storagev1.CSIStorageCapacity{} }, 92 | resourcev1alpha1.SchemeGroupVersion.WithKind("PodScheduling"): func() runtime.Object { 93 | if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) { 94 | return &resourcev1alpha1.PodScheduling{} 95 | } 96 | 97 | return nil 98 | }, 99 | resourcev1alpha1.SchemeGroupVersion.WithKind("ResourceClaim"): func() runtime.Object { 100 | if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) { 101 | return &resourcev1alpha1.ResourceClaim{} 102 | } 103 | 104 | return nil 105 | }, 106 | } 107 | once sync.Once 108 | initObjects []runtime.Object 109 | ) 110 | 111 | type kubeschedulerFramework struct { 112 | // fake clientset used by scheduler 113 | fakeClient clientset.Interface 114 | // fake informer factory used by scheduler 115 | fakeInformerFactory informers.SharedInformerFactory 116 | // TODO: follow kubernetes master branch code 117 | dynInformerFactory dynamicinformer.DynamicSharedInformerFactory 118 | restMapper meta.RESTMapper 119 | // real dynamic client to init the world 120 | dynamicClient *dynamic.DynamicClient 121 | 122 | // scheduler 123 | scheduler *scheduler.Scheduler 124 | excludeNodes sets.Set[string] 125 | withScheduledPods bool 126 | withNodeImages bool 127 | ignorePodsOnExcludesNode bool 128 | // deletionTimestamp is not nil and phase is not succeed or failed 129 | withTerminatingPods bool 130 | outOfTreeRegistry frameworkruntime.Registry 131 | customBind kubeschedulerconfig.PluginSet 132 | customPreBind kubeschedulerconfig.PluginSet 133 | customPostBind kubeschedulerconfig.PluginSet 134 | customEventHandlers []func() 135 | postBindHook func(*corev1.Pod) error 136 | 137 | // for scheduler and informer 138 | informerCh chan struct{} 139 | schedulerCh chan struct{} 140 | 141 | // for simulator 142 | stopCh chan struct{} 143 | stopMux sync.Mutex 144 | stopped bool 145 | 146 | // final status 147 | status *pkg.Status 148 | // save status to this file if specified 149 | saveTo string 150 | } 151 | 152 | type Option func(*kubeschedulerFramework) 153 | 154 | func WithExcludeNodes(excludeNodes []string) Option { 155 | return func(s *kubeschedulerFramework) { 156 | s.excludeNodes = sets.New[string](excludeNodes...) 157 | } 158 | } 159 | 160 | func WithOutOfTreeRegistry(registry frameworkruntime.Registry) Option { 161 | return func(s *kubeschedulerFramework) { 162 | s.outOfTreeRegistry = registry 163 | } 164 | } 165 | 166 | func WithCustomBind(plugins kubeschedulerconfig.PluginSet) Option { 167 | return func(s *kubeschedulerFramework) { 168 | s.customBind = plugins 169 | } 170 | } 171 | 172 | func WithCustomPreBind(plugins kubeschedulerconfig.PluginSet) Option { 173 | return func(s *kubeschedulerFramework) { 174 | s.customPreBind = plugins 175 | } 176 | } 177 | 178 | func WithCustomPostBind(plugins kubeschedulerconfig.PluginSet) Option { 179 | return func(s *kubeschedulerFramework) { 180 | s.customPostBind = plugins 181 | } 182 | } 183 | 184 | func WithCustomEventHandlers(handlers []func()) Option { 185 | return func(s *kubeschedulerFramework) { 186 | s.customEventHandlers = handlers 187 | } 188 | } 189 | 190 | func WithNodeImages(with bool) Option { 191 | return func(s *kubeschedulerFramework) { 192 | s.withNodeImages = with 193 | } 194 | } 195 | 196 | func WithScheduledPods(with bool) Option { 197 | return func(s *kubeschedulerFramework) { 198 | s.withScheduledPods = with 199 | } 200 | } 201 | 202 | func WithIgnorePodsOnExcludesNode(with bool) Option { 203 | return func(s *kubeschedulerFramework) { 204 | s.ignorePodsOnExcludesNode = with 205 | } 206 | } 207 | 208 | func WithPostBindHook(postBindHook func(*corev1.Pod) error) Option { 209 | return func(s *kubeschedulerFramework) { 210 | s.postBindHook = postBindHook 211 | } 212 | } 213 | 214 | func WithSaveTo(to string) Option { 215 | return func(s *kubeschedulerFramework) { 216 | s.saveTo = to 217 | } 218 | } 219 | 220 | func WithTerminatingPods(with bool) Option { 221 | return func(s *kubeschedulerFramework) { 222 | s.withTerminatingPods = with 223 | } 224 | } 225 | 226 | // NewKubeSchedulerFramework create a generic simulator for ce, cc, ss simulator which is completely independent of apiserver so no need 227 | // for kubeconfig nor for apiserver url 228 | func NewKubeSchedulerFramework(kubeSchedulerConfig *schedconfig.CompletedConfig, restConfig *restclient.Config, options ...Option) (pkg.Framework, error) { 229 | kubeSchedulerConfig.InformerFactory.InformerFor(&corev1.Pod{}, newPodInformer) 230 | 231 | dynamicClient := dynamic.NewForConfigOrDie(restConfig) 232 | restMapper, err := apiutil.NewDynamicRESTMapper(restConfig) 233 | if err != nil { 234 | return nil, err 235 | } 236 | 237 | s := &kubeschedulerFramework{ 238 | fakeClient: kubeSchedulerConfig.Client, 239 | dynamicClient: dynamicClient, 240 | restMapper: restMapper, 241 | stopCh: make(chan struct{}), 242 | fakeInformerFactory: kubeSchedulerConfig.InformerFactory, 243 | informerCh: make(chan struct{}), 244 | schedulerCh: make(chan struct{}), 245 | withScheduledPods: true, 246 | ignorePodsOnExcludesNode: false, 247 | withNodeImages: true, 248 | withTerminatingPods: true, 249 | status: &pkg.Status{}, 250 | } 251 | for _, option := range options { 252 | option(s) 253 | } 254 | 255 | // only for latest k8s version 256 | if restConfig != nil { 257 | dynClient := dynamic.NewForConfigOrDie(restConfig) 258 | s.dynInformerFactory = dynamicinformer.NewFilteredDynamicSharedInformerFactory(dynClient, 0, corev1.NamespaceAll, nil) 259 | } 260 | 261 | scheduler, err := s.createScheduler(kubeSchedulerConfig) 262 | if err != nil { 263 | return nil, err 264 | } 265 | 266 | s.scheduler = scheduler 267 | 268 | return s, nil 269 | } 270 | 271 | func (s *kubeschedulerFramework) GetPodsByNode(nodeName string) ([]*corev1.Pod, error) { 272 | dump := s.scheduler.Cache.Dump() 273 | var res []*corev1.Pod 274 | if dump != nil && dump.Nodes[nodeName] != nil { 275 | podInfos := dump.Nodes[nodeName].Pods 276 | for i := range podInfos { 277 | if podInfos[i].Pod != nil { 278 | res = append(res, podInfos[i].Pod) 279 | } 280 | } 281 | } 282 | 283 | if res == nil { 284 | return nil, errors.New("cannot get pods on the node because dump is nil") 285 | } 286 | return res, nil 287 | } 288 | 289 | // InitTheWorld use objs outside or default init resources to initialize the scheduler 290 | // the objs outside must be typed object. 291 | func (s *kubeschedulerFramework) Initialize(objs ...runtime.Object) error { 292 | if len(objs) == 0 { 293 | // black magic 294 | klog.V(2).InfoS("Init the world form running cluster") 295 | initObjects := getInitObjects(s.restMapper, s.dynamicClient) 296 | for _, unstructuredObj := range initObjects { 297 | obj := initResources[unstructuredObj.GetObjectKind().GroupVersionKind()]() 298 | if err := runtime.DefaultUnstructuredConverter.FromUnstructured(unstructuredObj.(*unstructured.Unstructured).UnstructuredContent(), obj); err != nil { 299 | return err 300 | } 301 | if needAdd, obj := s.preAdd(obj); needAdd { 302 | if err := s.fakeClient.(testing.FakeClient).Tracker().Add(obj); err != nil { 303 | return err 304 | } 305 | } 306 | } 307 | } else { 308 | klog.V(2).InfoS("Init the world form snapshot") 309 | for _, obj := range objs { 310 | if _, ok := obj.(runtime.Unstructured); ok { 311 | return errors.New("type of objs used to init the world must not be unstructured") 312 | } 313 | if needAdd, obj := s.preAdd(obj); needAdd { 314 | if err := s.fakeClient.(testing.FakeClient).Tracker().Add(obj); err != nil { 315 | return err 316 | } 317 | } 318 | } 319 | } 320 | 321 | return nil 322 | } 323 | 324 | func (s *kubeschedulerFramework) UpdateEstimationPods(pod ...*corev1.Pod) { 325 | s.status.PodsForEstimation = append(s.status.PodsForEstimation, pod...) 326 | } 327 | 328 | func (s *kubeschedulerFramework) UpdateNodesToScaleDown(nodeName string) { 329 | s.status.NodesToScaleDown = append(s.status.NodesToScaleDown, nodeName) 330 | } 331 | 332 | func (s *kubeschedulerFramework) Status() *pkg.Status { 333 | return s.status 334 | } 335 | 336 | func (s *kubeschedulerFramework) Stop(reason string) error { 337 | s.stopMux.Lock() 338 | defer func() { 339 | s.stopMux.Unlock() 340 | }() 341 | 342 | if s.stopped { 343 | return nil 344 | } 345 | 346 | nodeMap := make(map[string]corev1.Node) 347 | nodeList, _ := s.fakeClient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{ResourceVersion: "0"}) 348 | for _, node := range nodeList.Items { 349 | nodeMap[node.Name] = node 350 | } 351 | s.status.Nodes = nodeMap 352 | 353 | podList, _ := s.fakeClient.CoreV1().Pods(metav1.NamespaceAll).List(context.TODO(), metav1.ListOptions{ResourceVersion: "0"}) 354 | s.status.Pods = podList.Items 355 | 356 | s.status.StopReason = reason 357 | 358 | if len(s.saveTo) > 0 { 359 | file, err := os.OpenFile(s.saveTo, os.O_CREATE|os.O_RDWR, 0755) 360 | if err != nil { 361 | panic(err) 362 | } 363 | defer file.Close() 364 | 365 | bytes, err := json.Marshal(s.status) 366 | if err != nil { 367 | return err 368 | } 369 | 370 | _, err = file.Write(bytes) 371 | if err != nil { 372 | return err 373 | } 374 | } 375 | 376 | defer func() { 377 | close(s.stopCh) 378 | close(s.informerCh) 379 | close(s.schedulerCh) 380 | }() 381 | 382 | s.stopped = true 383 | 384 | return nil 385 | } 386 | 387 | func (s *kubeschedulerFramework) CreatePod(pod *corev1.Pod) error { 388 | _, err := s.fakeClient.CoreV1().Pods(pod.Namespace).Create(context.TODO(), pod, metav1.CreateOptions{}) 389 | return err 390 | } 391 | 392 | func (s *kubeschedulerFramework) Run(init func() error) error { 393 | // wait for all informer cache synced 394 | s.fakeInformerFactory.Start(s.informerCh) 395 | if s.dynInformerFactory != nil { 396 | s.dynInformerFactory.Start(s.informerCh) 397 | } 398 | start := time.Now() 399 | 400 | s.fakeInformerFactory.WaitForCacheSync(s.informerCh) 401 | if s.dynInformerFactory != nil { 402 | s.dynInformerFactory.WaitForCacheSync(s.informerCh) 403 | } 404 | 405 | klog.V(4).InfoS("wait sync", "cost", time.Since(start).Milliseconds()) 406 | 407 | if init != nil { 408 | err := init() 409 | if err != nil { 410 | return s.Stop(fmt.Sprintf("%s: %s", "FailedRunInit: ", err.Error())) 411 | } 412 | } 413 | 414 | go s.scheduler.Run(context.TODO()) 415 | 416 | <-s.stopCh 417 | 418 | return nil 419 | } 420 | 421 | func (s *kubeschedulerFramework) createScheduler(cc *schedconfig.CompletedConfig) (*scheduler.Scheduler, error) { 422 | // custom event handlers 423 | for _, handler := range s.customEventHandlers { 424 | handler() 425 | } 426 | 427 | // register default generic plugin 428 | if s.outOfTreeRegistry == nil { 429 | s.outOfTreeRegistry = make(frameworkruntime.Registry) 430 | } 431 | err := s.outOfTreeRegistry.Register(generic.Name, func(configuration runtime.Object, f framework.Handle) (framework.Plugin, error) { 432 | return generic.New(s.postBindHook, s.fakeClient, s.status) 433 | }) 434 | if err != nil { 435 | return nil, err 436 | } 437 | 438 | cc.ComponentConfig.Profiles[0].Plugins.PreBind.Enabled = append(cc.ComponentConfig.Profiles[0].Plugins.PreBind.Enabled, kubeschedulerconfig.Plugin{Name: generic.Name}) 439 | cc.ComponentConfig.Profiles[0].Plugins.PreBind.Disabled = append(cc.ComponentConfig.Profiles[0].Plugins.PreBind.Disabled, kubeschedulerconfig.Plugin{Name: volumebinding.Name}) 440 | cc.ComponentConfig.Profiles[0].Plugins.Bind.Enabled = append(cc.ComponentConfig.Profiles[0].Plugins.Bind.Enabled, kubeschedulerconfig.Plugin{Name: generic.Name}) 441 | cc.ComponentConfig.Profiles[0].Plugins.Bind.Disabled = append(cc.ComponentConfig.Profiles[0].Plugins.Bind.Disabled, kubeschedulerconfig.Plugin{Name: defaultbinder.Name}) 442 | cc.ComponentConfig.Profiles[0].Plugins.PostBind.Enabled = append(cc.ComponentConfig.Profiles[0].Plugins.PostBind.Enabled, kubeschedulerconfig.Plugin{Name: generic.Name}) 443 | cc.ComponentConfig.Profiles[0].Plugins.PostFilter.Disabled = append(cc.ComponentConfig.Profiles[0].Plugins.PostFilter.Disabled, kubeschedulerconfig.Plugin{Name: defaultpreemption.Name}) 444 | 445 | // custom bind plugin 446 | cc.ComponentConfig.Profiles[0].Plugins.PreBind.Enabled = append(cc.ComponentConfig.Profiles[0].Plugins.PreBind.Enabled, s.customPreBind.Enabled...) 447 | cc.ComponentConfig.Profiles[0].Plugins.PreBind.Disabled = append(cc.ComponentConfig.Profiles[0].Plugins.PreBind.Disabled, s.customPreBind.Disabled...) 448 | cc.ComponentConfig.Profiles[0].Plugins.Bind.Enabled = append(cc.ComponentConfig.Profiles[0].Plugins.Bind.Enabled, s.customBind.Enabled...) 449 | cc.ComponentConfig.Profiles[0].Plugins.Bind.Disabled = append(cc.ComponentConfig.Profiles[0].Plugins.Bind.Disabled, s.customBind.Disabled...) 450 | cc.ComponentConfig.Profiles[0].Plugins.PostBind.Enabled = append(cc.ComponentConfig.Profiles[0].Plugins.PostBind.Enabled, s.customPostBind.Enabled...) 451 | cc.ComponentConfig.Profiles[0].Plugins.PostBind.Disabled = append(cc.ComponentConfig.Profiles[0].Plugins.PostBind.Disabled, s.customPostBind.Disabled...) 452 | 453 | // create the scheduler. 454 | return scheduler.New( 455 | s.fakeClient, 456 | s.fakeInformerFactory, 457 | s.dynInformerFactory, 458 | getRecorderFactory(cc), 459 | s.schedulerCh, 460 | scheduler.WithComponentConfigVersion(cc.ComponentConfig.TypeMeta.APIVersion), 461 | scheduler.WithKubeConfig(cc.KubeConfig), 462 | scheduler.WithProfiles(cc.ComponentConfig.Profiles...), 463 | scheduler.WithPercentageOfNodesToScore(cc.ComponentConfig.PercentageOfNodesToScore), 464 | scheduler.WithFrameworkOutOfTreeRegistry(s.outOfTreeRegistry), 465 | scheduler.WithPodMaxBackoffSeconds(cc.ComponentConfig.PodMaxBackoffSeconds), 466 | scheduler.WithPodInitialBackoffSeconds(cc.ComponentConfig.PodInitialBackoffSeconds), 467 | scheduler.WithExtenders(cc.ComponentConfig.Extenders...), 468 | scheduler.WithParallelism(cc.ComponentConfig.Parallelism), 469 | ) 470 | } 471 | 472 | func (s *kubeschedulerFramework) preAdd(obj runtime.Object) (bool, runtime.Object) { 473 | // filter exclude nodes and pods and update pod, node spec and status property 474 | if pod, ok := obj.(*corev1.Pod); ok { 475 | // ignore ds pods on exclude nodes 476 | if s.excludeNodes != nil { 477 | if _, ok := s.excludeNodes[pod.Spec.NodeName]; ok { 478 | if s.ignorePodsOnExcludesNode || pod.OwnerReferences != nil && utils.IsDaemonsetPod(pod.OwnerReferences) { 479 | return false, nil 480 | } 481 | } 482 | } 483 | 484 | if pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed { 485 | return false, nil 486 | } 487 | 488 | if pod.DeletionTimestamp != nil && !s.withTerminatingPods { 489 | return false, nil 490 | } 491 | 492 | if !s.withScheduledPods && !utils.IsDaemonsetPod(pod.OwnerReferences) { 493 | pod := utils.InitPod(pod) 494 | pod.Status.Phase = corev1.PodPending 495 | 496 | return true, pod 497 | } 498 | } else if node, ok := obj.(*corev1.Node); ok && s.excludeNodes != nil { 499 | if _, ok := s.excludeNodes[node.Name]; ok { 500 | return false, nil 501 | } else if !s.withNodeImages { 502 | node.Status.Images = nil 503 | 504 | return true, node 505 | } 506 | } 507 | 508 | return true, obj 509 | } 510 | 511 | func newPodInformer(cs clientset.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { 512 | selector := fmt.Sprintf("status.phase!=%v,status.phase!=%v", corev1.PodSucceeded, corev1.PodFailed) 513 | tweakListOptions := func(options *metav1.ListOptions) { 514 | options.FieldSelector = selector 515 | } 516 | return coreinformers.NewFilteredPodInformer(cs, metav1.NamespaceAll, resyncPeriod, nil, tweakListOptions) 517 | } 518 | 519 | func getRecorderFactory(cc *schedconfig.CompletedConfig) profile.RecorderFactory { 520 | return func(name string) events.EventRecorder { 521 | return cc.EventBroadcaster.NewRecorder(name) 522 | } 523 | } 524 | 525 | // getInitObjects return all objects need to add to scheduler. 526 | // it's pkg scope for multi scheduler to avoid calling too much times of real kube-apiserver 527 | func getInitObjects(restMapper meta.RESTMapper, dynClient dynamic.Interface) []runtime.Object { 528 | once.Do(func() { 529 | // each item is UnstructuredList 530 | for gvk := range initResources { 531 | restMapping, err := restMapper.RESTMapping(gvk.GroupKind(), gvk.Version) 532 | if err != nil && !meta.IsNoMatchError(err) { 533 | fmt.Printf("unable to get rest mapping for %s, error: %s", gvk.String(), err.Error()) 534 | os.Exit(1) 535 | } 536 | 537 | if restMapping != nil { 538 | var ( 539 | list *unstructured.UnstructuredList 540 | err error 541 | ) 542 | if restMapping.Scope.Name() == meta.RESTScopeNameRoot { 543 | list, err = dynClient.Resource(restMapping.Resource).List(context.TODO(), metav1.ListOptions{ResourceVersion: "0"}) 544 | if err != nil && !apierrors.IsNotFound(err) { 545 | fmt.Printf("unable to list %s, error: %s", gvk.String(), err.Error()) 546 | os.Exit(1) 547 | } 548 | } else { 549 | if restMapping.Resource.Resource == "pods" { 550 | list, err = dynClient.Resource(restMapping.Resource).Namespace(metav1.NamespaceAll).List(context.TODO(), metav1.ListOptions{ 551 | ResourceVersion: "0", 552 | FieldSelector: fmt.Sprintf("status.phase!=%v,status.phase!=%v", corev1.PodSucceeded, corev1.PodFailed), 553 | }) 554 | } else { 555 | list, err = dynClient.Resource(restMapping.Resource).Namespace(metav1.NamespaceAll).List(context.TODO(), metav1.ListOptions{ResourceVersion: "0"}) 556 | } 557 | if err != nil && !apierrors.IsNotFound(err) { 558 | fmt.Printf("unable to list %s, error: %s", gvk.String(), err.Error()) 559 | os.Exit(1) 560 | } 561 | } 562 | 563 | _ = list.EachListItem(func(object runtime.Object) error { 564 | initObjects = append(initObjects, object) 565 | return nil 566 | }) 567 | } 568 | } 569 | }) 570 | 571 | return initObjects 572 | } 573 | --------------------------------------------------------------------------------