├── pkg
    ├── framework
    │   ├── volcano.go
    │   ├── doc.go
    │   └── kubescheduler.go
    ├── const.go
    ├── version
    │   ├── base.go
    │   ├── sharedcommand
    │   │   └── sharedcommand.go
    │   └── version.go
    ├── simulator
    │   ├── capacityestimation
    │   │   ├── podgenerator.go
    │   │   ├── simulator.go
    │   │   └── report.go
    │   ├── schedulersimulation
    │   │   ├── simulator.go
    │   │   └── report.go
    │   └── clustercompression
    │   │   ├── report.go
    │   │   ├── nodeFilter.go
    │   │   ├── options.go
    │   │   └── simulator.go
    ├── interface.go
    ├── status.go
    ├── plugins
    │   └── generic
    │   │   └── plugin.go
    └── utils
    │   ├── pod.go
    │   └── utils.go
├── docs
    └── images
    │   └── capacity-management-capacity-icon.jpeg
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── custom.md
    │   ├── feature_request.md
    │   └── bug_report.md
    ├── workflows
    │   ├── release.yml
    │   └── ci.yml
    └── PULL_REQUEST_TEMPLATE.md
├── .gitignore
├── hack
    ├── util.sh
    ├── verify-staticcheck.sh
    ├── verify-import-aliases.sh
    ├── .import-aliases
    └── tools
    │   └── preferredimports
    │       └── preferredimports.go
├── app
    ├── cmds
    │   ├── option.go
    │   ├── capacityestimation
    │   │   ├── options
    │   │   │   ├── namespacename.go
    │   │   │   └── capacityestimation.go
    │   │   └── capacityestimation.go
    │   ├── schedulersimulation
    │   │   ├── options
    │   │   │   └── schedulersimulation.go
    │   │   └── schedulersimulation.go
    │   └── clustercompression
    │   │   ├── options
    │   │       └── clustercompression.go
    │   │   └── clustercompression.go
    └── root.go
├── main.go
├── .krew.yaml
├── check_label.py
├── .goreleaser.yml
├── Makefile
├── README-ZH.md
├── go.mod
├── README.md
└── LICENSE


/pkg/framework/volcano.go:
--------------------------------------------------------------------------------
1 | package framework
2 | 


--------------------------------------------------------------------------------
/pkg/const.go:
--------------------------------------------------------------------------------
1 | package pkg
2 | 
3 | const (
4 | 	PodProvisioner = "kc.k-cloud-labs.io/provisioned-by"
5 | 	SchedulerName  = "simulator-scheduler"
6 | )
7 | 


--------------------------------------------------------------------------------
/docs/images/capacity-management-capacity-icon.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/k-cloud-labs/kluster-capacity/HEAD/docs/images/capacity-management-capacity-icon.jpeg


--------------------------------------------------------------------------------
/pkg/framework/doc.go:
--------------------------------------------------------------------------------
1 | /*
2 | 	this folder is used to implement frameworks for all scheduler, e.g. kube-scheduler, volcano, YuniKorn
3 | */
4 | 
5 | package framework
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/custom.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Custom issue template
 3 | about: Describe this issue template's purpose here.
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binaries for programs and plugins
 2 | *.exe
 3 | *.exe~
 4 | *.dll
 5 | *.so
 6 | *.dylib
 7 | 
 8 | # Test binary, built with `go test -c`
 9 | *.test
10 | test.sh
11 | 
12 | # Output of the go coverage tool, specifically when used with LiteIDE
13 | *.out
14 | 
15 | # Dependency directories (remove the comment below to include it)
16 | vendor/
17 | 
18 | # binary file
19 | kluster-capacity
20 | 
21 | # conf
22 | pod*.yaml
23 | schedulerconfig
24 | kubeconfig
25 | 
26 | # IDE
27 | .idea/
28 | 
29 | 


--------------------------------------------------------------------------------
/pkg/version/base.go:
--------------------------------------------------------------------------------
 1 | package version
 2 | 
 3 | // Base version information.
 4 | //
 5 | // This is the fallback data used when version information from git is not
 6 | // provided via go ldflags. It provides an approximation of the kinitiras
 7 | // version for ad-hoc builds (e.g. `go build`) that cannot get the version
 8 | // information from git.
 9 | var (
10 | 	gitVersion   = "v0.0.0-master"
11 | 	gitCommit    = "unknown" // sha1 from git, output of $(git rev-parse HEAD)
12 | 	gitTreeState = "unknown" // state of git tree, either "clean" or "dirty"
13 | 
14 | 	buildDate = "unknown" // build date in ISO8601 format, output of $(date -u +'%Y-%m-%dT%H:%M:%SZ')
15 | )
16 | 


--------------------------------------------------------------------------------
/hack/util.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -o errexit
 4 | set -o nounset
 5 | set -o pipefail
 6 | 
 7 | # This script holds common bash variables and utility functions.
 8 | 
 9 | # This function installs a Go tools by 'go install' command.
10 | # Parameters:
11 | #  - $1: package name, such as "sigs.k8s.io/controller-tools/cmd/controller-gen"
12 | #  - $2: package version, such as "v0.4.1"
13 | function util::install_tools() {
14 | 	local package="$1"
15 | 	local version="$2"
16 | 	echo "go install ${package}@${version}"
17 | 	GO111MODULE=on go install "${package}"@"${version}"
18 | 	GOPATH=$(go env GOPATH | awk -F ':' '{print $1}')
19 | 	export PATH=$PATH:$GOPATH/bin
20 | }


--------------------------------------------------------------------------------
/app/cmds/option.go:
--------------------------------------------------------------------------------
 1 | package cmds
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"path/filepath"
 6 | )
 7 | 
 8 | type Options struct {
 9 | 	SchedulerConfig string
10 | 	KubeConfig      string
11 | 	Verbose         bool
12 | 	OutputFormat    string
13 | 	// file to load initial data instead of from k8s cluster
14 | 	Snapshot string
15 | 	// file to save the result
16 | 	SaveTo       string
17 | 	ExcludeNodes []string
18 | 	MaxLimit     int
19 | }
20 | 
21 | func (o *Options) Default() {
22 | 	if len(o.KubeConfig) == 0 {
23 | 		config := os.Getenv("KUBECONFIG")
24 | 		if len(config) == 0 {
25 | 			config = filepath.Join(os.Getenv("HOME"), ".kube/config")
26 | 		}
27 | 		o.KubeConfig = config
28 | 	}
29 | }
30 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/hack/verify-staticcheck.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -o errexit
 4 | set -o nounset
 5 | set -o pipefail
 6 | 
 7 | REPO_ROOT=$(dirname "${BASH_SOURCE[0]}")/..
 8 | GOLANGCI_LINT_PKG="github.com/golangci/golangci-lint/cmd/golangci-lint"
 9 | GOLANGCI_LINT_VER="v1.50.1"
10 | 
11 | cd "${REPO_ROOT}"
12 | source "hack/util.sh"
13 | 
14 | util::install_tools ${GOLANGCI_LINT_PKG} ${GOLANGCI_LINT_VER}
15 | 
16 | if golangci-lint run --timeout=5m; then
17 |   echo 'Congratulations!  All Go source files have passed staticcheck.'
18 | else
19 |   echo # print one empty line, separate from warning messages.
20 |   echo 'Please review the above warnings.'
21 |   echo 'If the above warnings do not make sense, feel free to file an issue.'
22 |   exit 1
23 | fi


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright © 2023 k-cloud-labs org
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 | 	http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | package main
17 | 
18 | import "github.com/k-cloud-labs/kluster-capacity/app"
19 | 
20 | func main() {
21 | 	app.Execute()
22 | }
23 | 


--------------------------------------------------------------------------------
/pkg/simulator/capacityestimation/podgenerator.go:
--------------------------------------------------------------------------------
 1 | package capacityestimation
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	corev1 "k8s.io/api/core/v1"
 7 | 
 8 | 	"github.com/k-cloud-labs/kluster-capacity/pkg/utils"
 9 | )
10 | 
11 | type singlePodGenerator struct {
12 | 	counter     uint
13 | 	podTemplate *corev1.Pod
14 | }
15 | 
16 | func NewSinglePodGenerator(podTemplate *corev1.Pod) PodGenerator {
17 | 	return &singlePodGenerator{
18 | 		counter:     0,
19 | 		podTemplate: podTemplate,
20 | 	}
21 | }
22 | 
23 | func (g *singlePodGenerator) Generate() *corev1.Pod {
24 | 	pod := utils.InitPod(g.podTemplate)
25 | 	// use simulated pod name with an index to construct the name
26 | 	pod.ObjectMeta.Name = fmt.Sprintf("%v-%v", g.podTemplate.Name, g.counter)
27 | 
28 | 	// Ensures uniqueness
29 | 	g.counter++
30 | 
31 | 	return pod
32 | }
33 | 


--------------------------------------------------------------------------------
/pkg/interface.go:
--------------------------------------------------------------------------------
 1 | package pkg
 2 | 
 3 | import (
 4 | 	corev1 "k8s.io/api/core/v1"
 5 | 	"k8s.io/apimachinery/pkg/runtime"
 6 | )
 7 | 
 8 | // Framework need to be implemented by all scheduler framework
 9 | type Framework interface {
10 | 	Run(init func() error) error
11 | 	Initialize(objs ...runtime.Object) error
12 | 	CreatePod(pod *corev1.Pod) error
13 | 	UpdateEstimationPods(pod ...*corev1.Pod)
14 | 	UpdateNodesToScaleDown(nodeName string)
15 | 	Status() *Status
16 | 	GetPodsByNode(nodeName string) ([]*corev1.Pod, error)
17 | 	Stop(reason string) error
18 | }
19 | 
20 | // Simulator need to be implemented by all simulator
21 | type Simulator interface {
22 | 	Run() error
23 | 	Initialize(objs ...runtime.Object) error
24 | 	Report() Printer
25 | }
26 | 
27 | type Printer interface {
28 | 	Print(verbose bool, format string) error
29 | }
30 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: release
 2 | on:
 3 |   push:
 4 |     tags:
 5 |       - v*
 6 | jobs:
 7 |   goreleaser:
 8 |     runs-on: ubuntu-latest
 9 |     environment: release
10 |     steps:
11 |       - uses: actions/checkout@v3
12 |         with:
13 |           fetch-depth: 0
14 |       - run: git fetch --force --tags
15 |       - uses: actions/setup-go@v3
16 |         with:
17 |           go-version: '>=1.19'
18 |           cache: true
19 |       - name: GoReleaser
20 |         uses: goreleaser/goreleaser-action@v4
21 |         with:
22 |           version: latest
23 |           args: release --rm-dist
24 |         env:
25 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
26 |           TAP_GITHUB_TOKEN: ${{ secrets.TAP_GITHUB_TOKEN }}
27 |       - name: Update new version in krew-index
28 |         uses: rajatjindal/krew-release-bot@v0.0.43


--------------------------------------------------------------------------------
/pkg/version/sharedcommand/sharedcommand.go:
--------------------------------------------------------------------------------
 1 | package sharedcommand
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"io"
 6 | 
 7 | 	"github.com/spf13/cobra"
 8 | 
 9 | 	"github.com/k-cloud-labs/kluster-capacity/pkg/version"
10 | )
11 | 
12 | var (
13 | 	versionShort   = `Print the version information.`
14 | 	versionLong    = `Print the version information.`
15 | 	versionExample = `  # Print %s command version
16 |   %s version`
17 | )
18 | 
19 | // NewCmdVersion prints out the release version info for this command binary.
20 | // It is used as a subcommand of a parent command.
21 | func NewCmdVersion(out io.Writer, parentCommand string) *cobra.Command {
22 | 	cmd := &cobra.Command{
23 | 		Use:     "version",
24 | 		Short:   versionShort,
25 | 		Long:    versionLong,
26 | 		Example: fmt.Sprintf(versionExample, parentCommand, parentCommand),
27 | 		Run: func(cmd *cobra.Command, args []string) {
28 | 			fmt.Fprintf(out, "%s version: %s\n", parentCommand, version.Get())
29 | 		},
30 | 	}
31 | 
32 | 	return cmd
33 | }
34 | 


--------------------------------------------------------------------------------
/pkg/status.go:
--------------------------------------------------------------------------------
 1 | package pkg
 2 | 
 3 | import (
 4 | 	corev1 "k8s.io/api/core/v1"
 5 | )
 6 | 
 7 | // Status capture all scheduled pods with reason why the estimation could not continue
 8 | type Status struct {
 9 | 	// all pods
10 | 	Pods []corev1.Pod `json:"pods"`
11 | 	// all nodes
12 | 	Nodes map[string]corev1.Node `json:"nodes"`
13 | 	// for ce
14 | 	PodsForEstimation []*corev1.Pod `json:"pods_for_estimation"`
15 | 	// for cc
16 | 	NodesToScaleDown     []string `json:"nodes_to_scale_down"`
17 | 	SelectNodeCount      int      `json:"select_node_count"`
18 | 	SchedulerCount       int      `json:"scheduler_count"`
19 | 	FailedSchedulerCount int      `json:"failed_scheduler_count"`
20 | 	// stop reason
21 | 	StopReason string `json:"stop_reason"`
22 | }
23 | 
24 | func (s *Status) SelectNodeCountInc() {
25 | 	s.SelectNodeCount++
26 | }
27 | 
28 | func (s *Status) SchedulerCountInc() {
29 | 	s.SchedulerCount++
30 | }
31 | 
32 | func (s *Status) FailedSchedulerCountInc() {
33 | 	s.FailedSchedulerCount++
34 | }
35 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/hack/verify-import-aliases.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -o errexit
 4 | set -o nounset
 5 | set -o pipefail
 6 | 
 7 | SCRIPT_ROOT=$(dirname "${BASH_SOURCE[0]}")/..
 8 | cd "${SCRIPT_ROOT}"
 9 | ROOT_PATH=$(pwd)
10 | 
11 | IMPORT_ALIASES_PATH="${ROOT_PATH}/hack/.import-aliases"
12 | INCLUDE_PATH="(${ROOT_PATH}/app|${ROOT_PATH}/pkg)"
13 | 
14 | ret=0
15 | # We can't directly install preferredimports by `go install` due to the go.mod issue:
16 | # go install k8s.io/kubernetes/cmd/preferredimports@v1.21.3: k8s.io/kubernetes@v1.21.3
17 | #   The go.mod file for the module providing named packages contains one or
18 | #   more replace directives. It must not contain directives that would cause
19 | #   it to be interpreted differently than if it were the main module.
20 | go run "${ROOT_PATH}/hack/tools/preferredimports/preferredimports.go" -import-aliases "${IMPORT_ALIASES_PATH}" -include-path "${INCLUDE_PATH}"  "${ROOT_PATH}" || ret=$?
21 | if [[ $ret -ne 0 ]]; then
22 |   echo "!!! Please see hack/.import-aliases for the preferred aliases for imports." >&2
23 |   exit 1
24 | fi


--------------------------------------------------------------------------------
/pkg/version/version.go:
--------------------------------------------------------------------------------
 1 | package version
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"runtime"
 6 | )
 7 | 
 8 | // Info contains versioning information.
 9 | type Info struct {
10 | 	GitVersion   string `json:"gitVersion"`
11 | 	GitCommit    string `json:"gitCommit"`
12 | 	GitTreeState string `json:"gitTreeState"`
13 | 	BuildDate    string `json:"buildDate"`
14 | 	GoVersion    string `json:"goVersion"`
15 | 	Compiler     string `json:"compiler"`
16 | 	Platform     string `json:"platform"`
17 | }
18 | 
19 | // String returns a Go-syntax representation of the Info.
20 | func (info Info) String() string {
21 | 	return fmt.Sprintf("%#v", info)
22 | }
23 | 
24 | // Get returns the overall codebase version. It's for detecting
25 | // what code a binary was built from.
26 | func Get() Info {
27 | 	return Info{
28 | 		GitVersion:   gitVersion,
29 | 		GitCommit:    gitCommit,
30 | 		GitTreeState: gitTreeState,
31 | 		BuildDate:    buildDate,
32 | 		GoVersion:    runtime.Version(),
33 | 		Compiler:     runtime.Compiler,
34 | 		Platform:     fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH),
35 | 	}
36 | }
37 | 


--------------------------------------------------------------------------------
/app/cmds/capacityestimation/options/namespacename.go:
--------------------------------------------------------------------------------
 1 | package options
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"fmt"
 6 | 	"strings"
 7 | 
 8 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 9 | )
10 | 
11 | type NamespaceNames []NamespaceName
12 | 
13 | type NamespaceName struct {
14 | 	Namespace string `json:"namespace"`
15 | 	Name      string `json:"name"`
16 | }
17 | 
18 | func (n *NamespaceNames) Set(nns string) error {
19 | 	for _, nn := range strings.Split(nns, ",") {
20 | 		nnStrs := strings.Split(nn, "/")
21 | 		if len(nnStrs) == 1 {
22 | 			*n = append(*n, NamespaceName{
23 | 				Namespace: metav1.NamespaceDefault,
24 | 				Name:      nnStrs[0],
25 | 			})
26 | 		} else if len(nnStrs) == 2 {
27 | 			*n = append(*n, NamespaceName{
28 | 				Namespace: nnStrs[0],
29 | 				Name:      nnStrs[1],
30 | 			})
31 | 		} else {
32 | 			return errors.New("invalid format")
33 | 		}
34 | 	}
35 | 
36 | 	return nil
37 | }
38 | 
39 | func (n *NamespaceNames) String() string {
40 | 	strs := []string{}
41 | 	for _, nn := range *n {
42 | 		strs = append(strs, fmt.Sprintf("%s/%s", nn.Namespace, nn.Name))
43 | 	}
44 | 
45 | 	return strings.Join(strs, ",")
46 | }
47 | 
48 | func (n *NamespaceNames) Type() string {
49 | 	return "NamespaceNames"
50 | }
51 | 


--------------------------------------------------------------------------------
/.krew.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: krew.googlecontainertools.github.com/v1alpha2
 2 | kind: Plugin
 3 | metadata:
 4 |   name: kluster-capacity
 5 | spec:
 6 |   version: {{ .TagName }}
 7 |   homepage: https://github.com/k-cloud-labs/kluster-capacity
 8 |   shortDescription: Scheduler simulation for capacity analysis.
 9 |   description: |
10 |     A simple CLI provide cluster capacity analysis with scheduler simulation.
11 |   platforms:
12 |   - selector:
13 |       matchLabels:
14 |         os: darwin
15 |         arch: amd64
16 |     bin: kluster-capacity
17 |     {{addURIAndSha "https://github.com/k-cloud-labs/kluster-capacity/releases/download/{{ .TagName }}/kluster-capacity_{{ .TagName }}_darwin_x86_64.tar.gz" .TagName }}
18 |   - selector:
19 |       matchLabels:
20 |         os: darwin
21 |         arch: arm64
22 |     bin: kluster-capacity
23 |     {{addURIAndSha "https://github.com/k-cloud-labs/kluster-capacity/releases/download/{{ .TagName }}/kluster-capacity_{{ .TagName }}_darwin_arm64.tar.gz" .TagName }}
24 |   - selector:
25 |       matchLabels:
26 |         os: linux
27 |         arch: amd64
28 |     bin: kluster-capacity
29 |     {{addURIAndSha "https://github.com/k-cloud-labs/kluster-capacity/releases/download/{{ .TagName }}/kluster-capacity_{{ .TagName }}_linux_x86_64.tar.gz" .TagName }}
30 |   - selector:
31 |       matchLabels:
32 |         os: linux
33 |         arch: arm64
34 |     bin: kluster-capacity
35 |     {{addURIAndSha "https://github.com/k-cloud-labs/kluster-capacity/releases/download/{{ .TagName }}/kluster-capacity_{{ .TagName }}_linux_arm64.tar.gz" .TagName }}


--------------------------------------------------------------------------------
/check_label.py:
--------------------------------------------------------------------------------
 1 | from github import Github
 2 | import os
 3 | import sys
 4 | 
 5 | # GitHub API 认证
 6 | g = Github(os.environ['K_GITHUB_TOKEN'])
 7 | repo = g.get_repo(os.environ['GITHUB_REPOSITORY'])
 8 | 
 9 | # 获取 repository 下的所有标签
10 | labels = [label.name for label in repo.get_labels()]
11 | 
12 | if sys.argv[1] == 'issues':
13 |     issue_number = sys.argv[2]
14 |     issue = repo.get_issue(int(issue_number))
15 |     issue_labels = [label.name for label in issue.labels]
16 | 
17 |     # 判断 issue 是否包含所有标签中的其中之一
18 |     if not set(labels).intersection(set(issue_labels)):
19 |         message = "Please add a label from the following list: " + str(labels)
20 |         issue.create_comment(message)
21 | 
22 | if sys.argv[1] == 'pull_request':
23 |     pull_request_number = sys.argv[2]
24 |     pull_request = repo.get_pull(int(pull_request_number))
25 |     
26 |     # 自动添加一个标明大小的标签
27 |     size_labels = ["size/S", "size/M", "size/L", "size/XL"]
28 |     lines_of_code = pull_request.additions + pull_request.deletions
29 |     if lines_of_code <= 50:
30 |         pull_request.add_to_labels(size_labels[0])
31 |     elif lines_of_code <= 100:
32 |         pull_request.add_to_labels(size_labels[1])
33 |     elif lines_of_code <= 500:
34 |         pull_request.add_to_labels(size_labels[2])
35 |     else:
36 |         pull_request.add_to_labels(size_labels[3])
37 |     
38 | 
39 |     pull_request_labels = [label.name for label in pull_request.labels]
40 | 
41 |     # 判断 pull_request 是否包含所有标签中的其中之一
42 |     if not set(labels).intersection(set(pull_request_labels)):
43 |         message = "Please add a label from the following list: " + str(labels)
44 |         pull_request.create_issue_comment(message)
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/.goreleaser.yml:
--------------------------------------------------------------------------------
 1 | builds:
 2 |   - env:
 3 |       - CGO_ENABLED=0
 4 |     goos:
 5 |       - linux
 6 |       - darwin
 7 | #      - windows
 8 |     goarch:
 9 |       - arm64
10 |       - amd64
11 |       - 386
12 |     goarm:
13 |       - 6
14 |       - 7
15 | archives:
16 |   - name_template: |-
17 |       kluster-capacity_{{ .Tag }}_{{ .Os }}_{{ .Arch -}}
18 |       {{- with .Arm -}}
19 |         {{- if (eq . "6") -}}hf
20 |         {{- else -}}v{{- . -}}
21 |         {{- end -}}
22 |       {{- end -}}
23 |     replacements:
24 |       386: i386
25 |       amd64: x86_64
26 | #    format_overrides:
27 | #      - goos: windows
28 | #        format: zip
29 | checksum:
30 |   name_template: 'checksums.txt'
31 | snapshot:
32 |   name_template: "{{ .Tag }}-next"
33 | changelog:
34 |   skip: false
35 |   use: git
36 |   sort: asc
37 |   groups:
38 |     - title: Features
39 |       regexp: "^.*feat[(\\w)]*:+.*$"
40 |       order: 0
41 |     - title: 'Bug fixes'
42 |       regexp: "^.*fix[(\\w)]*:+.*$"
43 |       order: 1
44 |     - title: Others
45 |       order: 999
46 |   filters:
47 |     exclude:
48 |       - "^docs:"
49 |       - "^test:"
50 | brews:
51 |   - name: kluster-capacity
52 |     tap:
53 |       # The token determines the release type (Github/Gitlab).
54 |       owner: k-cloud-labs
55 |       name: homebrew-tap
56 |       token: "{{ .Env.TAP_GITHUB_TOKEN }}"
57 |     folder: Formula
58 |     # Brew fails from multiple 32-bit arm versions.
59 |     # Specify which version should be used.
60 |     goarm: 6
61 |     homepage: https://github.com/k-cloud-labs/kluster-capacity
62 |     license: apache-2.0
63 |     description: A simple CLI provide cluster capacity analysis with scheduler simulation.
64 |     dependencies:
65 |       - name: git
66 |     test: |
67 |       system "#{bin}/kube-capacity version"


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | 
 3 | on:
 4 |   # Run this workflow every time a new commit pushed to upstream/fork repository.
 5 |   # Run workflow on fork repository will help contributors find and resolve issues before sending a PR.
 6 |   push:
 7 |   pull_request:
 8 | jobs:
 9 |   golangci:
10 |     name: lint
11 |     runs-on: ubuntu-20.04
12 |     steps:
13 |       - name: checkout code
14 |         uses: actions/checkout@v3
15 |       - name: install Go
16 |         uses: actions/setup-go@v3
17 |         with:
18 |           go-version: 1.19.x
19 |       - name: lint
20 |         run: hack/verify-staticcheck.sh
21 |       - name: import alias
22 |         run: hack/verify-import-aliases.sh
23 |   fmt:
24 |     name: gofmt
25 |     runs-on: ubuntu-20.04
26 |     steps:
27 |       - name: checkout code
28 |         uses: actions/checkout@v3
29 |       - name: install Go
30 |         uses: actions/setup-go@v3
31 |         with:
32 |           go-version: 1.19.x
33 |       - name: go fmt check
34 |         run: make fmt-check
35 |   vet:
36 |     name: go vet
37 |     runs-on: ubuntu-20.04
38 |     steps:
39 |       - name: checkout code
40 |         uses: actions/checkout@v3
41 |       - name: install Go
42 |         uses: actions/setup-go@v3
43 |         with:
44 |           go-version: 1.19.x
45 |       - name: go vet
46 |         run: make vet
47 | #  test:
48 | #    name: unit test
49 | #    needs:
50 | #      - fmt
51 | #      - vet
52 | #    runs-on: ubuntu-18.04
53 | #    steps:
54 | #      - name: checkout code
55 | #        uses: actions/checkout@v2
56 | #      - name: install Go
57 | #        uses: actions/setup-go@v2
58 | #        with:
59 | #          go-version: 1.19.x
60 | #      - name: Run coverage
61 | #        run: ./script/test.sh
62 | #      - name: Codecov
63 | #        uses: codecov/codecov-action@v3.1.0
64 | 
65 | 


--------------------------------------------------------------------------------
/pkg/plugins/generic/plugin.go:
--------------------------------------------------------------------------------
 1 | package generic
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 
 7 | 	corev1 "k8s.io/api/core/v1"
 8 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 9 | 	"k8s.io/client-go/kubernetes"
10 | 	"k8s.io/klog/v2"
11 | 	"k8s.io/kubernetes/pkg/scheduler/framework"
12 | 
13 | 	"github.com/k-cloud-labs/kluster-capacity/pkg"
14 | )
15 | 
16 | const Name = "GenericBinder"
17 | 
18 | type GenericBinder struct {
19 | 	client       kubernetes.Interface
20 | 	postBindHook func(*corev1.Pod) error
21 | 	status       *pkg.Status
22 | }
23 | 
24 | func New(postBindHook func(*corev1.Pod) error, client kubernetes.Interface, status *pkg.Status) (framework.Plugin, error) {
25 | 	return &GenericBinder{
26 | 		postBindHook: postBindHook,
27 | 		client:       client,
28 | 		status:       status,
29 | 	}, nil
30 | }
31 | 
32 | func (b *GenericBinder) Name() string {
33 | 	return Name
34 | }
35 | 
36 | func (b *GenericBinder) Bind(ctx context.Context, state *framework.CycleState, p *corev1.Pod, nodeName string) *framework.Status {
37 | 	pod, err := b.client.CoreV1().Pods(p.Namespace).Get(context.TODO(), p.Name, metav1.GetOptions{})
38 | 	if err != nil {
39 | 		return framework.NewStatus(framework.Error, fmt.Sprintf("Unable to bind: %v", err))
40 | 	}
41 | 	updatedPod := pod.DeepCopy()
42 | 	updatedPod.Spec.NodeName = nodeName
43 | 	updatedPod.Status.Phase = corev1.PodRunning
44 | 
45 | 	if _, err = b.client.CoreV1().Pods(pod.Namespace).Update(ctx, updatedPod, metav1.UpdateOptions{}); err != nil {
46 | 		return framework.NewStatus(framework.Error, fmt.Sprintf("Unable to update binded pod: %v", err))
47 | 	}
48 | 
49 | 	return nil
50 | }
51 | 
52 | func (b *GenericBinder) PreBind(ctx context.Context, state *framework.CycleState, p *corev1.Pod, nodeName string) *framework.Status {
53 | 	return nil
54 | }
55 | 
56 | func (b *GenericBinder) PostBind(_ context.Context, _ *framework.CycleState, pod *corev1.Pod, _ string) {
57 | 	if b.postBindHook != nil {
58 | 		if !metav1.HasAnnotation(pod.ObjectMeta, pkg.PodProvisioner) {
59 | 			return
60 | 		}
61 | 		b.status.SchedulerCountInc()
62 | 
63 | 		if err := b.postBindHook(pod); err != nil {
64 | 			klog.ErrorS(err, "Invoking postBindHook gives an error", "pod", klog.KObj(pod))
65 | 		}
66 | 	}
67 | }
68 | 


--------------------------------------------------------------------------------
/hack/.import-aliases:
--------------------------------------------------------------------------------
 1 | {
 2 |   "k8s.io/api/admissionregistration/v1": "admissionregistrationv1",
 3 |   "k8s.io/api/admissionregistration/v1beta1": "admissionregistrationv1beta1",
 4 |   "k8s.io/api/admission/v1beta1": "admissionv1beta1",
 5 |   "k8s.io/api/admission/v1": "admissionv1",
 6 |   "k8s.io/api/apps/v1": "appsv1",
 7 |   "k8s.io/api/apps/v1beta1": "appsv1beta1",
 8 |   "k8s.io/api/apps/v1beta2": "appsv1beta2",
 9 |   "k8s.io/api/authentication/v1": "authenticationv1",
10 |   "k8s.io/api/authentication/v1beta1": "authenticationv1beta1",
11 |   "k8s.io/api/authorization/v1": "authorizationv1",
12 |   "k8s.io/api/authorization/v1beta1": "authorizationv1beta1",
13 |   "k8s.io/api/autoscaling/v1": "autoscalingv1",
14 |   "k8s.io/api/batch/v1": "batchv1",
15 |   "k8s.io/api/batch/v1beta1": "batchv1beta1",
16 |   "k8s.io/api/certificates/v1beta1": "certificatesv1beta1",
17 |   "k8s.io/api/coordination/v1": "coordinationv1",
18 |   "k8s.io/api/coordination/v1beta1": "coordinationv1beta1",
19 |   "k8s.io/api/core/v1": "corev1",
20 |   "k8s.io/api/discovery/v1": "discoveryv1",
21 |   "k8s.io/api/events/v1": "eventsv1",
22 |   "k8s.io/api/events/v1beta1": "eventsv1beta1",
23 |   "k8s.io/api/extensions/v1beta1": "extensionsv1beta1",
24 |   "k8s.io/api/imagepolicy/v1alpha1": "imagepolicyv1alpha1",
25 |   "k8s.io/api/networking/v1": "networkingv1",
26 |   "k8s.io/api/networking/v1beta1": "networkingv1beta1",
27 |   "k8s.io/api/node/v1alpha1": "nodev1alpha1",
28 |   "k8s.io/api/node/v1beta1": "nodev1beta1",
29 |   "k8s.io/api/node/v1": "nodev1",
30 |   "k8s.io/api/policy/v1": "policyv1",
31 |   "k8s.io/api/policy/v1beta1": "policyv1beta1",
32 |   "k8s.io/api/rbac/v1": "rbacv1",
33 |   "k8s.io/api/rbac/v1alpha1": "rbacv1alpha1",
34 |   "k8s.io/api/rbac/v1beta1": "rbacv1beta1",
35 |   "k8s.io/api/scheduling/v1": "schedulingv1",
36 |   "k8s.io/api/scheduling/v1alpha1": "schedulingv1alpha1",
37 |   "k8s.io/api/scheduling/v1beta1": "schedulingv1beta1",
38 |   "k8s.io/api/storage/v1": "storagev1",
39 |   "k8s.io/api/storage/v1alpha1": "storagev1alpha1",
40 |   "k8s.io/api/storage/v1beta1": "storagev1beta1",
41 |   "k8s.io/apimachinery/pkg/api/errors": "apierrors",
42 |   "k8s.io/apimachinery/pkg/apis/meta/v1": "metav1",
43 | 
44 |   "github.com/k-cloud-labs/pkg/apis/policy/v1alpha1": "policyv1alpha1"
45 | }


--------------------------------------------------------------------------------
/app/cmds/schedulersimulation/options/schedulersimulation.go:
--------------------------------------------------------------------------------
 1 | package options
 2 | 
 3 | import (
 4 | 	"github.com/spf13/pflag"
 5 | 	"k8s.io/apimachinery/pkg/runtime"
 6 | 
 7 | 	"github.com/k-cloud-labs/kluster-capacity/app/cmds"
 8 | )
 9 | 
10 | const (
11 | 	// FromCluster represent an existing cluster without pods
12 | 	FromCluster = "Cluster"
13 | 	// FromSnapshot represent a snapshot
14 | 	FromSnapshot = "Snapshot"
15 | 
16 | 	// ExitWhenAllScheduled means exit when all pods have been scheduled once
17 | 	ExitWhenAllScheduled = "AllScheduled"
18 | 	// ExitWhenAllSucceed means exit when all pods have been scheduled successfully
19 | 	ExitWhenAllSucceed = "AllSucceed"
20 | )
21 | 
22 | type Snapshot struct {
23 | 	// key is gk
24 | 	Objects map[string][]runtime.Object `json:"objects"`
25 | }
26 | 
27 | type SchedulerSimulationOptions struct {
28 | 	cmds.Options
29 | 	// Cluster, Snapshot
30 | 	SourceFrom               string
31 | 	ExitCondition            string
32 | 	IgnorePodsOnExcludeNodes bool
33 | }
34 | 
35 | type SchedulerSimulationConfig struct {
36 | 	Options  *SchedulerSimulationOptions
37 | 	InitObjs []runtime.Object
38 | }
39 | 
40 | func NewSchedulerSimulationOptions() *SchedulerSimulationOptions {
41 | 	return &SchedulerSimulationOptions{}
42 | }
43 | 
44 | func NewSchedulerSimulationConfig(option *SchedulerSimulationOptions) *SchedulerSimulationConfig {
45 | 	return &SchedulerSimulationConfig{
46 | 		Options: option,
47 | 	}
48 | }
49 | 
50 | func (s *SchedulerSimulationOptions) AddFlags(fs *pflag.FlagSet) {
51 | 	fs.StringVar(&s.KubeConfig, "kubeconfig", s.KubeConfig, "Path to the kubeconfig file to use for the analysis")
52 | 	fs.StringVar(&s.SchedulerConfig, "schedulerconfig", s.SchedulerConfig, "Path to JSON or YAML file containing scheduler configuration. Used when source-from is cluster")
53 | 	fs.StringVarP(&s.OutputFormat, "output", "o", s.OutputFormat, "Output format. One of: json|yaml")
54 | 	fs.StringSliceVar(&s.ExcludeNodes, "exclude-nodes", s.ExcludeNodes, "Exclude nodes to be scheduled")
55 | 	fs.BoolVarP(&s.IgnorePodsOnExcludeNodes, "ignore-pods-on-excludes-nodes", "i", true, "Whether ignore the pods on the excludes nodes. By default true")
56 | 	fs.StringVar(&s.Snapshot, "snapshot", s.Snapshot, "Path of snapshot to initialize the world. Used when source-from is snapshot")
57 | 	fs.StringVar(&s.SourceFrom, "source-from", "Cluster", "Source of the init data. One of: Cluster|Snapshot")
58 | 	fs.StringVar(&s.ExitCondition, "exit-condition", "AllSucceed", "Exit condition of the simulator. One of: AllScheduled|AllSucceed")
59 | 	fs.BoolVar(&s.Verbose, "verbose", s.Verbose, "Verbose mode")
60 | 	fs.StringVarP(&s.SaveTo, "save", "s", s.SaveTo, "File path to save the simulation result")
61 | }
62 | 


--------------------------------------------------------------------------------
/app/cmds/clustercompression/options/clustercompression.go:
--------------------------------------------------------------------------------
 1 | package options
 2 | 
 3 | import (
 4 | 	"github.com/spf13/pflag"
 5 | 
 6 | 	"github.com/k-cloud-labs/kluster-capacity/app/cmds"
 7 | )
 8 | 
 9 | type ClusterCompressionOptions struct {
10 | 	cmds.Options
11 | 	FilterNodeOptions FilterNodeOptions
12 | }
13 | 
14 | type FilterNodeOptions struct {
15 | 	ExcludeNotReadyNode bool
16 | 	ExcludeTaintNode    bool
17 | 	IgnoreStaticPod     bool
18 | 	IgnoreMirrorPod     bool
19 | 	IgnoreCloneSet      bool
20 | 	IgnoreVolumePod     bool
21 | }
22 | 
23 | type ClusterCompressionConfig struct {
24 | 	Options *ClusterCompressionOptions
25 | }
26 | 
27 | func NewClusterCompressionConfig(opt *ClusterCompressionOptions) *ClusterCompressionConfig {
28 | 	return &ClusterCompressionConfig{
29 | 		Options: opt,
30 | 	}
31 | }
32 | 
33 | func NewClusterCompressionOptions() *ClusterCompressionOptions {
34 | 	return &ClusterCompressionOptions{}
35 | }
36 | 
37 | func (s *ClusterCompressionOptions) AddFlags(fs *pflag.FlagSet) {
38 | 	fs.StringVar(&s.KubeConfig, "kubeconfig", s.KubeConfig, "Path to the kubeconfig file to use for the analysis.")
39 | 	fs.StringVarP(&s.OutputFormat, "output", "o", s.OutputFormat, "Output format. One of: json|default (Note: output is not versioned or guaranteed to be stable across releases)")
40 | 	fs.StringVar(&s.SchedulerConfig, "schedulerconfig", s.SchedulerConfig, "Path to JSON or YAML file containing scheduler configuration.")
41 | 	fs.IntVar(&s.MaxLimit, "max-limit", 0, "Number of instances of node to be scale down after which analysis stops.. By default unlimited.")
42 | 	fs.BoolVar(&s.FilterNodeOptions.ExcludeTaintNode, "exclude-taint-node", true, "Whether to filter nodes with taint when selecting nodes. By default true.")
43 | 	fs.BoolVar(&s.FilterNodeOptions.ExcludeNotReadyNode, "exclude-not-ready-node", true, "Whether to filter nodes with not ready when selecting nodes. By default true.")
44 | 	fs.BoolVar(&s.FilterNodeOptions.IgnoreStaticPod, "ignore-static-pod", false, "Whether to ignore nodes with static pods when filtering nodes. By default true.")
45 | 	fs.BoolVar(&s.FilterNodeOptions.IgnoreMirrorPod, "ignore-mirror-pod", false, "Whether to ignore nodes with mirror pods when filtering nodes. By default false.")
46 | 	fs.BoolVar(&s.FilterNodeOptions.IgnoreCloneSet, "ignore-cloneset", false, "Whether to ignore nodes with cloneSet pods when filtering nodes. By default false.")
47 | 	fs.BoolVar(&s.FilterNodeOptions.IgnoreVolumePod, "ignore-volume-pod", false, "Whether to ignore nodes with volume pods when filtering nodes. By default false.")
48 | 	fs.StringSliceVar(&s.ExcludeNodes, "exclude-nodes", s.ExcludeNodes, "Exclude nodes to be scheduled")
49 | 	fs.BoolVar(&s.Verbose, "verbose", s.Verbose, "Verbose mode")
50 | }
51 | 


--------------------------------------------------------------------------------
/pkg/utils/pod.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	uuid "github.com/satori/go.uuid"
 7 | 	corev1 "k8s.io/api/core/v1"
 8 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 9 | 	"k8s.io/apimachinery/pkg/types"
10 | 	apiv1 "k8s.io/kubernetes/pkg/apis/core/v1"
11 | 
12 | 	"github.com/k-cloud-labs/kluster-capacity/pkg"
13 | )
14 | 
15 | // IsMirrorPod returns true if the pod is a Mirror Pod.
16 | func IsMirrorPod(pod *corev1.Pod) bool {
17 | 	_, ok := pod.Annotations[corev1.MirrorPodAnnotationKey]
18 | 	return ok
19 | }
20 | 
21 | // IsPodTerminating returns true if the pod DeletionTimestamp is set.
22 | func IsPodTerminating(pod *corev1.Pod) bool {
23 | 	return pod.DeletionTimestamp != nil
24 | }
25 | 
26 | // IsStaticPod returns true if the pod is a static pod.
27 | func IsStaticPod(pod *corev1.Pod) bool {
28 | 	source, err := GetPodSource(pod)
29 | 	return err == nil && source != "api"
30 | }
31 | 
32 | // IsCloneSetPod returns true if the pod is a IsCloneSetPod.
33 | func IsCloneSetPod(ownerRefList []metav1.OwnerReference) bool {
34 | 	for _, ownerRef := range ownerRefList {
35 | 		if ownerRef.Kind == "CloneSet" {
36 | 			return true
37 | 		}
38 | 	}
39 | 	return false
40 | }
41 | 
42 | // IsDaemonsetPod returns true if the pod is a IsDaemonsetPod.
43 | func IsDaemonsetPod(ownerRefList []metav1.OwnerReference) bool {
44 | 	for _, ownerRef := range ownerRefList {
45 | 		if ownerRef.Kind == "DaemonSet" {
46 | 			return true
47 | 		}
48 | 	}
49 | 	return false
50 | }
51 | 
52 | // IsPodWithLocalStorage returns true if the pod has local storage.
53 | func IsPodWithLocalStorage(pod *corev1.Pod) bool {
54 | 	for _, volume := range pod.Spec.Volumes {
55 | 		if volume.HostPath != nil || volume.EmptyDir != nil {
56 | 			return true
57 | 		}
58 | 	}
59 | 
60 | 	return false
61 | }
62 | 
63 | // GetPodSource returns the source of the pod based on the annotation.
64 | func GetPodSource(pod *corev1.Pod) (string, error) {
65 | 	if pod.Annotations != nil {
66 | 		if source, ok := pod.Annotations["kubernetes.io/config.source"]; ok {
67 | 			return source, nil
68 | 		}
69 | 	}
70 | 	return "", fmt.Errorf("cannot get source of pod %q", pod.UID)
71 | }
72 | 
73 | func InitPod(podTemplate *corev1.Pod) *corev1.Pod {
74 | 	pod := podTemplate.DeepCopy()
75 | 
76 | 	apiv1.SetObjectDefaults_Pod(pod)
77 | 
78 | 	// reset pod
79 | 	pod.Spec.NodeName = ""
80 | 	pod.Spec.SchedulerName = pkg.SchedulerName
81 | 	pod.Namespace = podTemplate.Namespace
82 | 	if pod.Namespace == "" {
83 | 		pod.Namespace = metav1.NamespaceDefault
84 | 	}
85 | 	pod.Status = corev1.PodStatus{}
86 | 
87 | 	// use simulated pod name with an index to construct the name
88 | 	pod.ObjectMeta.Name = podTemplate.Name
89 | 	pod.ObjectMeta.UID = types.UID(uuid.NewV4().String())
90 | 
91 | 	// Add pod provisioner annotation
92 | 	if pod.ObjectMeta.Annotations == nil {
93 | 		pod.ObjectMeta.Annotations = map[string]string{}
94 | 	}
95 | 	pod.ObjectMeta.Annotations[pkg.PodProvisioner] = pkg.SchedulerName
96 | 
97 | 	return pod
98 | }
99 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--  Thanks for sending a pull request!  Here are some tips for you:
 2 | 
 3 | 1. If this is your first time, please read our contributor guidelines: https://git.k8s.io/community/contributors/guide/first-contribution.md#your-first-contribution and developer guide https://git.k8s.io/community/contributors/devel/development.md#development-guide
 4 | 2. Please label this pull request according to what type of issue you are addressing, especially if this is a release targeted pull request. For reference on required PR/issue labels, read here:
 5 | https://git.k8s.io/community/contributors/devel/sig-release/release.md#issuepr-kind-label
 6 | 3. Ensure you have added or ran the appropriate tests for your PR: https://git.k8s.io/community/contributors/devel/sig-testing/testing.md
 7 | 4. If you want *faster* PR reviews, read how: https://git.k8s.io/community/contributors/guide/pull-requests.md#best-practices-for-faster-reviews
 8 | 5. If the PR is unfinished, see how to mark it: https://git.k8s.io/community/contributors/guide/pull-requests.md#marking-unfinished-pull-requests
 9 | -->
10 | 
11 | #### What type of PR is this?
12 | 
13 | <!--
14 | Add one of the following kinds:
15 | /kind bug
16 | /kind cleanup
17 | /kind documentation
18 | /kind feature
19 | 
20 | Optionally add one or more of the following kinds if applicable:
21 | /kind api-change
22 | /kind deprecation
23 | /kind failing-test
24 | /kind flake
25 | /kind regression
26 | -->
27 | 
28 | #### What this PR does / why we need it:
29 | 
30 | #### Which issue(s) this PR fixes:
31 | <!--
32 | *Automatically closes linked issue when PR is merged.
33 | Usage: `Fixes #<issue number>`, or `Fixes (paste link of issue)`.
34 | _If PR is about `failing-tests or flakes`, please post the related issues/tests in a comment and do not use `Fixes`_*
35 | -->
36 | Fixes #
37 | 
38 | #### Special notes for your reviewer:
39 | 
40 | #### Does this PR introduce a user-facing change?
41 | <!--
42 | If no, just write "NONE" in the release-note block below.
43 | If yes, a release note is required:
44 | Enter your extended release note in the block below. If the PR requires additional action from users switching to the new release, include the string "action required".
45 | 
46 | For more information on release notes see: https://git.k8s.io/community/contributors/guide/release-notes.md
47 | -->
48 | ```release-note
49 | 
50 | ```
51 | 
52 | #### Additional documentation e.g., KEPs (Kubernetes Enhancement Proposals), usage docs, etc.:
53 | 
54 | <!--
55 | This section can be blank if this pull request does not require a release note.
56 | 
57 | When adding links which point to resources within git repositories, like
58 | KEPs or supporting documentation, please reference a specific commit and avoid
59 | linking directly to the master branch. This ensures that links reference a
60 | specific point in time, rather than a document that may change over time.
61 | 
62 | See here for guidance on getting permanent links to files: https://help.github.com/en/articles/getting-permanent-links-to-files
63 | 
64 | Please use the following format for linking documentation:
65 | - [KEP]: <link>
66 | - [Usage]: <link>
67 | - [Other doc]: <link>
68 | -->
69 | ```docs
70 | 
71 | ```
72 | 


--------------------------------------------------------------------------------
/app/root.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright © 2023 k-cloud-labs org
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 | 	http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | package app
17 | 
18 | import (
19 | 	"fmt"
20 | 	"os"
21 | 
22 | 	"github.com/spf13/cobra"
23 | 	"github.com/spf13/viper"
24 | 	_ "k8s.io/client-go/plugin/pkg/client/auth"
25 | 	"k8s.io/klog/v2"
26 | 
27 | 	"github.com/k-cloud-labs/kluster-capacity/app/cmds/capacityestimation"
28 | 	"github.com/k-cloud-labs/kluster-capacity/app/cmds/clustercompression"
29 | 	"github.com/k-cloud-labs/kluster-capacity/app/cmds/schedulersimulation"
30 | 	"github.com/k-cloud-labs/kluster-capacity/pkg/version/sharedcommand"
31 | )
32 | 
33 | var cfgFile string
34 | 
35 | // rootCmd represents the base command when called without any subcommands
36 | var rootCmd = &cobra.Command{
37 | 	Use:   "kluster-capacity",
38 | 	Short: "A tool which support capacity estimation, scheduler simulation, cluster compression.",
39 | 	Long:  `A tool which support capacity estimation, scheduler simulation, cluster compression.`,
40 | 	// Uncomment the following line if your bare application
41 | 	// has an action associated with it:
42 | 	//Run: func(cmd *cobra.Command, args []string) {},
43 | }
44 | 
45 | // Execute adds all child commands to the root command and sets flags appropriately.
46 | // This is called by main.main(). It only needs to happen once to the rootCmd.
47 | func Execute() {
48 | 	err := rootCmd.Execute()
49 | 	if err != nil {
50 | 		os.Exit(1)
51 | 	}
52 | }
53 | 
54 | func init() {
55 | 	klog.InitFlags(nil)
56 | 
57 | 	cobra.OnInitialize(initConfig)
58 | 
59 | 	// Here you will define your flags and configuration settings.
60 | 	// Cobra supports persistent flags, which, if defined here,
61 | 	// will be global for your application.
62 | 
63 | 	rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is $HOME/.kluster-capacity.yaml)")
64 | 
65 | 	// Cobra also supports local flags, which will only run
66 | 	// when this action is called directly.
67 | 	rootCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle")
68 | 
69 | 	rootCmd.AddCommand(capacityestimation.NewCapacityEstimationCmd(), schedulersimulation.NewSchedulerSimulationCmd(), clustercompression.NewClusterCompressionCmd())
70 | 	rootCmd.AddCommand(sharedcommand.NewCmdVersion(os.Stdout, "kluster-capacity"))
71 | }
72 | 
73 | // initConfig reads in config file and ENV variables if set.
74 | func initConfig() {
75 | 	if cfgFile != "" {
76 | 		// Use config file from the flag.
77 | 		viper.SetConfigFile(cfgFile)
78 | 	} else {
79 | 		// Find home directory.
80 | 		home, err := os.UserHomeDir()
81 | 		cobra.CheckErr(err)
82 | 
83 | 		// Search config in home directory with name ".kluster-capacity" (without extension).
84 | 		viper.AddConfigPath(home)
85 | 		viper.SetConfigType("yaml")
86 | 		viper.SetConfigName(".kluster-capacity")
87 | 	}
88 | 
89 | 	viper.AutomaticEnv() // read in environment variables that match
90 | 
91 | 	// If a config file is found, read it in.
92 | 	if err := viper.ReadInConfig(); err == nil {
93 | 		fmt.Fprintln(os.Stderr, "Using config file:", viper.ConfigFileUsed())
94 | 	}
95 | }
96 | 


--------------------------------------------------------------------------------
/app/cmds/clustercompression/clustercompression.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright © 2023 k-cloud-labs org
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 | 	http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package clustercompression
 18 | 
 19 | import (
 20 | 	"errors"
 21 | 	"flag"
 22 | 	"fmt"
 23 | 
 24 | 	"github.com/lithammer/dedent"
 25 | 	"github.com/spf13/cobra"
 26 | 	cliflag "k8s.io/component-base/cli/flag"
 27 | 	"k8s.io/klog/v2"
 28 | 
 29 | 	"github.com/k-cloud-labs/kluster-capacity/app/cmds/clustercompression/options"
 30 | 	"github.com/k-cloud-labs/kluster-capacity/pkg"
 31 | 	"github.com/k-cloud-labs/kluster-capacity/pkg/simulator/clustercompression"
 32 | )
 33 | 
 34 | var clusterCompressionLong = dedent.Dedent(`
 35 | 	The "cc" tool simulates an API server by copying the initial state from the Kubernetes environment, 
 36 | 	using the configuration specified in KUBECONFIG. It attempts to scale down the number of nodes to 
 37 | 	the limit specified by the --max-limits flag, and if this flag is not provided, it schedules pods 
 38 | 	onto as few nodes as possible and provides a list of nodes that can be taken offline.
 39 | 	`)
 40 | 
 41 | func NewClusterCompressionCmd() *cobra.Command {
 42 | 	opt := options.NewClusterCompressionOptions()
 43 | 
 44 | 	var cmd = &cobra.Command{
 45 | 		Use:           "cc",
 46 | 		Short:         "cc uses simulation scheduling to calculate the number of nodes that can be offline in the cluster",
 47 | 		Long:          clusterCompressionLong,
 48 | 		SilenceErrors: false,
 49 | 		RunE: func(cmd *cobra.Command, args []string) error {
 50 | 			flag.Parse()
 51 | 
 52 | 			opt.Default()
 53 | 			err := validateOptions(opt)
 54 | 			if err != nil {
 55 | 				return err
 56 | 			}
 57 | 
 58 | 			err = run(opt)
 59 | 			if err != nil {
 60 | 				return err
 61 | 			}
 62 | 
 63 | 			return nil
 64 | 		},
 65 | 	}
 66 | 
 67 | 	flags := cmd.Flags()
 68 | 	flags.SetNormalizeFunc(cliflag.WordSepNormalizeFunc)
 69 | 	flags.AddGoFlagSet(flag.CommandLine)
 70 | 	opt.AddFlags(flags)
 71 | 
 72 | 	return cmd
 73 | }
 74 | 
 75 | func validateOptions(opt *options.ClusterCompressionOptions) error {
 76 | 	if len(opt.KubeConfig) == 0 {
 77 | 		return errors.New("kubeconfig is missing")
 78 | 	}
 79 | 
 80 | 	return nil
 81 | }
 82 | 
 83 | func run(opt *options.ClusterCompressionOptions) error {
 84 | 	defer klog.Flush()
 85 | 	conf := options.NewClusterCompressionConfig(opt)
 86 | 
 87 | 	reports, err := runCCSimulator(conf)
 88 | 	if err != nil {
 89 | 		klog.Errorf("runCCSimulator err: %s\n", err.Error())
 90 | 		return err
 91 | 	}
 92 | 
 93 | 	if err := reports.Print(conf.Options.Verbose, conf.Options.OutputFormat); err != nil {
 94 | 		return fmt.Errorf("error while printing: %v\n", err)
 95 | 	}
 96 | 	return nil
 97 | }
 98 | 
 99 | func runCCSimulator(conf *options.ClusterCompressionConfig) (pkg.Printer, error) {
100 | 	s, err := clustercompression.NewCCSimulatorExecutor(conf)
101 | 	if err != nil {
102 | 		return nil, err
103 | 	}
104 | 
105 | 	err = s.Initialize()
106 | 	if err != nil {
107 | 		return nil, err
108 | 	}
109 | 
110 | 	err = s.Run()
111 | 	if err != nil {
112 | 		return nil, err
113 | 	}
114 | 
115 | 	return s.Report(), nil
116 | }
117 | 


--------------------------------------------------------------------------------
/pkg/simulator/schedulersimulation/simulator.go:
--------------------------------------------------------------------------------
  1 | package schedulersimulation
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"sync"
  6 | 
  7 | 	corev1 "k8s.io/api/core/v1"
  8 | 	"k8s.io/client-go/informers"
  9 | 	"k8s.io/client-go/tools/cache"
 10 | 
 11 | 	"github.com/k-cloud-labs/kluster-capacity/app/cmds/schedulersimulation/options"
 12 | 	"github.com/k-cloud-labs/kluster-capacity/pkg"
 13 | 	"github.com/k-cloud-labs/kluster-capacity/pkg/framework"
 14 | 	"github.com/k-cloud-labs/kluster-capacity/pkg/utils"
 15 | )
 16 | 
 17 | type simulator struct {
 18 | 	pkg.Framework
 19 | 
 20 | 	exitCondition string
 21 | }
 22 | 
 23 | func NewSSSimulatorExecutor(conf *options.SchedulerSimulationConfig) (pkg.Simulator, error) {
 24 | 	kubeSchedulerConfig, err := utils.BuildKubeSchedulerCompletedConfig(conf.Options.SchedulerConfig, conf.Options.KubeConfig)
 25 | 	if err != nil {
 26 | 		return nil, err
 27 | 	}
 28 | 
 29 | 	kubeConfig, err := utils.BuildRestConfig(conf.Options.KubeConfig)
 30 | 	if err != nil {
 31 | 		return nil, err
 32 | 	}
 33 | 
 34 | 	framework, err := framework.NewKubeSchedulerFramework(kubeSchedulerConfig, kubeConfig,
 35 | 		framework.WithNodeImages(false),
 36 | 		framework.WithScheduledPods(false),
 37 | 		framework.WithTerminatingPods(false),
 38 | 		framework.WithExcludeNodes(conf.Options.ExcludeNodes),
 39 | 		framework.WithSaveTo(conf.Options.SaveTo))
 40 | 	if err != nil {
 41 | 		return nil, err
 42 | 	}
 43 | 
 44 | 	s := &simulator{
 45 | 		Framework:     framework,
 46 | 		exitCondition: conf.Options.ExitCondition,
 47 | 	}
 48 | 
 49 | 	err = s.addEventHandlers(kubeSchedulerConfig.InformerFactory)
 50 | 	if err != nil {
 51 | 		return nil, err
 52 | 	}
 53 | 
 54 | 	return s, nil
 55 | }
 56 | 
 57 | func (s *simulator) Run() error {
 58 | 	return s.Framework.Run(nil)
 59 | }
 60 | 
 61 | func (s *simulator) Report() pkg.Printer {
 62 | 	return generateReport(s.Status())
 63 | }
 64 | 
 65 | func (s *simulator) addEventHandlers(informerFactory informers.SharedInformerFactory) (err error) {
 66 | 	succeedPodMap := sync.Map{}
 67 | 	failedPodMap := sync.Map{}
 68 | 	keyFunc := func(pod *corev1.Pod) string {
 69 | 		return pod.Namespace + "/" + pod.Name
 70 | 	}
 71 | 	count := 0
 72 | 	_, _ = informerFactory.Core().V1().Pods().Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
 73 | 		AddFunc: func(obj interface{}) {
 74 | 			pod := obj.(*corev1.Pod)
 75 | 			if len(pod.Spec.NodeName) > 0 {
 76 | 				succeedPodMap.Store(keyFunc(pod), true)
 77 | 			}
 78 | 			count++
 79 | 		},
 80 | 		UpdateFunc: func(oldObj, newObj interface{}) {
 81 | 			pod := newObj.(*corev1.Pod)
 82 | 			key := keyFunc(pod)
 83 | 			if len(pod.Spec.NodeName) > 0 {
 84 | 				succeedPodMap.Store(key, true)
 85 | 				if _, ok := failedPodMap.Load(key); ok {
 86 | 					failedPodMap.Delete(key)
 87 | 				}
 88 | 			} else {
 89 | 				for _, cond := range pod.Status.Conditions {
 90 | 					if cond.Type == corev1.PodScheduled && cond.Status == corev1.ConditionFalse {
 91 | 						failedPodMap.Store(key, true)
 92 | 					}
 93 | 				}
 94 | 			}
 95 | 
 96 | 			var (
 97 | 				succeedCount int
 98 | 				failedCount  int
 99 | 				stop         bool
100 | 				reason       string
101 | 			)
102 | 			succeedPodMap.Range(func(key, value any) bool {
103 | 				succeedCount++
104 | 				return true
105 | 			})
106 | 			failedPodMap.Range(func(key, value any) bool {
107 | 				failedCount++
108 | 				return true
109 | 			})
110 | 
111 | 			if s.exitCondition == options.ExitWhenAllScheduled && succeedCount+failedCount == count {
112 | 				stop = true
113 | 				reason = "AllScheduled: %d pod(s) have been scheduled once."
114 | 			} else if s.exitCondition == options.ExitWhenAllSucceed && succeedCount == count {
115 | 				stop = true
116 | 				reason = "AllSucceed: %d pod(s) have been scheduled successfully."
117 | 			}
118 | 
119 | 			if stop {
120 | 				err = s.Stop(fmt.Sprintf(reason, count))
121 | 			}
122 | 		},
123 | 	})
124 | 
125 | 	return
126 | }
127 | 


--------------------------------------------------------------------------------
/pkg/simulator/clustercompression/report.go:
--------------------------------------------------------------------------------
 1 | package clustercompression
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"strings"
 6 | 	"time"
 7 | 
 8 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 9 | 
10 | 	"github.com/k-cloud-labs/kluster-capacity/pkg"
11 | 	"github.com/k-cloud-labs/kluster-capacity/pkg/utils"
12 | )
13 | 
14 | type ClusterCompressionReview struct {
15 | 	metav1.TypeMeta
16 | 	Status ClusterCompressionReviewReviewStatus `json:"status"`
17 | }
18 | 
19 | type ClusterCompressionReviewReviewStatus struct {
20 | 	CreationTimestamp    time.Time                                   `json:"creationTimestamp"`
21 | 	StopReason           *ClusterCompressionReviewScheduleStopReason `json:"stopReason"`
22 | 	ScaleDownNodeNames   []string                                    `json:"scaleDownNodeNames"`
23 | 	SelectNodeCount      int                                         `json:"SelectNodeCount"`
24 | 	SchedulerCount       int                                         `json:"schedulerCount"`
25 | 	FailedSchedulerCount int                                         `json:"failedSchedulerCount"`
26 | }
27 | 
28 | type ClusterCompressionReviewScheduleStopReason struct {
29 | 	StopType    string `json:"stopType"`
30 | 	StopMessage string `json:"stopMessage"`
31 | }
32 | 
33 | func generateReport(status *pkg.Status) *ClusterCompressionReview {
34 | 	return &ClusterCompressionReview{
35 | 		Status: getReviewStatus(status),
36 | 	}
37 | }
38 | 
39 | func getReviewStatus(status *pkg.Status) ClusterCompressionReviewReviewStatus {
40 | 	return ClusterCompressionReviewReviewStatus{
41 | 		CreationTimestamp:    time.Now(),
42 | 		StopReason:           getMainStopReason(status.StopReason),
43 | 		ScaleDownNodeNames:   status.NodesToScaleDown,
44 | 		SelectNodeCount:      status.SelectNodeCount,
45 | 		SchedulerCount:       status.SchedulerCount,
46 | 		FailedSchedulerCount: status.FailedSchedulerCount,
47 | 	}
48 | }
49 | 
50 | func getMainStopReason(message string) *ClusterCompressionReviewScheduleStopReason {
51 | 	slicedMessage := strings.Split(message, "\n")
52 | 	colon := strings.Index(slicedMessage[0], ":")
53 | 
54 | 	reason := &ClusterCompressionReviewScheduleStopReason{
55 | 		StopType:    slicedMessage[0][:colon],
56 | 		StopMessage: strings.Trim(slicedMessage[0][colon+1:], " "),
57 | 	}
58 | 	return reason
59 | }
60 | 
61 | func (r *ClusterCompressionReview) Print(verbose bool, format string) error {
62 | 	switch format {
63 | 	case "json":
64 | 		return utils.PrintJson(r)
65 | 	default:
66 | 		return clusterCapacityReviewDefaultPrint(r, verbose)
67 | 	}
68 | }
69 | 
70 | func clusterCapacityReviewDefaultPrint(r *ClusterCompressionReview, verbose bool) error {
71 | 	if r != nil && len(r.Status.ScaleDownNodeNames) > 0 {
72 | 		if verbose {
73 | 			fmt.Printf("Select node %d times.\n", r.Status.SelectNodeCount)
74 | 			fmt.Printf("Scheduled pod %d times, with %d scheduling failure.\n", r.Status.SchedulerCount+r.Status.FailedSchedulerCount, r.Status.FailedSchedulerCount)
75 | 			fmt.Printf("%d node(s) in the cluster can be scaled down.\n", len(r.Status.ScaleDownNodeNames))
76 | 			fmt.Printf("\nTermination reason: %v: %v\n", r.Status.StopReason.StopType, r.Status.StopReason.StopMessage)
77 | 			fmt.Printf("\nnodes selected to be scaled down:\n")
78 | 
79 | 			for i := range r.Status.ScaleDownNodeNames {
80 | 				fmt.Printf("\t- %s\n", r.Status.ScaleDownNodeNames[i])
81 | 			}
82 | 		} else {
83 | 			for i := range r.Status.ScaleDownNodeNames {
84 | 				fmt.Println(r.Status.ScaleDownNodeNames[i])
85 | 			}
86 | 		}
87 | 	} else {
88 | 		fmt.Printf("Select node %d times.\n", r.Status.SelectNodeCount)
89 | 		fmt.Printf("Scheduled pod %d times, with %d scheduling failure.\n", r.Status.SchedulerCount+r.Status.FailedSchedulerCount, r.Status.FailedSchedulerCount)
90 | 		fmt.Println("No nodes in the cluster can be scaled down.")
91 | 		fmt.Printf("\nTermination reason: %v: %v\n", r.Status.StopReason.StopType, r.Status.StopReason.StopMessage)
92 | 	}
93 | 
94 | 	return nil
95 | }
96 | 


--------------------------------------------------------------------------------
/app/cmds/schedulersimulation/schedulersimulation.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright © 2023 k-cloud-labs org
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 | 	http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package schedulersimulation
 18 | 
 19 | import (
 20 | 	"errors"
 21 | 	"flag"
 22 | 	"fmt"
 23 | 
 24 | 	"github.com/lithammer/dedent"
 25 | 	"github.com/spf13/cobra"
 26 | 	cliflag "k8s.io/component-base/cli/flag"
 27 | 	"k8s.io/klog/v2"
 28 | 
 29 | 	"github.com/k-cloud-labs/kluster-capacity/app/cmds/schedulersimulation/options"
 30 | 	"github.com/k-cloud-labs/kluster-capacity/pkg"
 31 | 	"github.com/k-cloud-labs/kluster-capacity/pkg/simulator/schedulersimulation"
 32 | )
 33 | 
 34 | var schedulerSimulationLong = dedent.Dedent(`
 35 | 		ss simulates an API server with initial state copied from the Kubernetes environment
 36 | 		with its configuration specified in KUBECONFIG. The simulated API server tries to schedule the number of
 37 | 		pods from existing cluster.
 38 | 	`)
 39 | 
 40 | func NewSchedulerSimulationCmd() *cobra.Command {
 41 | 	opt := options.NewSchedulerSimulationOptions()
 42 | 
 43 | 	// ssCmd represents the ss command
 44 | 	var cmd = &cobra.Command{
 45 | 		Use:           "ss",
 46 | 		Short:         "ss is used for simulating scheduling of pods",
 47 | 		Long:          schedulerSimulationLong,
 48 | 		SilenceErrors: false,
 49 | 		RunE: func(cmd *cobra.Command, args []string) error {
 50 | 			flag.Parse()
 51 | 
 52 | 			opt.Default()
 53 | 			err := validate(opt)
 54 | 			if err != nil {
 55 | 				return err
 56 | 			}
 57 | 
 58 | 			err = run(opt)
 59 | 			if err != nil {
 60 | 				return err
 61 | 			}
 62 | 
 63 | 			return nil
 64 | 		},
 65 | 	}
 66 | 
 67 | 	flags := cmd.Flags()
 68 | 	flags.SetNormalizeFunc(cliflag.WordSepNormalizeFunc)
 69 | 	flags.AddGoFlagSet(flag.CommandLine)
 70 | 	opt.AddFlags(flags)
 71 | 
 72 | 	return cmd
 73 | }
 74 | 
 75 | func validate(opt *options.SchedulerSimulationOptions) error {
 76 | 	if opt.SourceFrom == options.FromCluster && len(opt.KubeConfig) == 0 {
 77 | 		return errors.New("kubeconfig must be specified when source-from is cluster")
 78 | 	}
 79 | 
 80 | 	if opt.SourceFrom == options.FromSnapshot && len(opt.Snapshot) == 0 {
 81 | 		return errors.New("snapshot must be specified when source-from is snapshot")
 82 | 	}
 83 | 
 84 | 	if opt.ExitCondition != options.ExitWhenAllSucceed && opt.ExitCondition != options.ExitWhenAllScheduled {
 85 | 		return errors.New("exit condition must be AllSucceed or AllScheduled")
 86 | 	}
 87 | 
 88 | 	if len(opt.KubeConfig) == 0 {
 89 | 		return errors.New("kubeconfig is missing")
 90 | 	}
 91 | 
 92 | 	return nil
 93 | }
 94 | 
 95 | func run(opt *options.SchedulerSimulationOptions) error {
 96 | 	defer klog.Flush()
 97 | 	conf := options.NewSchedulerSimulationConfig(opt)
 98 | 
 99 | 	// TODO: init simulator from snapshot
100 | 	//if opt.SourceFrom == options.FromSnapshot {
101 | 	//}
102 | 
103 | 	reports, err := runSimulator(conf)
104 | 	if err != nil {
105 | 		return err
106 | 	}
107 | 
108 | 	if err := reports.Print(opt.Verbose, opt.OutputFormat); err != nil {
109 | 		return fmt.Errorf("error while printing: %v", err)
110 | 	}
111 | 
112 | 	return nil
113 | }
114 | 
115 | func runSimulator(conf *options.SchedulerSimulationConfig) (pkg.Printer, error) {
116 | 	s, err := schedulersimulation.NewSSSimulatorExecutor(conf)
117 | 	if err != nil {
118 | 		return nil, err
119 | 	}
120 | 
121 | 	err = s.Initialize(conf.InitObjs...)
122 | 	if err != nil {
123 | 		return nil, err
124 | 	}
125 | 
126 | 	err = s.Run()
127 | 	if err != nil {
128 | 		return nil, err
129 | 	}
130 | 
131 | 	return s.Report(), nil
132 | }
133 | 


--------------------------------------------------------------------------------
/app/cmds/capacityestimation/capacityestimation.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright © 2023 k-cloud-labs org
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 | 	http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package capacityestimation
 18 | 
 19 | import (
 20 | 	"errors"
 21 | 	"flag"
 22 | 	"fmt"
 23 | 
 24 | 	"github.com/lithammer/dedent"
 25 | 	"github.com/spf13/cobra"
 26 | 	cliflag "k8s.io/component-base/cli/flag"
 27 | 	"k8s.io/klog/v2"
 28 | 
 29 | 	"github.com/k-cloud-labs/kluster-capacity/app/cmds/capacityestimation/options"
 30 | 	"github.com/k-cloud-labs/kluster-capacity/pkg"
 31 | 	"github.com/k-cloud-labs/kluster-capacity/pkg/simulator/capacityestimation"
 32 | )
 33 | 
 34 | var capacityEstimationLong = dedent.Dedent(`
 35 | 		ce simulates an API server with initial state copied from the Kubernetes environment
 36 | 		with its configuration specified in KUBECONFIG. The simulated API server tries to schedule the number of
 37 | 		pods specified by --max-limits flag. If the --max-limits flag is not specified, pods are scheduled until
 38 | 		the simulated API server runs out of resources.
 39 | 	`)
 40 | 
 41 | func NewCapacityEstimationCmd() *cobra.Command {
 42 | 	opt := options.NewCapacityEstimationOptions()
 43 | 
 44 | 	var cmd = &cobra.Command{
 45 | 		Use:           "ce --kubeconfig KUBECONFIG --pods-from-templates PODYAML | --pods-from-cluster Namespace/Name",
 46 | 		Short:         "ce is used to get the remaining capacity for specified pod",
 47 | 		Long:          capacityEstimationLong,
 48 | 		SilenceErrors: false,
 49 | 		RunE: func(cmd *cobra.Command, args []string) error {
 50 | 			flag.Parse()
 51 | 
 52 | 			opt.Default()
 53 | 			err := validate(opt)
 54 | 			if err != nil {
 55 | 				return err
 56 | 			}
 57 | 
 58 | 			err = run(opt)
 59 | 			if err != nil {
 60 | 				return err
 61 | 			}
 62 | 
 63 | 			return nil
 64 | 		},
 65 | 	}
 66 | 
 67 | 	flags := cmd.Flags()
 68 | 	flags.SetNormalizeFunc(cliflag.WordSepNormalizeFunc)
 69 | 	flags.AddGoFlagSet(flag.CommandLine)
 70 | 	opt.AddFlags(flags)
 71 | 
 72 | 	return cmd
 73 | }
 74 | 
 75 | func validate(opt *options.CapacityEstimationOptions) error {
 76 | 	if len(opt.PodsFromTemplate) == 0 && len(opt.PodsFromCluster) == 0 {
 77 | 		return errors.New("pod template file and pod from cluster both is missing")
 78 | 	}
 79 | 
 80 | 	if len(opt.PodsFromTemplate) != 0 && len(opt.PodsFromCluster) != 0 {
 81 | 		return errors.New("pod template file and pod from cluster is exclusive")
 82 | 	}
 83 | 
 84 | 	if len(opt.KubeConfig) == 0 {
 85 | 		return errors.New("kubeconfig is missing")
 86 | 	}
 87 | 
 88 | 	return nil
 89 | }
 90 | 
 91 | func run(opt *options.CapacityEstimationOptions) error {
 92 | 	defer klog.Flush()
 93 | 	conf := options.NewCapacityEstimationConfig(opt)
 94 | 
 95 | 	err := conf.ParseAPISpec()
 96 | 	if err != nil {
 97 | 		return fmt.Errorf("failed to parse pod spec file: %v ", err)
 98 | 	}
 99 | 
100 | 	reports, err := runSimulator(conf)
101 | 	if err != nil {
102 | 		return err
103 | 	}
104 | 
105 | 	if err := reports.Print(conf.Options.Verbose, conf.Options.OutputFormat); err != nil {
106 | 		return fmt.Errorf("error while printing: %v", err)
107 | 	}
108 | 
109 | 	return nil
110 | }
111 | 
112 | func runSimulator(conf *options.CapacityEstimationConfig) (pkg.Printer, error) {
113 | 	s, err := capacityestimation.NewCESimulatorExecutor(conf)
114 | 	if err != nil {
115 | 		return nil, err
116 | 	}
117 | 
118 | 	err = s.Initialize(conf.InitObjs...)
119 | 	if err != nil {
120 | 		return nil, err
121 | 	}
122 | 
123 | 	err = s.Run()
124 | 	if err != nil {
125 | 		return nil, err
126 | 	}
127 | 
128 | 	return s.Report(), nil
129 | }
130 | 


--------------------------------------------------------------------------------
/app/cmds/capacityestimation/options/capacityestimation.go:
--------------------------------------------------------------------------------
  1 | package options
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"io"
  7 | 	"net/http"
  8 | 	"os"
  9 | 	"path/filepath"
 10 | 	"strings"
 11 | 
 12 | 	"github.com/spf13/pflag"
 13 | 	corev1 "k8s.io/api/core/v1"
 14 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 15 | 	"k8s.io/apimachinery/pkg/runtime"
 16 | 	"k8s.io/apimachinery/pkg/util/yaml"
 17 | 	clientset "k8s.io/client-go/kubernetes"
 18 | 
 19 | 	"github.com/k-cloud-labs/kluster-capacity/app/cmds"
 20 | 	"github.com/k-cloud-labs/kluster-capacity/pkg/utils"
 21 | )
 22 | 
 23 | type CapacityEstimationOptions struct {
 24 | 	cmds.Options
 25 | 	PodsFromTemplate []string
 26 | 	PodsFromCluster  NamespaceNames
 27 | }
 28 | 
 29 | type CapacityEstimationConfig struct {
 30 | 	Pods     []*corev1.Pod
 31 | 	InitObjs []runtime.Object
 32 | 	Options  *CapacityEstimationOptions
 33 | }
 34 | 
 35 | func NewCapacityEstimationConfig(opt *CapacityEstimationOptions) *CapacityEstimationConfig {
 36 | 	return &CapacityEstimationConfig{
 37 | 		Options: opt,
 38 | 	}
 39 | }
 40 | 
 41 | func NewCapacityEstimationOptions() *CapacityEstimationOptions {
 42 | 	return &CapacityEstimationOptions{}
 43 | }
 44 | 
 45 | func (s *CapacityEstimationOptions) AddFlags(fs *pflag.FlagSet) {
 46 | 	fs.StringVar(&s.KubeConfig, "kubeconfig", s.KubeConfig, "Path to the kubeconfig file to use for the analysis")
 47 | 	fs.StringSliceVar(&s.PodsFromTemplate, "pods-from-template", s.PodsFromTemplate, "Path to JSON or YAML file containing pod definition. Comma seperated and Exclusive with --pods-from-cluster")
 48 | 	fs.Var(&s.PodsFromCluster, "pods-from-cluster", "Namespace/Name of the pod from existing cluster. Comma seperated and Exclusive with --pods-from-template")
 49 | 	fs.IntVar(&s.MaxLimit, "max-limit", 0, "Number of instances of pod to be scheduled after which analysis stops. By default unlimited")
 50 | 	fs.StringVar(&s.SchedulerConfig, "schedulerconfig", s.SchedulerConfig, "Path to JSON or YAML file containing scheduler configuration")
 51 | 	fs.BoolVar(&s.Verbose, "verbose", s.Verbose, "Verbose mode")
 52 | 	fs.StringVarP(&s.OutputFormat, "output", "o", s.OutputFormat, "Output format. One of: json|yaml (Note: output is not versioned or guaranteed to be stable across releases)")
 53 | 	fs.StringSliceVar(&s.ExcludeNodes, "exclude-nodes", s.ExcludeNodes, "Exclude nodes to be scheduled")
 54 | }
 55 | 
 56 | func (s *CapacityEstimationConfig) ParseAPISpec() error {
 57 | 	getPodFromTemplate := func(template string) (*corev1.Pod, error) {
 58 | 		var (
 59 | 			err          error
 60 | 			versionedPod = &corev1.Pod{}
 61 | 			spec         io.Reader
 62 | 		)
 63 | 
 64 | 		if strings.HasPrefix(template, "http://") || strings.HasPrefix(template, "https://") {
 65 | 			response, err := http.Get(template)
 66 | 			if err != nil {
 67 | 				return nil, err
 68 | 			}
 69 | 			defer response.Body.Close()
 70 | 			if response.StatusCode != http.StatusOK {
 71 | 				return nil, fmt.Errorf("unable to read URL %q, server reported %v, status code=%v", template, response.Status, response.StatusCode)
 72 | 			}
 73 | 			spec = response.Body
 74 | 		} else {
 75 | 			filename, _ := filepath.Abs(template)
 76 | 			spec, err = os.Open(filename)
 77 | 			if err != nil {
 78 | 				return nil, fmt.Errorf("failed to open config file: %v", err)
 79 | 			}
 80 | 		}
 81 | 
 82 | 		decoder := yaml.NewYAMLOrJSONDecoder(spec, 4096)
 83 | 		err = decoder.Decode(versionedPod)
 84 | 		if err != nil {
 85 | 			return nil, fmt.Errorf("failed to decode config file: %v", err)
 86 | 		}
 87 | 
 88 | 		return versionedPod, nil
 89 | 	}
 90 | 
 91 | 	if len(s.Options.PodsFromTemplate) != 0 {
 92 | 		for _, template := range s.Options.PodsFromTemplate {
 93 | 			pod, err := getPodFromTemplate(template)
 94 | 			if err != nil {
 95 | 				return err
 96 | 			}
 97 | 			s.Pods = append(s.Pods, pod)
 98 | 		}
 99 | 	} else {
100 | 		cfg, err := utils.BuildRestConfig(s.Options.KubeConfig)
101 | 		if err != nil {
102 | 			return err
103 | 		}
104 | 
105 | 		kubeClient, err := clientset.NewForConfig(cfg)
106 | 		if err != nil {
107 | 			return err
108 | 		}
109 | 
110 | 		for _, nn := range s.Options.PodsFromCluster {
111 | 			pod, err := kubeClient.CoreV1().Pods(nn.Namespace).Get(context.TODO(), nn.Name, metav1.GetOptions{ResourceVersion: "0"})
112 | 			if err != nil {
113 | 				return err
114 | 			}
115 | 			s.Pods = append(s.Pods, pod)
116 | 		}
117 | 	}
118 | 
119 | 	return nil
120 | }
121 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | # Go information
  2 | GO ?= go
  3 | GOFMT ?= gofmt "-s"
  4 | GOOS ?= $(shell go env GOOS)
  5 | GOARCH ?= $(shell go env GOARCH)
  6 | SOURCES := $(shell find . -type f  -name '*.go')
  7 | 
  8 | GOFILES := $(shell find . -name "*.go" | grep -v vendor)
  9 | TESTFOLDER := $(shell $(GO) list ./... | grep -v examples)
 10 | TESTTAGS ?= ""
 11 | VETPACKAGES ?= $(shell $(GO) list ./... | grep -v /examples/)
 12 | 
 13 | # Git information
 14 | GIT_VERSION ?= $(shell git describe --tags --dirty --always)
 15 | GIT_COMMIT_HASH ?= $(shell git rev-parse HEAD)
 16 | GIT_TREESTATE = "clean"
 17 | GIT_DIFF = $(shell git diff --quiet >/dev/null 2>&1; if [ $$? -eq 1 ]; then echo "1"; fi)
 18 | ifeq ($(GIT_DIFF), 1)
 19 |     GIT_TREESTATE = "dirty"
 20 | endif
 21 | BUILDDATE = $(shell date -u +'%Y-%m-%dT%H:%M:%SZ')
 22 | 
 23 | LDFLAGS := "-X github.com/k-cloud-labs/kluster-capacity/pkg/version.gitVersion=$(GIT_VERSION) \
 24 |                       -X github.com/k-cloud-labs/kluster-capacity/pkg/version.gitCommit=$(GIT_COMMIT_HASH) \
 25 |                       -X github.com/k-cloud-labs/kluster-capacity/pkg/version.gitTreeState=$(GIT_TREESTATE) \
 26 |                       -X github.com/k-cloud-labs/kluster-capacity/pkg/version.buildDate=$(BUILDDATE)"
 27 | 
 28 | # Set your version by env or using latest tags from git
 29 | VERSION?=""
 30 | ifeq ($(VERSION), "")
 31 |     LATEST_TAG=$(shell git describe --tags --always)
 32 |     ifeq ($(LATEST_TAG),)
 33 |         # Forked repo may not sync tags from upstream, so give it a default tag to make CI happy.
 34 |         VERSION="unknown"
 35 |     else
 36 |         VERSION=$(LATEST_TAG)
 37 |     endif
 38 | endif
 39 | 
 40 | # Setting SHELL to bash allows bash commands to be executed by recipes.
 41 | # This is a requirement for 'setup-envtest.sh' in the test target.
 42 | # Options are set to exit when a recipe line exits non-zero or a piped command fails.
 43 | SHELL = /usr/bin/env bash -o pipefail
 44 | .SHELLFLAGS = -ec
 45 | 
 46 | ##@ General
 47 | 
 48 | # The help target prints out all targets with their descriptions organized
 49 | # beneath their categories. The categories are represented by '##@' and the
 50 | # target descriptions by '##'. The awk commands is responsible for reading the
 51 | # entire set of makefiles included in this invocation, looking for lines of the
 52 | # file as xyz: ## something, and then pretty-format the target and help. Then,
 53 | # if there's a line with ##@ something, that gets pretty-printed as a category.
 54 | # More info on the usage of ANSI control characters for terminal formatting:
 55 | # https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters
 56 | # More info on the awk command:
 57 | # http://linuxcommand.org/lc3_adv_awk.php
 58 | 
 59 | .PHONY: help
 60 | help: ## Display this help.
 61 | 	@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n  make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf "  \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
 62 | 
 63 | .PHONY: checkall
 64 | checkall: fmt-check vet ## Do all check
 65 | 	hack/verify-staticcheck.sh
 66 | 	hack/verify-import-aliases.sh
 67 | 
 68 | .PHONY: build
 69 | build: $(SOURCES) ## Build kluster-capacity webhook binary file
 70 | 	@CGO_ENABLED=0 GOOS=$(GOOS) go build \
 71 | 		-ldflags $(LDFLAGS) \
 72 | 		-o kluster-capacity \
 73 | 		main.go
 74 | 
 75 | .PHONY: clean
 76 | clean: ## Clean kluster-capacity webhook binary file
 77 | 	@rm -rf kluster-capacity
 78 | 
 79 | .PHONY: fmt
 80 | fmt: ## Format project files
 81 | 	@$(GOFMT) -w $(GOFILES)
 82 | 
 83 | .PHONY: fmt-check
 84 | fmt-check: ## Check project files format info
 85 | 	@diff=$$($(GOFMT) -d $(GOFILES)); \
 86 | 	if [ -n "$$diff" ]; then \
 87 | 		echo "Please run 'make fmt' and commit the result:"; \
 88 | 		echo "$${diff}"; \
 89 | 		exit 1; \
 90 | 	fi;
 91 | 
 92 | .PHONY: vet
 93 | vet:
 94 | 	@$(GO) vet $(VETPACKAGES)
 95 | 
 96 | .PHONY: test
 97 | test: fmt-check vet ## Run project unit test and generate coverage result
 98 | 	echo "mode: count" > coverage.out
 99 | 	for d in $(TESTFOLDER); do \
100 | 		$(GO) test -tags $(TESTTAGS) -v -covermode=count -coverprofile=profile.out $$d > tmp.out; \
101 | 		cat tmp.out; \
102 | 		if grep -q "^--- FAIL" tmp.out; then \
103 | 			rm tmp.out; \
104 | 			exit 1; \
105 | 		elif grep -q "build failed" tmp.out; then \
106 | 			rm tmp.out; \
107 | 			exit 1; \
108 | 		elif grep -q "setup failed" tmp.out; then \
109 | 			rm tmp.out; \
110 | 			exit 1; \
111 | 		fi; \
112 | 		if [ -f profile.out ]; then \
113 | 			cat profile.out | grep -v "mode:" >> coverage.out; \
114 | 			rm profile.out; \
115 | 		fi; \
116 | 	done
117 | 


--------------------------------------------------------------------------------
/pkg/simulator/schedulersimulation/report.go:
--------------------------------------------------------------------------------
  1 | package schedulersimulation
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 
  6 | 	corev1 "k8s.io/api/core/v1"
  7 | 	"k8s.io/kubernetes/pkg/scheduler/framework"
  8 | 
  9 | 	"github.com/k-cloud-labs/kluster-capacity/pkg"
 10 | 	"github.com/k-cloud-labs/kluster-capacity/pkg/utils"
 11 | )
 12 | 
 13 | type SchedulerSimulationReview struct {
 14 | 	UnschedulablePods []corev1.Pod     `json:"unschedulablePods"`
 15 | 	Details           []ScheduleDetail `json:"details"`
 16 | 	StopReason        string           `json:"stopReason"`
 17 | }
 18 | 
 19 | type ScheduleDetail struct {
 20 | 	NodeName        string              `json:"nodeName"`
 21 | 	Replicas        int                 `json:"replicas"`
 22 | 	NodeAllocatable corev1.ResourceList `json:"nodeAllocatable"`
 23 | 	PodRequest      framework.Resource  `json:"podRequest"`
 24 | 	OnlyDSPod       bool                `json:"onlyDSPod"`
 25 | }
 26 | 
 27 | func (r *SchedulerSimulationReview) Print(verbose bool, format string) error {
 28 | 	switch format {
 29 | 	case "json":
 30 | 		return utils.PrintJson(r)
 31 | 	case "yaml":
 32 | 		return utils.PrintYaml(r)
 33 | 	case "":
 34 | 		prettyPrint(r, verbose)
 35 | 		return nil
 36 | 	default:
 37 | 		return fmt.Errorf("output format %q not recognized", format)
 38 | 	}
 39 | }
 40 | 
 41 | func prettyPrint(r *SchedulerSimulationReview, verbose bool) {
 42 | 	fmt.Printf("Termination reason: %s\n\n", r.StopReason)
 43 | 	if len(r.UnschedulablePods) > 0 {
 44 | 		fmt.Printf("Unschedulabel pods(%d):\n", len(r.UnschedulablePods))
 45 | 	}
 46 | 
 47 | 	for _, pod := range r.UnschedulablePods {
 48 | 		if verbose {
 49 | 			fmt.Printf("- %v/%s, reason: %s\n", pod.Namespace, pod.Name, getUnschedulableReason(&pod))
 50 | 		} else {
 51 | 			fmt.Printf("- %v/%s\n", pod.Namespace, pod.Name)
 52 | 		}
 53 | 	}
 54 | 
 55 | 	if len(r.UnschedulablePods) > 0 {
 56 | 		fmt.Printf("\n\n")
 57 | 	}
 58 | 	fmt.Printf("Pod distribution among %d nodes:\n", len(r.Details))
 59 | 
 60 | 	for _, detail := range r.Details {
 61 | 		if verbose {
 62 | 			var msg string
 63 | 			if detail.OnlyDSPod {
 64 | 				msg = "Only DaemonSet Pod"
 65 | 			}
 66 | 			fmt.Printf("\t- %v: %v instance(s) %s\n", detail.NodeName, detail.Replicas, msg)
 67 | 		} else {
 68 | 			fmt.Printf("\t- %v\n", detail.NodeName)
 69 | 		}
 70 | 	}
 71 | }
 72 | 
 73 | func getUnschedulableReason(pod *corev1.Pod) string {
 74 | 	for _, podCondition := range pod.Status.Conditions {
 75 | 		// Only for pending pods provisioned by ce
 76 | 		if podCondition.Type == corev1.PodScheduled && podCondition.Status == corev1.ConditionFalse &&
 77 | 			podCondition.Reason == corev1.PodReasonUnschedulable {
 78 | 			return podCondition.Message
 79 | 		}
 80 | 	}
 81 | 
 82 | 	return ""
 83 | }
 84 | 
 85 | func generateReport(status *pkg.Status) *SchedulerSimulationReview {
 86 | 	details := make([]ScheduleDetail, 0)
 87 | 	unschedulablePods := make([]corev1.Pod, 0)
 88 | 	nodePodMap := make(map[string][]corev1.Pod)
 89 | 
 90 | 	for _, pod := range status.Pods {
 91 | 		nodePodMap[pod.Spec.NodeName] = append(nodePodMap[pod.Spec.NodeName], pod)
 92 | 	}
 93 | 
 94 | 	for node, pods := range nodePodMap {
 95 | 		if node == "" {
 96 | 			unschedulablePods = append(unschedulablePods, pods...)
 97 | 			continue
 98 | 		}
 99 | 
100 | 		var request framework.Resource
101 | 
102 | 		for _, pod := range pods {
103 | 			addResource(&request, utils.ComputePodResourceRequest(&pod))
104 | 		}
105 | 
106 | 		detail := ScheduleDetail{
107 | 			NodeName:   node,
108 | 			Replicas:   len(nodePodMap[node]),
109 | 			PodRequest: request,
110 | 			OnlyDSPod: func(pods []corev1.Pod) bool {
111 | 				for i := range pods {
112 | 					if !utils.IsDaemonsetPod(pods[i].OwnerReferences) {
113 | 						return false
114 | 					}
115 | 				}
116 | 
117 | 				return true
118 | 			}(nodePodMap[node]),
119 | 		}
120 | 		if node, ok := status.Nodes[node]; ok {
121 | 			detail.NodeAllocatable = node.Status.Allocatable
122 | 		}
123 | 		details = append(details, detail)
124 | 	}
125 | 
126 | 	return &SchedulerSimulationReview{
127 | 		UnschedulablePods: unschedulablePods,
128 | 		Details:           details,
129 | 		StopReason:        status.StopReason,
130 | 	}
131 | }
132 | 
133 | func addResource(source *framework.Resource, res *framework.Resource) {
134 | 	source.MilliCPU += res.MilliCPU
135 | 	source.Memory += res.Memory
136 | 	source.EphemeralStorage += res.EphemeralStorage
137 | 	if source.ScalarResources == nil && len(res.ScalarResources) > 0 {
138 | 		source.ScalarResources = map[corev1.ResourceName]int64{}
139 | 	}
140 | 	for rName, rQuant := range res.ScalarResources {
141 | 		source.ScalarResources[rName] += rQuant
142 | 	}
143 | }
144 | 


--------------------------------------------------------------------------------
/pkg/simulator/clustercompression/nodeFilter.go:
--------------------------------------------------------------------------------
  1 | package clustercompression
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"strings"
  7 | 
  8 | 	corev1 "k8s.io/api/core/v1"
  9 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 10 | 	clientset "k8s.io/client-go/kubernetes"
 11 | 	"k8s.io/client-go/util/workqueue"
 12 | 
 13 | 	"github.com/k-cloud-labs/kluster-capacity/app/cmds/clustercompression/options"
 14 | )
 15 | 
 16 | const (
 17 | 	NodeScaledDownFailedLabel  = "kc.k-cloud-labs.io/node-scale-down-failed"
 18 | 	NodeScaledDownSuccessLabel = "kc.k-cloud-labs.io/node-scale-down-success"
 19 | 	KubernetesMasterNodeLabel  = "node-role.kubernetes.io/master"
 20 | 	NodeScaleDownDisableLabel  = "kc.k-cloud-labs.io/scale-down-disabled"
 21 | )
 22 | 
 23 | type NodeFilter interface {
 24 | 	SelectNode() *Status
 25 | 	Done()
 26 | }
 27 | 
 28 | func defaultFilterFunc() FilterFunc {
 29 | 	return func(node *corev1.Node) *FilterStatus {
 30 | 		if node.Labels != nil {
 31 | 			_, ok := node.Labels[KubernetesMasterNodeLabel]
 32 | 			if ok {
 33 | 				return &FilterStatus{
 34 | 					Success:   false,
 35 | 					ErrReason: ErrReasonMasterNode,
 36 | 				}
 37 | 			}
 38 | 
 39 | 			_, ok = node.Labels[NodeScaledDownFailedLabel]
 40 | 			if ok {
 41 | 				return &FilterStatus{
 42 | 					Success:   false,
 43 | 					ErrReason: ErrReasonFailedScaleDown,
 44 | 				}
 45 | 			}
 46 | 
 47 | 			_, ok = node.Labels[NodeScaledDownSuccessLabel]
 48 | 			if ok {
 49 | 				return &FilterStatus{
 50 | 					Success:   false,
 51 | 					ErrReason: ErrReasonSuccessScaleDown,
 52 | 				}
 53 | 			}
 54 | 
 55 | 			v, ok := node.Labels[NodeScaleDownDisableLabel]
 56 | 			if ok && v == "true" {
 57 | 				return &FilterStatus{
 58 | 					Success:   false,
 59 | 					ErrReason: ErrReasonScaleDownDisabled,
 60 | 				}
 61 | 			}
 62 | 		}
 63 | 		return &FilterStatus{Success: true}
 64 | 	}
 65 | }
 66 | 
 67 | type singleNodeFilter struct {
 68 | 	clientset      clientset.Interface
 69 | 	nodeFilter     FilterFunc
 70 | 	selectedCount  int
 71 | 	candidateNode  []*corev1.Node
 72 | 	candidateIndex int
 73 | }
 74 | 
 75 | type Status struct {
 76 | 	Node      *corev1.Node
 77 | 	ErrReason string
 78 | }
 79 | 
 80 | func NewNodeFilter(client clientset.Interface, getPodsByNode PodsByNodeFunc, excludeNodes []string, filterNodeOptions options.FilterNodeOptions) (NodeFilter, error) {
 81 | 	excludeNodeMap := make(map[string]bool)
 82 | 	for i := range excludeNodes {
 83 | 		excludeNodeMap[excludeNodes[i]] = true
 84 | 	}
 85 | 
 86 | 	nodeFilter := NewOptions().
 87 | 		WithFilter(defaultFilterFunc()).
 88 | 		WithExcludeNodes(excludeNodeMap).
 89 | 		WithExcludeTaintNodes(filterNodeOptions.ExcludeTaintNode).
 90 | 		WithExcludeNotReadyNodes(filterNodeOptions.ExcludeNotReadyNode).
 91 | 		WithIgnoreStaticPod(filterNodeOptions.IgnoreStaticPod).
 92 | 		WithIgnoreCloneSet(filterNodeOptions.IgnoreCloneSet).
 93 | 		WithIgnoreMirrorPod(filterNodeOptions.IgnoreMirrorPod).
 94 | 		WithIgnoreVolumePod(filterNodeOptions.IgnoreVolumePod).
 95 | 		WithPodsByNodeFunc(getPodsByNode).
 96 | 		BuildFilterFunc()
 97 | 
 98 | 	return &singleNodeFilter{
 99 | 		clientset:  client,
100 | 		nodeFilter: nodeFilter,
101 | 	}, nil
102 | }
103 | 
104 | func (g *singleNodeFilter) SelectNode() *Status {
105 | 	if len(g.candidateNode) != 0 && g.candidateIndex <= len(g.candidateNode)-1 {
106 | 		selectNode := g.candidateNode[g.candidateIndex]
107 | 		g.candidateIndex++
108 | 		if g.candidateIndex == len(g.candidateNode) {
109 | 			g.candidateNode = nil
110 | 			g.candidateIndex = 0
111 | 		}
112 | 		return &Status{Node: selectNode}
113 | 	}
114 | 
115 | 	g.candidateNode = nil
116 | 	g.candidateIndex = 0
117 | 
118 | 	nodes, err := g.clientset.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
119 | 	if err != nil {
120 | 		return nil
121 | 	}
122 | 
123 | 	var (
124 | 		statuses []*FilterStatus
125 | 		result   = make([]interface{}, len(nodes.Items))
126 | 	)
127 | 
128 | 	workqueue.ParallelizeUntil(context.TODO(), 16, len(nodes.Items), func(index int) {
129 | 		node := &nodes.Items[index]
130 | 		status := g.nodeFilter(node)
131 | 		if status.Success {
132 | 			result[index] = node
133 | 		} else {
134 | 			result[index] = status
135 | 		}
136 | 	})
137 | 
138 | 	for i := 0; i < len(nodes.Items); i++ {
139 | 		switch result[i].(type) {
140 | 		case *FilterStatus:
141 | 			statuses = append(statuses, result[i].(*FilterStatus))
142 | 		case *corev1.Node:
143 | 			g.candidateNode = append(g.candidateNode, result[i].(*corev1.Node))
144 | 		}
145 | 	}
146 | 
147 | 	if len(g.candidateNode) == 0 {
148 | 		return convertFilterStatusesToStatus(statuses, g.selectedCount)
149 | 	}
150 | 
151 | 	g.candidateIndex++
152 | 
153 | 	return &Status{Node: g.candidateNode[0]}
154 | }
155 | 
156 | func (g *singleNodeFilter) Done() {
157 | 	g.selectedCount++
158 | }
159 | 
160 | func convertFilterStatusesToStatus(statuses []*FilterStatus, selectedCount int) *Status {
161 | 	statusMap := make(map[string]int)
162 | 
163 | 	for _, status := range statuses {
164 | 		statusMap[status.ErrReason]++
165 | 	}
166 | 
167 | 	// for taint added by self
168 | 	if count, ok := statusMap[ErrReasonTaintNode]; ok {
169 | 		realCount := count - selectedCount
170 | 		if realCount == 0 {
171 | 			delete(statusMap, ErrReasonTaintNode)
172 | 		} else {
173 | 			statusMap[ErrReasonTaintNode] = realCount
174 | 		}
175 | 	}
176 | 
177 | 	sb := strings.Builder{}
178 | 	for reason, count := range statusMap {
179 | 		_, _ = sb.WriteString(fmt.Sprintf("%d %s; ", count, reason))
180 | 	}
181 | 
182 | 	return &Status{ErrReason: sb.String()}
183 | }
184 | 


--------------------------------------------------------------------------------
/pkg/simulator/capacityestimation/simulator.go:
--------------------------------------------------------------------------------
  1 | package capacityestimation
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 
  6 | 	"golang.org/x/sync/errgroup"
  7 | 	corev1 "k8s.io/api/core/v1"
  8 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  9 | 	"k8s.io/apimachinery/pkg/runtime"
 10 | 	"k8s.io/client-go/informers"
 11 | 	"k8s.io/client-go/tools/cache"
 12 | 	"k8s.io/klog/v2"
 13 | 
 14 | 	"github.com/k-cloud-labs/kluster-capacity/app/cmds/capacityestimation/options"
 15 | 	"github.com/k-cloud-labs/kluster-capacity/pkg"
 16 | 	pkgframework "github.com/k-cloud-labs/kluster-capacity/pkg/framework"
 17 | 	"github.com/k-cloud-labs/kluster-capacity/pkg/utils"
 18 | )
 19 | 
 20 | type PodGenerator interface {
 21 | 	Generate() *corev1.Pod
 22 | }
 23 | 
 24 | // only support one scheduler for now and the scheduler name is "default-scheduler"
 25 | type simulator struct {
 26 | 	pkg.Framework
 27 | 
 28 | 	podGenerator PodGenerator
 29 | 	simulatedPod *corev1.Pod
 30 | 	maxSimulated int
 31 | 	simulated    int
 32 | }
 33 | 
 34 | type multiSimulator struct {
 35 | 	simulators []*simulator
 36 | 	reports    pkg.Printer
 37 | }
 38 | 
 39 | // NewCESimulatorExecutor create a ce simulator which is completely independent of apiserver so no need
 40 | // for kubeconfig nor for apiserver url
 41 | func NewCESimulatorExecutor(conf *options.CapacityEstimationConfig) (pkg.Simulator, error) {
 42 | 	newSimulator := func(pod *corev1.Pod) (*simulator, error) {
 43 | 		kubeSchedulerConfig, err := utils.BuildKubeSchedulerCompletedConfig(conf.Options.SchedulerConfig, conf.Options.KubeConfig)
 44 | 		if err != nil {
 45 | 			return nil, err
 46 | 		}
 47 | 
 48 | 		kubeConfig, err := utils.BuildRestConfig(conf.Options.KubeConfig)
 49 | 		if err != nil {
 50 | 			return nil, err
 51 | 		}
 52 | 
 53 | 		s := &simulator{
 54 | 			podGenerator: NewSinglePodGenerator(pod),
 55 | 			simulatedPod: pod,
 56 | 			simulated:    0,
 57 | 			maxSimulated: conf.Options.MaxLimit,
 58 | 		}
 59 | 
 60 | 		err = s.addEventHandlers(kubeSchedulerConfig.InformerFactory)
 61 | 		if err != nil {
 62 | 			return nil, err
 63 | 		}
 64 | 
 65 | 		framework, err := pkgframework.NewKubeSchedulerFramework(kubeSchedulerConfig, kubeConfig,
 66 | 			pkgframework.WithExcludeNodes(conf.Options.ExcludeNodes),
 67 | 			pkgframework.WithPostBindHook(s.postBindHook))
 68 | 		if err != nil {
 69 | 			return nil, err
 70 | 		}
 71 | 
 72 | 		s.Framework = framework
 73 | 
 74 | 		return s, nil
 75 | 	}
 76 | 
 77 | 	ms := &multiSimulator{
 78 | 		simulators: make([]*simulator, 0),
 79 | 	}
 80 | 
 81 | 	for _, pod := range conf.Pods {
 82 | 		s, err := newSimulator(pod)
 83 | 		if err != nil {
 84 | 			return nil, err
 85 | 		}
 86 | 
 87 | 		ms.simulators = append(ms.simulators, s)
 88 | 	}
 89 | 
 90 | 	return ms, nil
 91 | }
 92 | 
 93 | func (s *simulator) Run(func() error) error {
 94 | 	return s.Framework.Run(s.createNextPod)
 95 | }
 96 | 
 97 | func (s *simulator) Report() pkg.Printer {
 98 | 	return generateReport([]*corev1.Pod{s.simulatedPod}, s.Status())
 99 | }
100 | 
101 | func (ms *multiSimulator) Initialize(objs ...runtime.Object) error {
102 | 	for _, s := range ms.simulators {
103 | 		if err := s.Initialize(objs...); err != nil {
104 | 			return err
105 | 		}
106 | 	}
107 | 
108 | 	return nil
109 | }
110 | 
111 | func (ms *multiSimulator) Run() error {
112 | 	g := errgroup.Group{}
113 | 	reports := make(CapacityEstimationReviews, len(ms.simulators))
114 | 	for i, s := range ms.simulators {
115 | 		i := i
116 | 		s := s
117 | 		g.Go(func() error {
118 | 			err := s.Run(nil)
119 | 			if err != nil {
120 | 				return err
121 | 			}
122 | 			reports[i] = s.Report().(*CapacityEstimationReview)
123 | 			return nil
124 | 		})
125 | 	}
126 | 
127 | 	err := g.Wait()
128 | 	if err != nil {
129 | 		return err
130 | 	}
131 | 
132 | 	ms.reports = reports
133 | 
134 | 	return nil
135 | }
136 | 
137 | func (ms *multiSimulator) Report() pkg.Printer {
138 | 	return ms.reports
139 | }
140 | 
141 | func (s *simulator) postBindHook(bindPod *corev1.Pod) error {
142 | 	s.UpdateEstimationPods(bindPod)
143 | 
144 | 	if s.maxSimulated > 0 && s.simulated >= s.maxSimulated {
145 | 		return s.Stop(fmt.Sprintf("LimitReached: Maximum number of pods simulated: %v", s.maxSimulated))
146 | 	}
147 | 
148 | 	if err := s.createNextPod(); err != nil {
149 | 		return fmt.Errorf("unable to create next pod for simulated scheduling: %v", err)
150 | 	}
151 | 	return nil
152 | }
153 | 
154 | func (s *simulator) createNextPod() error {
155 | 	pod := s.podGenerator.Generate()
156 | 	s.simulated++
157 | 	klog.V(2).InfoS("create simulate pod", "count", s.simulated, "key", pod.Namespace+"/"+pod.Name)
158 | 
159 | 	return s.CreatePod(pod)
160 | }
161 | 
162 | func (s *simulator) addEventHandlers(informerFactory informers.SharedInformerFactory) (err error) {
163 | 	_, _ = informerFactory.Core().V1().Pods().Informer().AddEventHandler(
164 | 		cache.FilteringResourceEventHandler{
165 | 			FilterFunc: func(obj interface{}) bool {
166 | 				if pod, ok := obj.(*corev1.Pod); ok && pod.Spec.SchedulerName == pkg.SchedulerName &&
167 | 					metav1.HasAnnotation(pod.ObjectMeta, pkg.PodProvisioner) {
168 | 					return true
169 | 				}
170 | 				return false
171 | 			},
172 | 			Handler: cache.ResourceEventHandlerFuncs{
173 | 				UpdateFunc: func(oldObj, newObj interface{}) {
174 | 					if pod, ok := newObj.(*corev1.Pod); ok {
175 | 						for _, podCondition := range pod.Status.Conditions {
176 | 							// Only for pending pods provisioned by ce
177 | 							if podCondition.Type == corev1.PodScheduled && podCondition.Status == corev1.ConditionFalse &&
178 | 								podCondition.Reason == corev1.PodReasonUnschedulable {
179 | 								err = s.Stop(fmt.Sprintf("%v: %v", podCondition.Reason, podCondition.Message))
180 | 							}
181 | 						}
182 | 					}
183 | 				},
184 | 			},
185 | 		},
186 | 	)
187 | 
188 | 	return
189 | }
190 | 


--------------------------------------------------------------------------------
/pkg/simulator/clustercompression/options.go:
--------------------------------------------------------------------------------
  1 | package clustercompression
  2 | 
  3 | import (
  4 | 	corev1 "k8s.io/api/core/v1"
  5 | 
  6 | 	"github.com/k-cloud-labs/kluster-capacity/pkg/utils"
  7 | )
  8 | 
  9 | const (
 10 | 	ErrReasonFailedScaleDown   = "node(s) can't be scale down because of insufficient resource in other nodes"
 11 | 	ErrReasonSuccessScaleDown  = "node(s) has been successfully scale down"
 12 | 	ErrReasonScaleDownDisabled = "node(s) have label with scale down disabled"
 13 | 	ErrReasonMasterNode        = "master node(s)"
 14 | 	ErrReasonTaintNode         = "node(s) have taint"
 15 | 	ErrReasonNotReadyNode      = "not ready node(s)"
 16 | 	ErrReasonStaticPod         = "node(s) have static pod"
 17 | 	ErrReasonMirrorPod         = "node(s) have mirror pod"
 18 | 	ErrReasonCloneset          = "node(s) have inplace update pod"
 19 | 	ErrReasonVolumePod         = "node(s) have pod used hostpath"
 20 | 	ErrReasonUnknown           = "node(s) have unknown error"
 21 | )
 22 | 
 23 | // FilterFunc is a filter for a node.
 24 | type FilterFunc func(*corev1.Node) *FilterStatus
 25 | type PodsByNodeFunc func(name string) ([]*corev1.Pod, error)
 26 | 
 27 | type FilterStatus struct {
 28 | 	// true means that node is able to simulate
 29 | 	Success   bool
 30 | 	ErrReason string
 31 | }
 32 | 
 33 | type Options struct {
 34 | 	filter              FilterFunc
 35 | 	getPodsByNode       PodsByNodeFunc
 36 | 	excludeNodes        map[string]bool
 37 | 	excludeTaintNode    bool
 38 | 	excludeNotReadyNode bool
 39 | 	ignoreStaticPod     bool
 40 | 	ignoreMirrorPod     bool
 41 | 	ignoreCloneSet      bool
 42 | 	ignoreVolumePod     bool
 43 | }
 44 | 
 45 | // NewOptions returns an empty Options.
 46 | func NewOptions() *Options {
 47 | 	return &Options{}
 48 | }
 49 | 
 50 | // WithFilter sets a node filter.
 51 | func (o *Options) WithFilter(filter FilterFunc) *Options {
 52 | 	o.filter = filter
 53 | 	return o
 54 | }
 55 | 
 56 | // WithExcludeNodes sets excluded node
 57 | func (o *Options) WithExcludeNodes(nodes map[string]bool) *Options {
 58 | 	o.excludeNodes = nodes
 59 | 	return o
 60 | }
 61 | 
 62 | // WithExcludeTaintNodes set taint options
 63 | func (o *Options) WithExcludeTaintNodes(excludeTaintNode bool) *Options {
 64 | 	o.excludeTaintNode = excludeTaintNode
 65 | 	return o
 66 | }
 67 | 
 68 | // WithExcludeNotReadyNodes set notReady options
 69 | func (o *Options) WithExcludeNotReadyNodes(excludeNotReadyNode bool) *Options {
 70 | 	o.excludeNotReadyNode = excludeNotReadyNode
 71 | 	return o
 72 | }
 73 | 
 74 | // WithIgnoreStaticPod set ignoreStaticPod options
 75 | func (o *Options) WithIgnoreStaticPod(ignoreStaticPod bool) *Options {
 76 | 	o.ignoreStaticPod = ignoreStaticPod
 77 | 	return o
 78 | }
 79 | 
 80 | // WithIgnoreMirrorPod set ignoreMirrorPod options
 81 | func (o *Options) WithIgnoreMirrorPod(ignoreMirrorPod bool) *Options {
 82 | 	o.ignoreMirrorPod = ignoreMirrorPod
 83 | 	return o
 84 | }
 85 | 
 86 | // WithIgnoreCloneSet set ignoreCloneSet options
 87 | func (o *Options) WithIgnoreCloneSet(ignoreCloneSet bool) *Options {
 88 | 	o.ignoreCloneSet = ignoreCloneSet
 89 | 	return o
 90 | }
 91 | 
 92 | // WithIgnoreVolumePod set ignoreVolumePod options
 93 | func (o *Options) WithIgnoreVolumePod(ignoreVolumePod bool) *Options {
 94 | 	o.ignoreVolumePod = ignoreVolumePod
 95 | 	return o
 96 | }
 97 | 
 98 | func (o *Options) WithPodsByNodeFunc(podsByNodeFunc PodsByNodeFunc) *Options {
 99 | 	o.getPodsByNode = podsByNodeFunc
100 | 	return o
101 | }
102 | 
103 | // BuildFilterFunc builds a final FilterFunc based on Options.
104 | func (o *Options) BuildFilterFunc() FilterFunc {
105 | 	return func(node *corev1.Node) *FilterStatus {
106 | 		if o.filter != nil {
107 | 			status := o.filter(node)
108 | 			if status != nil && !status.Success {
109 | 				return status
110 | 			}
111 | 		}
112 | 
113 | 		if o.excludeTaintNode && haveNodeTaint(node) {
114 | 			return &FilterStatus{
115 | 				Success:   false,
116 | 				ErrReason: ErrReasonTaintNode,
117 | 			}
118 | 		}
119 | 		if o.excludeNotReadyNode && isNodeNotReady(node) {
120 | 			return &FilterStatus{
121 | 				Success:   false,
122 | 				ErrReason: ErrReasonNotReadyNode,
123 | 			}
124 | 		}
125 | 
126 | 		podList, err := o.getPodsByNode(node.Name)
127 | 		if err != nil {
128 | 			return &FilterStatus{
129 | 				Success:   false,
130 | 				ErrReason: ErrReasonUnknown,
131 | 			}
132 | 		}
133 | 
134 | 		for i := range podList {
135 | 			if o.ignoreStaticPod && utils.IsStaticPod(podList[i]) {
136 | 				return &FilterStatus{
137 | 					Success:   false,
138 | 					ErrReason: ErrReasonStaticPod,
139 | 				}
140 | 			}
141 | 
142 | 			if o.ignoreMirrorPod && utils.IsMirrorPod(podList[i]) {
143 | 				return &FilterStatus{
144 | 					Success:   false,
145 | 					ErrReason: ErrReasonMirrorPod,
146 | 				}
147 | 			}
148 | 
149 | 			if o.ignoreVolumePod && utils.IsPodWithLocalStorage(podList[i]) {
150 | 				return &FilterStatus{
151 | 					Success:   false,
152 | 					ErrReason: ErrReasonVolumePod,
153 | 				}
154 | 			}
155 | 
156 | 			if o.ignoreCloneSet && utils.IsCloneSetPod(podList[i].OwnerReferences) {
157 | 				return &FilterStatus{
158 | 					Success:   false,
159 | 					ErrReason: ErrReasonCloneset,
160 | 				}
161 | 			}
162 | 
163 | 		}
164 | 		return &FilterStatus{Success: true}
165 | 	}
166 | }
167 | 
168 | func haveNodeTaint(node *corev1.Node) bool {
169 | 	return len(node.Spec.Taints) != 0
170 | }
171 | 
172 | func isNodeNotReady(node *corev1.Node) bool {
173 | 	for _, cond := range node.Status.Conditions {
174 | 		// We consider the node for scheduling only when its:
175 | 		// - NodeReady condition status is ConditionTrue,
176 | 		// - NodeNetworkUnavailable condition status is ConditionFalse.
177 | 		if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue {
178 | 			return false
179 | 		}
180 | 	}
181 | 	return true
182 | }
183 | 


--------------------------------------------------------------------------------
/README-ZH.md:
--------------------------------------------------------------------------------
  1 | # kluster-capacity
  2 | [[English](./README.md)]
  3 | 
  4 | ![kluster-capacity-logo](docs/images/capacity-management-capacity-icon.jpeg)
  5 | 
  6 | [![Build Status](https://github.com/k-cloud-labs/kluster-capacity/actions/workflows/ci.yml/badge.svg)](https://github.com/k-cloud-labs/kluster-capacity/actions?query=workflow%3Abuild)
  7 | [![Go Report Card](https://goreportcard.com/badge/github.com/k-cloud-labs/kluster-capacity)](https://goreportcard.com/report/github.com/k-cloud-labs/kluster-capacity)
  8 | [![Go doc](https://img.shields.io/badge/go.dev-reference-brightgreen?logo=go&logoColor=white&style=flat)](https://pkg.go.dev/github.com/k-cloud-labs/kluster-capacity)
  9 | 
 10 | 集群容量分析工具支持剩余容量估算、调度器模拟和集群压缩等功能。  
 11 | 这个仓库的灵感来自于 https://github.com/kubernetes-sigs/cluster-capacity。
 12 | 
 13 | ## 安装
 14 | ### Homebrew
 15 | 通过 [Homebrew](https://brew.sh/) 安装:
 16 | ```
 17 | brew tap k-cloud-labs/tap
 18 | brew install k-cloud-labs/tap/kluster-capacity
 19 | ```
 20 | 
 21 | ### Krew
 22 | 通过 [Krew](https://github.com/GoogleContainerTools/krew) 安装:
 23 | ```
 24 | kubectl krew install kluster-capacity
 25 | ```
 26 | 
 27 | ### 从源码编译
 28 | 编译整个程序:
 29 | 
 30 | ```sh
 31 | $ cd $GOPATH/src/github.com/k-cloud-labs/
 32 | $ git clone https://github.com/k-cloud-labs/kluster-capacity
 33 | $ cd kluster-capacity
 34 | $ make build
 35 | ```
 36 | 
 37 | 有三个可用的子命令：ce、cc和ss，分别表示剩余容量估算、集群压缩和调度模拟。
 38 | 
 39 | ## 容量评估
 40 | ### 介绍
 41 | 随着集群中节点上新的 Pod 被调度，消耗的资源越来越多。监控集群中可用的资源非常重要，因为运维人员可以及时增加当前的资源，以免所有资源都耗尽。或者，采取不同的步骤来增加可用资源。
 42 | 
 43 | 集群容量包括单个集群节点的容量。容量涵盖了 CPU、内存、磁盘空间和其他资源。
 44 | 
 45 | 整体剩余可分配容量是一个估计值。目标是分析剩余可分配的资源并估计可用容量，即可以在集群中安排给定资源需求的 Pod 实例数量。
 46 | 
 47 | ### 增强
 48 | 以下是对原集群容量的一些增强功能：
 49 | 
 50 | - 支持直接从集群中使用现有的 Pod 作为 Pod 模板。
 51 | - 支持针对不同的 Pod 模板进行批量模拟。
 52 | 
 53 | ### 运行
 54 | 
 55 | ```sh
 56 | # 直接使用指定的 pod 模板
 57 | $ ./kluster-capacity ce --pods-from-template <path to pod templates> 
 58 | # 使用集群中指定的 pod 作为模板
 59 | $ ./kluster-capacity ce --pods-from-cluster <namespace/name key of the pod> 
 60 | ```
 61 | 更多运行参数及功能，请执行如下命令：
 62 | 
 63 | ```sh
 64 | $ ./kluster-capacity ce --help
 65 | ```
 66 | 
 67 | ### 演示
 68 | 假设集群运行有 4 个节点和 1 个主节点，每个节点有 2 个 CPU 和 4GB 内存。而每个 Pod 所需的资源为 150m CPU 和 100Mi 内存。
 69 | 
 70 | ```sh
 71 | $ ./kluster-capacity ce --pods-from-template <path to pod templates> --verbose
 72 | Pod requirements:
 73 | 	- cpu: 150m
 74 | 	- memory: 100Mi
 75 | 
 76 | The cluster can schedule 52 instance(s) of the pod.
 77 | Termination reason: FailedScheduling: pod (small-pod-52) failed to fit in any node
 78 | fit failure on node (kube-node-1): Insufficient cpu
 79 | fit failure on node (kube-node-4): Insufficient cpu
 80 | fit failure on node (kube-node-2): Insufficient cpu
 81 | fit failure on node (kube-node-3): Insufficient cpu
 82 | 
 83 | 
 84 | Pod distribution among nodes:
 85 | 	- kube-node-1: 13 instance(s)
 86 | 	- kube-node-4: 13 instance(s)
 87 | 	- kube-node-2: 13 instance(s)
 88 | 	- kube-node-3: 13 instance(s)
 89 | ```
 90 | 
 91 | 随着集群中运行的 pod 数量增加，再次运行分析时，可调度的 pod 数量也会减少。
 92 | 
 93 | ```sh
 94 | $ ./kluster-capacity ce --pods-from-template <path to pod templates> --verbose
 95 | Pod requirements:
 96 | 	- cpu: 150m
 97 | 	- memory: 100Mi
 98 | 
 99 | The cluster can schedule 46 instance(s) of the pod.
100 | Termination reason: FailedScheduling: pod (small-pod-46) failed to fit in any node
101 | fit failure on node (kube-node-1): Insufficient cpu
102 | fit failure on node (kube-node-4): Insufficient cpu
103 | fit failure on node (kube-node-2): Insufficient cpu
104 | fit failure on node (kube-node-3): Insufficient cpu
105 | 
106 | 
107 | Pod distribution among nodes:
108 | 	- kube-node-1: 11 instance(s)
109 | 	- kube-node-4: 12 instance(s)
110 | 	- kube-node-2: 11 instance(s)
111 | 	- kube-node-3: 12 instance(s)
112 | ```
113 | 
114 | ### 输出格式
115 | `ce` 命令有一个 `--output (-o)` 标志，可以将其输出格式化为 json 或 yaml。
116 | 
117 | ```sh
118 | $ ./kluster-capacity ce --pods-from-template <path to pod templates> -o json|yaml
119 | ```
120 | 
121 | ## 调度模拟
122 | ### 介绍
123 | 
124 | 调度器模拟以当前集群中的所有 node、pod 等相关资源为输入，模拟从没有 pod 到创建并调度所有 pod 的过程。这可以用来计算集群压缩率比，以评估调度效果或衡量调度算法的质量。
125 | 
126 | 与集群压缩相比，其结果更加激进和理想化。
127 | 
128 | ### 运行
129 | 
130 | ```shell
131 |  ./kluster-capacity ss 
132 | ```
133 | 更多运行参数及功能，请执行如下命令：
134 | 
135 | ```sh
136 | $ ./kluster-capacity ss --help
137 | ```
138 | 它支持两种终止条件：`AllSucceed` 和 `AllScheduled`。前者是指所有pod调度成功后程序结束，后者是指所有 pod 至少被调度一次后程序退出。默认值为 `AllSucceed`。可以使用 `--exit-condition` 标志设置退出条件。
139 | 
140 | ### 演示
141 | 
142 | 假设集群运行有 4 个节点和 1 个主节点，每个节点有 2 个 CPU 和 4GB 内存。有 40 个资源需求是 100m CPU 和 200Mi 内存的 Pod 需要被调度。
143 | 
144 | 如果调度器使用 `LeastAllocated` 策略，调度结果可能如下所示：
145 | 
146 | ```sh
147 | $ ./kluster-capacity ss --verbose
148 | Termination reason: AllSucceed: 40 pod(s) have been scheduled successfully.
149 | 
150 | Pod distribution among nodes:
151 |         - kube-node-1: 10 instance(s)
152 |         - kube-node-2: 10 instance(s)
153 |         - kube-node-3: 10 instance(s)
154 |         - kube-node-4: 10 instance(s)
155 | ```
156 | 
157 | 如果调整调度器使用 `MostAllocated` 策略，调度结果可能如下所示：
158 | 
159 | ```sh
160 | $ ./kluster-capacity ss --verbose
161 | Termination reason: AllSucceed: 40 pod(s) have been scheduled successfully.
162 | 
163 | Pod distribution among nodes:
164 |         - kube-node-1: 20 instance(s)
165 |         - kube-node-2: 20 instance(s)
166 | ```
167 | 
168 | 可以分析上面的调度结果来评估调度策略的有效性和集群容量压缩比。例如，上面的结果表示集群压缩比为2，这意味着在理想情况下有50%的资源浪费。
169 | 
170 | 
171 | ## 集群压缩
172 | ### 介绍
173 | 集群压缩以集群的当前状态，包括所有 node、pod 和其他相关资源作为输入，模拟通过移除节点来压缩集群的过程。它可用于计算集群的压缩比，这是衡量资源利用效率的指标。
174 | 
175 | 与模拟调度相比，集群压缩的结果通常更显示，可操作性更强。
176 | 
177 | ### 运行
178 | 
179 | ```shell
180 |  ./kluster-capacity cc --verbose
181 | ```
182 | 更多运行参数及功能，请执行如下命令：
183 | 
184 | ```sh
185 | $ ./kluster-capacity cc --help
186 | ```
187 | 
188 | ### 演示
189 | 
190 | 假设集群运行有 4 个节点和 1 个主节点，每个节点有 2 个 CPU 和 4GB 内存。运行有 40 个资源需求是 100m CPU 和 200Mi 内存的 Pod。
191 | 
192 | ```shell
193 | ./kluster-capacity cc --verbose
194 | 2 node(s) in the cluster can be scaled down.
195 | 
196 | Termination reason: FailedSelectNode: could not find a node that satisfies the condition, 1 master node(s); 2 node(s) can't be scale down because of insufficient resource in other nodes;
197 | 
198 | nodes selected to be scaled down:
199 |         - kube-node-1
200 |         - kube-node-3
201 | ```
202 | 
203 | 上面的结果表明，给定 40 个 pod 的资源需求，在保证所有 pod 都能被调度的情况下，集群可以去掉 2 个节点，压缩比为 2，也就是有 50% 的资源浪费。
204 | 
205 | ## Feature
206 | - [x] 集群压缩
207 | - [x] 容量评估
208 | - [x] 调度模拟
209 | - [ ] 基于 snapshot 的模拟
210 | - [ ] 资源碎片分析
211 | 
212 | 欢迎体验并提出您的宝贵意见，谢谢！


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
  1 | module github.com/k-cloud-labs/kluster-capacity
  2 | 
  3 | go 1.19
  4 | 
  5 | require (
  6 | 	github.com/ghodss/yaml v1.0.0
  7 | 	github.com/jedib0t/go-pretty/v6 v6.4.4
  8 | 	github.com/lithammer/dedent v1.1.0
  9 | 	github.com/satori/go.uuid v1.2.0
 10 | 	github.com/spf13/cobra v1.6.1
 11 | 	github.com/spf13/pflag v1.0.5
 12 | 	github.com/spf13/viper v1.14.0
 13 | 	golang.org/x/sync v0.1.0
 14 | 	golang.org/x/term v0.3.0
 15 | 	k8s.io/api v0.26.1
 16 | 	k8s.io/apimachinery v0.26.1
 17 | 	k8s.io/apiserver v0.26.0
 18 | 	k8s.io/client-go v0.26.1
 19 | 	k8s.io/component-base v0.26.1
 20 | 	k8s.io/klog/v2 v2.80.1
 21 | 	k8s.io/kube-scheduler v0.0.0
 22 | 	k8s.io/kubernetes v1.26.0
 23 | 	sigs.k8s.io/controller-runtime v0.14.2
 24 | )
 25 | 
 26 | require (
 27 | 	github.com/NYTimes/gziphandler v1.1.1 // indirect
 28 | 	github.com/antlr/antlr4/runtime/Go/antlr v1.4.10 // indirect
 29 | 	github.com/beorn7/perks v1.0.1 // indirect
 30 | 	github.com/blang/semver/v4 v4.0.0 // indirect
 31 | 	github.com/cenkalti/backoff/v4 v4.1.3 // indirect
 32 | 	github.com/cespare/xxhash/v2 v2.1.2 // indirect
 33 | 	github.com/coreos/go-semver v0.3.0 // indirect
 34 | 	github.com/coreos/go-systemd/v22 v22.3.2 // indirect
 35 | 	github.com/davecgh/go-spew v1.1.1 // indirect
 36 | 	github.com/docker/distribution v2.8.1+incompatible // indirect
 37 | 	github.com/emicklei/go-restful/v3 v3.9.0 // indirect
 38 | 	github.com/evanphx/json-patch v4.12.0+incompatible // indirect
 39 | 	github.com/felixge/httpsnoop v1.0.3 // indirect
 40 | 	github.com/fsnotify/fsnotify v1.6.0 // indirect
 41 | 	github.com/go-logr/logr v1.2.3 // indirect
 42 | 	github.com/go-logr/stdr v1.2.2 // indirect
 43 | 	github.com/go-openapi/jsonpointer v0.19.5 // indirect
 44 | 	github.com/go-openapi/jsonreference v0.20.0 // indirect
 45 | 	github.com/go-openapi/swag v0.19.14 // indirect
 46 | 	github.com/gogo/protobuf v1.3.2 // indirect
 47 | 	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
 48 | 	github.com/golang/protobuf v1.5.2 // indirect
 49 | 	github.com/google/cel-go v0.12.5 // indirect
 50 | 	github.com/google/gnostic v0.5.7-v3refs // indirect
 51 | 	github.com/google/go-cmp v0.5.9 // indirect
 52 | 	github.com/google/gofuzz v1.1.0 // indirect
 53 | 	github.com/google/uuid v1.1.2 // indirect
 54 | 	github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect
 55 | 	github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect
 56 | 	github.com/hashicorp/hcl v1.0.0 // indirect
 57 | 	github.com/imdario/mergo v0.3.6 // indirect
 58 | 	github.com/inconshreveable/mousetrap v1.0.1 // indirect
 59 | 	github.com/josharian/intern v1.0.0 // indirect
 60 | 	github.com/json-iterator/go v1.1.12 // indirect
 61 | 	github.com/magiconair/properties v1.8.6 // indirect
 62 | 	github.com/mailru/easyjson v0.7.6 // indirect
 63 | 	github.com/mattn/go-runewidth v0.0.13 // indirect
 64 | 	github.com/matttproud/golang_protobuf_extensions v1.0.2 // indirect
 65 | 	github.com/mitchellh/mapstructure v1.5.0 // indirect
 66 | 	github.com/moby/sys/mountinfo v0.6.2 // indirect
 67 | 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 68 | 	github.com/modern-go/reflect2 v1.0.2 // indirect
 69 | 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
 70 | 	github.com/opencontainers/go-digest v1.0.0 // indirect
 71 | 	github.com/opencontainers/selinux v1.10.0 // indirect
 72 | 	github.com/pelletier/go-toml v1.9.5 // indirect
 73 | 	github.com/pelletier/go-toml/v2 v2.0.5 // indirect
 74 | 	github.com/pkg/errors v0.9.1 // indirect
 75 | 	github.com/prometheus/client_golang v1.14.0 // indirect
 76 | 	github.com/prometheus/client_model v0.3.0 // indirect
 77 | 	github.com/prometheus/common v0.37.0 // indirect
 78 | 	github.com/prometheus/procfs v0.8.0 // indirect
 79 | 	github.com/rivo/uniseg v0.2.0 // indirect
 80 | 	github.com/spf13/afero v1.9.2 // indirect
 81 | 	github.com/spf13/cast v1.5.0 // indirect
 82 | 	github.com/spf13/jwalterweatherman v1.1.0 // indirect
 83 | 	github.com/stoewer/go-strcase v1.2.0 // indirect
 84 | 	github.com/subosito/gotenv v1.4.1 // indirect
 85 | 	go.etcd.io/etcd/api/v3 v3.5.5 // indirect
 86 | 	go.etcd.io/etcd/client/pkg/v3 v3.5.5 // indirect
 87 | 	go.etcd.io/etcd/client/v3 v3.5.5 // indirect
 88 | 	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.35.0 // indirect
 89 | 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.35.0 // indirect
 90 | 	go.opentelemetry.io/otel v1.10.0 // indirect
 91 | 	go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.10.0 // indirect
 92 | 	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.10.0 // indirect
 93 | 	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.10.0 // indirect
 94 | 	go.opentelemetry.io/otel/metric v0.31.0 // indirect
 95 | 	go.opentelemetry.io/otel/sdk v1.10.0 // indirect
 96 | 	go.opentelemetry.io/otel/trace v1.10.0 // indirect
 97 | 	go.opentelemetry.io/proto/otlp v0.19.0 // indirect
 98 | 	go.uber.org/atomic v1.9.0 // indirect
 99 | 	go.uber.org/multierr v1.8.0 // indirect
100 | 	go.uber.org/zap v1.24.0 // indirect
101 | 	golang.org/x/crypto v0.1.0 // indirect
102 | 	golang.org/x/net v0.3.1-0.20221206200815-1e63c2f08a10 // indirect
103 | 	golang.org/x/oauth2 v0.0.0-20221014153046-6fdb5e3db783 // indirect
104 | 	golang.org/x/sys v0.3.0 // indirect
105 | 	golang.org/x/text v0.5.0 // indirect
106 | 	golang.org/x/time v0.3.0 // indirect
107 | 	google.golang.org/appengine v1.6.7 // indirect
108 | 	google.golang.org/genproto v0.0.0-20221024183307-1bc688fe9f3e // indirect
109 | 	google.golang.org/grpc v1.50.1 // indirect
110 | 	google.golang.org/protobuf v1.28.1 // indirect
111 | 	gopkg.in/inf.v0 v0.9.1 // indirect
112 | 	gopkg.in/ini.v1 v1.67.0 // indirect
113 | 	gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect
114 | 	gopkg.in/yaml.v2 v2.4.0 // indirect
115 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
116 | 	k8s.io/cloud-provider v0.0.0 // indirect
117 | 	k8s.io/component-helpers v0.26.0 // indirect
118 | 	k8s.io/csi-translation-lib v0.0.0 // indirect
119 | 	k8s.io/dynamic-resource-allocation v0.0.0 // indirect
120 | 	k8s.io/kms v0.26.0 // indirect
121 | 	k8s.io/kube-openapi v0.0.0-20221012153701-172d655c2280 // indirect
122 | 	k8s.io/mount-utils v0.0.0 // indirect
123 | 	k8s.io/utils v0.0.0-20221128185143-99ec85e7a448 // indirect
124 | 	sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.33 // indirect
125 | 	sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect
126 | 	sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
127 | 	sigs.k8s.io/yaml v1.3.0 // indirect
128 | )
129 | 
130 | replace (
131 | 	k8s.io/cloud-provider => k8s.io/cloud-provider v0.26.0
132 | 	k8s.io/csi-translation-lib => k8s.io/csi-translation-lib v0.26.0
133 | 	k8s.io/dynamic-resource-allocation => k8s.io/dynamic-resource-allocation v0.26.0
134 | 	k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.26.0
135 | 	k8s.io/mount-utils => k8s.io/mount-utils v0.26.0
136 | )
137 | 


--------------------------------------------------------------------------------
/pkg/utils/utils.go:
--------------------------------------------------------------------------------
  1 | package utils
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"os"
  7 | 
  8 | 	"github.com/ghodss/yaml"
  9 | 	corev1 "k8s.io/api/core/v1"
 10 | 	"k8s.io/client-go/informers"
 11 | 	fakeclientset "k8s.io/client-go/kubernetes/fake"
 12 | 	restclient "k8s.io/client-go/rest"
 13 | 	"k8s.io/client-go/tools/clientcmd"
 14 | 	"k8s.io/client-go/tools/events"
 15 | 	configv1alpha1 "k8s.io/component-base/config/v1alpha1"
 16 | 	"k8s.io/component-base/logs"
 17 | 	kubeschedulerconfigv1 "k8s.io/kube-scheduler/config/v1"
 18 | 	schedconfig "k8s.io/kubernetes/cmd/kube-scheduler/app/config"
 19 | 	kubescheduleroptions "k8s.io/kubernetes/cmd/kube-scheduler/app/options"
 20 | 	kubeschedulerconfig "k8s.io/kubernetes/pkg/scheduler/apis/config"
 21 | 	"k8s.io/kubernetes/pkg/scheduler/apis/config/latest"
 22 | 	kubeschedulerscheme "k8s.io/kubernetes/pkg/scheduler/apis/config/scheme"
 23 | 	"k8s.io/kubernetes/pkg/scheduler/apis/config/validation"
 24 | 	"k8s.io/kubernetes/pkg/scheduler/framework"
 25 | 
 26 | 	"github.com/k-cloud-labs/kluster-capacity/pkg"
 27 | )
 28 | 
 29 | const (
 30 | 	DefaultQPS   = 10000
 31 | 	DefaultBurst = 20000
 32 | )
 33 | 
 34 | func BuildRestConfig(config string) (*restclient.Config, error) {
 35 | 	if len(config) != 0 {
 36 | 		master, err := getMasterFromKubeConfig(config)
 37 | 		if err != nil {
 38 | 			return nil, fmt.Errorf("failed to parse kubeconfig file: %v ", err)
 39 | 		}
 40 | 
 41 | 		cfg, err := clientcmd.BuildConfigFromFlags(master, config)
 42 | 		if err != nil {
 43 | 			return nil, fmt.Errorf("unable to build config: %v", err)
 44 | 		}
 45 | 
 46 | 		cfg.QPS = DefaultQPS
 47 | 		cfg.Burst = DefaultBurst
 48 | 
 49 | 		return cfg, nil
 50 | 	} else {
 51 | 		cfg, err := restclient.InClusterConfig()
 52 | 		if err != nil {
 53 | 			return nil, fmt.Errorf("unable to build in cluster config: %v", err)
 54 | 		}
 55 | 
 56 | 		cfg.QPS = DefaultQPS
 57 | 		cfg.Burst = DefaultBurst
 58 | 
 59 | 		return cfg, nil
 60 | 	}
 61 | }
 62 | 
 63 | func BuildKubeSchedulerCompletedConfig(config, kubeconfig string) (*schedconfig.CompletedConfig, error) {
 64 | 	var kcfg *kubeschedulerconfig.KubeSchedulerConfiguration
 65 | 	if len(config) > 0 {
 66 | 		cfg, err := loadConfigFromFile(config)
 67 | 		if err != nil {
 68 | 			return nil, err
 69 | 		}
 70 | 		if err := validation.ValidateKubeSchedulerConfiguration(cfg); err != nil {
 71 | 			return nil, err
 72 | 		}
 73 | 		kcfg = cfg
 74 | 	} else {
 75 | 		cfg, err := latest.Default()
 76 | 		if err != nil {
 77 | 			return nil, err
 78 | 		}
 79 | 		kcfg = cfg
 80 | 	}
 81 | 
 82 | 	if len(kcfg.ClientConnection.Kubeconfig) == 0 && len(kubeconfig) > 0 {
 83 | 		kcfg.ClientConnection.Kubeconfig = kubeconfig
 84 | 	}
 85 | 
 86 | 	cc, err := buildKubeSchedulerCompletedConfig(kcfg)
 87 | 	if err != nil {
 88 | 		return nil, fmt.Errorf("failed to init kube scheduler configuration: %v ", err)
 89 | 	}
 90 | 
 91 | 	return cc, nil
 92 | }
 93 | 
 94 | func PrintJson(r pkg.Printer) error {
 95 | 	jsonBytes, err := json.Marshal(r)
 96 | 	if err != nil {
 97 | 		return fmt.Errorf("failed to create json: %v", err)
 98 | 	}
 99 | 	fmt.Println(string(jsonBytes))
100 | 	return nil
101 | }
102 | 
103 | func PrintYaml(r pkg.Printer) error {
104 | 	yamlBytes, err := yaml.Marshal(r)
105 | 	if err != nil {
106 | 		return fmt.Errorf("failed to create yaml: %v", err)
107 | 	}
108 | 	fmt.Print(string(yamlBytes))
109 | 	return nil
110 | }
111 | 
112 | func ComputePodResourceRequest(pod *corev1.Pod) *framework.Resource {
113 | 	result := &framework.Resource{}
114 | 
115 | 	for _, container := range pod.Spec.Containers {
116 | 		result.Add(container.Resources.Requests)
117 | 	}
118 | 
119 | 	// take max_resource(sum_pod, any_init_container)
120 | 	for _, container := range pod.Spec.InitContainers {
121 | 		result.SetMaxResource(container.Resources.Requests)
122 | 	}
123 | 
124 | 	// If Overhead is being utilized, add to the total requests for the pod
125 | 	if pod.Spec.Overhead != nil {
126 | 		result.Add(pod.Spec.Overhead)
127 | 	}
128 | 	return result
129 | }
130 | 
131 | func buildKubeSchedulerCompletedConfig(kcfg *kubeschedulerconfig.KubeSchedulerConfiguration) (*schedconfig.CompletedConfig, error) {
132 | 	if kcfg == nil {
133 | 		kcfg = &kubeschedulerconfig.KubeSchedulerConfiguration{}
134 | 		versionedCfg := kubeschedulerconfigv1.KubeSchedulerConfiguration{}
135 | 		versionedCfg.DebuggingConfiguration = *configv1alpha1.NewRecommendedDebuggingConfiguration()
136 | 
137 | 		kubeschedulerscheme.Scheme.Default(&versionedCfg)
138 | 		if err := kubeschedulerscheme.Scheme.Convert(&versionedCfg, kcfg, nil); err != nil {
139 | 			return nil, err
140 | 		}
141 | 	}
142 | 
143 | 	// inject scheduler config
144 | 	if len(kcfg.Profiles) == 0 {
145 | 		kcfg.Profiles = []kubeschedulerconfig.KubeSchedulerProfile{
146 | 			{},
147 | 		}
148 | 	}
149 | 
150 | 	kcfg.Profiles[0].SchedulerName = pkg.SchedulerName
151 | 	if kcfg.Profiles[0].Plugins == nil {
152 | 		kcfg.Profiles[0].Plugins = &kubeschedulerconfig.Plugins{}
153 | 	}
154 | 
155 | 	opts := &kubescheduleroptions.Options{
156 | 		ComponentConfig: kcfg,
157 | 		Logs:            logs.NewOptions(),
158 | 	}
159 | 
160 | 	c := &schedconfig.Config{}
161 | 	// clear out all unnecessary options so no port is bound
162 | 	// to allow running multiple instances in a row
163 | 	opts.Deprecated = nil
164 | 	opts.SecureServing = nil
165 | 	if err := opts.ApplyTo(c); err != nil {
166 | 		return nil, fmt.Errorf("unable to get scheduler kcfg: %v", err)
167 | 	}
168 | 
169 | 	// Get the completed config
170 | 	cc := c.Complete()
171 | 
172 | 	// completely ignore the events
173 | 	cc.EventBroadcaster = events.NewEventBroadcasterAdapter(fakeclientset.NewSimpleClientset())
174 | 
175 | 	// black magic
176 | 	cc.Client = fakeclientset.NewSimpleClientset()
177 | 	cc.InformerFactory = informers.NewSharedInformerFactory(cc.Client, 0)
178 | 
179 | 	return &cc, nil
180 | }
181 | 
182 | func loadConfigFromFile(file string) (*kubeschedulerconfig.KubeSchedulerConfiguration, error) {
183 | 	data, err := os.ReadFile(file)
184 | 	if err != nil {
185 | 		return nil, err
186 | 	}
187 | 	return loadConfig(data)
188 | }
189 | 
190 | func loadConfig(data []byte) (*kubeschedulerconfig.KubeSchedulerConfiguration, error) {
191 | 	// The UniversalDecoder runs defaulting and returns the internal type by default.
192 | 	obj, gvk, err := kubeschedulerscheme.Codecs.UniversalDecoder().Decode(data, nil, nil)
193 | 	if err != nil {
194 | 		return nil, err
195 | 	}
196 | 	if cfgObj, ok := obj.(*kubeschedulerconfig.KubeSchedulerConfiguration); ok {
197 | 		// We don't set this field in pkg/scheduler/apis/config/{version}/conversion.go
198 | 		// because the field will be cleared later by API machinery during
199 | 		// conversion. See KubeSchedulerConfiguration internal type definition for
200 | 		// more details.
201 | 		cfgObj.TypeMeta.APIVersion = gvk.GroupVersion().String()
202 | 		return cfgObj, nil
203 | 	}
204 | 	return nil, fmt.Errorf("couldn't decode as KubeSchedulerConfiguration, got %s: ", gvk)
205 | }
206 | 
207 | func getMasterFromKubeConfig(filename string) (string, error) {
208 | 	config, err := clientcmd.LoadFromFile(filename)
209 | 	if err != nil {
210 | 		return "", fmt.Errorf("can not load kubeconfig file: %v", err)
211 | 	}
212 | 
213 | 	context, ok := config.Contexts[config.CurrentContext]
214 | 	if !ok {
215 | 		return "", fmt.Errorf("failed to get master address from kubeconfig")
216 | 	}
217 | 
218 | 	if val, ok := config.Clusters[context.Cluster]; ok {
219 | 		return val.Server, nil
220 | 	}
221 | 	return "", fmt.Errorf("failed to get master address from kubeconfig")
222 | }
223 | 


--------------------------------------------------------------------------------
/hack/tools/preferredimports/preferredimports.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2019 The Kubernetes Authors.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | // This code is directly lifted from the Kubernetes codebase in order to avoid relying on the k8s.io/kubernetes package.
 18 | // For reference: https://github.com/kubernetes/kubernetes/blob/release-1.22/cmd/preferredimports/preferredimports.go
 19 | 
 20 | // verify that all the imports have our preferred alias(es).
 21 | package main
 22 | 
 23 | import (
 24 | 	"bytes"
 25 | 	"encoding/json"
 26 | 	"flag"
 27 | 	"fmt"
 28 | 	"go/ast"
 29 | 	"go/build"
 30 | 	"go/format"
 31 | 	"go/parser"
 32 | 	"go/token"
 33 | 	"log"
 34 | 	"os"
 35 | 	"path/filepath"
 36 | 	"regexp"
 37 | 	"sort"
 38 | 	"strings"
 39 | 
 40 | 	"golang.org/x/term"
 41 | )
 42 | 
 43 | var (
 44 | 	importAliases = flag.String("import-aliases", "hack/.import-aliases", "json file with import aliases")
 45 | 	confirm       = flag.Bool("confirm", false, "update file with the preferred aliases for imports")
 46 | 	regex         = flag.String("include-path", "(test/e2e/|test/e2e_node)", "only files with paths matching this regex is touched")
 47 | 	isTerminal    = term.IsTerminal(int(os.Stdout.Fd()))
 48 | 	logPrefix     = ""
 49 | 	aliases       map[string]string
 50 | )
 51 | 
 52 | type analyzer struct {
 53 | 	fset      *token.FileSet // positions are relative to fset
 54 | 	ctx       build.Context
 55 | 	failed    bool
 56 | 	donePaths map[string]interface{}
 57 | }
 58 | 
 59 | func newAnalyzer() *analyzer {
 60 | 	ctx := build.Default
 61 | 	ctx.CgoEnabled = true
 62 | 
 63 | 	a := &analyzer{
 64 | 		fset:      token.NewFileSet(),
 65 | 		ctx:       ctx,
 66 | 		donePaths: make(map[string]interface{}),
 67 | 	}
 68 | 
 69 | 	return a
 70 | }
 71 | 
 72 | // collect extracts test metadata from a file.
 73 | func (a *analyzer) collect(dir string) {
 74 | 	if _, ok := a.donePaths[dir]; ok {
 75 | 		return
 76 | 	}
 77 | 	a.donePaths[dir] = nil
 78 | 
 79 | 	// Create the AST by parsing src.
 80 | 	fs, err := parser.ParseDir(a.fset, dir, nil, parser.AllErrors|parser.ParseComments)
 81 | 
 82 | 	if err != nil {
 83 | 		fmt.Fprintln(os.Stderr, "ERROR(syntax)", logPrefix, err)
 84 | 		a.failed = true
 85 | 		return
 86 | 	}
 87 | 
 88 | 	for _, p := range fs {
 89 | 		// returns first error, but a.handleError deals with it
 90 | 		files := a.filterFiles(p.Files)
 91 | 		for _, file := range files {
 92 | 			replacements := make(map[string]string)
 93 | 			pathToFile := a.fset.File(file.Pos()).Name()
 94 | 			for _, imp := range file.Imports {
 95 | 				importPath := strings.Replace(imp.Path.Value, "\"", "", -1)
 96 | 				pathSegments := strings.Split(importPath, "/")
 97 | 				importName := pathSegments[len(pathSegments)-1]
 98 | 				if imp.Name != nil {
 99 | 					importName = imp.Name.Name
100 | 				}
101 | 				if alias, ok := aliases[importPath]; ok {
102 | 					if alias != importName {
103 | 						if !*confirm {
104 | 							fmt.Fprintf(os.Stderr, "%sERROR wrong alias for import \"%s\" should be %s in file %s\n", logPrefix, importPath, alias, pathToFile)
105 | 							a.failed = true
106 | 						}
107 | 						replacements[importName] = alias
108 | 						if imp.Name != nil {
109 | 							imp.Name.Name = alias
110 | 						} else {
111 | 							imp.Name = ast.NewIdent(alias)
112 | 						}
113 | 					}
114 | 				}
115 | 			}
116 | 
117 | 			if len(replacements) > 0 {
118 | 				if *confirm {
119 | 					fmt.Printf("%sReplacing imports with aliases in file %s\n", logPrefix, pathToFile)
120 | 					for key, value := range replacements {
121 | 						renameImportUsages(file, key, value)
122 | 					}
123 | 					ast.SortImports(a.fset, file)
124 | 					var buffer bytes.Buffer
125 | 					if err = format.Node(&buffer, a.fset, file); err != nil {
126 | 						panic(fmt.Sprintf("Error formatting ast node after rewriting import.\n%s\n", err.Error()))
127 | 					}
128 | 
129 | 					fileInfo, err := os.Stat(pathToFile)
130 | 					if err != nil {
131 | 						panic(fmt.Sprintf("Error stat'ing file: %s\n%s\n", pathToFile, err.Error()))
132 | 					}
133 | 
134 | 					err = os.WriteFile(pathToFile, buffer.Bytes(), fileInfo.Mode())
135 | 					if err != nil {
136 | 						panic(fmt.Sprintf("Error writing file: %s\n%s\n", pathToFile, err.Error()))
137 | 					}
138 | 				}
139 | 			}
140 | 		}
141 | 	}
142 | }
143 | 
144 | func renameImportUsages(f *ast.File, old, new string) {
145 | 	// use this to avoid renaming the package declaration, eg:
146 | 	//   given: package foo; import foo "bar"; foo.Baz, rename foo->qux
147 | 	//   yield: package foo; import qux "bar"; qux.Baz
148 | 	var pkg *ast.Ident
149 | 
150 | 	// Rename top-level old to new, both unresolved names
151 | 	// (probably defined in another file) and names that resolve
152 | 	// to a declaration we renamed.
153 | 	ast.Inspect(f, func(node ast.Node) bool {
154 | 		if node == nil {
155 | 			return false
156 | 		}
157 | 		switch id := node.(type) {
158 | 		case *ast.File:
159 | 			pkg = id.Name
160 | 		case *ast.Ident:
161 | 			if pkg != nil && id == pkg {
162 | 				return false
163 | 			}
164 | 			if id.Name == old {
165 | 				id.Name = new
166 | 			}
167 | 		}
168 | 		return true
169 | 	})
170 | }
171 | 
172 | func (a *analyzer) filterFiles(fs map[string]*ast.File) []*ast.File {
173 | 	var files []*ast.File
174 | 	for _, f := range fs {
175 | 		files = append(files, f)
176 | 	}
177 | 	return files
178 | }
179 | 
180 | type collector struct {
181 | 	dirs  []string
182 | 	regex *regexp.Regexp
183 | }
184 | 
185 | // handlePath walks the filesystem recursively, collecting directories,
186 | // ignoring some unneeded directories (hidden/vendored) that are handled
187 | // specially later.
188 | func (c *collector) handlePath(path string, info os.FileInfo, err error) error {
189 | 	if err != nil {
190 | 		return err
191 | 	}
192 | 	if info.IsDir() {
193 | 		// Ignore hidden directories (.git, .cache, etc)
194 | 		if len(path) > 1 && path[0] == '.' ||
195 | 			// Staging code is symlinked from vendor/k8s.io, and uses import
196 | 			// paths as if it were inside of vendor/. It fails typechecking
197 | 			// inside of staging/, but works when typechecked as part of vendor/.
198 | 			path == "staging" ||
199 | 			// OS-specific vendor code tends to be imported by OS-specific
200 | 			// packages. We recursively typecheck imported vendored packages for
201 | 			// each OS, but don't typecheck everything for every OS.
202 | 			path == "vendor" ||
203 | 			path == "_output" ||
204 | 			// This is a weird one. /testdata/ is *mostly* ignored by Go,
205 | 			// and this translates to kubernetes/vendor not working.
206 | 			// edit/record.go doesn't compile without gopkg.in/yaml.v2
207 | 			// in $GOSRC/$GOROOT (both typecheck and the shell script).
208 | 			path == "pkg/kubectl/cmd/testdata/edit" {
209 | 			return filepath.SkipDir
210 | 		}
211 | 		if c.regex.MatchString(path) {
212 | 			c.dirs = append(c.dirs, path)
213 | 		}
214 | 	}
215 | 	return nil
216 | }
217 | 
218 | func main() {
219 | 	flag.Parse()
220 | 	args := flag.Args()
221 | 
222 | 	if len(args) == 0 {
223 | 		args = append(args, ".")
224 | 	}
225 | 
226 | 	regex, err := regexp.Compile(*regex)
227 | 	if err != nil {
228 | 		log.Fatalf("Error compiling regex: %v", err)
229 | 	}
230 | 	c := collector{regex: regex}
231 | 	for _, arg := range args {
232 | 		err := filepath.Walk(arg, c.handlePath)
233 | 		if err != nil {
234 | 			log.Fatalf("Error walking: %v", err)
235 | 		}
236 | 	}
237 | 	sort.Strings(c.dirs)
238 | 
239 | 	if len(*importAliases) > 0 {
240 | 		bytes, err := os.ReadFile(*importAliases)
241 | 		if err != nil {
242 | 			log.Fatalf("Error reading import aliases: %v", err)
243 | 		}
244 | 		err = json.Unmarshal(bytes, &aliases)
245 | 		if err != nil {
246 | 			log.Fatalf("Error loading aliases: %v", err)
247 | 		}
248 | 	}
249 | 	if isTerminal {
250 | 		logPrefix = "\r" // clear status bar when printing
251 | 	}
252 | 	fmt.Println("checking-imports: ")
253 | 
254 | 	a := newAnalyzer()
255 | 	for _, dir := range c.dirs {
256 | 		if isTerminal {
257 | 			fmt.Printf("\r\033[0m %-80s\n", dir)
258 | 		}
259 | 		a.collect(dir)
260 | 	}
261 | 	fmt.Println()
262 | 	if a.failed {
263 | 		os.Exit(1)
264 | 	}
265 | }
266 | 


--------------------------------------------------------------------------------
/pkg/simulator/capacityestimation/report.go:
--------------------------------------------------------------------------------
  1 | package capacityestimation
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"strings"
  7 | 	"time"
  8 | 
  9 | 	"github.com/jedib0t/go-pretty/v6/table"
 10 | 	corev1 "k8s.io/api/core/v1"
 11 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 12 | 	"k8s.io/apimachinery/pkg/labels"
 13 | 	"k8s.io/kubernetes/pkg/scheduler/framework"
 14 | 
 15 | 	"github.com/k-cloud-labs/kluster-capacity/pkg"
 16 | 	"github.com/k-cloud-labs/kluster-capacity/pkg/utils"
 17 | )
 18 | 
 19 | type CapacityEstimationReview struct {
 20 | 	metav1.TypeMeta
 21 | 	Spec   CapacityEstimationReviewSpec   `json:"spec"`
 22 | 	Status CapacityEstimationReviewStatus `json:"status"`
 23 | }
 24 | 
 25 | type CapacityEstimationReviews []*CapacityEstimationReview
 26 | 
 27 | type CapacityEstimationReviewSpec struct {
 28 | 	// the pod desired for scheduling
 29 | 	Templates       []corev1.Pod    `json:"templates"`
 30 | 	PodRequirements []*Requirements `json:"podRequirements"`
 31 | }
 32 | 
 33 | type CapacityEstimationReviewStatus struct {
 34 | 	CreationTimestamp time.Time `json:"creationTimestamp"`
 35 | 	// actual number of replicas that could schedule
 36 | 	Replicas   int32                                       `json:"replicas"`
 37 | 	StopReason *CapacityEstimationReviewScheduleStopReason `json:"stopReason"`
 38 | 	// per node information about the scheduling simulation
 39 | 	Pods []*CapacityEstimationReviewResult `json:"pods"`
 40 | }
 41 | 
 42 | type CapacityEstimationReviewResult struct {
 43 | 	PodName string `json:"podName"`
 44 | 	// numbers of replicas on nodes
 45 | 	ReplicasOnNodes []*ReplicasOnNode `json:"replicasOnNodes"`
 46 | 	// reason why no more pods could schedule (if any on this node)
 47 | 	Summary []StopReasonSummary `json:"summary"`
 48 | }
 49 | 
 50 | type ReplicasOnNode struct {
 51 | 	NodeName string `json:"nodeName"`
 52 | 	Replicas int    `json:"replicas"`
 53 | }
 54 | 
 55 | type StopReasonSummary struct {
 56 | 	Reason string `json:"reason"`
 57 | 	Count  int    `json:"count"`
 58 | }
 59 | 
 60 | type Resources struct {
 61 | 	PrimaryResources corev1.ResourceList           `json:"primaryResources"`
 62 | 	ScalarResources  map[corev1.ResourceName]int64 `json:"scalarResources"`
 63 | }
 64 | 
 65 | type Requirements struct {
 66 | 	PodName       string              `json:"podName"`
 67 | 	Resources     *framework.Resource `json:"resources"`
 68 | 	NodeSelectors map[string]string   `json:"nodeSelectors"`
 69 | }
 70 | 
 71 | type CapacityEstimationReviewScheduleStopReason struct {
 72 | 	StopType    string `json:"stopType"`
 73 | 	StopMessage string `json:"stopMessage"`
 74 | }
 75 | 
 76 | func (r *CapacityEstimationReview) Print(verbose bool, format string) error {
 77 | 	switch format {
 78 | 	case "json":
 79 | 		return utils.PrintJson(r)
 80 | 	case "yaml":
 81 | 		return utils.PrintYaml(r)
 82 | 	case "":
 83 | 		capacityEstimationReviewPrettyPrint(r, verbose)
 84 | 		return nil
 85 | 	default:
 86 | 		return fmt.Errorf("output format %q not recognized", format)
 87 | 	}
 88 | }
 89 | 
 90 | func (r CapacityEstimationReviews) Print(verbose bool, format string) error {
 91 | 	t := table.NewWriter()
 92 | 	t.AppendHeader(table.Row{"spec", "replicas"})
 93 | 	for i, review := range r {
 94 | 		if i > 0 && (format != "" || verbose) {
 95 | 			fmt.Println("---------------------------------------------------------------")
 96 | 		}
 97 | 		switch format {
 98 | 		case "json":
 99 | 			err := utils.PrintJson(review)
100 | 			if err != nil {
101 | 				return err
102 | 			}
103 | 		case "yaml":
104 | 			err := utils.PrintYaml(review)
105 | 			if err != nil {
106 | 				return err
107 | 			}
108 | 		case "":
109 | 			if verbose {
110 | 				capacityEstimationReviewPrettyPrint(review, verbose)
111 | 			} else {
112 | 				output, err := json.Marshal(review.Spec.PodRequirements[0])
113 | 				if err != nil {
114 | 					return err
115 | 				}
116 | 				t.AppendRow(table.Row{string(output), review.Status.Replicas})
117 | 			}
118 | 		default:
119 | 			return fmt.Errorf("output format %q not recognized", format)
120 | 		}
121 | 	}
122 | 
123 | 	if format == "" && !verbose {
124 | 		fmt.Println(t.Render())
125 | 	}
126 | 
127 | 	return nil
128 | }
129 | 
130 | func generateReport(pods []*corev1.Pod, status *pkg.Status) *CapacityEstimationReview {
131 | 	return &CapacityEstimationReview{
132 | 		Spec:   getReviewSpec(pods),
133 | 		Status: getReviewStatus(pods, status),
134 | 	}
135 | }
136 | 
137 | func getMainStopReason(message string) *CapacityEstimationReviewScheduleStopReason {
138 | 	slicedMessage := strings.Split(message, "\n")
139 | 	colon := strings.Index(slicedMessage[0], ":")
140 | 
141 | 	reason := &CapacityEstimationReviewScheduleStopReason{
142 | 		StopType:    slicedMessage[0][:colon],
143 | 		StopMessage: strings.Trim(slicedMessage[0][colon+1:], " "),
144 | 	}
145 | 	return reason
146 | }
147 | 
148 | func parsePodsReview(templatePods []*corev1.Pod, status *pkg.Status) []*CapacityEstimationReviewResult {
149 | 	templatesCount := len(templatePods)
150 | 	result := make([]*CapacityEstimationReviewResult, 0)
151 | 
152 | 	for i := 0; i < templatesCount; i++ {
153 | 		result = append(result, &CapacityEstimationReviewResult{
154 | 			ReplicasOnNodes: make([]*ReplicasOnNode, 0),
155 | 			PodName:         templatePods[i].Name,
156 | 		})
157 | 	}
158 | 
159 | 	for i, pod := range status.PodsForEstimation {
160 | 		nodeName := pod.Spec.NodeName
161 | 		first := true
162 | 		for _, sum := range result[i%templatesCount].ReplicasOnNodes {
163 | 			if sum.NodeName == nodeName {
164 | 				sum.Replicas++
165 | 				first = false
166 | 			}
167 | 		}
168 | 		if first {
169 | 			result[i%templatesCount].ReplicasOnNodes = append(result[i%templatesCount].ReplicasOnNodes, &ReplicasOnNode{
170 | 				NodeName: nodeName,
171 | 				Replicas: 1,
172 | 			})
173 | 		}
174 | 	}
175 | 
176 | 	slicedMessage := strings.Split(status.StopReason, "\n")
177 | 	if len(slicedMessage) == 1 {
178 | 		return result
179 | 	}
180 | 
181 | 	return result
182 | }
183 | 
184 | func getReviewSpec(podTemplates []*corev1.Pod) CapacityEstimationReviewSpec {
185 | 	podCopies := make([]corev1.Pod, len(podTemplates))
186 | 	deepCopyPods(podTemplates, podCopies)
187 | 	return CapacityEstimationReviewSpec{
188 | 		Templates:       podCopies,
189 | 		PodRequirements: getPodsRequirements(podTemplates),
190 | 	}
191 | }
192 | 
193 | func getReviewStatus(pods []*corev1.Pod, status *pkg.Status) CapacityEstimationReviewStatus {
194 | 	return CapacityEstimationReviewStatus{
195 | 		CreationTimestamp: time.Now(),
196 | 		Replicas:          int32(len(status.PodsForEstimation)),
197 | 		StopReason:        getMainStopReason(status.StopReason),
198 | 		Pods:              parsePodsReview(pods, status),
199 | 	}
200 | }
201 | 
202 | func deepCopyPods(in []*corev1.Pod, out []corev1.Pod) {
203 | 	for i, pod := range in {
204 | 		out[i] = *pod.DeepCopy()
205 | 	}
206 | }
207 | 
208 | func getPodsRequirements(pods []*corev1.Pod) []*Requirements {
209 | 	result := make([]*Requirements, 0)
210 | 	for _, pod := range pods {
211 | 		podRequirements := &Requirements{
212 | 			PodName:       pod.Name,
213 | 			Resources:     utils.ComputePodResourceRequest(pod),
214 | 			NodeSelectors: pod.Spec.NodeSelector,
215 | 		}
216 | 		result = append(result, podRequirements)
217 | 	}
218 | 	return result
219 | }
220 | 
221 | func instancesSum(replicasOnNodes []*ReplicasOnNode) int {
222 | 	result := 0
223 | 	for _, v := range replicasOnNodes {
224 | 		result += v.Replicas
225 | 	}
226 | 	return result
227 | }
228 | 
229 | func capacityEstimationReviewPrettyPrint(r *CapacityEstimationReview, verbose bool) {
230 | 	if verbose {
231 | 		for _, req := range r.Spec.PodRequirements {
232 | 			fmt.Printf("%v pod requirements:\n", req.PodName)
233 | 			fmt.Printf("\t- CPU(m): %v\n", req.Resources.MilliCPU)
234 | 			fmt.Printf("\t- Memory(B): %v\n", req.Resources.Memory)
235 | 			if req.Resources.ScalarResources != nil {
236 | 				fmt.Printf("\t- ScalarResources: %v\n", req.Resources.ScalarResources)
237 | 			}
238 | 
239 | 			if req.NodeSelectors != nil {
240 | 				fmt.Printf("\t- NodeSelector: %v\n", labels.SelectorFromSet(req.NodeSelectors).String())
241 | 			}
242 | 			fmt.Printf("\n")
243 | 		}
244 | 	}
245 | 
246 | 	for _, pod := range r.Status.Pods {
247 | 		if verbose {
248 | 			fmt.Printf("The cluster can schedule %v instance(s) of the pod %v.\n", instancesSum(pod.ReplicasOnNodes), pod.PodName)
249 | 		} else {
250 | 			fmt.Printf("%v\n", instancesSum(pod.ReplicasOnNodes))
251 | 		}
252 | 	}
253 | 
254 | 	if verbose {
255 | 		fmt.Printf("\nTermination reason: %v: %v\n", r.Status.StopReason.StopType, r.Status.StopReason.StopMessage)
256 | 	}
257 | 
258 | 	if verbose && r.Status.Replicas > 0 {
259 | 		for _, pod := range r.Status.Pods {
260 | 			if pod.Summary != nil {
261 | 				fmt.Printf("fit failure summary on nodes: ")
262 | 				for _, fs := range pod.Summary {
263 | 					fmt.Printf("%v (%v), ", fs.Reason, fs.Count)
264 | 				}
265 | 				fmt.Printf("\n")
266 | 			}
267 | 		}
268 | 		fmt.Printf("\nPod distribution among nodes:\n")
269 | 		for _, pod := range r.Status.Pods {
270 | 			fmt.Printf("%v\n", pod.PodName)
271 | 			for _, ron := range pod.ReplicasOnNodes {
272 | 				fmt.Printf("\t- %v: %v instance(s)\n", ron.NodeName, ron.Replicas)
273 | 			}
274 | 		}
275 | 	}
276 | }
277 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # kluster-capacity  
  2 | [[中文](./README-ZH.md)]
  3 | 
  4 | ![kluster-capacity-logo](docs/images/capacity-management-capacity-icon.jpeg)  
  5 | 
  6 | [![Build Status](https://github.com/k-cloud-labs/kluster-capacity/actions/workflows/ci.yml/badge.svg)](https://github.com/k-cloud-labs/kluster-capacity/actions?query=workflow%3Abuild)
  7 | [![Go Report Card](https://goreportcard.com/badge/github.com/k-cloud-labs/kluster-capacity)](https://goreportcard.com/report/github.com/k-cloud-labs/kluster-capacity)
  8 | [![Go doc](https://img.shields.io/badge/go.dev-reference-brightgreen?logo=go&logoColor=white&style=flat)](https://pkg.go.dev/github.com/k-cloud-labs/kluster-capacity)
  9 | 
 10 | 
 11 | Cluster capacity tool supports capacity estimation, scheduler simulation, and cluster compression.
 12 | This repository was inspired by https://github.com/kubernetes-sigs/cluster-capacity.
 13 | 
 14 | ## Install
 15 | Go binaries are automatically built with each release by [GoReleaser](https://github.com/goreleaser/goreleaser). These can be accessed on the GitHub [releases page](https://github.com/robscott/kube-capacity/releases) for this project.
 16 | 
 17 | ### Homebrew
 18 | This project can be installed with [Homebrew](https://brew.sh/):
 19 | ```
 20 | brew tap k-cloud-labs/tap
 21 | brew install k-cloud-labs/tap/kluster-capacity
 22 | ```
 23 | 
 24 | ### Krew
 25 | This project can be installed with [Krew](https://github.com/GoogleContainerTools/krew):
 26 | ```
 27 | kubectl krew install kluster-capacity
 28 | ```
 29 | 
 30 | ### From Source Code
 31 | Build the framework:
 32 | 
 33 | ```sh
 34 | $ cd $GOPATH/src/github.com/k-cloud-labs/
 35 | $ git clone https://github.com/k-cloud-labs/kluster-capacity
 36 | $ cd kluster-capacity
 37 | $ make build
 38 | ```
 39 | 
 40 | There are three available sub-commands: ce, cc, and ss, which represent capacity estimation, cluster compression, and scheduler simulation, respectively.
 41 | 
 42 | ## Capacity Estimation
 43 | ### Intro
 44 | As new pods get scheduled on nodes in a cluster, more resources get consumed. Monitoring available resources in the cluster is very important as operators can increase the current resources in time before all of them get exhausted. Or, carry different steps that lead to increase of available resources.
 45 | 
 46 | Cluster capacity consists of capacities of individual cluster nodes. Capacity covers CPU, memory, disk space and other resources.
 47 | 
 48 | Overall remaining allocatable capacity is an estimation. The goal is to analyze the remaining allocatable resources and estimate the available capacity that can still be consumed in terms of the number of pod instances with given requirements that can be scheduled in a cluster.  
 49 | 
 50 | ### Enhancement
 51 | Here are some enhancements to the cluster capacity mentioned above.
 52 | - Support using an existing pod as a pod template directly from the cluster.
 53 | - Support batch simulation for different pod templates.
 54 | 
 55 | ### Run
 56 | run the analysis:
 57 | 
 58 | ```sh
 59 | # use an specified pod yaml file as pod template
 60 | $ ./kluster-capacity ce --pods-from-template <path to pod templates> 
 61 | # use an existing pod from cluster as pod template
 62 | $ ./kluster-capacity ce --pods-from-cluster <namespace/name key of the pod> 
 63 | ```
 64 | For more information about available options run:
 65 | 
 66 | ```sh
 67 | $ ./kluster-capacity ce --help
 68 | ```
 69 | 
 70 | ### Demonstration
 71 | 
 72 | Assuming a cluster is running with 4 nodes and 1 master with each node with 2 CPUs and 4GB of memory.
 73 | With pod resource requirements to be `150m` of CPU and `100Mi` of Memory.
 74 | 
 75 | ```sh
 76 | $ ./kluster-capacity ce --pods-from-template <path to pod templates> --verbose
 77 | Pod requirements:
 78 | 	- cpu: 150m
 79 | 	- memory: 100Mi
 80 | 
 81 | The cluster can schedule 52 instance(s) of the pod.
 82 | Termination reason: FailedScheduling: pod (small-pod-52) failed to fit in any node
 83 | fit failure on node (kube-node-1): Insufficient cpu
 84 | fit failure on node (kube-node-4): Insufficient cpu
 85 | fit failure on node (kube-node-2): Insufficient cpu
 86 | fit failure on node (kube-node-3): Insufficient cpu
 87 | 
 88 | 
 89 | Pod distribution among nodes:
 90 | 	- kube-node-1: 13 instance(s)
 91 | 	- kube-node-4: 13 instance(s)
 92 | 	- kube-node-2: 13 instance(s)
 93 | 	- kube-node-3: 13 instance(s)
 94 | ```
 95 | 
 96 | Once the number of running pods in the cluster grows and the analysis is run again,
 97 | the number of schedulable pods decreases as well:
 98 | 
 99 | ```sh
100 | $ ./kluster-capacity ce --pods-from-template <path to pod templates> --verbose
101 | Pod requirements:
102 | 	- cpu: 150m
103 | 	- memory: 100Mi
104 | 
105 | The cluster can schedule 46 instance(s) of the pod.
106 | Termination reason: FailedScheduling: pod (small-pod-46) failed to fit in any node
107 | fit failure on node (kube-node-1): Insufficient cpu
108 | fit failure on node (kube-node-4): Insufficient cpu
109 | fit failure on node (kube-node-2): Insufficient cpu
110 | fit failure on node (kube-node-3): Insufficient cpu
111 | 
112 | 
113 | Pod distribution among nodes:
114 | 	- kube-node-1: 11 instance(s)
115 | 	- kube-node-4: 12 instance(s)
116 | 	- kube-node-2: 11 instance(s)
117 | 	- kube-node-3: 12 instance(s)
118 | ```
119 | 
120 | ### Output format
121 | `ce` command has a flag `--output (-o)` to format its output as json or yaml.
122 | 
123 | ```sh
124 | $ ./kluster-capacity ce --pods-from-template <path to pod templates> -o json|yaml
125 | ```
126 | 
127 | The json or yaml output is not versioned and is not guaranteed to be stable across various releases.
128 | 
129 | ## Scheduler Simulation
130 | ### Intro
131 | The scheduler simulation takes all nodes, pods, and other related resources in the current cluster as input to simulate the process from having no pods to creating and scheduling all pods. This can be used to calculate the cluster compression ratio to evaluate the effectiveness of the scheduling or to measure the quality of the scheduling algorithm.
132 | 
133 | Compared to cluster compression, its results are more extreme and idealized.
134 | 
135 | ### Run
136 | run the analysis:
137 | 
138 | ```shell
139 |  ./kluster-capacity ss
140 | ```
141 | For more information about available options run:
142 | 
143 | ```sh
144 | $ ./kluster-capacity ss --help
145 | ```
146 | It supports two termination conditions: `AllSucceed` and `AllScheduled`. The former means the program ends when all pods are successfully scheduled, while the latter means it exits after all pods have been scheduled at least once. The default is `AllSucceed`. The exit condition can be set using the `--exit-condition` flag.
147 | 
148 | ### Demonstration
149 | 
150 | Assuming a cluster is running with 4 nodes and 1 master with each node with 2 CPUs and 4GB of memory.
151 | With 40 pod with resource requirements to be `100m` of CPU and `200Mi` of Memory to schedule.
152 | 
153 | If the scheduler uses the `LeastAllocated` strategy, the scheduling result may be as follows:
154 | 
155 | ```sh
156 | $ ./kluster-capacity ss --verbose
157 | Termination reason: AllSucceed: 40 pod(s) have been scheduled successfully.
158 | 
159 | Pod distribution among nodes:
160 |         - kube-node-1: 10 instance(s)
161 |         - kube-node-2: 10 instance(s)
162 |         - kube-node-3: 10 instance(s)
163 |         - kube-node-4: 10 instance(s)
164 | ```
165 | 
166 | Once the scheduler uses the `MostAllocated` strategy, the scheduling result may be as follows:
167 | 
168 | ```sh
169 | $ ./kluster-capacity ss --verbose
170 | Termination reason: AllSucceed: 40 pod(s) have been scheduled successfully.
171 | 
172 | Pod distribution among nodes:
173 |         - kube-node-1: 20 instance(s)
174 |         - kube-node-2: 20 instance(s)
175 | ```
176 | 
177 | The scheduling result above can be analyzed to evaluate the effectiveness of the scheduling strategy and the cluster capacity compression ratio. For example, the above result represents a cluster compression ratio of 2, which means that there is 50% resource waste in an ideal situation.
178 | 
179 | 
180 | ## Cluster Compression
181 | ### Intro
182 | Cluster compression takes the current state of the cluster, including all nodes, pods, and other relevant resources, as input, and simulates the process of compressing the cluster by removing nodes. It can be used to calculate the compression ratio of the cluster, which is a measure of how efficiently the resources are being utilized.   
183 | 
184 | Compared to simulation scheduling, the results of cluster compression are generally more realistic.
185 | 
186 | ### Run
187 | run the analysis:
188 | 
189 | ```shell
190 |  ./kluster-capacity cc --verbose
191 | ```
192 | For more information about available options run:
193 | 
194 | ```sh
195 | $ ./kluster-capacity cc --help
196 | ```
197 | 
198 | ### Demonstration
199 | 
200 | Assuming a cluster is running with 4 nodes and 1 master with each node with 2 CPUs and 4GB of memory.
201 | With 40 pod with resource requirements to be `100m` of CPU and `200Mi` of Memory bind to the 4 nodes.
202 | 
203 | ```shell
204 | ./kluster-capacity cc --verbose
205 | 2 node(s) in the cluster can be scaled down.
206 | 
207 | Termination reason: FailedSelectNode: could not find a node that satisfies the condition, 1 master node(s); 2 node(s) can't be scale down because of insufficient resource in other nodes;
208 | 
209 | nodes selected to be scaled down:
210 |         - kube-node-1
211 |         - kube-node-3
212 | ```
213 | 
214 | The above result indicates that with the given resource requirements for 40 pods, ensuring that all pods can be scheduled, the cluster can remove 2 additional nodes, resulting in a compression ratio of 2, which means there is 50% resource waste.
215 | 
216 | ## Feature
217 | - [x] cluster compression
218 | - [x] capacity estimation
219 | - [x] scheduler simulation
220 | - [ ] snapshot based simulation 
221 | - [ ] fragmentation rate analysis
222 | 
223 | Enjoy it and feel free to give your opinion, thanks!


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/pkg/simulator/clustercompression/simulator.go:
--------------------------------------------------------------------------------
  1 | package clustercompression
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 
  8 | 	corev1 "k8s.io/api/core/v1"
  9 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 10 | 	"k8s.io/client-go/informers"
 11 | 	clientset "k8s.io/client-go/kubernetes"
 12 | 	"k8s.io/client-go/tools/cache"
 13 | 	"k8s.io/klog/v2"
 14 | 
 15 | 	"github.com/k-cloud-labs/kluster-capacity/app/cmds/clustercompression/options"
 16 | 	"github.com/k-cloud-labs/kluster-capacity/pkg"
 17 | 	pkgframework "github.com/k-cloud-labs/kluster-capacity/pkg/framework"
 18 | 	"github.com/k-cloud-labs/kluster-capacity/pkg/utils"
 19 | )
 20 | 
 21 | const FailedSelectNode = "FailedSelectNode: could not find a node that satisfies the condition"
 22 | 
 23 | // only support one scheduler for now and the scheduler name is "default-scheduler"
 24 | type simulator struct {
 25 | 	pkg.Framework
 26 | 
 27 | 	maxSimulated             int
 28 | 	simulated                int
 29 | 	fakeClient               clientset.Interface
 30 | 	createdPods              []*corev1.Pod
 31 | 	createPodIndex           int
 32 | 	currentNode              string
 33 | 	currentNodeUnschedulable bool
 34 | 	bindSuccessPodCount      int
 35 | 	nodeFilter               NodeFilter
 36 | }
 37 | 
 38 | // NewCCSimulatorExecutor create a ce simulator which is completely independent of apiserver so no need
 39 | // for kubeconfig nor for apiserver url
 40 | func NewCCSimulatorExecutor(conf *options.ClusterCompressionConfig) (pkg.Simulator, error) {
 41 | 	cc, err := utils.BuildKubeSchedulerCompletedConfig(conf.Options.SchedulerConfig, conf.Options.KubeConfig)
 42 | 	if err != nil {
 43 | 		return nil, err
 44 | 	}
 45 | 
 46 | 	kubeConfig, err := utils.BuildRestConfig(conf.Options.KubeConfig)
 47 | 	if err != nil {
 48 | 		return nil, err
 49 | 	}
 50 | 
 51 | 	s := &simulator{
 52 | 		simulated:           0,
 53 | 		bindSuccessPodCount: 0,
 54 | 		createPodIndex:      0,
 55 | 		maxSimulated:        conf.Options.MaxLimit,
 56 | 	}
 57 | 
 58 | 	// add your custom event handlers
 59 | 	err = s.addEventHandlers(cc.InformerFactory)
 60 | 	if err != nil {
 61 | 		return nil, err
 62 | 	}
 63 | 
 64 | 	framework, err := pkgframework.NewKubeSchedulerFramework(cc, kubeConfig,
 65 | 		pkgframework.WithExcludeNodes(conf.Options.ExcludeNodes),
 66 | 		pkgframework.WithPostBindHook(s.postBindHook),
 67 | 	)
 68 | 	if err != nil {
 69 | 		return nil, err
 70 | 	}
 71 | 
 72 | 	s.Framework = framework
 73 | 	s.fakeClient = cc.Client
 74 | 	nodeFilter, err := NewNodeFilter(s.fakeClient, s.GetPodsByNode, conf.Options.ExcludeNodes, conf.Options.FilterNodeOptions)
 75 | 	if err != nil {
 76 | 		return nil, err
 77 | 	}
 78 | 
 79 | 	s.nodeFilter = nodeFilter
 80 | 	return s, nil
 81 | }
 82 | 
 83 | func (s *simulator) Run() error {
 84 | 	return s.Framework.Run(s.selectNextNode)
 85 | }
 86 | 
 87 | func (s *simulator) Report() pkg.Printer {
 88 | 	klog.V(2).Infof("the following nodes can be offline to save resources: %v", s.Status().NodesToScaleDown)
 89 | 	klog.V(2).Infof("the clusterCompression StopReason: %s", s.Status().StopReason)
 90 | 	return generateReport(s.Status())
 91 | }
 92 | 
 93 | func (s *simulator) postBindHook(bindPod *corev1.Pod) error {
 94 | 
 95 | 	if s.maxSimulated > 0 && s.simulated >= s.maxSimulated {
 96 | 		return s.Stop(fmt.Sprintf("LimitReached: maximum number of nodes simulated: %v", s.maxSimulated))
 97 | 	}
 98 | 
 99 | 	s.bindSuccessPodCount++
100 | 	if len(s.createdPods) > 0 && s.createPodIndex < len(s.createdPods) {
101 | 		klog.V(2).Infof("create %d pod: %s", s.createPodIndex, s.createdPods[s.createPodIndex].Namespace+"/"+s.createdPods[s.createPodIndex].Name)
102 | 		_, err := s.fakeClient.CoreV1().Pods(s.createdPods[s.createPodIndex].Namespace).Create(context.TODO(), utils.InitPod(s.createdPods[s.createPodIndex]), metav1.CreateOptions{})
103 | 		if err != nil {
104 | 			return err
105 | 		}
106 | 		s.createPodIndex++
107 | 	} else if s.bindSuccessPodCount == len(s.createdPods) {
108 | 		klog.V(2).Infof("add node %s to simulator status", s.currentNode)
109 | 		s.UpdateNodesToScaleDown(s.currentNode)
110 | 
111 | 		err := s.addLabelToNode(s.currentNode, NodeScaledDownSuccessLabel, "true")
112 | 		if err != nil {
113 | 			_ = s.Stop("FailedAddLabelToNode: " + err.Error())
114 | 		}
115 | 
116 | 		s.simulated++
117 | 		s.nodeFilter.Done()
118 | 
119 | 		err = s.selectNextNode()
120 | 		if err != nil {
121 | 			return s.Stop(fmt.Sprintf("%s, %s", FailedSelectNode, err.Error()))
122 | 		}
123 | 	}
124 | 
125 | 	return nil
126 | }
127 | 
128 | func (s *simulator) selectNextNode() error {
129 | 	s.Status().SelectNodeCountInc()
130 | 	status := s.nodeFilter.SelectNode()
131 | 	if status != nil && status.Node == nil {
132 | 		return errors.New(status.ErrReason)
133 | 	}
134 | 	node := status.Node
135 | 	klog.V(2).Infof("select node %s to simulate\n", node.Name)
136 | 
137 | 	s.createdPods = nil
138 | 	s.bindSuccessPodCount = 0
139 | 	s.createPodIndex = 0
140 | 	s.currentNode = node.Name
141 | 	s.currentNodeUnschedulable = node.Spec.Unschedulable
142 | 
143 | 	err := s.cordon(node)
144 | 	if err != nil {
145 | 		return err
146 | 	}
147 | 
148 | 	err = s.deletePodsByNode(node)
149 | 	if err != nil {
150 | 		return err
151 | 	}
152 | 	klog.V(2).Infof("node %s needs to create %d pods\n", node.Name, len(s.createdPods))
153 | 
154 | 	if len(s.createdPods) > 0 {
155 | 		_, err = s.fakeClient.CoreV1().Pods(s.createdPods[s.createPodIndex].Namespace).Create(context.TODO(), utils.InitPod(s.createdPods[s.createPodIndex]), metav1.CreateOptions{})
156 | 		klog.V(2).Infof("create %d pod: %s", s.createPodIndex, s.createdPods[s.createPodIndex].Namespace+"/"+s.createdPods[s.createPodIndex].Name)
157 | 		if err != nil {
158 | 			return err
159 | 		}
160 | 		s.createPodIndex++
161 | 	} else {
162 | 		klog.V(2).Infof("add node %s to simulator status", s.currentNode)
163 | 		s.UpdateNodesToScaleDown(s.currentNode)
164 | 
165 | 		err := s.addLabelToNode(s.currentNode, NodeScaledDownSuccessLabel, "true")
166 | 		if err != nil {
167 | 			_ = s.Stop("FailedAddLabelToNode: " + err.Error())
168 | 		}
169 | 
170 | 		s.simulated++
171 | 		s.nodeFilter.Done()
172 | 		return s.selectNextNode()
173 | 	}
174 | 
175 | 	return nil
176 | }
177 | 
178 | func (s *simulator) cordon(node *corev1.Node) error {
179 | 	node, err := s.fakeClient.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{})
180 | 	if err != nil {
181 | 		return err
182 | 	}
183 | 
184 | 	copy := node.DeepCopy()
185 | 
186 | 	taints := []corev1.Taint{}
187 | 	unScheduleTaint := corev1.Taint{
188 | 		Key:    corev1.TaintNodeUnschedulable,
189 | 		Effect: corev1.TaintEffectNoSchedule,
190 | 	}
191 | 	taints = append(taints, unScheduleTaint)
192 | 
193 | 	for i := range copy.Spec.Taints {
194 | 		if copy.Spec.Taints[i].Key != corev1.TaintNodeUnschedulable {
195 | 			taints = append(taints, copy.Spec.Taints[i])
196 | 		}
197 | 	}
198 | 	copy.Spec.Taints = taints
199 | 
200 | 	_, err = s.fakeClient.CoreV1().Nodes().Update(context.TODO(), copy, metav1.UpdateOptions{})
201 | 	if err != nil {
202 | 		return err
203 | 	}
204 | 	klog.V(2).Infof("cordon node %s successfully\n", node.Name)
205 | 	return nil
206 | }
207 | 
208 | func (s *simulator) unCordon(nodeName string) error {
209 | 	node, err := s.fakeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
210 | 	if err != nil {
211 | 		return err
212 | 	}
213 | 
214 | 	copy := node.DeepCopy()
215 | 
216 | 	taints := []corev1.Taint{}
217 | 	for i := range copy.Spec.Taints {
218 | 		if copy.Spec.Taints[i].Key != corev1.TaintNodeUnschedulable {
219 | 			taints = append(taints, copy.Spec.Taints[i])
220 | 		}
221 | 	}
222 | 	copy.Spec.Taints = taints
223 | 
224 | 	_, err = s.fakeClient.CoreV1().Nodes().Update(context.TODO(), copy, metav1.UpdateOptions{})
225 | 	if err != nil {
226 | 		return err
227 | 	}
228 | 	klog.V(2).Infof("unCordon node %s successfully\n", nodeName)
229 | 	return nil
230 | 
231 | }
232 | 
233 | func (s *simulator) addLabelToNode(nodeName string, labelKey string, labelValue string) error {
234 | 	node, err := s.fakeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
235 | 	if err != nil {
236 | 		return err
237 | 	}
238 | 
239 | 	copy := node.DeepCopy()
240 | 
241 | 	copy.Labels[labelKey] = labelValue
242 | 	_, err = s.fakeClient.CoreV1().Nodes().Update(context.TODO(), copy, metav1.UpdateOptions{})
243 | 	if err != nil {
244 | 		return err
245 | 	}
246 | 	klog.V(2).Infof("add node %s failed scale down label successfully\n", node.Name)
247 | 	return nil
248 | }
249 | 
250 | func (s *simulator) updatePodsFromCreatedPods() error {
251 | 	podList := s.createdPods
252 | 	for index := s.createPodIndex - 1; index >= 0; index-- {
253 | 		err := s.fakeClient.CoreV1().Pods(podList[index].Namespace).Delete(context.TODO(), podList[index].Name, metav1.DeleteOptions{})
254 | 		if err != nil {
255 | 			return err
256 | 		}
257 | 		klog.V(2).Infof("delete %d pod: %s", index, s.createdPods[index].Namespace+"/"+s.createdPods[index].Name)
258 | 	}
259 | 
260 | 	for index := range s.createdPods {
261 | 		_, err := s.fakeClient.CoreV1().Pods(s.createdPods[index].Namespace).Create(context.TODO(), s.createdPods[index], metav1.CreateOptions{})
262 | 		if err != nil {
263 | 			return err
264 | 		}
265 | 		klog.V(2).Infof("create %d pod: %s", index, s.createdPods[index].Namespace+"/"+s.createdPods[index].Name)
266 | 	}
267 | 
268 | 	return nil
269 | }
270 | 
271 | func (s *simulator) deletePodsByNode(node *corev1.Node) error {
272 | 	podList, err := s.getPodsByNode(node)
273 | 	if err != nil {
274 | 		return err
275 | 	}
276 | 
277 | 	var createdPods []*corev1.Pod
278 | 	for i := range podList {
279 | 		if !utils.IsDaemonsetPod(podList[i].OwnerReferences) && podList[i].DeletionTimestamp == nil {
280 | 			createdPods = append(createdPods, podList[i])
281 | 			err := s.fakeClient.CoreV1().Pods(podList[i].Namespace).Delete(context.TODO(), podList[i].Name, metav1.DeleteOptions{})
282 | 			if err != nil {
283 | 				return err
284 | 			}
285 | 		}
286 | 	}
287 | 
288 | 	s.createdPods = createdPods
289 | 	return nil
290 | }
291 | 
292 | func (s *simulator) addEventHandlers(informerFactory informers.SharedInformerFactory) (err error) {
293 | 
294 | 	_, _ = informerFactory.Core().V1().Pods().Informer().AddEventHandler(
295 | 		cache.FilteringResourceEventHandler{
296 | 			FilterFunc: func(obj interface{}) bool {
297 | 				if pod, ok := obj.(*corev1.Pod); ok && pod.Spec.SchedulerName == pkg.SchedulerName &&
298 | 					metav1.HasAnnotation(pod.ObjectMeta, pkg.PodProvisioner) {
299 | 					return true
300 | 				}
301 | 				return false
302 | 			},
303 | 			Handler: cache.ResourceEventHandlerFuncs{
304 | 				UpdateFunc: func(oldObj, newObj interface{}) {
305 | 					if pod, ok := newObj.(*corev1.Pod); ok {
306 | 						for _, podCondition := range pod.Status.Conditions {
307 | 							// Only for pending pods provisioned by cc
308 | 							if podCondition.Type == corev1.PodScheduled && podCondition.Status == corev1.ConditionFalse &&
309 | 								podCondition.Reason == corev1.PodReasonUnschedulable {
310 | 								s.Status().FailedSchedulerCountInc()
311 | 								// 1. Empty all Pods created by fake before
312 | 								// 2. Uncordon this node if needed
313 | 								// 3. Type the flags that cannot be filtered, clear the flags that prohibit scheduling, add failed scale down label, then selectNextNode
314 | 								klog.V(2).Infof("Failed scheduling pod %s, reason: %s, message: %s\n", pod.Namespace+"/"+pod.Name, podCondition.Reason, podCondition.Message)
315 | 								err = s.updatePodsFromCreatedPods()
316 | 								if err != nil {
317 | 									err = s.Stop("FailedDeletePodsFromCreatedPods: " + err.Error())
318 | 								}
319 | 
320 | 								if !s.currentNodeUnschedulable {
321 | 									err = s.unCordon(s.currentNode)
322 | 									if err != nil {
323 | 										err = s.Stop("FailedUnCordon: " + err.Error())
324 | 									}
325 | 								}
326 | 
327 | 								err = s.addLabelToNode(s.currentNode, NodeScaledDownFailedLabel, "true")
328 | 								if err != nil {
329 | 									err = s.Stop("FailedAddLabelToNode: " + err.Error())
330 | 								}
331 | 
332 | 								err = s.selectNextNode()
333 | 								if err != nil {
334 | 									_ = s.Stop(fmt.Sprintf("%s, %s", FailedSelectNode, err.Error()))
335 | 								}
336 | 							}
337 | 						}
338 | 					}
339 | 				},
340 | 			},
341 | 		},
342 | 	)
343 | 
344 | 	return
345 | }
346 | 
347 | func (s *simulator) getPodsByNode(node *corev1.Node) ([]*corev1.Pod, error) {
348 | 	podList, err := s.GetPodsByNode(node.Name)
349 | 	if err != nil {
350 | 		return nil, err
351 | 	}
352 | 
353 | 	klog.V(2).Infof("node %s has %d pods\n", node.Name, len(podList))
354 | 	return podList, nil
355 | }
356 | 


--------------------------------------------------------------------------------
/pkg/framework/kubescheduler.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright © 2023 k-cloud-labs org
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 | 	http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package framework
 18 | 
 19 | import (
 20 | 	"context"
 21 | 	"encoding/json"
 22 | 	"errors"
 23 | 	"fmt"
 24 | 	"os"
 25 | 	"sync"
 26 | 	"time"
 27 | 
 28 | 	appsv1 "k8s.io/api/apps/v1"
 29 | 	corev1 "k8s.io/api/core/v1"
 30 | 	resourcev1alpha1 "k8s.io/api/resource/v1alpha1"
 31 | 	storagev1 "k8s.io/api/storage/v1"
 32 | 	apierrors "k8s.io/apimachinery/pkg/api/errors"
 33 | 	"k8s.io/apimachinery/pkg/api/meta"
 34 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 35 | 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 36 | 	"k8s.io/apimachinery/pkg/runtime"
 37 | 	"k8s.io/apimachinery/pkg/runtime/schema"
 38 | 	"k8s.io/apimachinery/pkg/util/sets"
 39 | 	utilfeature "k8s.io/apiserver/pkg/util/feature"
 40 | 	"k8s.io/client-go/dynamic"
 41 | 	"k8s.io/client-go/dynamic/dynamicinformer"
 42 | 	"k8s.io/client-go/informers"
 43 | 	coreinformers "k8s.io/client-go/informers/core/v1"
 44 | 	clientset "k8s.io/client-go/kubernetes"
 45 | 	restclient "k8s.io/client-go/rest"
 46 | 	"k8s.io/client-go/testing"
 47 | 	"k8s.io/client-go/tools/cache"
 48 | 	"k8s.io/client-go/tools/events"
 49 | 	"k8s.io/klog/v2"
 50 | 	schedconfig "k8s.io/kubernetes/cmd/kube-scheduler/app/config"
 51 | 	"k8s.io/kubernetes/pkg/api/legacyscheme"
 52 | 	"k8s.io/kubernetes/pkg/features"
 53 | 	"k8s.io/kubernetes/pkg/scheduler"
 54 | 	kubeschedulerconfig "k8s.io/kubernetes/pkg/scheduler/apis/config"
 55 | 	"k8s.io/kubernetes/pkg/scheduler/framework"
 56 | 	"k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder"
 57 | 	"k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption"
 58 | 	"k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding"
 59 | 	frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime"
 60 | 	"k8s.io/kubernetes/pkg/scheduler/profile"
 61 | 	"sigs.k8s.io/controller-runtime/pkg/client/apiutil"
 62 | 
 63 | 	"github.com/k-cloud-labs/kluster-capacity/pkg"
 64 | 	"github.com/k-cloud-labs/kluster-capacity/pkg/plugins/generic"
 65 | 	"github.com/k-cloud-labs/kluster-capacity/pkg/utils"
 66 | )
 67 | 
 68 | func init() {
 69 | 	if err := corev1.AddToScheme(legacyscheme.Scheme); err != nil {
 70 | 		fmt.Printf("err: %v\n", err)
 71 | 	}
 72 | 	// add your own scheme here to use dynamic informer factory when you have some custom filter plugins
 73 | 	// which uses other resources than defined in scheduler.
 74 | 	// for details, refer to k8s.io/kubernetes/pkg/scheduler/eventhandlers.go
 75 | }
 76 | 
 77 | var (
 78 | 	initResources = map[schema.GroupVersionKind]func() runtime.Object{
 79 | 		corev1.SchemeGroupVersion.WithKind("Namespace"):             func() runtime.Object { return &corev1.Namespace{} },
 80 | 		corev1.SchemeGroupVersion.WithKind("Pod"):                   func() runtime.Object { return &corev1.Pod{} },
 81 | 		corev1.SchemeGroupVersion.WithKind("Node"):                  func() runtime.Object { return &corev1.Node{} },
 82 | 		corev1.SchemeGroupVersion.WithKind("PersistentVolume"):      func() runtime.Object { return &corev1.PersistentVolume{} },
 83 | 		corev1.SchemeGroupVersion.WithKind("PersistentVolumeClaim"): func() runtime.Object { return &corev1.PersistentVolumeClaim{} },
 84 | 		corev1.SchemeGroupVersion.WithKind("Service"):               func() runtime.Object { return &corev1.Service{} },
 85 | 		corev1.SchemeGroupVersion.WithKind("ReplicationController"): func() runtime.Object { return &corev1.ReplicationController{} },
 86 | 		appsv1.SchemeGroupVersion.WithKind("StatefulSet"):           func() runtime.Object { return &appsv1.StatefulSet{} },
 87 | 		appsv1.SchemeGroupVersion.WithKind("ReplicaSet"):            func() runtime.Object { return &appsv1.ReplicaSet{} },
 88 | 		storagev1.SchemeGroupVersion.WithKind("StorageClass"):       func() runtime.Object { return &storagev1.StorageClass{} },
 89 | 		storagev1.SchemeGroupVersion.WithKind("CSINode"):            func() runtime.Object { return &storagev1.CSINode{} },
 90 | 		storagev1.SchemeGroupVersion.WithKind("CSIDriver"):          func() runtime.Object { return &storagev1.CSIDriver{} },
 91 | 		storagev1.SchemeGroupVersion.WithKind("CSIStorageCapacity"): func() runtime.Object { return &storagev1.CSIStorageCapacity{} },
 92 | 		resourcev1alpha1.SchemeGroupVersion.WithKind("PodScheduling"): func() runtime.Object {
 93 | 			if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
 94 | 				return &resourcev1alpha1.PodScheduling{}
 95 | 			}
 96 | 
 97 | 			return nil
 98 | 		},
 99 | 		resourcev1alpha1.SchemeGroupVersion.WithKind("ResourceClaim"): func() runtime.Object {
100 | 			if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
101 | 				return &resourcev1alpha1.ResourceClaim{}
102 | 			}
103 | 
104 | 			return nil
105 | 		},
106 | 	}
107 | 	once        sync.Once
108 | 	initObjects []runtime.Object
109 | )
110 | 
111 | type kubeschedulerFramework struct {
112 | 	// fake clientset used by scheduler
113 | 	fakeClient clientset.Interface
114 | 	// fake informer factory used by scheduler
115 | 	fakeInformerFactory informers.SharedInformerFactory
116 | 	// TODO: follow kubernetes master branch code
117 | 	dynInformerFactory dynamicinformer.DynamicSharedInformerFactory
118 | 	restMapper         meta.RESTMapper
119 | 	// real dynamic client to init the world
120 | 	dynamicClient *dynamic.DynamicClient
121 | 
122 | 	// scheduler
123 | 	scheduler                *scheduler.Scheduler
124 | 	excludeNodes             sets.Set[string]
125 | 	withScheduledPods        bool
126 | 	withNodeImages           bool
127 | 	ignorePodsOnExcludesNode bool
128 | 	// deletionTimestamp is not nil and phase is not succeed or failed
129 | 	withTerminatingPods bool
130 | 	outOfTreeRegistry   frameworkruntime.Registry
131 | 	customBind          kubeschedulerconfig.PluginSet
132 | 	customPreBind       kubeschedulerconfig.PluginSet
133 | 	customPostBind      kubeschedulerconfig.PluginSet
134 | 	customEventHandlers []func()
135 | 	postBindHook        func(*corev1.Pod) error
136 | 
137 | 	// for scheduler and informer
138 | 	informerCh  chan struct{}
139 | 	schedulerCh chan struct{}
140 | 
141 | 	// for simulator
142 | 	stopCh  chan struct{}
143 | 	stopMux sync.Mutex
144 | 	stopped bool
145 | 
146 | 	// final status
147 | 	status *pkg.Status
148 | 	// save status to this file if specified
149 | 	saveTo string
150 | }
151 | 
152 | type Option func(*kubeschedulerFramework)
153 | 
154 | func WithExcludeNodes(excludeNodes []string) Option {
155 | 	return func(s *kubeschedulerFramework) {
156 | 		s.excludeNodes = sets.New[string](excludeNodes...)
157 | 	}
158 | }
159 | 
160 | func WithOutOfTreeRegistry(registry frameworkruntime.Registry) Option {
161 | 	return func(s *kubeschedulerFramework) {
162 | 		s.outOfTreeRegistry = registry
163 | 	}
164 | }
165 | 
166 | func WithCustomBind(plugins kubeschedulerconfig.PluginSet) Option {
167 | 	return func(s *kubeschedulerFramework) {
168 | 		s.customBind = plugins
169 | 	}
170 | }
171 | 
172 | func WithCustomPreBind(plugins kubeschedulerconfig.PluginSet) Option {
173 | 	return func(s *kubeschedulerFramework) {
174 | 		s.customPreBind = plugins
175 | 	}
176 | }
177 | 
178 | func WithCustomPostBind(plugins kubeschedulerconfig.PluginSet) Option {
179 | 	return func(s *kubeschedulerFramework) {
180 | 		s.customPostBind = plugins
181 | 	}
182 | }
183 | 
184 | func WithCustomEventHandlers(handlers []func()) Option {
185 | 	return func(s *kubeschedulerFramework) {
186 | 		s.customEventHandlers = handlers
187 | 	}
188 | }
189 | 
190 | func WithNodeImages(with bool) Option {
191 | 	return func(s *kubeschedulerFramework) {
192 | 		s.withNodeImages = with
193 | 	}
194 | }
195 | 
196 | func WithScheduledPods(with bool) Option {
197 | 	return func(s *kubeschedulerFramework) {
198 | 		s.withScheduledPods = with
199 | 	}
200 | }
201 | 
202 | func WithIgnorePodsOnExcludesNode(with bool) Option {
203 | 	return func(s *kubeschedulerFramework) {
204 | 		s.ignorePodsOnExcludesNode = with
205 | 	}
206 | }
207 | 
208 | func WithPostBindHook(postBindHook func(*corev1.Pod) error) Option {
209 | 	return func(s *kubeschedulerFramework) {
210 | 		s.postBindHook = postBindHook
211 | 	}
212 | }
213 | 
214 | func WithSaveTo(to string) Option {
215 | 	return func(s *kubeschedulerFramework) {
216 | 		s.saveTo = to
217 | 	}
218 | }
219 | 
220 | func WithTerminatingPods(with bool) Option {
221 | 	return func(s *kubeschedulerFramework) {
222 | 		s.withTerminatingPods = with
223 | 	}
224 | }
225 | 
226 | // NewKubeSchedulerFramework create a generic simulator for ce, cc, ss simulator which is completely independent of apiserver so no need
227 | // for kubeconfig nor for apiserver url
228 | func NewKubeSchedulerFramework(kubeSchedulerConfig *schedconfig.CompletedConfig, restConfig *restclient.Config, options ...Option) (pkg.Framework, error) {
229 | 	kubeSchedulerConfig.InformerFactory.InformerFor(&corev1.Pod{}, newPodInformer)
230 | 
231 | 	dynamicClient := dynamic.NewForConfigOrDie(restConfig)
232 | 	restMapper, err := apiutil.NewDynamicRESTMapper(restConfig)
233 | 	if err != nil {
234 | 		return nil, err
235 | 	}
236 | 
237 | 	s := &kubeschedulerFramework{
238 | 		fakeClient:               kubeSchedulerConfig.Client,
239 | 		dynamicClient:            dynamicClient,
240 | 		restMapper:               restMapper,
241 | 		stopCh:                   make(chan struct{}),
242 | 		fakeInformerFactory:      kubeSchedulerConfig.InformerFactory,
243 | 		informerCh:               make(chan struct{}),
244 | 		schedulerCh:              make(chan struct{}),
245 | 		withScheduledPods:        true,
246 | 		ignorePodsOnExcludesNode: false,
247 | 		withNodeImages:           true,
248 | 		withTerminatingPods:      true,
249 | 		status:                   &pkg.Status{},
250 | 	}
251 | 	for _, option := range options {
252 | 		option(s)
253 | 	}
254 | 
255 | 	// only for latest k8s version
256 | 	if restConfig != nil {
257 | 		dynClient := dynamic.NewForConfigOrDie(restConfig)
258 | 		s.dynInformerFactory = dynamicinformer.NewFilteredDynamicSharedInformerFactory(dynClient, 0, corev1.NamespaceAll, nil)
259 | 	}
260 | 
261 | 	scheduler, err := s.createScheduler(kubeSchedulerConfig)
262 | 	if err != nil {
263 | 		return nil, err
264 | 	}
265 | 
266 | 	s.scheduler = scheduler
267 | 
268 | 	return s, nil
269 | }
270 | 
271 | func (s *kubeschedulerFramework) GetPodsByNode(nodeName string) ([]*corev1.Pod, error) {
272 | 	dump := s.scheduler.Cache.Dump()
273 | 	var res []*corev1.Pod
274 | 	if dump != nil && dump.Nodes[nodeName] != nil {
275 | 		podInfos := dump.Nodes[nodeName].Pods
276 | 		for i := range podInfos {
277 | 			if podInfos[i].Pod != nil {
278 | 				res = append(res, podInfos[i].Pod)
279 | 			}
280 | 		}
281 | 	}
282 | 
283 | 	if res == nil {
284 | 		return nil, errors.New("cannot get pods on the node because dump is nil")
285 | 	}
286 | 	return res, nil
287 | }
288 | 
289 | // InitTheWorld use objs outside or default init resources to initialize the scheduler
290 | // the objs outside must be typed object.
291 | func (s *kubeschedulerFramework) Initialize(objs ...runtime.Object) error {
292 | 	if len(objs) == 0 {
293 | 		// black magic
294 | 		klog.V(2).InfoS("Init the world form running cluster")
295 | 		initObjects := getInitObjects(s.restMapper, s.dynamicClient)
296 | 		for _, unstructuredObj := range initObjects {
297 | 			obj := initResources[unstructuredObj.GetObjectKind().GroupVersionKind()]()
298 | 			if err := runtime.DefaultUnstructuredConverter.FromUnstructured(unstructuredObj.(*unstructured.Unstructured).UnstructuredContent(), obj); err != nil {
299 | 				return err
300 | 			}
301 | 			if needAdd, obj := s.preAdd(obj); needAdd {
302 | 				if err := s.fakeClient.(testing.FakeClient).Tracker().Add(obj); err != nil {
303 | 					return err
304 | 				}
305 | 			}
306 | 		}
307 | 	} else {
308 | 		klog.V(2).InfoS("Init the world form snapshot")
309 | 		for _, obj := range objs {
310 | 			if _, ok := obj.(runtime.Unstructured); ok {
311 | 				return errors.New("type of objs used to init the world must not be unstructured")
312 | 			}
313 | 			if needAdd, obj := s.preAdd(obj); needAdd {
314 | 				if err := s.fakeClient.(testing.FakeClient).Tracker().Add(obj); err != nil {
315 | 					return err
316 | 				}
317 | 			}
318 | 		}
319 | 	}
320 | 
321 | 	return nil
322 | }
323 | 
324 | func (s *kubeschedulerFramework) UpdateEstimationPods(pod ...*corev1.Pod) {
325 | 	s.status.PodsForEstimation = append(s.status.PodsForEstimation, pod...)
326 | }
327 | 
328 | func (s *kubeschedulerFramework) UpdateNodesToScaleDown(nodeName string) {
329 | 	s.status.NodesToScaleDown = append(s.status.NodesToScaleDown, nodeName)
330 | }
331 | 
332 | func (s *kubeschedulerFramework) Status() *pkg.Status {
333 | 	return s.status
334 | }
335 | 
336 | func (s *kubeschedulerFramework) Stop(reason string) error {
337 | 	s.stopMux.Lock()
338 | 	defer func() {
339 | 		s.stopMux.Unlock()
340 | 	}()
341 | 
342 | 	if s.stopped {
343 | 		return nil
344 | 	}
345 | 
346 | 	nodeMap := make(map[string]corev1.Node)
347 | 	nodeList, _ := s.fakeClient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{ResourceVersion: "0"})
348 | 	for _, node := range nodeList.Items {
349 | 		nodeMap[node.Name] = node
350 | 	}
351 | 	s.status.Nodes = nodeMap
352 | 
353 | 	podList, _ := s.fakeClient.CoreV1().Pods(metav1.NamespaceAll).List(context.TODO(), metav1.ListOptions{ResourceVersion: "0"})
354 | 	s.status.Pods = podList.Items
355 | 
356 | 	s.status.StopReason = reason
357 | 
358 | 	if len(s.saveTo) > 0 {
359 | 		file, err := os.OpenFile(s.saveTo, os.O_CREATE|os.O_RDWR, 0755)
360 | 		if err != nil {
361 | 			panic(err)
362 | 		}
363 | 		defer file.Close()
364 | 
365 | 		bytes, err := json.Marshal(s.status)
366 | 		if err != nil {
367 | 			return err
368 | 		}
369 | 
370 | 		_, err = file.Write(bytes)
371 | 		if err != nil {
372 | 			return err
373 | 		}
374 | 	}
375 | 
376 | 	defer func() {
377 | 		close(s.stopCh)
378 | 		close(s.informerCh)
379 | 		close(s.schedulerCh)
380 | 	}()
381 | 
382 | 	s.stopped = true
383 | 
384 | 	return nil
385 | }
386 | 
387 | func (s *kubeschedulerFramework) CreatePod(pod *corev1.Pod) error {
388 | 	_, err := s.fakeClient.CoreV1().Pods(pod.Namespace).Create(context.TODO(), pod, metav1.CreateOptions{})
389 | 	return err
390 | }
391 | 
392 | func (s *kubeschedulerFramework) Run(init func() error) error {
393 | 	// wait for all informer cache synced
394 | 	s.fakeInformerFactory.Start(s.informerCh)
395 | 	if s.dynInformerFactory != nil {
396 | 		s.dynInformerFactory.Start(s.informerCh)
397 | 	}
398 | 	start := time.Now()
399 | 
400 | 	s.fakeInformerFactory.WaitForCacheSync(s.informerCh)
401 | 	if s.dynInformerFactory != nil {
402 | 		s.dynInformerFactory.WaitForCacheSync(s.informerCh)
403 | 	}
404 | 
405 | 	klog.V(4).InfoS("wait sync", "cost", time.Since(start).Milliseconds())
406 | 
407 | 	if init != nil {
408 | 		err := init()
409 | 		if err != nil {
410 | 			return s.Stop(fmt.Sprintf("%s: %s", "FailedRunInit: ", err.Error()))
411 | 		}
412 | 	}
413 | 
414 | 	go s.scheduler.Run(context.TODO())
415 | 
416 | 	<-s.stopCh
417 | 
418 | 	return nil
419 | }
420 | 
421 | func (s *kubeschedulerFramework) createScheduler(cc *schedconfig.CompletedConfig) (*scheduler.Scheduler, error) {
422 | 	// custom event handlers
423 | 	for _, handler := range s.customEventHandlers {
424 | 		handler()
425 | 	}
426 | 
427 | 	// register default generic plugin
428 | 	if s.outOfTreeRegistry == nil {
429 | 		s.outOfTreeRegistry = make(frameworkruntime.Registry)
430 | 	}
431 | 	err := s.outOfTreeRegistry.Register(generic.Name, func(configuration runtime.Object, f framework.Handle) (framework.Plugin, error) {
432 | 		return generic.New(s.postBindHook, s.fakeClient, s.status)
433 | 	})
434 | 	if err != nil {
435 | 		return nil, err
436 | 	}
437 | 
438 | 	cc.ComponentConfig.Profiles[0].Plugins.PreBind.Enabled = append(cc.ComponentConfig.Profiles[0].Plugins.PreBind.Enabled, kubeschedulerconfig.Plugin{Name: generic.Name})
439 | 	cc.ComponentConfig.Profiles[0].Plugins.PreBind.Disabled = append(cc.ComponentConfig.Profiles[0].Plugins.PreBind.Disabled, kubeschedulerconfig.Plugin{Name: volumebinding.Name})
440 | 	cc.ComponentConfig.Profiles[0].Plugins.Bind.Enabled = append(cc.ComponentConfig.Profiles[0].Plugins.Bind.Enabled, kubeschedulerconfig.Plugin{Name: generic.Name})
441 | 	cc.ComponentConfig.Profiles[0].Plugins.Bind.Disabled = append(cc.ComponentConfig.Profiles[0].Plugins.Bind.Disabled, kubeschedulerconfig.Plugin{Name: defaultbinder.Name})
442 | 	cc.ComponentConfig.Profiles[0].Plugins.PostBind.Enabled = append(cc.ComponentConfig.Profiles[0].Plugins.PostBind.Enabled, kubeschedulerconfig.Plugin{Name: generic.Name})
443 | 	cc.ComponentConfig.Profiles[0].Plugins.PostFilter.Disabled = append(cc.ComponentConfig.Profiles[0].Plugins.PostFilter.Disabled, kubeschedulerconfig.Plugin{Name: defaultpreemption.Name})
444 | 
445 | 	// custom bind plugin
446 | 	cc.ComponentConfig.Profiles[0].Plugins.PreBind.Enabled = append(cc.ComponentConfig.Profiles[0].Plugins.PreBind.Enabled, s.customPreBind.Enabled...)
447 | 	cc.ComponentConfig.Profiles[0].Plugins.PreBind.Disabled = append(cc.ComponentConfig.Profiles[0].Plugins.PreBind.Disabled, s.customPreBind.Disabled...)
448 | 	cc.ComponentConfig.Profiles[0].Plugins.Bind.Enabled = append(cc.ComponentConfig.Profiles[0].Plugins.Bind.Enabled, s.customBind.Enabled...)
449 | 	cc.ComponentConfig.Profiles[0].Plugins.Bind.Disabled = append(cc.ComponentConfig.Profiles[0].Plugins.Bind.Disabled, s.customBind.Disabled...)
450 | 	cc.ComponentConfig.Profiles[0].Plugins.PostBind.Enabled = append(cc.ComponentConfig.Profiles[0].Plugins.PostBind.Enabled, s.customPostBind.Enabled...)
451 | 	cc.ComponentConfig.Profiles[0].Plugins.PostBind.Disabled = append(cc.ComponentConfig.Profiles[0].Plugins.PostBind.Disabled, s.customPostBind.Disabled...)
452 | 
453 | 	// create the scheduler.
454 | 	return scheduler.New(
455 | 		s.fakeClient,
456 | 		s.fakeInformerFactory,
457 | 		s.dynInformerFactory,
458 | 		getRecorderFactory(cc),
459 | 		s.schedulerCh,
460 | 		scheduler.WithComponentConfigVersion(cc.ComponentConfig.TypeMeta.APIVersion),
461 | 		scheduler.WithKubeConfig(cc.KubeConfig),
462 | 		scheduler.WithProfiles(cc.ComponentConfig.Profiles...),
463 | 		scheduler.WithPercentageOfNodesToScore(cc.ComponentConfig.PercentageOfNodesToScore),
464 | 		scheduler.WithFrameworkOutOfTreeRegistry(s.outOfTreeRegistry),
465 | 		scheduler.WithPodMaxBackoffSeconds(cc.ComponentConfig.PodMaxBackoffSeconds),
466 | 		scheduler.WithPodInitialBackoffSeconds(cc.ComponentConfig.PodInitialBackoffSeconds),
467 | 		scheduler.WithExtenders(cc.ComponentConfig.Extenders...),
468 | 		scheduler.WithParallelism(cc.ComponentConfig.Parallelism),
469 | 	)
470 | }
471 | 
472 | func (s *kubeschedulerFramework) preAdd(obj runtime.Object) (bool, runtime.Object) {
473 | 	// filter exclude nodes and pods and update pod, node spec and status property
474 | 	if pod, ok := obj.(*corev1.Pod); ok {
475 | 		// ignore ds pods on exclude nodes
476 | 		if s.excludeNodes != nil {
477 | 			if _, ok := s.excludeNodes[pod.Spec.NodeName]; ok {
478 | 				if s.ignorePodsOnExcludesNode || pod.OwnerReferences != nil && utils.IsDaemonsetPod(pod.OwnerReferences) {
479 | 					return false, nil
480 | 				}
481 | 			}
482 | 		}
483 | 
484 | 		if pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed {
485 | 			return false, nil
486 | 		}
487 | 
488 | 		if pod.DeletionTimestamp != nil && !s.withTerminatingPods {
489 | 			return false, nil
490 | 		}
491 | 
492 | 		if !s.withScheduledPods && !utils.IsDaemonsetPod(pod.OwnerReferences) {
493 | 			pod := utils.InitPod(pod)
494 | 			pod.Status.Phase = corev1.PodPending
495 | 
496 | 			return true, pod
497 | 		}
498 | 	} else if node, ok := obj.(*corev1.Node); ok && s.excludeNodes != nil {
499 | 		if _, ok := s.excludeNodes[node.Name]; ok {
500 | 			return false, nil
501 | 		} else if !s.withNodeImages {
502 | 			node.Status.Images = nil
503 | 
504 | 			return true, node
505 | 		}
506 | 	}
507 | 
508 | 	return true, obj
509 | }
510 | 
511 | func newPodInformer(cs clientset.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer {
512 | 	selector := fmt.Sprintf("status.phase!=%v,status.phase!=%v", corev1.PodSucceeded, corev1.PodFailed)
513 | 	tweakListOptions := func(options *metav1.ListOptions) {
514 | 		options.FieldSelector = selector
515 | 	}
516 | 	return coreinformers.NewFilteredPodInformer(cs, metav1.NamespaceAll, resyncPeriod, nil, tweakListOptions)
517 | }
518 | 
519 | func getRecorderFactory(cc *schedconfig.CompletedConfig) profile.RecorderFactory {
520 | 	return func(name string) events.EventRecorder {
521 | 		return cc.EventBroadcaster.NewRecorder(name)
522 | 	}
523 | }
524 | 
525 | // getInitObjects return all objects need to add to scheduler.
526 | // it's pkg scope for multi scheduler to avoid calling too much times of real kube-apiserver
527 | func getInitObjects(restMapper meta.RESTMapper, dynClient dynamic.Interface) []runtime.Object {
528 | 	once.Do(func() {
529 | 		// each item is UnstructuredList
530 | 		for gvk := range initResources {
531 | 			restMapping, err := restMapper.RESTMapping(gvk.GroupKind(), gvk.Version)
532 | 			if err != nil && !meta.IsNoMatchError(err) {
533 | 				fmt.Printf("unable to get rest mapping for %s, error: %s", gvk.String(), err.Error())
534 | 				os.Exit(1)
535 | 			}
536 | 
537 | 			if restMapping != nil {
538 | 				var (
539 | 					list *unstructured.UnstructuredList
540 | 					err  error
541 | 				)
542 | 				if restMapping.Scope.Name() == meta.RESTScopeNameRoot {
543 | 					list, err = dynClient.Resource(restMapping.Resource).List(context.TODO(), metav1.ListOptions{ResourceVersion: "0"})
544 | 					if err != nil && !apierrors.IsNotFound(err) {
545 | 						fmt.Printf("unable to list %s, error: %s", gvk.String(), err.Error())
546 | 						os.Exit(1)
547 | 					}
548 | 				} else {
549 | 					if restMapping.Resource.Resource == "pods" {
550 | 						list, err = dynClient.Resource(restMapping.Resource).Namespace(metav1.NamespaceAll).List(context.TODO(), metav1.ListOptions{
551 | 							ResourceVersion: "0",
552 | 							FieldSelector:   fmt.Sprintf("status.phase!=%v,status.phase!=%v", corev1.PodSucceeded, corev1.PodFailed),
553 | 						})
554 | 					} else {
555 | 						list, err = dynClient.Resource(restMapping.Resource).Namespace(metav1.NamespaceAll).List(context.TODO(), metav1.ListOptions{ResourceVersion: "0"})
556 | 					}
557 | 					if err != nil && !apierrors.IsNotFound(err) {
558 | 						fmt.Printf("unable to list %s, error: %s", gvk.String(), err.Error())
559 | 						os.Exit(1)
560 | 					}
561 | 				}
562 | 
563 | 				_ = list.EachListItem(func(object runtime.Object) error {
564 | 					initObjects = append(initObjects, object)
565 | 					return nil
566 | 				})
567 | 			}
568 | 		}
569 | 	})
570 | 
571 | 	return initObjects
572 | }
573 | 


--------------------------------------------------------------------------------