├── .github └── workflows │ ├── ci.yml │ └── release.yml ├── .gitignore ├── .golangci.yml ├── CODE_OF_CONDUCT.md ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── api └── types │ ├── http.go │ ├── load_traffic.go │ ├── load_traffic_test.go │ ├── metric.go │ └── runner_group.go ├── cmd └── kperf │ ├── commands │ ├── root.go │ ├── runner │ │ └── runner.go │ ├── runnergroup │ │ ├── delete.go │ │ ├── result.go │ │ ├── root.go │ │ ├── run.go │ │ ├── server.go │ │ └── status.go │ ├── utils │ │ └── helper.go │ └── virtualcluster │ │ ├── nodepool.go │ │ └── vc.go │ └── main.go ├── contrib ├── cmd │ └── runkperf │ │ ├── commands │ │ ├── bench │ │ │ ├── cilium_cr_list.go │ │ │ ├── list_configmaps.go │ │ │ ├── node100_job1_pod3k.go │ │ │ ├── node100_pod10k.go │ │ │ ├── node10_job1_pod100.go │ │ │ ├── root.go │ │ │ └── utils.go │ │ ├── data │ │ │ ├── configmaps │ │ │ │ └── configmap.go │ │ │ └── root.go │ │ ├── root.go │ │ └── warmup │ │ │ └── command.go │ │ └── main.go ├── internal │ ├── manifests │ │ ├── helm.go │ │ ├── loadprofile │ │ │ ├── cilium_cr_list.yaml │ │ │ ├── list_configmaps.yaml │ │ │ ├── node100_job1_pod3k.yaml │ │ │ ├── node100_pod10k.yaml │ │ │ ├── node10_job1_pod100.yaml │ │ │ └── warmup.yaml │ │ ├── manifest.go │ │ └── workload │ │ │ ├── 100pod.job.yaml │ │ │ ├── 3kpod.job.yaml │ │ │ └── deployments │ │ │ ├── Chart.yaml │ │ │ ├── templates │ │ │ └── deployments.tpl │ │ │ └── values.yaml │ ├── mountns │ │ ├── ns_linux.go │ │ └── ns_other.go │ └── types │ │ └── report.go ├── log │ ├── klogger.go │ └── logger.go └── utils │ ├── kperf_cmd.go │ ├── kubectl_cmd.go │ ├── kubectl_cmd_linux.go │ ├── kubectl_cmd_other.go │ ├── utils.go │ ├── utils_linux.go │ └── utils_other.go ├── docs ├── getting-started.md └── runkperf.md ├── go.mod ├── go.sum ├── helmcli ├── delete.go ├── get.go ├── list.go ├── release.go └── release_test.go ├── manifests ├── helm.go ├── mainfest.go ├── runnergroup │ └── server │ │ ├── Chart.yaml │ │ ├── templates │ │ ├── clusterrole.yaml │ │ ├── clusterrolebinding.yaml │ │ ├── flowcontrol.yaml │ │ ├── pod.yaml │ │ ├── serviceaccount.yaml │ │ └── spec.yaml │ │ └── values.yaml └── virtualcluster │ ├── nodecontrollers │ ├── Chart.yaml │ ├── templates │ │ ├── clusterrole.yaml │ │ ├── clusterrolebinding.yaml │ │ ├── config.yaml │ │ ├── flowcontrol.yaml │ │ ├── kwok.x-k8s.io_stages.yaml │ │ ├── serviceaccount.yaml │ │ └── statefulsets.tpl │ └── values.yaml │ └── nodes │ ├── Chart.yaml │ ├── templates │ ├── node-heartbeat-with-lease.yaml │ ├── node-initialize.yaml │ ├── nodes.tpl │ ├── pod-complete.yaml │ ├── pod-create.yaml │ ├── pod-delete.yaml │ ├── pod-init-container-completed.yaml │ ├── pod-init-container-running.yaml │ └── pod-ready.yaml │ └── values.yaml ├── metrics ├── request.go ├── request_test.go ├── utils.go └── utils_test.go ├── portforward └── portforward.go ├── request ├── client.go ├── client_test.go ├── random.go ├── requester.go ├── schedule.go ├── testdata │ └── dummy_nonexistent_kubeconfig.yaml └── unstructuredscheme │ └── serializer.go ├── runner ├── group │ ├── handler.go │ └── parse.go ├── localstore │ ├── reader.go │ ├── store.go │ └── writer.go ├── runnergroup_common.go ├── runnergroup_delete.go ├── runnergroup_list.go ├── runnergroup_result.go ├── runnergroup_run.go ├── server.go ├── server_runnergroup.go └── utils.go ├── scripts └── run_runner.sh └── virtualcluster ├── nodes_common.go ├── nodes_create.go ├── nodes_delete.go └── nodes_list.go /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | pull_request: 4 | branches: ['main'] 5 | jobs: 6 | linter: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v3 10 | - uses: actions/setup-go@v4 11 | with: 12 | go-version: "1.21" 13 | - name: golangci-lint 14 | uses: golangci/golangci-lint-action@v3 15 | with: 16 | args: --config .golangci.yml --timeout=8m 17 | 18 | test: 19 | runs-on: ubuntu-latest 20 | steps: 21 | - uses: actions/checkout@v3 22 | - uses: actions/setup-go@v4 23 | with: 24 | go-version: "1.21" 25 | - name: test 26 | run: make test 27 | 28 | build: 29 | runs-on: ubuntu-latest 30 | steps: 31 | - uses: actions/checkout@v3 32 | - uses: actions/setup-go@v4 33 | with: 34 | go-version: "1.21" 35 | - name: build 36 | run: make build 37 | - name: image-build 38 | run: make image-build 39 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | push: 4 | tags: 5 | - v* 6 | env: 7 | IMAGE_NAME: kperf 8 | 9 | jobs: 10 | push: 11 | runs-on: ubuntu-latest 12 | permissions: 13 | packages: write 14 | contents: read 15 | 16 | steps: 17 | - uses: actions/checkout@v4 18 | - name: Build image 19 | run: docker build . --file Dockerfile --tag $IMAGE_NAME --label "runnumber=${GITHUB_RUN_ID}" 20 | - name: Log in to registry 21 | run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin 22 | - name: Push image 23 | run: | 24 | IMAGE_ID=ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME 25 | IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]') 26 | 27 | VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') 28 | 29 | [[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//') 30 | 31 | [ "$VERSION" == "main" ] && VERSION=latest 32 | echo IMAGE_ID=$IMAGE_ID 33 | echo VERSION=$VERSION 34 | docker tag $IMAGE_NAME $IMAGE_ID:$VERSION 35 | docker push $IMAGE_ID:$VERSION 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | bin/ 17 | 18 | # Dependency directories (remove the comment below to include it) 19 | # vendor/ 20 | 21 | # Go workspace file 22 | go.work 23 | 24 | #output file directory 25 | result/ 26 | 27 | #tmp folder which contains .yaml files 28 | tmp/ 29 | 30 | #.txt files which contain response stats 31 | *.txt 32 | 33 | # VSCode settings 34 | .vscode/ 35 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | linters: 2 | enable: 3 | - gofmt 4 | - goimports 5 | - gosec 6 | - ineffassign 7 | - misspell 8 | - nolintlint 9 | - revive 10 | - staticcheck 11 | - unconvert 12 | - unused 13 | - vet 14 | - errcheck 15 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.22 AS build-stage 2 | 3 | WORKDIR /gomod 4 | COPY go.mod go.sum ./ 5 | RUN go mod download 6 | 7 | RUN mkdir -p /output 8 | 9 | WORKDIR /kperf-build 10 | RUN --mount=source=./,target=/kperf-build,rw make build && PREFIX=/output make install 11 | 12 | # TODO: We should consider to implement our own curl to upload data 13 | FROM ubuntu:22.04 AS release-stage 14 | 15 | RUN apt update -y && apt install curl -y 16 | 17 | WORKDIR / 18 | 19 | COPY --from=build-stage /output/bin/kperf /kperf 20 | COPY --from=build-stage /output/bin/runkperf /runkperf 21 | COPY scripts/run_runner.sh /run_runner.sh 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | COMMANDS=kperf 2 | CONTRIB_COMMANDS=runkperf 3 | 4 | # PREFIX is base path to install. 5 | PREFIX ?= /usr/local 6 | 7 | GO_BUILDTAGS = -tags "osusergo netgo static_build" 8 | 9 | # IMAGE_REPO is default repo for image-build recipe. 10 | IMAGE_REPO ?= localhost:5000 11 | IMAGE_TAG ?= latest 12 | IMAGE_NAME = $(IMAGE_REPO)/kperf:$(IMAGE_TAG) 13 | 14 | BINARIES=$(addprefix bin/,$(COMMANDS)) 15 | CONTRIB_BINARIES=$(addprefix bin/contrib/,$(CONTRIB_COMMANDS)) 16 | 17 | # default recipe is build 18 | .DEFAULT_GOAL := build 19 | 20 | # Always build 21 | ALWAYS: 22 | 23 | bin/%: cmd/% ALWAYS 24 | @echo $@ 25 | @CGO_ENABLED=0 go build -o $@ ${GO_BUILDTAGS} ./$< 26 | 27 | bin/contrib/%: contrib/cmd/% ALWAYS 28 | @echo $@ 29 | @CGO_ENABLED=0 go build -o $@ ${GO_BUILDTAGS} ./$< 30 | 31 | build: $(BINARIES) $(CONTRIB_BINARIES) ## build binaries 32 | @echo "$@" 33 | 34 | install: ## install binaries 35 | @install -d $(PREFIX)/bin 36 | @install $(BINARIES) $(PREFIX)/bin 37 | @install $(CONTRIB_BINARIES) $(PREFIX)/bin 38 | 39 | image-build: ## build image 40 | @echo building ${IMAGE_NAME} 41 | @docker build . -t ${IMAGE_NAME} 42 | 43 | image-push: image-build ## push image 44 | @echo pushing ${IMAGE_NAME} 45 | @docker push ${IMAGE_NAME} 46 | 47 | image-clean: ## clean image 48 | @echo cleaning ${IMAGE_NAME} 49 | @docker rmi ${IMAGE_NAME} 50 | 51 | test: ## run test 52 | @go test -v ./... 53 | 54 | lint: ## run lint 55 | @golangci-lint run --config .golangci.yml 56 | 57 | .PHONY: clean 58 | clean: ## clean up binaries 59 | @rm -f $(BINARIES) 60 | @rm -f $(CONTRIB_BINARIES) 61 | 62 | .PHONY: help 63 | help: ## this help 64 | @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "\033[36m%-36s\033[0m%s\n", $$1, $$2}' $(MAKEFILE_LIST) 65 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kperf - a kube-apiserver benchmark tool 2 | 3 | kperf is a benchmarking tool for the Kubernetes API server that allows users to 4 | conduct high-load testing on simulated clusters. Its primary purpose is to emulate 5 | clusters larger than the actual environment, helping to uncover potential control 6 | plane issues based on the user's workload scale. This tool provides an efficient, 7 | cost-effective way for users to validate the performance and stability of their 8 | Kubernetes API server. 9 | 10 | # Why kperf? 11 | 12 | kperf offers unique advantages over tools like kubemark by simulating a broader 13 | range of traffic patterns found in real Kubernetes workloads. While kubemark 14 | primarily emulates kubelet traffic, kperf can replicate complex interactions 15 | typically associated with controllers, operators, and daemonsets. This includes 16 | scenarios like stale list requests from the API server cache, quorum-based list 17 | operations that directly impact etcd, and informer cache lists and watch behaviors. 18 | By covering these additional traffic types, kperf provides a more comprehensive 19 | view of control plane performance and stability, making it an essential tool for 20 | understanding how a cluster will handle high-load scenarios across diverse workload patterns. 21 | 22 | ## Getting Started 23 | 24 | See documentation on [Getting-Started](/docs/getting-started.md) 25 | 26 | ## Running in Cluster 27 | 28 | The `kperf` commands offer low-level functions to measure that target kube-apiserver. 29 | You may need example to combine these functions to run example benchmark test. 30 | 31 | See documentation on [runkperf](/docs/runkperf.md) for more detail. 32 | 33 | ## Contributing 34 | 35 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 36 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 37 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 38 | 39 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 40 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 41 | provided by the bot. You will only need to do this once across all repos using our CLA. 42 | 43 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 44 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 45 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 46 | 47 | ## Trademarks 48 | 49 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 50 | trademarks or logos is subject to and must follow 51 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). 52 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. 53 | Any use of third-party trademarks or logos are subject to those third-party's policies. 54 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # TODO: The maintainer of this repo has not yet edited this file 2 | 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project? 4 | 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help. 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps. 7 | - **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide. 8 | 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.* 10 | 11 | # Support 12 | 13 | ## How to file issues and get help 14 | 15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 16 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 17 | feature request as a new Issue. 18 | 19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER 21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**. 22 | 23 | ## Microsoft Support Policy 24 | 25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above. 26 | -------------------------------------------------------------------------------- /api/types/http.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package types 5 | 6 | // HTTPError is used to render response for error. 7 | type HTTPError struct { 8 | ErrorMessage string `json:"error"` 9 | } 10 | 11 | // Error implements error interface. 12 | func (herr HTTPError) Error() string { 13 | return herr.ErrorMessage 14 | } 15 | -------------------------------------------------------------------------------- /api/types/metric.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package types 5 | 6 | import "time" 7 | 8 | // ResponseErrorType is error type of response. 9 | type ResponseErrorType string 10 | 11 | const ( 12 | // ResponseErrorTypeUnknown indicates we don't have correct category for errors. 13 | ResponseErrorTypeUnknown ResponseErrorType = "unknown" 14 | // ResponseErrorTypeHTTP indicates that the response returns http code >= 400. 15 | ResponseErrorTypeHTTP ResponseErrorType = "http" 16 | // ResponseErrorTypeHTTP2Protocol indicates that error comes from http2 layer. 17 | ResponseErrorTypeHTTP2Protocol ResponseErrorType = "http2-protocol" 18 | // ResponseErrorTypeConnection indicates that error is related to connection. 19 | // For instance, connection refused caused by server down. 20 | ResponseErrorTypeConnection ResponseErrorType = "connection" 21 | ) 22 | 23 | // ResponseError is the record about that error. 24 | type ResponseError struct { 25 | // URL indicates target resource. 26 | URL string `json:"url"` 27 | // Timestamp indicates when this error was received. 28 | Timestamp time.Time `json:"timestamp"` 29 | // Duration records timespan in seconds. 30 | Duration float64 `json:"duration"` 31 | // Type indicates that category to which the error belongs. 32 | Type ResponseErrorType `json:"type"` 33 | // Code only works when Type is http. 34 | Code int `json:"code"` 35 | // Message shows error message for this error. 36 | // 37 | // NOTE: When Type is http, this field will be empty. 38 | Message string `json:"message"` 39 | } 40 | 41 | // ResponseStats is the report about benchmark result. 42 | type ResponseStats struct { 43 | // Errors stores all the observed errors. 44 | Errors []ResponseError 45 | // LatenciesByURL stores all the observed latencies for each request. 46 | LatenciesByURL map[string][]float64 47 | // TotalReceivedBytes is total bytes read from apiserver. 48 | TotalReceivedBytes int64 49 | } 50 | 51 | type RunnerMetricReport struct { 52 | // Total represents total number of requests. 53 | Total int `json:"total"` 54 | // Duration means the time of benchmark. 55 | Duration string `json:"duration"` 56 | // Errors stores all the observed errors. 57 | Errors []ResponseError `json:"errors,omitempty"` 58 | // ErrorStats means summary of errors group by type. 59 | ErrorStats map[string]int32 `json:"errorStats,omitempty"` 60 | // TotalReceivedBytes is total bytes read from apiserver. 61 | TotalReceivedBytes int64 `json:"totalReceivedBytes"` 62 | // LatenciesByURL stores all the observed latencies. 63 | LatenciesByURL map[string][]float64 `json:"latenciesByURL,omitempty"` 64 | // PercentileLatencies represents the latency distribution in seconds. 65 | PercentileLatencies [][2]float64 `json:"percentileLatencies,omitempty"` 66 | // PercentileLatenciesByURL represents the latency distribution in seconds per request. 67 | PercentileLatenciesByURL map[string][][2]float64 `json:"percentileLatenciesByURL,omitempty"` 68 | } 69 | 70 | // TODO(weifu): build brand new struct for RunnerGroupsReport to include more 71 | // information, like how many runner groups, service account and flow control. 72 | type RunnerGroupsReport = RunnerMetricReport 73 | -------------------------------------------------------------------------------- /api/types/runner_group.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package types 5 | 6 | import metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 7 | 8 | // RunnerGroup defines a set of runners with same load profile. 9 | type RunnerGroup struct { 10 | // Name is the name of runner group. 11 | Name string `json:"name" yaml:"name"` 12 | // Spec is specification of the desired behavior of the runner group. 13 | Spec *RunnerGroupSpec `json:"spec" yaml:"spec"` 14 | // Status is current state. 15 | Status *RunnerGroupStatus `json:"status,omitempty" yaml:"status,omitempty"` 16 | } 17 | 18 | // RunnerGroupSpec is to descibe how the runner group works. 19 | type RunnerGroupSpec struct { 20 | // Count is the number of runners. 21 | Count int32 `json:"count" yaml:"count"` 22 | // Profile defines what the load traffic looks like. 23 | Profile *LoadProfile `json:"loadProfile,omitempty" yaml:"loadProfile"` 24 | // NodeAffinity defines how to deploy runners into dedicated nodes 25 | // which have specific labels. 26 | NodeAffinity map[string][]string `json:"nodeAffinity,omitempty" yaml:"nodeAffinity,omitempty"` 27 | // ServiceAccount is the name of the ServiceAccount to use to run runners. 28 | ServiceAccount *string `json:"serviceAccount,omitempty" yaml:"serviceAccount,omitempty"` 29 | // OwnerReference is to mark the runner group depending on this object. 30 | // 31 | // FORMAT: APIVersion:Kind:Name:UID 32 | OwnerReference *string `json:"ownerReference,omitempty" yaml:"ownerReference,omitempty"` 33 | } 34 | 35 | // RunnerGroupStatus represents current state of RunnerGroup. 36 | type RunnerGroupStatus struct { 37 | // State is the current state of RunnerGroup. 38 | State string `json:"state" yaml:"state"` 39 | // StartTime represents time when RunnerGroup has been started. 40 | StartTime *metav1.Time `json:"startTime,omitempty" yaml:"startTime,omitempty"` 41 | // The number of runners which reached phase Succeeded. 42 | Succeeded int32 `json:"succeeded" yaml:"succeeded"` 43 | // The number of runners which reached phase Failed. 44 | Failed int32 `json:"failed" yaml:"failed"` 45 | } 46 | 47 | // RunnerGroupStatusState is current state of RunnerGroup. 48 | type RunnerGroupStatusState string 49 | 50 | const ( 51 | // RunnerGroupStatusStateUnknown represents unknown state. 52 | RunnerGroupStatusStateUnknown = "unknown" 53 | // RunnerGroupStatusStateRunning represents runner group is still running. 54 | RunnerGroupStatusStateRunning = "running" 55 | // RunnerGroupStatusStateFinished represents all runners finished. 56 | RunnerGroupStatusStateFinished = "finished" 57 | ) 58 | -------------------------------------------------------------------------------- /cmd/kperf/commands/root.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package commands 5 | 6 | import ( 7 | "flag" 8 | "fmt" 9 | "os" 10 | "strconv" 11 | 12 | "github.com/Azure/kperf/cmd/kperf/commands/runner" 13 | "github.com/Azure/kperf/cmd/kperf/commands/runnergroup" 14 | "github.com/Azure/kperf/cmd/kperf/commands/virtualcluster" 15 | 16 | "github.com/urfave/cli" 17 | "k8s.io/klog/v2" 18 | ) 19 | 20 | // App returns kperf application. 21 | func App() *cli.App { 22 | return &cli.App{ 23 | Name: "kperf", 24 | // TODO: add more fields 25 | Commands: []cli.Command{ 26 | runner.Command, 27 | runnergroup.Command, 28 | virtualcluster.Command, 29 | }, 30 | Flags: []cli.Flag{ 31 | cli.StringFlag{ 32 | Name: "v", 33 | Usage: "log level for V logs", 34 | Value: "0", 35 | }, 36 | }, 37 | Before: func(cliCtx *cli.Context) error { 38 | return initKlog(cliCtx) 39 | }, 40 | } 41 | } 42 | 43 | // initKlog initializes klog. 44 | func initKlog(cliCtx *cli.Context) error { 45 | klogFlagset := flag.NewFlagSet(os.Args[0], flag.ExitOnError) 46 | klog.InitFlags(klogFlagset) 47 | 48 | vInStr := cliCtx.GlobalString("v") 49 | if vFlag, err := strconv.Atoi(vInStr); err != nil || vFlag < 0 { 50 | return fmt.Errorf("invalid value \"%v\" for flag -v: value must be a non-negative integer", vInStr) 51 | } 52 | 53 | if err := klogFlagset.Set("v", vInStr); err != nil { 54 | return fmt.Errorf("failed to set log level: %w", err) 55 | } 56 | return nil 57 | } 58 | -------------------------------------------------------------------------------- /cmd/kperf/commands/runnergroup/delete.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package runnergroup 5 | 6 | import ( 7 | "context" 8 | 9 | "github.com/Azure/kperf/runner" 10 | 11 | "github.com/urfave/cli" 12 | ) 13 | 14 | var deleteCommand = cli.Command{ 15 | Name: "delete", 16 | ShortName: "del", 17 | Usage: "delete runner groups", 18 | Action: func(cliCtx *cli.Context) error { 19 | kubeCfgPath := cliCtx.GlobalString("kubeconfig") 20 | 21 | return runner.DeleteRunnerGroupServer(context.Background(), kubeCfgPath) 22 | }, 23 | } 24 | -------------------------------------------------------------------------------- /cmd/kperf/commands/runnergroup/result.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package runnergroup 5 | 6 | import ( 7 | "context" 8 | "encoding/json" 9 | "fmt" 10 | "os" 11 | "time" 12 | 13 | "github.com/Azure/kperf/api/types" 14 | "github.com/Azure/kperf/runner" 15 | 16 | "github.com/urfave/cli" 17 | ) 18 | 19 | var resultCommand = cli.Command{ 20 | Name: "result", 21 | Usage: "show the runner groups' result", 22 | Flags: []cli.Flag{ 23 | cli.DurationFlag{ 24 | Name: "timeout", 25 | Usage: "Timeout for waiting result. Only valid when --wait", 26 | Value: time.Hour, 27 | }, 28 | cli.BoolTFlag{ 29 | Name: "wait", 30 | Usage: "Wait until result is ready", 31 | }, 32 | }, 33 | Action: func(cliCtx *cli.Context) error { 34 | kubeCfgPath := cliCtx.GlobalString("kubeconfig") 35 | wait := cliCtx.Bool("wait") 36 | 37 | ctx := context.Background() 38 | to := cliCtx.Duration("timeout") 39 | if to > 0 && wait { 40 | tctx, tcancel := context.WithTimeout(ctx, to) 41 | defer tcancel() 42 | ctx = tctx 43 | } 44 | 45 | res, err := runner.GetRunnerGroupResult(ctx, kubeCfgPath, wait) 46 | if err != nil { 47 | return err 48 | } 49 | 50 | return renderRunnerGroupsReport(res) 51 | }, 52 | } 53 | 54 | // renderRunnerGroupsReport renders runner groups' report into stdio. 55 | func renderRunnerGroupsReport(res *types.RunnerGroupsReport) error { 56 | encoder := json.NewEncoder(os.Stdout) 57 | 58 | encoder.SetIndent("", " ") 59 | err := encoder.Encode(res) 60 | if err != nil { 61 | return fmt.Errorf("failed to encode json: %w", err) 62 | } 63 | return nil 64 | } 65 | -------------------------------------------------------------------------------- /cmd/kperf/commands/runnergroup/root.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package runnergroup 5 | 6 | import ( 7 | "github.com/Azure/kperf/cmd/kperf/commands/utils" 8 | 9 | "github.com/urfave/cli" 10 | ) 11 | 12 | // Command represents runnergroup sub-command. 13 | var Command = cli.Command{ 14 | Name: "runnergroup", 15 | ShortName: "rg", 16 | Usage: "deploy multiple runner groups into kubernetes", 17 | Flags: []cli.Flag{ 18 | cli.StringFlag{ 19 | Name: "kubeconfig", 20 | Usage: "Path to the kubeconfig file", 21 | Value: utils.DefaultKubeConfigPath, 22 | }, 23 | }, 24 | Subcommands: []cli.Command{ 25 | runCommand, 26 | deleteCommand, 27 | resultCommand, 28 | serverCommand, 29 | statusCommand, 30 | }, 31 | } 32 | -------------------------------------------------------------------------------- /cmd/kperf/commands/runnergroup/run.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package runnergroup 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | "strconv" 10 | "strings" 11 | 12 | "github.com/Azure/kperf/api/types" 13 | "github.com/Azure/kperf/cmd/kperf/commands/utils" 14 | "github.com/Azure/kperf/runner" 15 | runnergroup "github.com/Azure/kperf/runner/group" 16 | 17 | "github.com/urfave/cli" 18 | ) 19 | 20 | var runCommand = cli.Command{ 21 | Name: "run", 22 | Usage: "run runner groups", 23 | Flags: []cli.Flag{ 24 | // TODO(weifu): need https://github.com/Azure/kperf/issues/25 to support list 25 | cli.StringSliceFlag{ 26 | Name: "runnergroup", 27 | Usage: "The runner group spec's URI", 28 | Required: true, 29 | }, 30 | cli.StringFlag{ 31 | Name: "runner-image", 32 | Usage: "The runner's conainer image", 33 | // TODO(weifu): 34 | // 35 | // We should build release pipeline so that we can 36 | // build with fixed public release image as default value. 37 | // Right now, we need to set image manually. 38 | Required: true, 39 | }, 40 | cli.StringFlag{ 41 | Name: "runner-flowcontrol", 42 | Usage: "Apply flowcontrol to runner group. (FORMAT: PriorityLevel:MatchingPrecedence)", 43 | Value: "workload-low:1000", 44 | }, 45 | cli.StringSliceFlag{ 46 | Name: "affinity", 47 | Usage: "Deploy server to the node with a specific labels (FORMAT: KEY=VALUE[,VALUE])", 48 | }, 49 | cli.IntFlag{ 50 | Name: "runner-verbosity", 51 | Usage: "The verbosity level of runners", 52 | Value: 2, 53 | }, 54 | }, 55 | Action: func(cliCtx *cli.Context) error { 56 | imgRef := cliCtx.String("runner-image") 57 | if len(imgRef) == 0 { 58 | return fmt.Errorf("required valid runner image") 59 | } 60 | 61 | affinityLabels, err := utils.KeyValuesMap(cliCtx.StringSlice("affinity")) 62 | if err != nil { 63 | return fmt.Errorf("failed to parse affinity: %w", err) 64 | } 65 | 66 | priorityLevel, matchingPrecedence, err := parseFlowControl(cliCtx.String("runner-flowcontrol")) 67 | if err != nil { 68 | return fmt.Errorf("failed to parse runner-flowcontrol: %w", err) 69 | } 70 | 71 | specs, err := loadRunnerGroupSpec(cliCtx) 72 | if err != nil { 73 | return fmt.Errorf("failed to load runner group spec: %w", err) 74 | } 75 | if len(specs) != 1 { 76 | return fmt.Errorf("only support one runner group right now. will support it after https://github.com/Azure/kperf/issues/25") 77 | } 78 | 79 | kubeCfgPath := cliCtx.GlobalString("kubeconfig") 80 | return runner.CreateRunnerGroupServer(context.Background(), 81 | kubeCfgPath, 82 | imgRef, 83 | specs[0], 84 | cliCtx.Int("runner-verbosity"), 85 | runner.WithRunCmdServerNodeSelectorsOpt(affinityLabels), 86 | runner.WithRunCmdRunnerGroupFlowControl(priorityLevel, matchingPrecedence), 87 | ) 88 | }, 89 | } 90 | 91 | // loadRunnerGroupSpec loads runner group spec from URIs. 92 | func loadRunnerGroupSpec(cliCtx *cli.Context) ([]*types.RunnerGroupSpec, error) { 93 | clientset, err := buildKubernetesClientset(cliCtx) 94 | if err != nil { 95 | return nil, fmt.Errorf("failed to build kubernetes clientset: %w", err) 96 | } 97 | 98 | specURIs := cliCtx.StringSlice("runnergroup") 99 | 100 | specs := make([]*types.RunnerGroupSpec, 0, len(specURIs)) 101 | for _, specURI := range specURIs { 102 | spec, err := runnergroup.NewRunnerGroupSpecFromURI(clientset, specURI) 103 | if err != nil { 104 | return nil, err 105 | } 106 | 107 | specs = append(specs, spec) 108 | } 109 | return specs, nil 110 | } 111 | 112 | // parseFlowControl parses PriorityLevel:MatchingPrecedence into string and int. 113 | func parseFlowControl(value string) (priorityLevel string, matchingPrecedence int, err error) { 114 | l, r, ok := strings.Cut(value, ":") 115 | if !ok || len(l) == 0 || len(r) == 0 { 116 | err = fmt.Errorf("expected PriorityLevel:MatchingPrecedence format, but got %s", value) 117 | return 118 | } 119 | 120 | priorityLevel = l 121 | matchingPrecedence, err = strconv.Atoi(r) 122 | if err != nil { 123 | err = fmt.Errorf("failed to parse matchingPrecedence into int: %w", err) 124 | } 125 | return 126 | } 127 | -------------------------------------------------------------------------------- /cmd/kperf/commands/runnergroup/server.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package runnergroup 5 | 6 | import ( 7 | "fmt" 8 | "strings" 9 | 10 | "github.com/Azure/kperf/runner" 11 | runnergroup "github.com/Azure/kperf/runner/group" 12 | 13 | "github.com/urfave/cli" 14 | "k8s.io/client-go/kubernetes" 15 | "k8s.io/client-go/tools/clientcmd" 16 | ) 17 | 18 | var serverCommand = cli.Command{ 19 | Name: "server", 20 | ArgsUsage: "NAME", 21 | Flags: []cli.Flag{ 22 | cli.StringFlag{ 23 | Name: "namespace", 24 | Usage: "The namespace scope for runners", 25 | Value: "default", 26 | }, 27 | cli.StringSliceFlag{ 28 | Name: "runnergroup", 29 | Usage: "The runner group spec's URI", 30 | Required: true, 31 | }, 32 | cli.StringFlag{ 33 | Name: "runner-image", 34 | Usage: "The runner's conainer image", 35 | Required: true, 36 | }, 37 | cli.StringFlag{ 38 | Name: "runner-owner", 39 | Usage: "The runners depend on this object (FORMAT: APIVersion:Kind:Name:UID)", 40 | }, 41 | cli.StringFlag{ 42 | Name: "runner-sa", 43 | Usage: "Override runner group's service account", 44 | }, 45 | cli.StringSliceFlag{ 46 | Name: "address", 47 | Usage: "Address for the server", 48 | Required: true, 49 | }, 50 | cli.StringFlag{ 51 | Name: "data", 52 | Usage: "The runner result should be stored in that path", 53 | Required: true, 54 | }, 55 | cli.IntFlag{ 56 | Name: "runner-verbosity", 57 | Usage: "The verbosity level of runners", 58 | Value: 2, 59 | }, 60 | }, 61 | Hidden: true, 62 | Action: func(cliCtx *cli.Context) error { 63 | if cliCtx.NArg() != 1 { 64 | return fmt.Errorf("required only one argument as server name") 65 | } 66 | 67 | name := strings.TrimSpace(cliCtx.Args().Get(0)) 68 | if len(name) == 0 { 69 | return fmt.Errorf("required non-empty server name") 70 | } 71 | 72 | groupHandlers, err := buildRunnerGroupHandlers(cliCtx, name) 73 | if err != nil { 74 | return fmt.Errorf("failed to create runner group handlers: %w", err) 75 | } 76 | 77 | dataDir := cliCtx.String("data") 78 | addrs := cliCtx.StringSlice("address") 79 | 80 | srv, err := runner.NewServer(dataDir, addrs, groupHandlers...) 81 | if err != nil { 82 | return err 83 | } 84 | return srv.Run() 85 | }, 86 | } 87 | 88 | // buildRunnerGroupHandlers creates a slice of runner group handlers. 89 | func buildRunnerGroupHandlers(cliCtx *cli.Context, serverName string) ([]*runnergroup.Handler, error) { 90 | clientset, err := buildKubernetesClientset(cliCtx) 91 | if err != nil { 92 | return nil, fmt.Errorf("failed to build kubernetes clientset: %w", err) 93 | } 94 | 95 | specURIs := cliCtx.StringSlice("runnergroup") 96 | imgRef := cliCtx.String("runner-image") 97 | namespace := cliCtx.String("namespace") 98 | runnerVerbosity := cliCtx.Int("runner-verbosity") 99 | 100 | ownerRef := "" 101 | if cliCtx.IsSet("runner-owner") { 102 | ownerRef = cliCtx.String("runner-owner") 103 | } 104 | 105 | sa := "" 106 | if cliCtx.IsSet("runner-sa") { 107 | sa = cliCtx.String("runner-sa") 108 | } 109 | 110 | groups := make([]*runnergroup.Handler, 0, len(specURIs)) 111 | for idx, specURI := range specURIs { 112 | spec, err := runnergroup.NewRunnerGroupSpecFromURI(clientset, specURI) 113 | if err != nil { 114 | return nil, err 115 | } 116 | 117 | if ownerRef != "" { 118 | spec.OwnerReference = &ownerRef 119 | } 120 | 121 | if sa != "" { 122 | spec.ServiceAccount = &sa 123 | } 124 | 125 | groupName := fmt.Sprintf("%s-%d", serverName, idx) 126 | g, err := runnergroup.NewHandler(clientset, namespace, groupName, spec, imgRef, runnerVerbosity) 127 | if err != nil { 128 | return nil, err 129 | } 130 | groups = append(groups, g) 131 | } 132 | 133 | return groups, nil 134 | } 135 | 136 | // buildKubernetesClientset builds kubernetes clientset from global flag. 137 | func buildKubernetesClientset(cliCtx *cli.Context) (kubernetes.Interface, error) { 138 | kubeCfgPath := cliCtx.GlobalString("kubeconfig") 139 | config, err := clientcmd.BuildConfigFromFlags("", kubeCfgPath) 140 | if err != nil { 141 | return nil, err 142 | } 143 | 144 | return kubernetes.NewForConfig(config) 145 | } 146 | -------------------------------------------------------------------------------- /cmd/kperf/commands/runnergroup/status.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package runnergroup 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | "os" 10 | "text/tabwriter" 11 | "time" 12 | 13 | "github.com/Azure/kperf/api/types" 14 | "github.com/Azure/kperf/runner" 15 | 16 | "github.com/urfave/cli" 17 | ) 18 | 19 | var statusCommand = cli.Command{ 20 | Name: "status", 21 | Usage: "show runner groups' current status", 22 | Action: func(cliCtx *cli.Context) error { 23 | kubeCfgPath := cliCtx.GlobalString("kubeconfig") 24 | ctx := context.Background() 25 | 26 | rgs, err := runner.ListRunnerGroups(ctx, kubeCfgPath) 27 | if err != nil { 28 | return err 29 | } 30 | 31 | return renderRunnerGroups(rgs) 32 | }, 33 | } 34 | 35 | // renderRunnerGroups renders RunnerGroups into table format. 36 | func renderRunnerGroups(rgs []*types.RunnerGroup) error { 37 | tw := tabwriter.NewWriter(os.Stdout, 1, 12, 3, ' ', 0) 38 | 39 | fmt.Fprintln(tw, "NAME\tCOUNT\tSUCCEEDED\tFAILED\tSTATE\tSTART\t") 40 | for _, rg := range rgs { 41 | startAt := "unknown" 42 | if st := rg.Status.StartTime; st != nil { 43 | startAt = st.Format(time.RFC3339) 44 | } 45 | fmt.Fprintf(tw, "%s\t%d\t%d\t%d\t%s\t%s\t\n", 46 | rg.Name, 47 | rg.Spec.Count, 48 | rg.Status.Succeeded, 49 | rg.Status.Failed, 50 | rg.Status.State, 51 | startAt, 52 | ) 53 | } 54 | return tw.Flush() 55 | } 56 | -------------------------------------------------------------------------------- /cmd/kperf/commands/utils/helper.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package utils 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | "os" 10 | "path/filepath" 11 | "strings" 12 | 13 | flowcontrolv1 "k8s.io/api/flowcontrol/v1" 14 | apierrors "k8s.io/apimachinery/pkg/api/errors" 15 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 16 | "k8s.io/client-go/kubernetes" 17 | "k8s.io/client-go/tools/clientcmd" 18 | "k8s.io/client-go/util/homedir" 19 | ) 20 | 21 | // DefaultKubeConfigPath is default kubeconfig path if there is home dir. 22 | var DefaultKubeConfigPath string 23 | 24 | func init() { 25 | if !inCluster() { 26 | if home := homedir.HomeDir(); home != "" { 27 | DefaultKubeConfigPath = filepath.Join(home, ".kube", "config") 28 | } 29 | } 30 | } 31 | 32 | // KeyValuesMap converts key=value[,value] into map[string][]string. 33 | func KeyValuesMap(strs []string) (map[string][]string, error) { 34 | res := make(map[string][]string, len(strs)) 35 | for _, str := range strs { 36 | key, valuesInStr, ok := strings.Cut(str, "=") 37 | if !ok { 38 | return nil, fmt.Errorf("expected key=value[,value] format, but got %s", str) 39 | } 40 | values := strings.Split(valuesInStr, ",") 41 | res[key] = values 42 | } 43 | return res, nil 44 | } 45 | 46 | // KeyValuesMap converts key=value into map[string]string. 47 | func KeyValueMap(strs []string) (map[string]string, error) { 48 | res := make(map[string]string, len(strs)) 49 | for _, str := range strs { 50 | key, value, ok := strings.Cut(str, "=") 51 | if !ok { 52 | return nil, fmt.Errorf("expected key=value format, but got %s", str) 53 | } 54 | res[key] = value 55 | } 56 | return res, nil 57 | } 58 | 59 | // inCluster is to check if current process is in pod. 60 | func inCluster() bool { 61 | f, err := os.Stat("/var/run/secrets/kubernetes.io/serviceaccount/token") 62 | if err != nil || f.IsDir() { 63 | return false 64 | } 65 | 66 | return os.Getenv("KUBERNETES_SERVICE_HOST") != "" && 67 | os.Getenv("KUBERNETES_SERVICE_PORT") != "" 68 | } 69 | 70 | // ApplyPriorityLevelConfiguration applies the PriorityLevelConfiguration manifest using kubectl. 71 | func ApplyPriorityLevelConfiguration(kubeconfigPath string) error { 72 | // Load the kubeconfig file 73 | config, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath) 74 | if err != nil { 75 | return fmt.Errorf("failed to load kubeconfig: %v", err) 76 | } 77 | 78 | // Create a Kubernetes client 79 | clientset, err := kubernetes.NewForConfig(config) 80 | if err != nil { 81 | return fmt.Errorf("failed to create Kubernetes client: %v", err) 82 | } 83 | 84 | // Define the PriorityLevelConfiguration 85 | lendablePercent := int32(30) 86 | plc := &flowcontrolv1.PriorityLevelConfiguration{ 87 | TypeMeta: metav1.TypeMeta{ 88 | APIVersion: "flowcontrol.apiserver.k8s.io/v1", 89 | Kind: "PriorityLevelConfiguration", 90 | }, 91 | ObjectMeta: metav1.ObjectMeta{ 92 | Name: "custom-system", 93 | }, 94 | Spec: flowcontrolv1.PriorityLevelConfigurationSpec{ 95 | Type: flowcontrolv1.PriorityLevelEnablementLimited, 96 | Limited: &flowcontrolv1.LimitedPriorityLevelConfiguration{ 97 | LendablePercent: &lendablePercent, 98 | LimitResponse: flowcontrolv1.LimitResponse{ 99 | Type: flowcontrolv1.LimitResponseTypeQueue, 100 | Queuing: &flowcontrolv1.QueuingConfiguration{ 101 | Queues: 64, 102 | HandSize: 6, 103 | QueueLengthLimit: 50, 104 | }, 105 | }, 106 | }, 107 | }, 108 | } 109 | 110 | plcCli := clientset.FlowcontrolV1().PriorityLevelConfigurations() 111 | 112 | // Apply the PriorityLevelConfiguration 113 | _, err = plcCli.Create(context.TODO(), plc, metav1.CreateOptions{}) 114 | if err != nil { 115 | if apierrors.IsAlreadyExists(err) { 116 | _, err = plcCli.Update(context.TODO(), plc, metav1.UpdateOptions{}) 117 | } 118 | } 119 | if err != nil { 120 | return fmt.Errorf("failed to apply PriorityLevelConfiguration: %v", err) 121 | } 122 | 123 | fmt.Printf("Successfully applied PriorityLevelConfiguration: %s\n", plc.Name) 124 | return nil 125 | } 126 | -------------------------------------------------------------------------------- /cmd/kperf/commands/virtualcluster/nodepool.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package virtualcluster 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | "os" 10 | "strings" 11 | "text/tabwriter" 12 | 13 | "github.com/Azure/kperf/cmd/kperf/commands/utils" 14 | "github.com/Azure/kperf/virtualcluster" 15 | "helm.sh/helm/v3/pkg/release" 16 | 17 | "github.com/urfave/cli" 18 | ) 19 | 20 | var nodepoolCommand = cli.Command{ 21 | Name: "nodepool", 22 | Usage: "Manage virtual node pools", 23 | Flags: []cli.Flag{ 24 | cli.StringFlag{ 25 | Name: "kubeconfig", 26 | Usage: "Path to the kubeconfig file", 27 | Value: utils.DefaultKubeConfigPath, 28 | }, 29 | }, 30 | Subcommands: []cli.Command{ 31 | nodepoolAddCommand, 32 | nodepoolDelCommand, 33 | nodepoolListCommand, 34 | }, 35 | } 36 | 37 | var nodepoolAddCommand = cli.Command{ 38 | Name: "add", 39 | Usage: "Add a virtual node pool", 40 | ArgsUsage: "NAME", 41 | Flags: []cli.Flag{ 42 | cli.IntFlag{ 43 | Name: "nodes", 44 | Usage: "The number of virtual nodes", 45 | Value: 10, 46 | }, 47 | cli.IntFlag{ 48 | Name: "cpu", 49 | Usage: "The allocatable CPU resource per node", 50 | Value: 8, 51 | }, 52 | cli.IntFlag{ 53 | Name: "memory", 54 | Usage: "The allocatable Memory resource per node (GiB)", 55 | Value: 16, 56 | }, 57 | cli.IntFlag{ 58 | Name: "max-pods", 59 | Usage: "The maximum Pods per node", 60 | Value: 110, 61 | }, 62 | cli.StringSliceFlag{ 63 | Name: "affinity", 64 | Usage: "Deploy controllers to the nodes with a specific labels (FORMAT: KEY=VALUE[,VALUE])", 65 | }, 66 | cli.StringSliceFlag{ 67 | Name: "node-labels", 68 | Usage: "Additional labels to node (FORMAT: KEY=VALUE)", 69 | }, 70 | cli.StringFlag{ 71 | Name: "shared-provider-id", 72 | Usage: "Force all the virtual nodes using one provider ID", 73 | Hidden: true, 74 | }, 75 | }, 76 | Action: func(cliCtx *cli.Context) error { 77 | if cliCtx.NArg() != 1 { 78 | return fmt.Errorf("required only one argument as nodepool name: %v", cliCtx.Args()) 79 | } 80 | nodepoolName := strings.TrimSpace(cliCtx.Args().Get(0)) 81 | if len(nodepoolName) == 0 { 82 | return fmt.Errorf("required non-empty nodepool name") 83 | } 84 | 85 | kubeCfgPath := cliCtx.GlobalString("kubeconfig") 86 | 87 | err := utils.ApplyPriorityLevelConfiguration(kubeCfgPath) 88 | if err != nil { 89 | return fmt.Errorf("failed to apply priority level configuration: %w", err) 90 | } 91 | 92 | affinityLabels, err := utils.KeyValuesMap(cliCtx.StringSlice("affinity")) 93 | if err != nil { 94 | return fmt.Errorf("failed to parse affinity: %w", err) 95 | } 96 | 97 | nodeLabels, err := utils.KeyValueMap(cliCtx.StringSlice("node-labels")) 98 | if err != nil { 99 | return fmt.Errorf("failed to parse node-labels: %w", err) 100 | } 101 | 102 | return virtualcluster.CreateNodepool(context.Background(), 103 | kubeCfgPath, 104 | nodepoolName, 105 | virtualcluster.WithNodepoolCPUOpt(cliCtx.Int("cpu")), 106 | virtualcluster.WithNodepoolMemoryOpt(cliCtx.Int("memory")), 107 | virtualcluster.WithNodepoolCountOpt(cliCtx.Int("nodes")), 108 | virtualcluster.WithNodepoolMaxPodsOpt(cliCtx.Int("max-pods")), 109 | virtualcluster.WithNodepoolNodeControllerAffinity(affinityLabels), 110 | virtualcluster.WithNodepoolLabelsOpt(nodeLabels), 111 | virtualcluster.WithNodepoolSharedProviderID(cliCtx.String("shared-provider-id")), 112 | ) 113 | }, 114 | } 115 | 116 | var nodepoolDelCommand = cli.Command{ 117 | Name: "delete", 118 | ShortName: "del", 119 | ArgsUsage: "NAME", 120 | Usage: "Delete a virtual node pool", 121 | Action: func(cliCtx *cli.Context) error { 122 | if cliCtx.NArg() != 1 { 123 | return fmt.Errorf("required only one argument as nodepool name") 124 | } 125 | nodepoolName := strings.TrimSpace(cliCtx.Args().Get(0)) 126 | if len(nodepoolName) == 0 { 127 | return fmt.Errorf("required non-empty nodepool name") 128 | } 129 | 130 | kubeCfgPath := cliCtx.GlobalString("kubeconfig") 131 | 132 | return virtualcluster.DeleteNodepool(context.Background(), kubeCfgPath, nodepoolName) 133 | }, 134 | } 135 | 136 | var nodepoolListCommand = cli.Command{ 137 | Name: "list", 138 | Usage: "List virtual node pools", 139 | Action: func(cliCtx *cli.Context) error { 140 | kubeCfgPath := cliCtx.GlobalString("kubeconfig") 141 | nodepools, err := virtualcluster.ListNodepools(context.Background(), kubeCfgPath) 142 | if err != nil { 143 | return err 144 | } 145 | return renderNodepoolList(nodepools) 146 | 147 | }, 148 | } 149 | 150 | func renderNodepoolList(nodepools []*release.Release) error { 151 | tw := tabwriter.NewWriter(os.Stdout, 1, 12, 3, ' ', 0) 152 | 153 | fmt.Fprintln(tw, "NAME\tNODES\tCPU\tMEMORY (GiB)\tMAX PODS\tSTATUS\t") 154 | for _, nodepool := range nodepools { 155 | fmt.Fprintf(tw, "%s\t%v\t%v\t%v\t%v\t%s\t\n", 156 | nodepool.Name, 157 | // TODO(weifu): show the number of read nodes 158 | fmt.Sprintf("? / %v", nodepool.Config["replicas"]), 159 | nodepool.Config["cpu"], 160 | nodepool.Config["memory"], 161 | nodepool.Config["maxPods"], 162 | nodepool.Info.Status, 163 | ) 164 | } 165 | return tw.Flush() 166 | } 167 | -------------------------------------------------------------------------------- /cmd/kperf/commands/virtualcluster/vc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package virtualcluster 5 | 6 | import "github.com/urfave/cli" 7 | 8 | // const namespace = "kperf-virtualcluster" 9 | 10 | // Command represents virtualcluster subcommand. 11 | var Command = cli.Command{ 12 | Name: "virtualcluster", 13 | ShortName: "vc", 14 | Usage: "Setup virtual cluster and run workload on that", 15 | Subcommands: []cli.Command{ 16 | nodepoolCommand, 17 | }, 18 | } 19 | -------------------------------------------------------------------------------- /cmd/kperf/main.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package main 5 | 6 | import ( 7 | "fmt" 8 | "os" 9 | 10 | "github.com/Azure/kperf/cmd/kperf/commands" 11 | ) 12 | 13 | func main() { 14 | app := commands.App() 15 | if err := app.Run(os.Args); err != nil { 16 | fmt.Fprintf(os.Stderr, "%s: %v\n", app.Name, err) 17 | os.Exit(1) 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /contrib/cmd/runkperf/commands/bench/list_configmaps.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package bench 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | 10 | internaltypes "github.com/Azure/kperf/contrib/internal/types" 11 | "github.com/Azure/kperf/contrib/log" 12 | "github.com/Azure/kperf/contrib/utils" 13 | 14 | "github.com/urfave/cli" 15 | ) 16 | 17 | var benchListConfigmapsCase = cli.Command{ 18 | Name: "list_configmaps", 19 | Usage: ` 20 | 21 | The test suite is to generate configmaps in a namespace and list them. The load profile is fixed. 22 | `, 23 | Flags: []cli.Flag{ 24 | cli.IntFlag{ 25 | Name: "size", 26 | Usage: "The size of each configmap (Unit: KiB)", 27 | Value: 100, 28 | }, 29 | cli.IntFlag{ 30 | Name: "group-size", 31 | Usage: "The size of each configmap group", 32 | Value: 100, 33 | }, 34 | cli.IntFlag{ 35 | Name: "configmap-amount", 36 | Usage: "Total amount of configmaps", 37 | Value: 1024, 38 | }, 39 | cli.IntFlag{ 40 | Name: "total", 41 | Usage: "Total requests per runner (There are 10 runners totally and runner's rate is 10)", 42 | Value: 1000, 43 | }, 44 | cli.IntFlag{ 45 | Name: "duration", 46 | Usage: "Duration of the benchmark in seconds. It will be ignored if --total is set.", 47 | Value: 0, 48 | }, 49 | }, 50 | Action: func(cliCtx *cli.Context) error { 51 | _, err := renderBenchmarkReportInterceptor( 52 | addAPIServerCoresInfoInterceptor(benchListConfigmapsRun), 53 | )(cliCtx) 54 | return err 55 | }, 56 | } 57 | 58 | var benchConfigmapNamespace = "kperf-configmaps-bench" 59 | 60 | // benchfigmapsCase is for subcommand benchConfigmapsCase. 61 | func benchListConfigmapsRun(cliCtx *cli.Context) (*internaltypes.BenchmarkReport, error) { 62 | ctx := context.Background() 63 | kubeCfgPath := cliCtx.GlobalString("kubeconfig") 64 | 65 | rgCfgFile, rgSpec, rgCfgFileDone, err := newLoadProfileFromEmbed(cliCtx, 66 | "loadprofile/list_configmaps.yaml") 67 | if err != nil { 68 | return nil, err 69 | } 70 | defer func() { _ = rgCfgFileDone() }() 71 | 72 | // Create a namespace for the benchmark 73 | cmAmount := cliCtx.Int("configmap-amount") 74 | cmSize := cliCtx.Int("size") 75 | cmGroupSize := cliCtx.Int("group-size") 76 | 77 | err = utils.CreateConfigmaps(ctx, kubeCfgPath, cmAmount, cmSize, cmGroupSize, benchConfigmapNamespace, 0) 78 | if err != nil { 79 | return nil, err 80 | } 81 | 82 | defer func() { 83 | // Delete the configmaps after the benchmark 84 | err = utils.DeleteConfigmaps(ctx, kubeCfgPath, benchConfigmapNamespace, 0) 85 | if err != nil { 86 | log.GetLogger(ctx).WithKeyValues("level", "error"). 87 | LogKV("msg", fmt.Sprintf("Failed to delete configmaps: %v", err)) 88 | } 89 | 90 | // Delete the namespace after the benchmark 91 | kr := utils.NewKubectlRunner(kubeCfgPath, benchConfigmapNamespace) 92 | err := kr.DeleteNamespace(ctx, 0, benchConfigmapNamespace) 93 | if err != nil { 94 | log.GetLogger(ctx).WithKeyValues("level", "error"). 95 | LogKV("msg", fmt.Sprintf("Failed to delete namespace: %v", err)) 96 | } 97 | }() 98 | 99 | dpCtx, dpCancel := context.WithCancel(ctx) 100 | defer dpCancel() 101 | 102 | duration := cliCtx.Duration("duration") 103 | if duration != 0 { 104 | log.GetLogger(dpCtx). 105 | WithKeyValues("level", "info"). 106 | LogKV("msg", fmt.Sprintf("Running for %v seconds", duration.Seconds())) 107 | } 108 | 109 | rgResult, derr := utils.DeployRunnerGroup(ctx, 110 | cliCtx.GlobalString("kubeconfig"), 111 | cliCtx.GlobalString("runner-image"), 112 | rgCfgFile, 113 | cliCtx.GlobalString("runner-flowcontrol"), 114 | cliCtx.GlobalString("rg-affinity"), 115 | ) 116 | 117 | if derr != nil { 118 | return nil, derr 119 | } 120 | 121 | return &internaltypes.BenchmarkReport{ 122 | Description: fmt.Sprintf(` 123 | Environment: Generate %v configmaps with %v bytes each in a namespace. 124 | Workload: List all configmaps in the namespace and get the percentile latency.`, 125 | cmAmount, cmSize), 126 | 127 | LoadSpec: *rgSpec, 128 | Result: *rgResult, 129 | Info: map[string]interface{}{ 130 | "configmapSizeInBytes": cmSize, 131 | "runningTime": duration.String(), 132 | }, 133 | }, nil 134 | } 135 | -------------------------------------------------------------------------------- /contrib/cmd/runkperf/commands/bench/node100_job1_pod3k.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package bench 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | "sync" 10 | "time" 11 | 12 | internaltypes "github.com/Azure/kperf/contrib/internal/types" 13 | "github.com/Azure/kperf/contrib/utils" 14 | 15 | "github.com/urfave/cli" 16 | ) 17 | 18 | var benchNode100Job1Pod3KCase = cli.Command{ 19 | Name: "node100_job1_pod3k", 20 | Usage: ` 21 | 22 | The test suite is to setup 100 virtual nodes and deploy one job with 3k pods on 23 | that nodes. It repeats to create and delete job. The load profile is fixed. 24 | `, 25 | Flags: append( 26 | []cli.Flag{ 27 | cli.IntFlag{ 28 | Name: "total", 29 | Usage: "Total requests per runner (There are 10 runners totally and runner's rate is 10)", 30 | Value: 36000, 31 | }, 32 | }, 33 | commonFlags..., 34 | ), 35 | Action: func(cliCtx *cli.Context) error { 36 | _, err := renderBenchmarkReportInterceptor( 37 | addAPIServerCoresInfoInterceptor(benchNode100Job1Pod3KCaseRun), 38 | )(cliCtx) 39 | return err 40 | }, 41 | } 42 | 43 | // benchNode100Job1Pod3KCaseRun is for benchNode100Job1Pod3KCase subcommand. 44 | func benchNode100Job1Pod3KCaseRun(cliCtx *cli.Context) (*internaltypes.BenchmarkReport, error) { 45 | ctx := context.Background() 46 | kubeCfgPath := cliCtx.GlobalString("kubeconfig") 47 | 48 | rgCfgFile, rgSpec, rgCfgFileDone, err := newLoadProfileFromEmbed(cliCtx, 49 | "loadprofile/node100_job1_pod3k.yaml") 50 | if err != nil { 51 | return nil, err 52 | } 53 | defer func() { _ = rgCfgFileDone() }() 54 | 55 | vcDone, err := deployVirtualNodepool(ctx, cliCtx, "node100job1pod3k", 56 | 100, 57 | cliCtx.Int("cpu"), 58 | cliCtx.Int("memory"), 59 | cliCtx.Int("max-pods"), 60 | ) 61 | if err != nil { 62 | return nil, fmt.Errorf("failed to deploy virtual node: %w", err) 63 | } 64 | defer func() { _ = vcDone() }() 65 | 66 | var wg sync.WaitGroup 67 | wg.Add(1) 68 | 69 | jobInterval := 5 * time.Second 70 | jobCtx, jobCancel := context.WithCancel(ctx) 71 | go func() { 72 | defer wg.Done() 73 | 74 | utils.RepeatJobWithPod(jobCtx, kubeCfgPath, "job1pod3k", "workload/3kpod.job.yaml", jobInterval) 75 | }() 76 | 77 | rgResult, derr := utils.DeployRunnerGroup(ctx, 78 | cliCtx.GlobalString("kubeconfig"), 79 | cliCtx.GlobalString("runner-image"), 80 | rgCfgFile, 81 | cliCtx.GlobalString("runner-flowcontrol"), 82 | cliCtx.GlobalString("rg-affinity"), 83 | ) 84 | jobCancel() 85 | wg.Wait() 86 | 87 | if derr != nil { 88 | return nil, derr 89 | } 90 | 91 | return &internaltypes.BenchmarkReport{ 92 | Description: fmt.Sprintf(` 93 | Environment: 100 virtual nodes managed by kwok-controller, 94 | Workload: Deploy 1 job with 3,000 pods repeatedly. The parallelism is 100. The interval is %v`, jobInterval), 95 | LoadSpec: *rgSpec, 96 | Result: *rgResult, 97 | Info: make(map[string]interface{}), 98 | }, nil 99 | } 100 | -------------------------------------------------------------------------------- /contrib/cmd/runkperf/commands/bench/node10_job1_pod100.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package bench 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | "sync" 10 | "time" 11 | 12 | internaltypes "github.com/Azure/kperf/contrib/internal/types" 13 | "github.com/Azure/kperf/contrib/utils" 14 | 15 | "github.com/urfave/cli" 16 | ) 17 | 18 | var benchNode10Job1Pod100Case = cli.Command{ 19 | Name: "node10_job1_pod100", 20 | Usage: ` 21 | 22 | The test suite is to setup 10 virtual nodes and deploy one job with 100 pods on 23 | that nodes. It repeats to create and delete job. The load profile is fixed. 24 | `, 25 | Flags: append( 26 | []cli.Flag{ 27 | cli.IntFlag{ 28 | Name: "total", 29 | Usage: "Total requests per runner (There are 10 runners totally and runner's rate is 1)", 30 | Value: 1000, 31 | }, 32 | }, 33 | commonFlags..., 34 | ), 35 | Action: func(cliCtx *cli.Context) error { 36 | _, err := renderBenchmarkReportInterceptor( 37 | addAPIServerCoresInfoInterceptor(benchNode10Job1Pod100CaseRun), 38 | )(cliCtx) 39 | return err 40 | }, 41 | } 42 | 43 | // benchNode10Job1Pod100CaseRun is for benchNode10Job1Pod100Case subcommand. 44 | func benchNode10Job1Pod100CaseRun(cliCtx *cli.Context) (*internaltypes.BenchmarkReport, error) { 45 | ctx := context.Background() 46 | kubeCfgPath := cliCtx.GlobalString("kubeconfig") 47 | 48 | rgCfgFile, rgSpec, rgCfgFileDone, err := newLoadProfileFromEmbed(cliCtx, 49 | "loadprofile/node10_job1_pod100.yaml") 50 | if err != nil { 51 | return nil, err 52 | } 53 | defer func() { _ = rgCfgFileDone() }() 54 | 55 | vcDone, err := deployVirtualNodepool(ctx, cliCtx, "node10job1pod100", 56 | 100, 57 | cliCtx.Int("cpu"), 58 | cliCtx.Int("memory"), 59 | cliCtx.Int("max-pods"), 60 | ) 61 | if err != nil { 62 | return nil, fmt.Errorf("failed to deploy virtual node: %w", err) 63 | } 64 | defer func() { _ = vcDone() }() 65 | 66 | var wg sync.WaitGroup 67 | wg.Add(1) 68 | 69 | jobInterval := 5 * time.Second 70 | jobCtx, jobCancel := context.WithCancel(ctx) 71 | go func() { 72 | defer wg.Done() 73 | 74 | utils.RepeatJobWithPod(jobCtx, kubeCfgPath, "job1pod100", "workload/100pod.job.yaml", jobInterval) 75 | }() 76 | 77 | rgResult, derr := utils.DeployRunnerGroup(ctx, 78 | cliCtx.GlobalString("kubeconfig"), 79 | cliCtx.GlobalString("runner-image"), 80 | rgCfgFile, 81 | cliCtx.GlobalString("runner-flowcontrol"), 82 | cliCtx.GlobalString("rg-affinity"), 83 | ) 84 | jobCancel() 85 | wg.Wait() 86 | 87 | if derr != nil { 88 | return nil, derr 89 | } 90 | 91 | return &internaltypes.BenchmarkReport{ 92 | Description: fmt.Sprintf(` 93 | Environment: 100 virtual nodes managed by kwok-controller, 94 | Workload: Deploy 1 job with 3,000 pods repeatedly. The parallelism is 100. The interval is %v`, jobInterval), 95 | LoadSpec: *rgSpec, 96 | Result: *rgResult, 97 | Info: make(map[string]interface{}), 98 | }, nil 99 | } 100 | -------------------------------------------------------------------------------- /contrib/cmd/runkperf/commands/bench/root.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package bench 5 | 6 | import ( 7 | kperfcmdutils "github.com/Azure/kperf/cmd/kperf/commands/utils" 8 | 9 | "github.com/urfave/cli" 10 | ) 11 | 12 | // Command represents bench subcommand. 13 | var Command = cli.Command{ 14 | Name: "bench", 15 | Usage: "Run benchmark test cases", 16 | Flags: []cli.Flag{ 17 | cli.StringFlag{ 18 | Name: "kubeconfig", 19 | Usage: "Path to the kubeconfig file", 20 | Value: kperfcmdutils.DefaultKubeConfigPath, 21 | }, 22 | cli.StringFlag{ 23 | Name: "runner-image", 24 | Usage: "The runner's conainer image", 25 | // TODO(weifu): 26 | // 27 | // We should build release pipeline so that we can 28 | // build with fixed public release image as default value. 29 | // Right now, we need to set image manually. 30 | Required: true, 31 | }, 32 | cli.StringFlag{ 33 | Name: "runner-flowcontrol", 34 | Usage: "Apply flowcontrol to runner group. (FORMAT: PriorityLevel:MatchingPrecedence)", 35 | Value: "workload-low:1000", 36 | }, 37 | cli.StringFlag{ 38 | Name: "vc-affinity", 39 | Usage: "Deploy virtualnode's controller with a specific labels (FORMAT: KEY=VALUE[,VALUE])", 40 | Value: "node.kubernetes.io/instance-type=Standard_D8s_v3,m4.2xlarge,n1-standard-8", 41 | }, 42 | cli.StringFlag{ 43 | Name: "rg-affinity", 44 | Usage: "Deploy runner group with a specific labels (FORMAT: KEY=VALUE[,VALUE])", 45 | Value: "node.kubernetes.io/instance-type=Standard_D16s_v3,m4.4xlarge,n1-standard-16", 46 | }, 47 | cli.BoolFlag{ 48 | Name: "eks", 49 | Usage: "Indicates the target kubernetes cluster is EKS", 50 | Hidden: true, 51 | }, 52 | cli.StringFlag{ 53 | Name: "result", 54 | Usage: "Path to the file which stores results", 55 | }, 56 | }, 57 | Subcommands: []cli.Command{ 58 | benchNode10Job1Pod100Case, 59 | benchNode100Job1Pod3KCase, 60 | benchNode100DeploymentNPod10KCase, 61 | benchCiliumCustomResourceListCase, 62 | benchListConfigmapsCase, 63 | }, 64 | } 65 | 66 | // commonFlags is used as subcommand's option instead of global options. 67 | // 68 | // NOTE: The format of global options, like `--option xyz subcommand`, is not 69 | // easy to extend existing configuration. If the subcommand extends it with 70 | // its own option, the user can just append options, like `subcommand --options 71 | // xyz. 72 | var commonFlags = []cli.Flag{ 73 | cli.IntFlag{ 74 | Name: "cpu", 75 | Usage: "the allocatable cpu resource per node", 76 | Value: 32, 77 | }, 78 | cli.IntFlag{ 79 | Name: "memory", 80 | Usage: "The allocatable Memory resource per node (GiB)", 81 | Value: 96, 82 | }, 83 | cli.IntFlag{ 84 | Name: "max-pods", 85 | Usage: "The maximum Pods per node", 86 | Value: 110, 87 | }, 88 | cli.StringFlag{ 89 | Name: "content-type", 90 | Usage: "Content type (json or protobuf)", 91 | Value: "json", 92 | }, 93 | } 94 | -------------------------------------------------------------------------------- /contrib/cmd/runkperf/commands/bench/utils.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package bench 5 | 6 | import ( 7 | "context" 8 | "encoding/json" 9 | "fmt" 10 | "os" 11 | "path/filepath" 12 | 13 | "github.com/Azure/kperf/api/types" 14 | kperfcmdutils "github.com/Azure/kperf/cmd/kperf/commands/utils" 15 | internaltypes "github.com/Azure/kperf/contrib/internal/types" 16 | "github.com/Azure/kperf/contrib/log" 17 | "github.com/Azure/kperf/contrib/utils" 18 | 19 | "github.com/urfave/cli" 20 | "gopkg.in/yaml.v2" 21 | ) 22 | 23 | // subcmdActionFunc is to unify each subcommand's interface. They should return 24 | // benchmark report as result. 25 | type subcmdActionFunc func(*cli.Context) (*internaltypes.BenchmarkReport, error) 26 | 27 | // addAPIServerCoresInfoInterceptor adds apiserver's cores into benchmark report. 28 | func addAPIServerCoresInfoInterceptor(handler subcmdActionFunc) subcmdActionFunc { 29 | return func(cliCtx *cli.Context) (*internaltypes.BenchmarkReport, error) { 30 | ctx := context.Background() 31 | kubeCfgPath := cliCtx.GlobalString("kubeconfig") 32 | 33 | beforeCores, ferr := utils.FetchAPIServerCores(ctx, kubeCfgPath) 34 | if ferr != nil { 35 | log.GetLogger(ctx). 36 | WithKeyValues("level", "warn"). 37 | LogKV("msg", "failed to fetch apiserver cores", "error", ferr) 38 | } 39 | 40 | report, err := handler(cliCtx) 41 | if err != nil { 42 | return nil, err 43 | } 44 | 45 | afterCores, ferr := utils.FetchAPIServerCores(ctx, kubeCfgPath) 46 | if ferr != nil { 47 | log.GetLogger(ctx). 48 | WithKeyValues("level", "warn"). 49 | LogKV("msg", "failed to fetch apiserver cores", "error", ferr) 50 | } 51 | 52 | report.Info["apiserver"] = map[string]interface{}{ 53 | "cores": map[string]interface{}{ 54 | "before": beforeCores, 55 | "after": afterCores, 56 | }, 57 | } 58 | return report, nil 59 | } 60 | } 61 | 62 | // renderBenchmarkReportInterceptor renders benchmark report into file or stdout. 63 | func renderBenchmarkReportInterceptor(handler subcmdActionFunc) subcmdActionFunc { 64 | return func(cliCtx *cli.Context) (*internaltypes.BenchmarkReport, error) { 65 | report, err := handler(cliCtx) 66 | if err != nil { 67 | return nil, err 68 | } 69 | 70 | outF := os.Stdout 71 | if targetFile := cliCtx.GlobalString("result"); targetFile != "" { 72 | targetFileDir := filepath.Dir(targetFile) 73 | 74 | _, err = os.Stat(targetFileDir) 75 | if err != nil && os.IsNotExist(err) { 76 | err = os.MkdirAll(targetFileDir, 0750) 77 | } 78 | if err != nil { 79 | return nil, fmt.Errorf("failed to ensure output's dir %s: %w", targetFileDir, err) 80 | } 81 | 82 | outF, err = os.Create(targetFile) 83 | if err != nil { 84 | return nil, err 85 | } 86 | defer outF.Close() 87 | } 88 | 89 | encoder := json.NewEncoder(outF) 90 | encoder.SetIndent("", " ") 91 | 92 | if err := encoder.Encode(report); err != nil { 93 | return nil, fmt.Errorf("failed to encode json: %w", err) 94 | } 95 | return report, nil 96 | } 97 | } 98 | 99 | // deployVirtualNodepool deploys virtual nodepool. 100 | func deployVirtualNodepool(ctx context.Context, cliCtx *cli.Context, target string, nodes, cpu, memory, maxPods int) (func() error, error) { 101 | log.GetLogger(ctx). 102 | WithKeyValues("level", "info"). 103 | LogKV("msg", "deploying virtual nodepool", "name", target) 104 | 105 | kubeCfgPath := cliCtx.GlobalString("kubeconfig") 106 | virtualNodeAffinity := cliCtx.GlobalString("vc-affinity") 107 | 108 | kr := utils.NewKperfRunner(kubeCfgPath, "") 109 | 110 | var sharedProviderID string 111 | var err error 112 | 113 | if cliCtx.GlobalBool("eks") { 114 | sharedProviderID, err = utils.FetchNodeProviderIDByType(ctx, kubeCfgPath, utils.EKSIdleNodepoolInstanceType) 115 | if err != nil { 116 | return nil, fmt.Errorf("failed to get EKS idle node (type: %s) providerID: %w", 117 | utils.EKSIdleNodepoolInstanceType, err) 118 | } 119 | } 120 | 121 | log.GetLogger(ctx). 122 | WithKeyValues("level", "info"). 123 | LogKV("msg", "trying to delete nodepool if necessary", "name", target) 124 | if err = kr.DeleteNodepool(ctx, 0, target); err != nil { 125 | log.GetLogger(ctx). 126 | WithKeyValues("level", "warn"). 127 | LogKV("msg", "failed to delete nodepool", "name", target, "error", err) 128 | } 129 | 130 | err = kr.NewNodepool(ctx, 0, target, nodes, cpu, memory, maxPods, virtualNodeAffinity, sharedProviderID) 131 | if err != nil { 132 | return nil, fmt.Errorf("failed to create nodepool %s: %w", target, err) 133 | } 134 | 135 | return func() error { 136 | return kr.DeleteNodepool(ctx, 0, target) 137 | }, nil 138 | } 139 | 140 | func NewRunnerGroupSpecFromYamlFile() {} 141 | 142 | // newLoadProfileFromEmbed loads load profile from embed and tweaks that load 143 | // profile. 144 | func newLoadProfileFromEmbed(cliCtx *cli.Context, name string) (_name string, _spec *types.RunnerGroupSpec, _cleanup func() error, _err error) { 145 | var rgSpec types.RunnerGroupSpec 146 | rgCfgFile, rgCfgFileDone, err := utils.NewRunnerGroupSpecFileFromEmbed( 147 | name, 148 | func(spec *types.RunnerGroupSpec) error { 149 | reqs := cliCtx.Int("total") 150 | if reqs < 0 { 151 | return fmt.Errorf("invalid total-requests value: %v", reqs) 152 | } 153 | reqsTime := cliCtx.Int("duration") 154 | if !cliCtx.IsSet("total") && reqsTime > 0 { 155 | reqs = 0 156 | spec.Profile.Spec.Duration = reqsTime 157 | } 158 | 159 | rgAffinity := cliCtx.GlobalString("rg-affinity") 160 | affinityLabels, err := kperfcmdutils.KeyValuesMap([]string{rgAffinity}) 161 | if err != nil { 162 | return fmt.Errorf("failed to parse %s affinity: %w", rgAffinity, err) 163 | } 164 | 165 | if reqs != 0 { 166 | spec.Profile.Spec.Total = reqs 167 | } 168 | spec.NodeAffinity = affinityLabels 169 | spec.Profile.Spec.ContentType = types.ContentType(cliCtx.String("content-type")) 170 | data, _ := yaml.Marshal(spec) 171 | 172 | log.GetLogger(context.TODO()). 173 | WithKeyValues("level", "info"). 174 | LogKV("msg", "dump load profile", "config", string(data)) 175 | 176 | rgSpec = *spec 177 | return nil 178 | }, 179 | ) 180 | if err != nil { 181 | return "", nil, nil, err 182 | } 183 | return rgCfgFile, &rgSpec, rgCfgFileDone, nil 184 | } 185 | -------------------------------------------------------------------------------- /contrib/cmd/runkperf/commands/data/root.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package data 5 | 6 | import ( 7 | "github.com/Azure/kperf/contrib/cmd/runkperf/commands/data/configmaps" 8 | 9 | "github.com/urfave/cli" 10 | ) 11 | 12 | var Command = cli.Command{ 13 | Name: "data", 14 | Usage: "Create data for runkperf", 15 | Subcommands: []cli.Command{ 16 | configmaps.Command, 17 | }, 18 | } 19 | -------------------------------------------------------------------------------- /contrib/cmd/runkperf/commands/root.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package commands 5 | 6 | import ( 7 | "flag" 8 | "fmt" 9 | "os" 10 | "strconv" 11 | 12 | "github.com/Azure/kperf/contrib/cmd/runkperf/commands/bench" 13 | "github.com/Azure/kperf/contrib/cmd/runkperf/commands/data" 14 | "github.com/Azure/kperf/contrib/cmd/runkperf/commands/warmup" 15 | 16 | "github.com/urfave/cli" 17 | "k8s.io/klog/v2" 18 | ) 19 | 20 | // App returns kperf application. 21 | func App() *cli.App { 22 | return &cli.App{ 23 | Name: "runkperf", 24 | // TODO: add more fields 25 | Commands: []cli.Command{ 26 | warmup.Command, 27 | bench.Command, 28 | data.Command, 29 | }, 30 | Flags: []cli.Flag{ 31 | cli.StringFlag{ 32 | Name: "v", 33 | Usage: "log level for V logs", 34 | Value: "0", 35 | }, 36 | }, 37 | Before: func(cliCtx *cli.Context) error { 38 | return initKlog(cliCtx) 39 | }, 40 | } 41 | } 42 | 43 | // initKlog initializes klog. 44 | func initKlog(cliCtx *cli.Context) error { 45 | klogFlagset := flag.NewFlagSet(os.Args[0], flag.ExitOnError) 46 | klog.InitFlags(klogFlagset) 47 | 48 | vInStr := cliCtx.GlobalString("v") 49 | if vFlag, err := strconv.Atoi(vInStr); err != nil || vFlag < 0 { 50 | return fmt.Errorf("invalid value \"%v\" for flag -v: value must be a non-negative integer", vInStr) 51 | } 52 | 53 | if err := klogFlagset.Set("v", vInStr); err != nil { 54 | return fmt.Errorf("failed to set log level: %w", err) 55 | } 56 | return nil 57 | } 58 | -------------------------------------------------------------------------------- /contrib/cmd/runkperf/main.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package main 5 | 6 | import ( 7 | "fmt" 8 | "os" 9 | 10 | "github.com/Azure/kperf/contrib/cmd/runkperf/commands" 11 | ) 12 | 13 | func main() { 14 | app := commands.App() 15 | if err := app.Run(os.Args); err != nil { 16 | fmt.Fprintf(os.Stderr, "%s: %v\n", app.Name, err) 17 | os.Exit(1) 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /contrib/internal/manifests/helm.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package manifests 5 | 6 | import ( 7 | rootmainfests "github.com/Azure/kperf/manifests" 8 | 9 | "helm.sh/helm/v3/pkg/chart" 10 | ) 11 | 12 | // LoadChart returns chart from current package's embed filesystem. 13 | func LoadChart(componentName string) (*chart.Chart, error) { 14 | return rootmainfests.LoadChartFromEmbedFS(FS, componentName) 15 | } 16 | -------------------------------------------------------------------------------- /contrib/internal/manifests/loadprofile/cilium_cr_list.yaml: -------------------------------------------------------------------------------- 1 | # count defines how many runners in the group. 2 | count: 10 3 | 4 | # This simulates worst-case behavior for Cilium: 5 | # every agent hammering apiserver with stale LIST requests for CiliumIdentity and CiliumEndpoint. 6 | # The request rate is much, much higher than what we'd expect to see in production with 7 | # client-go exponential backoff configured, so if apiserver can survive this onslaught it can 8 | # survive anything Cilium throws at it. 9 | loadProfile: 10 | version: 1 11 | description: cilium list profile 12 | spec: 13 | rate: 20 # 20 req/sec * 10 runners = 200 req/sec 14 | total: 12000 # run for ~10 minutes, 600 seconds * 20/sec = 12000 15 | # 5k node cluster, one cilium agent per node 16 | # divided by the number of runners. 17 | conns: 500 18 | client: 500 19 | 20 | contentType: json 21 | disableHTTP2: false 22 | 23 | # 50/50 mix of ciliumidentity and ciliumendpoint queries. 24 | # We're simulating with CilumEndpointSlice disabled here, on the assumption that CES will always 25 | # have lower count than CEP, so if we can survive with CEP only then we're in good shape. 26 | requests: 27 | - staleList: 28 | group: cilium.io 29 | version: v2 30 | resource: ciliumidentities 31 | shares: 1000 # Has 50% chance = 1000 / (1000 + 1000) 32 | - staleList: 33 | group: cilium.io 34 | version: v2 35 | resource: ciliumendpoints 36 | namespace: "default" 37 | shares: 1000 # Has 50% chance = 1000 / (1000 + 1000) -------------------------------------------------------------------------------- /contrib/internal/manifests/loadprofile/list_configmaps.yaml: -------------------------------------------------------------------------------- 1 | count: 10 2 | loadProfile: 3 | version: 1 4 | description: "list configmaps" 5 | spec: 6 | rate: 10 7 | conns: 10 8 | client: 10 9 | contentType: json 10 | disableHTTP2: false 11 | maxRetries: 0 12 | requests: 13 | - staleList: 14 | version: v1 15 | resource: configmaps 16 | shares: 100 # chance 100 / (100 + 100) 17 | - quorumList: 18 | version: v1 19 | resource: configmaps 20 | shares: 100 # chance 100 / (100 + 100) 21 | -------------------------------------------------------------------------------- /contrib/internal/manifests/loadprofile/node100_job1_pod3k.yaml: -------------------------------------------------------------------------------- 1 | count: 10 2 | loadProfile: 3 | version: 1 4 | description: "node100-job1-pod3k" 5 | spec: 6 | rate: 10 7 | total: 36000 8 | conns: 10 9 | client: 100 10 | contentType: json 11 | disableHTTP2: false 12 | maxRetries: 0 13 | requests: 14 | - staleList: 15 | version: v1 16 | resource: pods 17 | shares: 1000 # chance 1000 / (1000 + 100 + 100) 18 | - quorumList: 19 | version: v1 20 | resource: pods 21 | limit: 1000 22 | shares: 100 # chance 100 / (1000 + 100 + 100) 23 | - quorumList: 24 | version: v1 25 | resource: events 26 | limit: 1000 27 | shares: 100 # chance 100 / (1000 + 100 + 100) 28 | -------------------------------------------------------------------------------- /contrib/internal/manifests/loadprofile/node100_pod10k.yaml: -------------------------------------------------------------------------------- 1 | count: 10 2 | loadProfile: 3 | version: 1 4 | description: "node100-pod10k" 5 | spec: 6 | rate: 10 7 | total: 36000 8 | conns: 10 9 | client: 100 10 | contentType: json 11 | disableHTTP2: false 12 | maxRetries: 0 13 | requests: 14 | - staleList: 15 | version: v1 16 | resource: pods 17 | fieldSelector: "spec.nodeName=node100pod10k-49" 18 | shares: 1000 # 1000 / (1000 + 100 + 200) * 10 = 7.7 req/s 19 | - staleList: 20 | version: v1 21 | resource: pods 22 | shares: 100 # 100 / (1000 + 100 + 200) * 10 = 0.7 req/s 23 | - quorumList: 24 | version: v1 25 | resource: pods 26 | namespace: benchmark-0 27 | # NOTE: It's to simulate the request created by daemonset to get pods, 28 | # including kubelet, when they want to get pods from ETCD. The limit 29 | # is 100 because it's close to MaxPods value. 30 | limit: 100 31 | seletor: "app=benchmark" 32 | shares: 200 # 200 / (1000 + 100 + 200) * 10 = 1.5 req/s 33 | -------------------------------------------------------------------------------- /contrib/internal/manifests/loadprofile/node10_job1_pod100.yaml: -------------------------------------------------------------------------------- 1 | count: 1 2 | loadProfile: 3 | version: 1 4 | description: "node10-job1-pod100" 5 | spec: 6 | rate: 10 7 | total: 1000 8 | conns: 10 9 | client: 10 10 | contentType: json 11 | disableHTTP2: false 12 | maxRetries: 0 13 | requests: 14 | - staleList: 15 | version: v1 16 | resource: pods 17 | shares: 1000 # chance 1000 / (1000 + 100 + 100) 18 | - quorumList: 19 | version: v1 20 | resource: pods 21 | limit: 1000 22 | shares: 100 # chance 100 / (1000 + 100 + 100) 23 | - quorumList: 24 | version: v1 25 | resource: events 26 | limit: 1000 27 | shares: 100 # chance 100 / (1000 + 100 + 100) 28 | -------------------------------------------------------------------------------- /contrib/internal/manifests/loadprofile/warmup.yaml: -------------------------------------------------------------------------------- 1 | count: 10 2 | loadProfile: 3 | version: 1 4 | description: "warmup" 5 | spec: 6 | rate: 20 7 | total: 10000 8 | conns: 10 9 | client: 100 10 | contentType: json 11 | disableHTTP2: false 12 | maxRetries: 0 13 | requests: 14 | - staleList: 15 | version: v1 16 | resource: pods 17 | shares: 1000 # chance 1000 / (1000 + 100 + 100) 18 | - quorumList: 19 | version: v1 20 | resource: pods 21 | limit: 1000 22 | shares: 100 # chance 100 / (1000 + 100 + 100) 23 | - quorumList: 24 | version: v1 25 | resource: events 26 | limit: 1000 27 | shares: 100 # chance 100 / (1000 + 100 + 100) 28 | -------------------------------------------------------------------------------- /contrib/internal/manifests/manifest.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package manifests 5 | 6 | import "embed" 7 | 8 | // FS embeds the manifests 9 | // 10 | //go:embed workload/* 11 | //go:embed loadprofile/* 12 | var FS embed.FS 13 | -------------------------------------------------------------------------------- /contrib/internal/manifests/workload/100pod.job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: batchjobs 5 | spec: 6 | completions: 100 7 | parallelism: 10 8 | template: 9 | metadata: 10 | labels: 11 | app: fake-pod 12 | spec: 13 | restartPolicy: Never 14 | affinity: 15 | nodeAffinity: 16 | requiredDuringSchedulingIgnoredDuringExecution: 17 | nodeSelectorTerms: 18 | - matchExpressions: 19 | - key: type 20 | operator: In 21 | values: 22 | - kperf-virtualnodes 23 | # A taints was added to an automatically created Node. 24 | # You can remove taints of Node or add this tolerations. 25 | tolerations: 26 | - key: "kperf.io/nodepool" 27 | operator: "Exists" 28 | effect: "NoSchedule" 29 | containers: 30 | - name: fake-container 31 | image: fake-image 32 | -------------------------------------------------------------------------------- /contrib/internal/manifests/workload/3kpod.job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: batchjobs 5 | spec: 6 | completions: 3000 7 | parallelism: 100 8 | template: 9 | metadata: 10 | labels: 11 | app: fake-pod 12 | spec: 13 | restartPolicy: Never 14 | affinity: 15 | nodeAffinity: 16 | requiredDuringSchedulingIgnoredDuringExecution: 17 | nodeSelectorTerms: 18 | - matchExpressions: 19 | - key: type 20 | operator: In 21 | values: 22 | - kperf-virtualnodes 23 | # A taints was added to an automatically created Node. 24 | # You can remove taints of Node or add this tolerations. 25 | tolerations: 26 | - key: "kperf.io/nodepool" 27 | operator: "Exists" 28 | effect: "NoSchedule" 29 | containers: 30 | - name: fake-container 31 | image: fake-image 32 | -------------------------------------------------------------------------------- /contrib/internal/manifests/workload/deployments/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: "deployment" 3 | version: "0.0.1" 4 | -------------------------------------------------------------------------------- /contrib/internal/manifests/workload/deployments/templates/deployments.tpl: -------------------------------------------------------------------------------- 1 | {{- $pattern := .Values.namePattern }} 2 | {{- $replica := int .Values.replica }} 3 | {{- $paddingBytes := int .Values.paddingBytes }} 4 | {{- range $index := (untilStep 0 (int .Values.total) 1) }} 5 | apiVersion: v1 6 | kind: Namespace 7 | metadata: 8 | name: {{ $pattern }}-{{ $index }} 9 | labels: 10 | name: benchmark-testing 11 | --- 12 | apiVersion: apps/v1 13 | kind: Deployment 14 | metadata: 15 | name: {{ $pattern }}-{{ $index }} 16 | namespace: {{ $pattern }}-{{ $index }} 17 | labels: 18 | app: {{ $pattern }} 19 | spec: 20 | replicas: {{ $replica }} 21 | strategy: 22 | rollingUpdate: 23 | maxSurge: 100 24 | type: RollingUpdate 25 | selector: 26 | matchLabels: 27 | app: {{ $pattern }} 28 | index: "{{ $index }}" 29 | template: 30 | metadata: 31 | labels: 32 | app: {{ $pattern }} 33 | index: "{{ $index }}" 34 | annotations: 35 | data: "{{ randAlphaNum $paddingBytes | nospace }}" 36 | spec: 37 | affinity: 38 | nodeAffinity: 39 | requiredDuringSchedulingIgnoredDuringExecution: 40 | nodeSelectorTerms: 41 | - matchExpressions: 42 | - key: type 43 | operator: In 44 | values: 45 | - kperf-virtualnodes 46 | tolerations: 47 | - key: "kperf.io/nodepool" 48 | operator: "Exists" 49 | effect: "NoSchedule" 50 | containers: 51 | - name: fake-container 52 | image: fake-image 53 | --- 54 | {{- end}} 55 | -------------------------------------------------------------------------------- /contrib/internal/manifests/workload/deployments/values.yaml: -------------------------------------------------------------------------------- 1 | namePattern: "benchmark" 2 | total: 5 3 | replica: 2000 4 | paddingBytes: 0 5 | -------------------------------------------------------------------------------- /contrib/internal/mountns/ns_linux.go: -------------------------------------------------------------------------------- 1 | //go:build linux 2 | 3 | // Copyright (c) Microsoft Corporation. 4 | // Licensed under the MIT License. 5 | 6 | package mountns 7 | 8 | import ( 9 | "fmt" 10 | "runtime" 11 | "sync" 12 | 13 | "golang.org/x/sys/unix" 14 | ) 15 | 16 | // Executes runs the closure in a new mount namespace. 17 | // 18 | // NOTE: The caller should not call runtime.UnlockOSThread or fork any new 19 | // goroutines, because it's risk. The thread in the new mount namespace should 20 | // be cleanup by Go runtime when it exits without unlock OS thread. 21 | func Executes(run func() error) error { 22 | var wg sync.WaitGroup 23 | wg.Add(1) 24 | 25 | var innerErr error 26 | go func() { 27 | defer wg.Done() 28 | 29 | runtime.LockOSThread() 30 | 31 | err := unix.Unshare(unix.CLONE_FS | unix.CLONE_NEWNS) 32 | if err != nil { 33 | innerErr = fmt.Errorf("failed to create a new mount namespace: %w", err) 34 | return 35 | } 36 | innerErr = run() 37 | }() 38 | wg.Wait() 39 | 40 | return innerErr 41 | } 42 | -------------------------------------------------------------------------------- /contrib/internal/mountns/ns_other.go: -------------------------------------------------------------------------------- 1 | //go:build !linux 2 | 3 | // Copyright (c) Microsoft Corporation. 4 | // Licensed under the MIT License. 5 | 6 | package mountns 7 | 8 | import "fmt" 9 | 10 | func Executes(run func() error) error { 11 | return fmt.Errorf("not supported") 12 | } 13 | -------------------------------------------------------------------------------- /contrib/internal/types/report.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package types 5 | 6 | import apitypes "github.com/Azure/kperf/api/types" 7 | 8 | // BenchmarkReport represents runkperf-bench's result. 9 | type BenchmarkReport struct { 10 | // Description describes test case. 11 | Description string `json:"description" yaml:"description"` 12 | // LoadSpec represents what the load profile looks like. 13 | LoadSpec apitypes.RunnerGroupSpec `json:"loadSpec" yaml:"loadSpec"` 14 | // Result represents runner group's report. 15 | Result apitypes.RunnerGroupsReport `json:"result" yaml:"result"` 16 | // Info is additional information. 17 | // 18 | // FIXME(weifu): Use struct after finialized. 19 | Info map[string]interface{} `json:"info" yaml:"info"` 20 | } 21 | -------------------------------------------------------------------------------- /contrib/log/klogger.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package log 5 | 6 | import ( 7 | "fmt" 8 | 9 | "k8s.io/klog/v2" 10 | ) 11 | 12 | type klogger struct { 13 | level klog.Level 14 | kvs []any 15 | } 16 | 17 | // Logf implements Logger.Logf. 18 | func (kl klogger) Logf(msg string, args ...any) { 19 | if len(kl.kvs) > 0 { 20 | klog.V(kl.level).InfoS(fmt.Sprintf(msg, args...), kl.kvs...) 21 | return 22 | } 23 | klog.V(kl.level).Infof(msg, args...) 24 | } 25 | 26 | // LogKV implements Logger.LogKV. 27 | func (kl klogger) LogKV(kvs ...any) { 28 | klog.V(kl.level).InfoS("", append(copySlice(kl.kvs), kvs...)...) 29 | } 30 | 31 | // WithKeyValues implements Logger.WithKeyValues. 32 | func (kl klogger) WithKeyValues(kvs ...any) Logger { 33 | return klogger{ 34 | level: kl.level, 35 | kvs: append(copySlice(kl.kvs), kvs...), 36 | } 37 | } 38 | 39 | // NewLogger returns builtin Logger implementation. 40 | func NewLogger(level int32) Logger { 41 | return klogger{level: klog.Level(level)} 42 | } 43 | 44 | func copySlice(src []any) []any { 45 | if len(src) == 0 { 46 | return []any{} 47 | } 48 | 49 | dst := make([]any, len(src)) 50 | copy(dst, src) 51 | return dst 52 | } 53 | -------------------------------------------------------------------------------- /contrib/log/logger.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package log 5 | 6 | import ( 7 | "context" 8 | ) 9 | 10 | type Logger interface { 11 | Logf(msg string, args ...any) 12 | 13 | LogKV(kvs ...any) 14 | 15 | // WithKeyValues returns new logger with default key values 16 | WithKeyValues(kvs ...any) Logger 17 | } 18 | 19 | type loggerKey struct{} 20 | 21 | // WithLogger returns a context with provided logger. 22 | func WithLogger(ctx context.Context, logger Logger) context.Context { 23 | return context.WithValue(ctx, loggerKey{}, logger) 24 | } 25 | 26 | // GetLogger returns logger from context if applicable. Or it will returns 27 | // builtin logger. 28 | func GetLogger(ctx context.Context) Logger { 29 | if logger := ctx.Value(loggerKey{}); logger != nil { 30 | return logger.(Logger) 31 | } 32 | return NewLogger(2) 33 | } 34 | -------------------------------------------------------------------------------- /contrib/utils/kperf_cmd.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package utils 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | "time" 10 | ) 11 | 12 | // KperfRunner is the wrapper of exec.Command to execute kperf command. 13 | type KperfRunner struct { 14 | kubeCfgPath string 15 | runnerImage string 16 | } 17 | 18 | func NewKperfRunner(kubeCfgPath string, runnerImage string) *KperfRunner { 19 | return &KperfRunner{ 20 | kubeCfgPath: kubeCfgPath, 21 | runnerImage: runnerImage, 22 | } 23 | } 24 | 25 | // NewNodepool creates new virtual nodepool. 26 | func (kr *KperfRunner) NewNodepool( 27 | ctx context.Context, 28 | timeout time.Duration, 29 | name string, nodes int, cpu, memory, maxPods int, 30 | affinity string, 31 | sharedProviderID string, 32 | ) error { 33 | args := []string{"vc", "nodepool"} 34 | if kr.kubeCfgPath != "" { 35 | args = append(args, fmt.Sprintf("--kubeconfig=%s", kr.kubeCfgPath)) 36 | } 37 | args = append(args, "add", name, 38 | fmt.Sprintf("--nodes=%v", nodes), 39 | fmt.Sprintf("--cpu=%v", cpu), 40 | fmt.Sprintf("--memory=%v", memory), 41 | fmt.Sprintf("--max-pods=%v", maxPods), 42 | ) 43 | if affinity != "" { 44 | args = append(args, fmt.Sprintf("--affinity=%v", affinity)) 45 | } 46 | if sharedProviderID != "" { 47 | args = append(args, fmt.Sprintf("--shared-provider-id=%v", sharedProviderID)) 48 | } 49 | 50 | _, err := runCommand(ctx, timeout, "kperf", args) 51 | return err 52 | } 53 | 54 | // DeleteNodepool deletes a virtual nodepool by a given name. 55 | func (kr *KperfRunner) DeleteNodepool(ctx context.Context, timeout time.Duration, name string) error { 56 | args := []string{"vc", "nodepool"} 57 | if kr.kubeCfgPath != "" { 58 | args = append(args, fmt.Sprintf("--kubeconfig=%s", kr.kubeCfgPath)) 59 | } 60 | args = append(args, "delete", name) 61 | 62 | _, err := runCommand(ctx, timeout, "kperf", args) 63 | return err 64 | } 65 | 66 | // RGRun deploys runner group into kubernetes cluster. 67 | func (kr *KperfRunner) RGRun(ctx context.Context, timeout time.Duration, rgCfgPath, flowcontrol, affinity string) error { 68 | args := []string{"rg"} 69 | if kr.kubeCfgPath != "" { 70 | args = append(args, fmt.Sprintf("--kubeconfig=%s", kr.kubeCfgPath)) 71 | } 72 | args = append(args, "run", 73 | fmt.Sprintf("--runnergroup=file://%v", rgCfgPath), 74 | fmt.Sprintf("--runner-image=%v", kr.runnerImage), 75 | ) 76 | if affinity != "" { 77 | args = append(args, fmt.Sprintf("--affinity=%v", affinity)) 78 | } 79 | if flowcontrol != "" { 80 | args = append(args, fmt.Sprintf("--runner-flowcontrol=%v", flowcontrol)) 81 | } 82 | 83 | _, err := runCommand(ctx, timeout, "kperf", args) 84 | return err 85 | } 86 | 87 | // RGResult fetches runner group's result. 88 | func (kr *KperfRunner) RGResult(ctx context.Context, timeout time.Duration) (string, error) { 89 | args := []string{"rg"} 90 | if kr.kubeCfgPath != "" { 91 | args = append(args, fmt.Sprintf("--kubeconfig=%s", kr.kubeCfgPath)) 92 | } 93 | args = append(args, "result") 94 | 95 | data, err := runCommand(ctx, timeout, "kperf", args) 96 | return string(data), err 97 | } 98 | 99 | // RGDelete deletes runner group. 100 | func (kr *KperfRunner) RGDelete(ctx context.Context, timeout time.Duration) error { 101 | args := []string{"rg"} 102 | if kr.kubeCfgPath != "" { 103 | args = append(args, fmt.Sprintf("--kubeconfig=%s", kr.kubeCfgPath)) 104 | } 105 | args = append(args, "delete") 106 | 107 | _, err := runCommand(ctx, timeout, "kperf", args) 108 | return err 109 | } 110 | -------------------------------------------------------------------------------- /contrib/utils/kubectl_cmd.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package utils 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | "net/url" 10 | "strings" 11 | "time" 12 | ) 13 | 14 | // KubectlRunner is the wrapper of exec.Command to execute kubectl command. 15 | type KubectlRunner struct { 16 | kubeCfgPath string 17 | namespace string 18 | } 19 | 20 | func NewKubectlRunner(kubeCfgPath string, namespace string) *KubectlRunner { 21 | return &KubectlRunner{ 22 | kubeCfgPath: kubeCfgPath, 23 | namespace: namespace, 24 | } 25 | } 26 | 27 | // FQDN returns the FQDN of the cluster. 28 | func (kr *KubectlRunner) FQDN(ctx context.Context, timeout time.Duration) (string, error) { 29 | args := []string{} 30 | if kr.kubeCfgPath != "" { 31 | args = append(args, "--kubeconfig", kr.kubeCfgPath) 32 | } 33 | args = append(args, "cluster-info") 34 | 35 | data, err := runCommand(ctx, timeout, "kubectl", args) 36 | if err != nil { 37 | return "", err 38 | } 39 | 40 | line := strings.Split(string(data), "\n")[0] 41 | items := strings.Fields(line) 42 | 43 | rawFqdn := items[len(items)-1] 44 | rawFqdn = strings.TrimPrefix(rawFqdn, "\x1b[0;33m") 45 | rawFqdn = strings.TrimSuffix(rawFqdn, "\x1b[0m") 46 | 47 | fqdn, err := url.Parse(rawFqdn) 48 | if err != nil { 49 | return "", err 50 | } 51 | host := strings.Split(fqdn.Host, ":")[0] 52 | return strings.ToLower(host), nil 53 | } 54 | 55 | // Wait runs wait subcommand. 56 | func (kr *KubectlRunner) Wait(ctx context.Context, timeout time.Duration, condition, waitTimeout, target string) error { 57 | if condition == "" { 58 | return fmt.Errorf("condition is required") 59 | } 60 | 61 | if target == "" { 62 | return fmt.Errorf("target is required") 63 | } 64 | 65 | args := []string{} 66 | if kr.kubeCfgPath != "" { 67 | args = append(args, "--kubeconfig", kr.kubeCfgPath) 68 | } 69 | if kr.namespace != "" { 70 | args = append(args, "-n", kr.namespace) 71 | } 72 | 73 | args = append(args, "wait", "--for="+condition) 74 | if waitTimeout != "" { 75 | args = append(args, "--timeout="+waitTimeout) 76 | } 77 | args = append(args, target) 78 | 79 | _, err := runCommand(ctx, timeout, "kubectl", args) 80 | return err 81 | } 82 | 83 | // CreateNamespace creates a new namespace. 84 | func (kr *KubectlRunner) CreateNamespace(ctx context.Context, timeout time.Duration, name string) error { 85 | args := []string{} 86 | if kr.kubeCfgPath != "" { 87 | args = append(args, "--kubeconfig", kr.kubeCfgPath) 88 | } 89 | args = append(args, "create", "namespace", name) 90 | 91 | _, err := runCommand(ctx, timeout, "kubectl", args) 92 | return err 93 | } 94 | 95 | // DeleteNamespace delete a namespace. 96 | func (kr *KubectlRunner) DeleteNamespace(ctx context.Context, timeout time.Duration, name string) error { 97 | args := []string{} 98 | if kr.kubeCfgPath != "" { 99 | args = append(args, "--kubeconfig", kr.kubeCfgPath) 100 | } 101 | args = append(args, "delete", "namespace", name) 102 | 103 | _, err := runCommand(ctx, timeout, "kubectl", args) 104 | return err 105 | } 106 | 107 | // Apply runs apply subcommand. 108 | func (kr *KubectlRunner) Apply(ctx context.Context, timeout time.Duration, filePath string) error { 109 | args := []string{} 110 | if kr.kubeCfgPath != "" { 111 | args = append(args, "--kubeconfig", kr.kubeCfgPath) 112 | } 113 | if kr.namespace != "" { 114 | args = append(args, "-n", kr.namespace) 115 | } 116 | args = append(args, "apply", "-f", filePath) 117 | 118 | _, err := runCommand(ctx, timeout, "kubectl", args) 119 | return err 120 | } 121 | 122 | // ServerSideApplyWithData runs kubectl apply with --server-side=true, with input data piped through stdin. 123 | func (kr *KubectlRunner) ServerSideApplyWithData(ctx context.Context, timeout time.Duration, data string) error { 124 | args := []string{} 125 | if kr.kubeCfgPath != "" { 126 | args = append(args, "--kubeconfig", kr.kubeCfgPath) 127 | } 128 | if kr.namespace != "" { 129 | args = append(args, "-n", kr.namespace) 130 | } 131 | args = append(args, "apply", "--server-side=true", "--validate=ignore", "-f", "-") 132 | 133 | _, err := runCommandWithInput(ctx, timeout, "kubectl", args, data) 134 | return err 135 | } 136 | 137 | // Delete runs delete subcommand. 138 | func (kr *KubectlRunner) Delete(ctx context.Context, timeout time.Duration, filePath string) error { 139 | args := []string{} 140 | if kr.kubeCfgPath != "" { 141 | args = append(args, "--kubeconfig", kr.kubeCfgPath) 142 | } 143 | if kr.namespace != "" { 144 | args = append(args, "-n", kr.namespace) 145 | } 146 | args = append(args, "delete", "-f", filePath) 147 | 148 | _, err := runCommand(ctx, timeout, "kubectl", args) 149 | return err 150 | } 151 | 152 | // DeploymentRestart restats a deployment. 153 | func (kr *KubectlRunner) DeploymentRestart(ctx context.Context, timeout time.Duration, name string) error { 154 | args := []string{} 155 | if kr.kubeCfgPath != "" { 156 | args = append(args, "--kubeconfig", kr.kubeCfgPath) 157 | } 158 | if kr.namespace != "" { 159 | args = append(args, "-n", kr.namespace) 160 | } 161 | args = append(args, "rollout", "restart", "deployment", name) 162 | 163 | _, err := runCommand(ctx, timeout, "kubectl", args) 164 | return err 165 | } 166 | 167 | // DeploymentRolloutStatus watches the rollout status of a deployment. 168 | func (kr *KubectlRunner) DeploymentRolloutStatus(ctx context.Context, timeout time.Duration, name string) error { 169 | args := []string{} 170 | if kr.kubeCfgPath != "" { 171 | args = append(args, "--kubeconfig", kr.kubeCfgPath) 172 | } 173 | if kr.namespace != "" { 174 | args = append(args, "-n", kr.namespace) 175 | } 176 | args = append(args, "rollout", "status", fmt.Sprintf("deployment/%s", name)) 177 | 178 | _, err := runCommand(ctx, timeout, "kubectl", args) 179 | return err 180 | } 181 | -------------------------------------------------------------------------------- /contrib/utils/kubectl_cmd_linux.go: -------------------------------------------------------------------------------- 1 | //go:build linux 2 | 3 | // Copyright (c) Microsoft Corporation. 4 | // Licensed under the MIT License. 5 | 6 | package utils 7 | 8 | import ( 9 | "context" 10 | "fmt" 11 | "time" 12 | 13 | "github.com/Azure/kperf/contrib/internal/mountns" 14 | 15 | "golang.org/x/sys/unix" 16 | "k8s.io/klog/v2" 17 | ) 18 | 19 | // Metrics returns the metrics for a specific kube-apiserver. 20 | func (kr *KubectlRunner) Metrics(ctx context.Context, timeout time.Duration, fqdn, ip string) ([]byte, error) { 21 | args := []string{} 22 | if kr.kubeCfgPath != "" { 23 | args = append(args, "--kubeconfig", kr.kubeCfgPath) 24 | } 25 | args = append(args, "get", "--raw", "/metrics") 26 | 27 | var result []byte 28 | 29 | merr := mountns.Executes(func() error { 30 | newETCHostFile, cleanup, err := CreateTempFileWithContent([]byte(fmt.Sprintf("%s %s\n", ip, fqdn))) 31 | if err != nil { 32 | return err 33 | } 34 | defer func() { _ = cleanup() }() 35 | 36 | target := "/etc/hosts" 37 | 38 | err = unix.Mount(newETCHostFile, target, "none", unix.MS_BIND, "") 39 | if err != nil { 40 | return fmt.Errorf("failed to mount %s on %s: %w", 41 | newETCHostFile, target, err) 42 | } 43 | defer func() { 44 | derr := unix.Unmount(target, 0) 45 | if derr != nil { 46 | klog.Warningf("failed umount %s", target) 47 | } 48 | }() 49 | 50 | result, err = runCommand(ctx, timeout, "kubectl", args) 51 | return err 52 | }) 53 | return result, merr 54 | } 55 | -------------------------------------------------------------------------------- /contrib/utils/kubectl_cmd_other.go: -------------------------------------------------------------------------------- 1 | //go:build !linux 2 | 3 | // Copyright (c) Microsoft Corporation. 4 | // Licensed under the MIT License. 5 | 6 | package utils 7 | 8 | import ( 9 | "context" 10 | "fmt" 11 | "time" 12 | ) 13 | 14 | // Metrics returns the metrics for a specific kube-apiserver. 15 | func (kr *KubectlRunner) Metrics(ctx context.Context, timeout time.Duration, fqdn, ip string) ([]byte, error) { 16 | return nil, fmt.Errorf("not supported") 17 | } 18 | -------------------------------------------------------------------------------- /contrib/utils/utils_linux.go: -------------------------------------------------------------------------------- 1 | //go:build linux 2 | 3 | // Copyright (c) Microsoft Corporation. 4 | // Licensed under the MIT License. 5 | 6 | package utils 7 | 8 | import ( 9 | "context" 10 | "os/exec" 11 | "syscall" 12 | ) 13 | 14 | func newExecCommand(ctx context.Context, name string, args ...string) *exec.Cmd { 15 | c := exec.CommandContext(ctx, name, args...) 16 | c.SysProcAttr = &syscall.SysProcAttr{Pdeathsig: syscall.SIGKILL} 17 | return c 18 | } 19 | -------------------------------------------------------------------------------- /contrib/utils/utils_other.go: -------------------------------------------------------------------------------- 1 | //go:build !linux 2 | 3 | // Copyright (c) Microsoft Corporation. 4 | // Licensed under the MIT License. 5 | 6 | package utils 7 | 8 | import ( 9 | "context" 10 | "os/exec" 11 | ) 12 | 13 | func newExecCommand(ctx context.Context, name string, args ...string) *exec.Cmd { 14 | return exec.CommandContext(ctx, name, args...) 15 | } 16 | -------------------------------------------------------------------------------- /helmcli/delete.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package helmcli 5 | 6 | import ( 7 | "errors" 8 | "fmt" 9 | 10 | "helm.sh/helm/v3/pkg/action" 11 | "helm.sh/helm/v3/pkg/storage/driver" 12 | "k8s.io/cli-runtime/pkg/genericclioptions" 13 | ) 14 | 15 | // DeleteCli is a client to delete helm release. 16 | type DeleteCli struct { 17 | namespace string 18 | 19 | cfg *action.Configuration 20 | } 21 | 22 | // NewDeleteCli returns new DeleteCli instance. 23 | func NewDeleteCli(kubeconfigPath string, namespace string) (*DeleteCli, error) { 24 | actionCfg := new(action.Configuration) 25 | if err := actionCfg.Init( 26 | &genericclioptions.ConfigFlags{ 27 | KubeConfig: &kubeconfigPath, 28 | }, 29 | namespace, 30 | "secret", 31 | debugLog, 32 | ); err != nil { 33 | return nil, fmt.Errorf("failed to init action config: %w", err) 34 | } 35 | return &DeleteCli{ 36 | namespace: namespace, 37 | cfg: actionCfg, 38 | }, nil 39 | } 40 | 41 | // Delete deletes existing helm release. 42 | func (cli *DeleteCli) Delete(releaseName string) error { 43 | delCli := action.NewUninstall(cli.cfg) 44 | _, err := delCli.Run(releaseName) 45 | if errors.Is(err, driver.ErrReleaseNotFound) { 46 | err = nil 47 | } 48 | return err 49 | } 50 | -------------------------------------------------------------------------------- /helmcli/get.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package helmcli 5 | 6 | import ( 7 | "fmt" 8 | 9 | "helm.sh/helm/v3/pkg/action" 10 | "helm.sh/helm/v3/pkg/release" 11 | "k8s.io/cli-runtime/pkg/genericclioptions" 12 | ) 13 | 14 | // GetCli is a client to get helm chart from secret storage. 15 | type GetCli struct { 16 | namespace string 17 | 18 | cfg *action.Configuration 19 | } 20 | 21 | // NewGetCli returns new GetCli instance. 22 | func NewGetCli(kubeconfigPath string, namespace string) (*GetCli, error) { 23 | actionCfg := new(action.Configuration) 24 | if err := actionCfg.Init( 25 | &genericclioptions.ConfigFlags{ 26 | KubeConfig: &kubeconfigPath, 27 | }, 28 | namespace, 29 | "secret", 30 | debugLog, 31 | ); err != nil { 32 | return nil, fmt.Errorf("failed to init action config: %w", err) 33 | } 34 | return &GetCli{ 35 | namespace: namespace, 36 | cfg: actionCfg, 37 | }, nil 38 | } 39 | 40 | // Get returns all the information about that given release. 41 | func (cli *GetCli) Get(releaseName string) (*release.Release, error) { 42 | getCli := action.NewGet(cli.cfg) 43 | return getCli.Run(releaseName) 44 | } 45 | -------------------------------------------------------------------------------- /helmcli/list.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package helmcli 5 | 6 | import ( 7 | "fmt" 8 | 9 | "helm.sh/helm/v3/pkg/action" 10 | "helm.sh/helm/v3/pkg/release" 11 | "k8s.io/cli-runtime/pkg/genericclioptions" 12 | ) 13 | 14 | // ListCli is a client to get helm charts from secret storage. 15 | type ListCli struct { 16 | namespace string 17 | 18 | cfg *action.Configuration 19 | } 20 | 21 | // NewGetCli returns new GetCli instance. 22 | func NewListCli(kubeconfigPath string, namespace string) (*ListCli, error) { 23 | actionCfg := new(action.Configuration) 24 | if err := actionCfg.Init( 25 | &genericclioptions.ConfigFlags{ 26 | KubeConfig: &kubeconfigPath, 27 | }, 28 | namespace, 29 | "secret", 30 | debugLog, 31 | ); err != nil { 32 | return nil, fmt.Errorf("failed to init action config: %w", err) 33 | } 34 | return &ListCli{ 35 | namespace: namespace, 36 | cfg: actionCfg, 37 | }, nil 38 | } 39 | 40 | func (cli *ListCli) List() ([]*release.Release, error) { 41 | listCli := action.NewList(cli.cfg) 42 | return listCli.Run() 43 | } 44 | -------------------------------------------------------------------------------- /helmcli/release.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package helmcli 5 | 6 | import ( 7 | "context" 8 | "encoding/json" 9 | "fmt" 10 | "time" 11 | 12 | "gopkg.in/yaml.v3" 13 | "helm.sh/helm/v3/pkg/action" 14 | "helm.sh/helm/v3/pkg/chart" 15 | "helm.sh/helm/v3/pkg/storage/driver" 16 | "helm.sh/helm/v3/pkg/strvals" 17 | "k8s.io/cli-runtime/pkg/genericclioptions" 18 | "k8s.io/klog/v2" 19 | ) 20 | 21 | var debugLog action.DebugLog = func(fmt string, args ...interface{}) { 22 | klog.V(2).Infof(fmt, args...) 23 | } 24 | 25 | // ValuesApplier is to apply new key/values to existing chart's values. 26 | type ValuesApplier func(values map[string]interface{}) error 27 | 28 | // StringPathValuesApplier applies key/values by string path. 29 | // 30 | // For instance, x.y.z=1 is the same to that YAML value: 31 | // 32 | // ```yaml 33 | // 34 | // x: 35 | // y: 36 | // z: 1 37 | // 38 | // ``` 39 | func StringPathValuesApplier(values ...string) ValuesApplier { 40 | return func(to map[string]interface{}) error { 41 | for _, v := range values { 42 | if err := strvals.ParseInto(v, to); err != nil { 43 | return fmt.Errorf("failed to parse (%s) into values: %w", v, err) 44 | } 45 | } 46 | return nil 47 | } 48 | } 49 | 50 | // YAMLValuesApplier applies key/values by YAML. 51 | func YAMLValuesApplier(yamlValues string) (ValuesApplier, error) { 52 | values := make(map[string]interface{}) 53 | err := yaml.Unmarshal([]byte(yamlValues), &values) 54 | if err != nil { 55 | return nil, err 56 | } 57 | 58 | return func(to map[string]interface{}) error { 59 | return applyValues(to, values) 60 | }, nil 61 | } 62 | 63 | func applyValues(to, from map[string]interface{}) error { 64 | for k, v := range from { 65 | // If 'to' doesn't have key 'k' 66 | if _, checkKey := to[k]; !checkKey { 67 | to[k] = v 68 | continue 69 | } 70 | 71 | // If 'to' has key 'k' 72 | switch v := v.(type) { 73 | case map[string]interface{}: 74 | // If 'v' is of type map[string]interface{} 75 | if toMap, checkKey := to[k].(map[string]interface{}); checkKey { 76 | if err := applyValues(toMap, v); err != nil { 77 | return err 78 | } 79 | } else { 80 | to[k] = v 81 | 82 | } 83 | default: 84 | // If 'v' is not of type map[string]interface{} 85 | to[k] = v 86 | } 87 | } 88 | return nil 89 | } 90 | 91 | // ReleaseCli is a client to deploy helm chart with secret storage. 92 | type ReleaseCli struct { 93 | namespace string 94 | name string 95 | 96 | cfg *action.Configuration 97 | ch *chart.Chart 98 | values map[string]interface{} 99 | labels map[string]string 100 | } 101 | 102 | // NewReleaseCli returns new ReleaseCli instance. 103 | // 104 | // TODO: 105 | // 1. add flag to disable Wait 106 | func NewReleaseCli( 107 | kubeconfigPath string, 108 | namespace string, 109 | name string, 110 | ch *chart.Chart, 111 | labels map[string]string, 112 | valuesAppliers ...ValuesApplier, 113 | ) (*ReleaseCli, error) { 114 | // build default values 115 | values, err := copyValues(ch.Values) 116 | if err != nil { 117 | return nil, err 118 | } 119 | 120 | for _, applier := range valuesAppliers { 121 | if err := applier(values); err != nil { 122 | return nil, fmt.Errorf("failed to apply: %w", err) 123 | } 124 | } 125 | 126 | actionCfg := new(action.Configuration) 127 | if err := actionCfg.Init( 128 | &genericclioptions.ConfigFlags{ 129 | KubeConfig: &kubeconfigPath, 130 | }, 131 | namespace, 132 | "secret", 133 | debugLog, 134 | ); err != nil { 135 | return nil, fmt.Errorf("failed to init action config: %w", err) 136 | } 137 | 138 | return &ReleaseCli{ 139 | namespace: namespace, 140 | name: name, 141 | cfg: actionCfg, 142 | ch: ch, 143 | values: values, 144 | labels: labels, 145 | }, nil 146 | } 147 | 148 | // Deploy will install or upgrade that release. 149 | func (cli *ReleaseCli) Deploy(ctx context.Context, timeout time.Duration, valuesAppliers ...ValuesApplier) error { 150 | values, err := cli.initValues(valuesAppliers...) 151 | if err != nil { 152 | return err 153 | } 154 | 155 | // NOTE: Maintain only one history record just in case that there are 156 | // too many secret records which causes ETCD OutOfSpace. 157 | histCli := action.NewHistory(cli.cfg) 158 | histCli.Max = 1 159 | if _, err = histCli.Run(cli.name); err == driver.ErrReleaseNotFound { 160 | installCli := action.NewInstall(cli.cfg) 161 | installCli.CreateNamespace = true 162 | installCli.Atomic = true 163 | installCli.Namespace = cli.namespace 164 | installCli.ReleaseName = cli.name 165 | installCli.IsUpgrade = true 166 | installCli.Timeout = timeout 167 | installCli.Labels = cli.labels 168 | installCli.Wait = true 169 | 170 | release, err := installCli.RunWithContext(ctx, cli.ch, values) 171 | if err != nil { 172 | return fmt.Errorf("failed to install that release %s: %w", cli.name, err) 173 | } 174 | cli.values = release.Config 175 | return nil 176 | } 177 | 178 | upgradeCli := action.NewUpgrade(cli.cfg) 179 | upgradeCli.Namespace = cli.namespace 180 | upgradeCli.Atomic = true 181 | upgradeCli.Timeout = timeout 182 | upgradeCli.MaxHistory = 1 183 | upgradeCli.Wait = true 184 | upgradeCli.Labels = cli.labels 185 | 186 | release, err := upgradeCli.RunWithContext(ctx, cli.name, cli.ch, values) 187 | if err != nil { 188 | return fmt.Errorf("failed to upgrade that release %s: %w", cli.name, err) 189 | } 190 | 191 | cli.values = release.Config 192 | return nil 193 | } 194 | 195 | // Uninstall deletes that release. 196 | func (cli *ReleaseCli) Uninstall() error { 197 | uninstallCli := action.NewUninstall(cli.cfg) 198 | _, err := uninstallCli.Run(cli.name) 199 | return err 200 | } 201 | 202 | // initValues is to apply valuesAppliers into copied values. Just in case that 203 | // we can rollback if valuesApplier returns error. 204 | func (cli *ReleaseCli) initValues(valuesAppliers ...ValuesApplier) (map[string]interface{}, error) { 205 | values, err := copyValues(cli.values) 206 | if err != nil { 207 | return nil, fmt.Errorf("failed to copy values: %w", err) 208 | } 209 | 210 | for _, applier := range valuesAppliers { 211 | if err := applier(values); err != nil { 212 | return nil, fmt.Errorf("failed to apply: %w", err) 213 | } 214 | } 215 | return values, nil 216 | } 217 | 218 | func copyValues(src map[string]interface{}) (map[string]interface{}, error) { 219 | data, err := json.Marshal(src) 220 | if err != nil { 221 | return nil, fmt.Errorf("failed to json.Marshal original values: %w", err) 222 | } 223 | 224 | newValues := make(map[string]interface{}) 225 | if err := json.Unmarshal(data, &newValues); err != nil { 226 | return nil, fmt.Errorf("failed to use json.Unmarshal to copy values: %w", err) 227 | } 228 | return newValues, nil 229 | } 230 | -------------------------------------------------------------------------------- /helmcli/release_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package helmcli 5 | 6 | import ( 7 | "testing" 8 | 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func TestApplyValues(t *testing.T) { 13 | for idx, tc := range []struct { 14 | from map[string]interface{} 15 | to map[string]interface{} 16 | expected map[string]interface{} 17 | }{ 18 | { 19 | from: map[string]interface{}{ 20 | "foo": "bar1", 21 | "baz": map[string]interface{}{ 22 | "name": "alice", 23 | }, 24 | }, 25 | to: map[string]interface{}{ 26 | "foo": "bar2", 27 | "baz": map[string]interface{}{ 28 | "name": "bob", 29 | "age": "18", 30 | }, 31 | }, 32 | expected: map[string]interface{}{ 33 | "foo": "bar1", 34 | "baz": map[string]interface{}{ 35 | "name": "alice", 36 | "age": "18", 37 | }, 38 | }, 39 | }, 40 | { 41 | from: map[string]interface{}{ 42 | "foo": "bar1", 43 | "baz": "profile", 44 | }, 45 | to: map[string]interface{}{ 46 | "foo": "bar1", 47 | "baz": map[string]interface{}{ 48 | "name": "alice", 49 | }, 50 | }, 51 | expected: map[string]interface{}{ 52 | "foo": "bar1", 53 | "baz": "profile", 54 | }, 55 | }, 56 | { 57 | from: map[string]interface{}{ 58 | "foo": "bar1", 59 | "baz": map[string]interface{}{ 60 | "name": "alice", 61 | }, 62 | }, 63 | to: map[string]interface{}{ 64 | "version": "alpha", 65 | }, 66 | expected: map[string]interface{}{ 67 | "foo": "bar1", 68 | "baz": map[string]interface{}{ 69 | "name": "alice", 70 | }, 71 | "version": "alpha", 72 | }, 73 | }, 74 | { 75 | from: map[string]interface{}{ 76 | "baz": map[string]interface{}{ 77 | "name": map[string]interface{}{ 78 | "last": "unknown", 79 | "first": "bob", 80 | }, 81 | }, 82 | "version": "beta", 83 | }, 84 | to: map[string]interface{}{ 85 | "foo": "bar2", 86 | "baz": map[string]interface{}{ 87 | "name": "bob", 88 | "age": "18", 89 | }, 90 | }, 91 | expected: map[string]interface{}{ 92 | "foo": "bar2", 93 | "baz": map[string]interface{}{ 94 | "name": map[string]interface{}{ 95 | "last": "unknown", 96 | "first": "bob", 97 | }, 98 | "age": "18", 99 | }, 100 | "version": "beta", 101 | }, 102 | }, 103 | } { 104 | err := applyValues(tc.to, tc.from) 105 | assert.NoError(t, err, "#%v", idx) 106 | assert.Equal(t, tc.expected, tc.to, "#%v", idx) 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /manifests/helm.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package manifests 5 | 6 | import ( 7 | "embed" 8 | "fmt" 9 | "io/fs" 10 | "path/filepath" 11 | "strings" 12 | 13 | "helm.sh/helm/v3/pkg/chart" 14 | "helm.sh/helm/v3/pkg/chart/loader" 15 | ) 16 | 17 | // LoadChart returns chart from current package's embed filesystem. 18 | func LoadChart(componentName string) (*chart.Chart, error) { 19 | return loadChart(FS, componentName) 20 | } 21 | 22 | // LoadChartFromEmbedFS returns chart from a given embed filesystem. 23 | func LoadChartFromEmbedFS(targetFS embed.FS, componentName string) (*chart.Chart, error) { 24 | return loadChart(targetFS, componentName) 25 | } 26 | 27 | func loadChart(targetFS embed.FS, componentName string) (*chart.Chart, error) { 28 | files, err := getFilesFromFSRecursively(targetFS, componentName) 29 | if err != nil { 30 | return nil, fmt.Errorf("failed to get chart files: %w", err) 31 | } 32 | 33 | topDir := componentName + string(filepath.Separator) 34 | bufFiles := make([]*loader.BufferedFile, 0, len(files)) 35 | for _, f := range files { 36 | data, err := fs.ReadFile(targetFS, f) 37 | if err != nil { 38 | return nil, fmt.Errorf("failed to read file (%s): %w", f, err) 39 | } 40 | 41 | fname := filepath.ToSlash(strings.TrimPrefix(f, topDir)) 42 | bufFiles = append(bufFiles, 43 | &loader.BufferedFile{ 44 | Name: fname, 45 | Data: data, 46 | }, 47 | ) 48 | } 49 | return loader.LoadFiles(bufFiles) 50 | } 51 | 52 | func getFilesFromFSRecursively(targetFS embed.FS, componentName string) ([]string, error) { 53 | files := make([]string, 0) 54 | 55 | err := fs.WalkDir(targetFS, componentName, 56 | func(path string, d fs.DirEntry, err error) error { 57 | if err != nil { 58 | return err 59 | } 60 | 61 | if d.IsDir() { 62 | return nil 63 | } 64 | files = append(files, path) 65 | return nil 66 | }, 67 | ) 68 | if err != nil { 69 | return nil, err 70 | } 71 | return files, nil 72 | } 73 | -------------------------------------------------------------------------------- /manifests/mainfest.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package manifests 5 | 6 | import "embed" 7 | 8 | // FS embeds the manifests 9 | // 10 | //go:embed virtualcluster/* 11 | //go:embed runnergroup/* 12 | var FS embed.FS 13 | -------------------------------------------------------------------------------- /manifests/runnergroup/server/Chart.yaml: -------------------------------------------------------------------------------- 1 | { 2 | "apiVersion": "v1", 3 | "name": "runnergroup-server", 4 | "version": "0.0.1" 5 | } 6 | -------------------------------------------------------------------------------- /manifests/runnergroup/server/templates/clusterrole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: {{ .Values.name }} 5 | rules: 6 | - apiGroups: 7 | - '*' 8 | resources: 9 | - '*' 10 | verbs: 11 | - '*' 12 | -------------------------------------------------------------------------------- /manifests/runnergroup/server/templates/clusterrolebinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: {{ .Values.name }} 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: {{ .Values.name }} 9 | subjects: 10 | - kind: ServiceAccount 11 | name: {{ .Values.name }} 12 | namespace: {{ .Release.Namespace }} 13 | -------------------------------------------------------------------------------- /manifests/runnergroup/server/templates/flowcontrol.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: flowcontrol.apiserver.k8s.io/v1 2 | kind: FlowSchema 3 | metadata: 4 | name: {{ .Values.name }} 5 | namespace: {{ .Release.Namespace }} 6 | labels: 7 | app.kubernetes.io/managed-by: "Helm" 8 | annotations: 9 | meta.helm.sh/release-name: "{{ .Release.Name }}" 10 | meta.helm.sh/release-namespace: "{{ .Release.Namespace }}" 11 | spec: 12 | distinguisherMethod: 13 | type: ByUser 14 | matchingPrecedence: {{ .Values.flowcontrol.matchingPrecedence }} 15 | priorityLevelConfiguration: 16 | name: {{ .Values.flowcontrol.priorityLevelConfiguration }} 17 | rules: 18 | - resourceRules: 19 | - apiGroups: 20 | - '*' 21 | clusterScope: true 22 | namespaces: 23 | - '*' 24 | resources: 25 | - '*' 26 | verbs: 27 | - '*' 28 | subjects: 29 | - kind: ServiceAccount 30 | serviceAccount: 31 | name: {{ .Values.name }} 32 | namespace: {{ .Release.Namespace }} 33 | -------------------------------------------------------------------------------- /manifests/runnergroup/server/templates/pod.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: {{ .Values.name }} 5 | namespace: {{ .Release.Namespace }} 6 | spec: 7 | {{- if .Values.nodeSelectors }} 8 | affinity: 9 | nodeAffinity: 10 | requiredDuringSchedulingIgnoredDuringExecution: 11 | nodeSelectorTerms: 12 | - matchExpressions: 13 | {{- range $key, $values := .Values.nodeSelectors }} 14 | - key: "{{ $key }}" 15 | operator: In 16 | values: 17 | {{- range $values }} 18 | - {{ . }} 19 | {{- end }} 20 | {{- end }} 21 | {{- end }} 22 | containers: 23 | - name: server 24 | command: 25 | - /kperf 26 | - rg 27 | - server 28 | - --namespace 29 | - $(POD_NAMESPACE) 30 | - --runnergroup 31 | - configmap://{{ .Values.name }}-init-spec?namespace={{ .Release.Namespace }} 32 | - --runner-image 33 | - {{ .Values.image }} 34 | - --runner-owner 35 | - v1:Pod:$(POD_NAME):$(POD_UID) 36 | - --runner-sa 37 | - {{ .Values.name }} 38 | - --runner-verbosity 39 | - {{ .Values.runnerVerbosity }} 40 | - --address 41 | - $(POD_IP):8080 42 | - --address 43 | - localhost:8080 44 | - --data 45 | - /data 46 | - $(POD_NAME) 47 | env: 48 | - name: POD_NAME 49 | valueFrom: 50 | fieldRef: 51 | fieldPath: metadata.name 52 | - name: POD_NAMESPACE 53 | valueFrom: 54 | fieldRef: 55 | fieldPath: metadata.namespace 56 | - name: POD_UID 57 | valueFrom: 58 | fieldRef: 59 | fieldPath: metadata.uid 60 | - name: POD_IP 61 | valueFrom: 62 | fieldRef: 63 | fieldPath: status.podIP 64 | image: {{ .Values.image }} 65 | imagePullPolicy: Always 66 | volumeMounts: 67 | - mountPath: /data 68 | name: data 69 | restartPolicy: Always 70 | serviceAccount: {{ .Values.name }} 71 | volumes: 72 | - emptyDir: 73 | name: data 74 | -------------------------------------------------------------------------------- /manifests/runnergroup/server/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: {{ .Values.name }} 5 | namespace: {{ .Release.Namespace }} 6 | -------------------------------------------------------------------------------- /manifests/runnergroup/server/templates/spec.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | labels: 5 | app: {{ .Values.name }} 6 | name: {{ .Values.name }}-init-spec 7 | namespace: {{ .Release.Namespace }} 8 | data: 9 | spec: {{ .Values.runnerGroupSpec | toYaml | indent 2 }} 10 | -------------------------------------------------------------------------------- /manifests/runnergroup/server/values.yaml: -------------------------------------------------------------------------------- 1 | name: "" 2 | image: "" 3 | # TODO(weifu): need https://github.com/Azure/kperf/issues/25 to support list 4 | runnerGroupSpec: "" 5 | runnerVerbosity: "2" 6 | nodeSelectors: {} 7 | flowcontrol: 8 | priorityLevelConfiguration: workload-low 9 | matchingPrecedence: 1000 10 | -------------------------------------------------------------------------------- /manifests/virtualcluster/nodecontrollers/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: virtualnode-controllers 3 | version: "0.0.1" 4 | -------------------------------------------------------------------------------- /manifests/virtualcluster/nodecontrollers/templates/clusterrole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: {{ .Values.name }} 5 | rules: 6 | - apiGroups: 7 | - "" 8 | resources: 9 | - events 10 | verbs: 11 | - create 12 | - update 13 | - patch 14 | - watch 15 | - list 16 | - get 17 | - apiGroups: 18 | - "" 19 | resources: 20 | - nodes 21 | verbs: 22 | - watch 23 | - list 24 | - get 25 | - apiGroups: 26 | - "" 27 | resources: 28 | - nodes/status 29 | verbs: 30 | - update 31 | - patch 32 | - apiGroups: 33 | - "" 34 | resources: 35 | - pods 36 | verbs: 37 | - watch 38 | - list 39 | - delete 40 | - update 41 | - patch 42 | - apiGroups: 43 | - "" 44 | resources: 45 | - pods/status 46 | verbs: 47 | - update 48 | - patch 49 | - apiGroups: 50 | - coordination.k8s.io 51 | resources: 52 | - leases 53 | verbs: 54 | - create 55 | - update 56 | - patch 57 | - watch 58 | - list 59 | - get 60 | - apiGroups: 61 | - kwok.x-k8s.io 62 | resources: 63 | - stages 64 | verbs: 65 | - create 66 | - delete 67 | - get 68 | - list 69 | - patch 70 | - update 71 | - watch 72 | - apiGroups: 73 | - kwok.x-k8s.io 74 | resources: 75 | - stages/status 76 | verbs: 77 | - patch 78 | - update -------------------------------------------------------------------------------- /manifests/virtualcluster/nodecontrollers/templates/clusterrolebinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: {{ .Values.name }} 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: {{ .Values.name }} 9 | subjects: 10 | - kind: ServiceAccount 11 | name: {{ .Values.name }} 12 | namespace: {{ .Release.Namespace }} 13 | -------------------------------------------------------------------------------- /manifests/virtualcluster/nodecontrollers/templates/config.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | labels: 5 | app: {{ .Values.name }} 6 | name: {{ .Values.name }} 7 | namespace: {{ .Release.Namespace }} 8 | data: 9 | # NOTE: https://github.com/helm/helm/issues/2798#issuecomment-470435015 10 | kwok-config.yaml: |- 11 | apiVersion: config.kwok.x-k8s.io/v1alpha1 12 | kind: KwokConfiguration 13 | options: 14 | enableProfilingHandler: false 15 | enableContentionProfiling: false 16 | enablePodsOnNodeSyncListPager: false 17 | enablePodsOnNodeSyncStreamWatch: true 18 | nodeLeaseParallelism: 4 19 | podPlayStageParallelism: 4 20 | nodePlayStageParallelism: 4 21 | enableCRDs: 22 | - Stage -------------------------------------------------------------------------------- /manifests/virtualcluster/nodecontrollers/templates/flowcontrol.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: flowcontrol.apiserver.k8s.io/v1 2 | kind: FlowSchema 3 | metadata: 4 | name: {{ .Values.name }} 5 | namespace: {{ .Release.Namespace }} 6 | labels: 7 | app.kubernetes.io/managed-by: "Helm" 8 | annotations: 9 | meta.helm.sh/release-name: "{{ .Release.Name }}" 10 | meta.helm.sh/release-namespace: "{{ .Release.Namespace }}" 11 | spec: 12 | distinguisherMethod: 13 | type: ByUser 14 | matchingPrecedence: 500 15 | priorityLevelConfiguration: 16 | name: custom-system 17 | rules: 18 | - resourceRules: 19 | - apiGroups: 20 | - '*' 21 | clusterScope: true 22 | namespaces: 23 | - '*' 24 | resources: 25 | - '*' 26 | verbs: 27 | - '*' 28 | subjects: 29 | - kind: ServiceAccount 30 | serviceAccount: 31 | name: {{ .Values.name }} 32 | namespace: {{ .Release.Namespace }} 33 | -------------------------------------------------------------------------------- /manifests/virtualcluster/nodecontrollers/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: {{ .Values.name }} 5 | namespace: {{ .Release.Namespace }} 6 | -------------------------------------------------------------------------------- /manifests/virtualcluster/nodecontrollers/templates/statefulsets.tpl: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: StatefulSet 3 | metadata: 4 | name: {{ .Values.name }} 5 | namespace: {{ .Release.Namespace }} 6 | spec: 7 | selector: 8 | matchLabels: 9 | app: {{ .Values.name }} 10 | replicas: {{ .Values.replicas }} 11 | podManagementPolicy: Parallel 12 | template: 13 | metadata: 14 | labels: 15 | app: {{ .Values.name }} 16 | spec: 17 | {{- if .Values.nodeSelectors }} 18 | affinity: 19 | nodeAffinity: 20 | requiredDuringSchedulingIgnoredDuringExecution: 21 | nodeSelectorTerms: 22 | - matchExpressions: 23 | {{- range $key, $values := .Values.nodeSelectors }} 24 | - key: "{{ $key }}" 25 | operator: In 26 | values: 27 | {{- range $values }} 28 | - {{ . }} 29 | {{- end }} 30 | {{- end }} 31 | {{- end }} 32 | terminationGracePeriodSeconds: 1 33 | containers: 34 | - args: 35 | - --config=/data/kwok-config.yaml 36 | - --manage-all-nodes=false 37 | - --manage-single-node=$(POD_NAME) # act as virtualnode 38 | - --disregard-status-with-annotation-selector=kwok.x-k8s.io/status=custom 39 | - --disregard-status-with-label-selector= 40 | - --node-ip=$(POD_IP) 41 | - --node-port=10247 42 | - --cidr=10.0.0.1/24 43 | - --node-lease-duration-seconds=40 44 | env: 45 | - name: POD_IP 46 | valueFrom: 47 | fieldRef: 48 | fieldPath: status.podIP 49 | - name: POD_NAME 50 | valueFrom: 51 | fieldRef: 52 | fieldPath: metadata.name 53 | image: registry.k8s.io/kwok/kwok:v0.7.0 54 | imagePullPolicy: IfNotPresent 55 | name: kwok-controller 56 | volumeMounts: 57 | - name: kwok-config 58 | mountPath: /data/ 59 | resources: 60 | limits: 61 | cpu: "500m" 62 | requests: 63 | cpu: "200m" 64 | restartPolicy: Always 65 | serviceAccount: {{ .Values.name }} 66 | serviceAccountName: {{ .Values.name }} 67 | volumes: 68 | - name: kwok-config 69 | configMap: 70 | name: {{ .Values.name }} 71 | -------------------------------------------------------------------------------- /manifests/virtualcluster/nodecontrollers/values.yaml: -------------------------------------------------------------------------------- 1 | name: "vc-testing" 2 | nodeSelectors: {} 3 | replicas: 0 4 | -------------------------------------------------------------------------------- /manifests/virtualcluster/nodes/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: virtualnodes 3 | version: "0.0.1" 4 | -------------------------------------------------------------------------------- /manifests/virtualcluster/nodes/templates/node-heartbeat-with-lease.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kwok.x-k8s.io/v1alpha1 2 | kind: Stage 3 | metadata: 4 | name: node-heartbeat-with-lease 5 | spec: 6 | resourceRef: 7 | apiGroup: v1 8 | kind: Node 9 | selector: 10 | matchExpressions: 11 | - key: '.status.phase' 12 | operator: 'In' 13 | values: 14 | - 'Running' 15 | - key: '.status.conditions.[] | select( .type == "Ready" ) | .status' 16 | operator: 'In' 17 | values: 18 | - 'True' 19 | delay: 20 | durationMilliseconds: 600000 21 | jitterDurationMilliseconds: 610000 22 | next: 23 | statusTemplate: | 24 | {{ `{{ $now := Now }}` }} 25 | {{ `{{ $lastTransitionTime := or .metadata.creationTimestamp $now }}` }} 26 | conditions: 27 | {{ `{{ range NodeConditions }}` }} 28 | - lastHeartbeatTime: {{ `{{ $now | Quote }}` }} 29 | lastTransitionTime: {{ `{{ $lastTransitionTime | Quote }}` }} 30 | message: {{ `{{ .message | Quote }}` }} 31 | reason: {{ `{{ .reason | Quote }}` }} 32 | status: {{ `{{ .status | Quote }}` }} 33 | type: {{ `{{ .type | Quote }}` }} 34 | {{ `{{ end }}` }} 35 | 36 | addresses: 37 | {{ `{{ with .status.addresses }}` }} 38 | {{ `{{ YAML . 1 }}` }} 39 | {{ `{{ else }}` }} 40 | {{ `{{ with NodeIP }}` }} 41 | - address: {{ `{{ . | Quote }}` }} 42 | type: InternalIP 43 | {{ `{{ end }}` }} 44 | {{ `{{ with NodeName }}` }} 45 | - address: {{ `{{ . | Quote }}` }} 46 | type: Hostname 47 | {{ `{{ end }}` }} 48 | {{ `{{ end }}` }} 49 | 50 | {{ `{{ with NodePort }}` }} 51 | daemonEndpoints: 52 | kubeletEndpoint: 53 | Port: {{ `{{ . }}` }} 54 | {{ `{{ end }}` }} 55 | -------------------------------------------------------------------------------- /manifests/virtualcluster/nodes/templates/node-initialize.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kwok.x-k8s.io/v1alpha1 2 | kind: Stage 3 | metadata: 4 | name: node-initialize 5 | spec: 6 | resourceRef: 7 | apiGroup: v1 8 | kind: Node 9 | selector: 10 | matchExpressions: 11 | - key: '.status.conditions.[] | select( .type == "Ready" ) | .status' 12 | operator: 'NotIn' 13 | values: 14 | - 'True' 15 | next: 16 | statusTemplate: | 17 | {{ `{{ $now := Now }}` }} 18 | {{ `{{ $lastTransitionTime := or .metadata.creationTimestamp $now }}` }} 19 | conditions: 20 | {{ `{{ range NodeConditions }}` }} 21 | - lastHeartbeatTime: {{ `{{ $now | Quote }}` }} 22 | lastTransitionTime: {{ `{{ $lastTransitionTime | Quote }}` }} 23 | message: {{ `{{ .message | Quote }}` }} 24 | reason: {{ `{{ .reason | Quote }}` }} 25 | status: {{ `{{ .status | Quote }}` }} 26 | type: {{ `{{ .type | Quote}}` }} 27 | {{ `{{ end }}` }} 28 | 29 | addresses: 30 | {{ `{{ with .status.addresses }}` }} 31 | {{ `{{ YAML . 1 }}` }} 32 | {{ `{{ else }}` }} 33 | {{ `{{ with NodeIP }}` }} 34 | - address: {{ `{{ . | Quote }}` }} 35 | type: InternalIP 36 | {{ `{{ end }}` }} 37 | {{ `{{ with NodeName }}` }} 38 | - address: {{ `{{ . | Quote }}` }} 39 | type: Hostname 40 | {{ `{{ end }}` }} 41 | {{ `{{ end }}` }} 42 | 43 | {{ `{{ with NodePort }}` }} 44 | daemonEndpoints: 45 | kubeletEndpoint: 46 | Port: {{ `{{ . }}` }} 47 | {{ `{{ end }}` }} 48 | 49 | allocatable: 50 | {{ `{{ with .status.allocatable }}` }} 51 | {{ `{{ YAML . 1 }}` }} 52 | {{ `{{ else }}` }} 53 | cpu: 1k 54 | memory: 1Ti 55 | pods: 1M 56 | {{ `{{ end }}` }} 57 | capacity: 58 | {{ `{{ with .status.capacity }}` }} 59 | {{ `{{ YAML . 1 }}` }} 60 | {{ `{{ else }}` }} 61 | cpu: 1k 62 | memory: 1Ti 63 | pods: 1M 64 | {{ `{{ end }}` }} 65 | 66 | {{ `{{ $nodeInfo := .status.nodeInfo }}` }} 67 | {{ `{{ $kwokVersion := printf "kwok-%s" Version }}` }} 68 | nodeInfo: 69 | architecture: {{ `{{ or $nodeInfo.architecture "amd64" }}` }} 70 | bootID: {{ `{{ or $nodeInfo.bootID "" }}` }} 71 | containerRuntimeVersion: {{ `{{ or $nodeInfo.containerRuntimeVersion $kwokVersion }}` }} 72 | kernelVersion: {{ `{{ or $nodeInfo.kernelVersion $kwokVersion }}` }} 73 | kubeProxyVersion: {{ `{{ or $nodeInfo.kubeProxyVersion $kwokVersion }}` }} 74 | kubeletVersion: {{ `{{ or $nodeInfo.kubeletVersion $kwokVersion }}` }} 75 | machineID: {{ `{{ or $nodeInfo.machineID "" }}` }} 76 | operatingSystem: {{ `{{ or $nodeInfo.operatingSystem "linux" }}` }} 77 | osImage: {{ `{{ or $nodeInfo.osImage "" }}` }} 78 | systemUUID: {{ `{{ or $nodeInfo.systemUUID "" }}` }} 79 | phase: Running 80 | -------------------------------------------------------------------------------- /manifests/virtualcluster/nodes/templates/nodes.tpl: -------------------------------------------------------------------------------- 1 | {{- $name := .Values.name }} 2 | {{- $cpu := .Values.cpu }} 3 | {{- $memory := .Values.memory }} 4 | {{- $maxPods := .Values.maxPods }} 5 | {{- $labels := .Values.nodeLabels }} 6 | {{- $sharedProviderID := .Values.sharedProviderID }} 7 | {{- range $index := (untilStep 0 (int .Values.replicas) 1) }} 8 | apiVersion: v1 9 | kind: Node 10 | metadata: 11 | annotations: 12 | node.alpha.kubernetes.io/ttl: "0" 13 | kwok.x-k8s.io/node: fake 14 | kwok.x-k8s.io/manage: {{ $name }}-{{ $index }} 15 | labels: 16 | beta.kubernetes.io/arch: amd64 17 | beta.kubernetes.io/os: linux 18 | kubernetes.io/arch: amd64 19 | kubernetes.io/hostname: {{ $name }}-{{ $index }} 20 | kubernetes.io/os: linux 21 | kubernetes.io/role: agent 22 | node-role.kubernetes.io/agent: "" 23 | node.kubernetes.io/exclude-from-external-load-balancers: "true" 24 | kubernetes.azure.com/managed: "false" 25 | type: kperf-virtualnodes 26 | alpha.kperf.io/nodepool: {{ $name }} 27 | {{- range $key, $value := $labels }} 28 | {{ $key }}: {{ $value }} 29 | {{- end }} 30 | name: {{ $name }}-{{ $index }} 31 | spec: 32 | taints: # Avoid scheduling actual running pods to fake Node 33 | - effect: NoSchedule 34 | key: kperf.io/nodepool 35 | value: fake 36 | {{- if $sharedProviderID }} 37 | providerID: {{ $sharedProviderID }} 38 | {{- end}} 39 | status: 40 | allocatable: 41 | cpu: {{ $cpu }} 42 | memory: {{ $memory }}Gi 43 | pods: {{ $maxPods }} 44 | capacity: 45 | cpu: {{ $cpu }} 46 | memory: {{ $memory }}Gi 47 | pods: {{ $maxPods }} 48 | nodeInfo: 49 | architecture: amd64 50 | containerRuntimeVersion: "kwok" 51 | kubeProxyVersion: fake 52 | kubeletVersion: fake 53 | operatingSystem: linux 54 | --- 55 | {{- end}} 56 | -------------------------------------------------------------------------------- /manifests/virtualcluster/nodes/templates/pod-complete.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kwok.x-k8s.io/v1alpha1 2 | kind: Stage 3 | metadata: 4 | name: pod-complete 5 | spec: 6 | resourceRef: 7 | apiGroup: v1 8 | kind: Pod 9 | selector: 10 | matchExpressions: 11 | - key: '.metadata.deletionTimestamp' 12 | operator: 'DoesNotExist' 13 | - key: '.status.phase' 14 | operator: 'In' 15 | values: 16 | - 'Running' 17 | - key: '.status.conditions.[] | select( .type == "Ready" ) | .status' 18 | operator: 'In' 19 | values: 20 | - 'True' 21 | - key: '.metadata.ownerReferences.[].kind' 22 | operator: 'In' 23 | values: 24 | - 'Job' 25 | weight: 1 26 | weightFrom: 27 | expressionFrom: '.metadata.annotations["pod-complete.stage.kwok.x-k8s.io/weight"]' 28 | delay: 29 | durationMilliseconds: 1000 30 | durationFrom: 31 | expressionFrom: '.metadata.annotations["pod-complete.stage.kwok.x-k8s.io/delay"]' 32 | jitterDurationMilliseconds: 5000 33 | jitterDurationFrom: 34 | expressionFrom: '.metadata.annotations["pod-complete.stage.kwok.x-k8s.io/jitter-delay"]' 35 | next: 36 | delete: false 37 | statusTemplate: | 38 | {{ `{{ $now := Now }}` }} 39 | {{ `{{ $root := . }}` }} 40 | containerStatuses: 41 | {{ `{{ range $index, $item := .spec.containers }}` }} 42 | {{ `{{ $origin := index $root.status.containerStatuses $index }}` }} 43 | - image: {{ `{{ $item.image | Quote }}` }} 44 | name: {{ `{{ $item.name | Quote }}` }} 45 | ready: true 46 | restartCount: 0 47 | started: false 48 | state: 49 | terminated: 50 | exitCode: 0 51 | finishedAt: {{ `{{ $now | Quote }}` }} 52 | reason: Completed 53 | startedAt: {{ `{{ $now | Quote }}` }} 54 | {{ `{{ end }}` }} 55 | phase: Succeeded 56 | -------------------------------------------------------------------------------- /manifests/virtualcluster/nodes/templates/pod-create.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kwok.x-k8s.io/v1alpha1 2 | kind: Stage 3 | metadata: 4 | name: pod-create 5 | spec: 6 | resourceRef: 7 | apiGroup: v1 8 | kind: Pod 9 | selector: 10 | matchExpressions: 11 | - key: '.metadata.deletionTimestamp' 12 | operator: 'DoesNotExist' 13 | - key: '.status.podIP' 14 | operator: 'DoesNotExist' 15 | weight: 1 16 | weightFrom: 17 | expressionFrom: '.metadata.annotations["pod-create.stage.kwok.x-k8s.io/weight"]' 18 | delay: 19 | durationMilliseconds: 1000 20 | durationFrom: 21 | expressionFrom: '.metadata.annotations["pod-create.stage.kwok.x-k8s.io/delay"]' 22 | jitterDurationMilliseconds: 5000 23 | jitterDurationFrom: 24 | expressionFrom: '.metadata.annotations["pod-create.stage.kwok.x-k8s.io/jitter-delay"]' 25 | next: 26 | event: 27 | type: Normal 28 | reason: Created 29 | message: Created container 30 | statusTemplate: | 31 | {{ `{{ $now := Now }}` }} 32 | 33 | conditions: 34 | {{ `{{ if .spec.initContainers }}` }} 35 | - lastProbeTime: null 36 | lastTransitionTime: {{ `{{ $now | Quote }}` }} 37 | message: 'containers with incomplete status: [{{ `{{ range .spec.initContainers }}` }} {{ `{{ .name }}` }} {{ `{{ end }}` }}]' 38 | reason: ContainersNotInitialized 39 | status: "False" 40 | type: Initialized 41 | {{ `{{ else }}` }} 42 | - lastProbeTime: null 43 | lastTransitionTime: {{ `{{ $now | Quote }}` }} 44 | status: "True" 45 | type: Initialized 46 | {{ `{{ end }}` }} 47 | - lastProbeTime: null 48 | lastTransitionTime: {{ `{{ $now | Quote }}` }} 49 | message: 'containers with unready status: [{{ `{{ range .spec.containers }}` }} {{ `{{ .name }}` }} {{ `{{ end }}` }}]' 50 | reason: ContainersNotReady 51 | status: "False" 52 | type: Ready 53 | - lastProbeTime: null 54 | lastTransitionTime: {{ `{{ $now | Quote }}` }} 55 | message: 'containers with unready status: [{{ `{{ range .spec.containers }}` }} {{ `{{ .name }}` }} {{ `{{ end }}` }}]' 56 | reason: ContainersNotReady 57 | status: "False" 58 | type: ContainersReady 59 | {{ `{{ range .spec.readinessGates }}` }} 60 | - lastTransitionTime: {{ `{{ $now | Quote }}` }} 61 | status: "True" 62 | type: {{ `{{ .conditionType | Quote }}` }} 63 | {{ `{{ end }}` }} 64 | 65 | {{ `{{ if .spec.initContainers }}` }} 66 | initContainerStatuses: 67 | {{ `{{ range .spec.initContainers }}` }} 68 | - image: {{ `{{ .image | Quote }}` }} 69 | name: {{ `{{ .name | Quote }}` }} 70 | ready: false 71 | restartCount: 0 72 | started: false 73 | state: 74 | waiting: 75 | reason: PodInitializing 76 | {{ `{{ end }}` }} 77 | containerStatuses: 78 | {{ `{{ range .spec.containers }}` }} 79 | - image: {{ `{{ .image | Quote }}` }} 80 | name: {{ `{{ .name | Quote }}` }} 81 | ready: false 82 | restartCount: 0 83 | started: false 84 | state: 85 | waiting: 86 | reason: PodInitializing 87 | {{ `{{ end }}` }} 88 | {{ `{{ else }}` }} 89 | containerStatuses: 90 | {{ `{{ range .spec.containers }}` }} 91 | - image: {{ `{{ .image | Quote }}` }} 92 | name: {{ `{{ .name | Quote }}` }} 93 | ready: false 94 | restartCount: 0 95 | started: false 96 | state: 97 | waiting: 98 | reason: ContainerCreating 99 | {{ `{{ end }}` }} 100 | {{ `{{ end }}` }} 101 | 102 | hostIP: {{ `{{ NodeIPWith .spec.nodeName | Quote }}` }} 103 | podIP: {{ `{{ PodIPWith .spec.nodeName ( or .spec.hostNetwork false ) ( or .metadata.uid "" ) ( or .metadata.name "" ) ( or .metadata.namespace "" ) | Quote }}` }} 104 | phase: Pending 105 | -------------------------------------------------------------------------------- /manifests/virtualcluster/nodes/templates/pod-delete.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kwok.x-k8s.io/v1alpha1 2 | kind: Stage 3 | metadata: 4 | name: pod-delete 5 | spec: 6 | resourceRef: 7 | apiGroup: v1 8 | kind: Pod 9 | selector: 10 | matchExpressions: 11 | - key: '.metadata.deletionTimestamp' 12 | operator: 'Exists' 13 | - key: '.metadata.finalizers' 14 | operator: 'DoesNotExist' 15 | weight: 1 16 | weightFrom: 17 | expressionFrom: '.metadata.annotations["pod-delete.stage.kwok.x-k8s.io/weight"]' 18 | delay: 19 | durationMilliseconds: 1000 20 | durationFrom: 21 | expressionFrom: '.metadata.annotations["pod-delete.stage.kwok.x-k8s.io/delay"]' 22 | jitterDurationFrom: 23 | expressionFrom: '.metadata.deletionTimestamp' 24 | next: 25 | delete: true 26 | -------------------------------------------------------------------------------- /manifests/virtualcluster/nodes/templates/pod-init-container-completed.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kwok.x-k8s.io/v1alpha1 2 | kind: Stage 3 | metadata: 4 | name: pod-init-container-completed 5 | spec: 6 | resourceRef: 7 | apiGroup: v1 8 | kind: Pod 9 | selector: 10 | matchExpressions: 11 | - key: '.metadata.deletionTimestamp' 12 | operator: 'DoesNotExist' 13 | - key: '.status.phase' 14 | operator: 'In' 15 | values: 16 | - 'Pending' 17 | - key: '.status.initContainerStatuses.[].state.running.startedAt' 18 | operator: 'Exists' 19 | weight: 1 20 | weightFrom: 21 | expressionFrom: '.metadata.annotations["pod-init-container-completed.stage.kwok.x-k8s.io/weight"]' 22 | delay: 23 | durationMilliseconds: 1000 24 | durationFrom: 25 | expressionFrom: '.metadata.annotations["pod-init-container-completed.stage.kwok.x-k8s.io/delay"]' 26 | jitterDurationMilliseconds: 5000 27 | jitterDurationFrom: 28 | expressionFrom: '.metadata.annotations["pod-init-container-completed.stage.kwok.x-k8s.io/jitter-delay"]' 29 | next: 30 | statusTemplate: | 31 | {{ `{{ $now := Now }}` }} 32 | {{ `{{ $root := . }}` }} 33 | conditions: 34 | - lastProbeTime: null 35 | lastTransitionTime: {{ `{{ $now | Quote }}` }} 36 | status: "True" 37 | reason: "" 38 | type: Initialized 39 | initContainerStatuses: 40 | {{ `{{ range $index, $item := .spec.initContainers }}` }} 41 | {{ `{{ $origin := index $root.status.initContainerStatuses $index }}` }} 42 | - image: {{ `{{ $item.image | Quote }}` }} 43 | name: {{ `{{ $item.name | Quote }}` }} 44 | ready: true 45 | restartCount: 0 46 | started: false 47 | state: 48 | terminated: 49 | exitCode: 0 50 | finishedAt: {{ `{{ $now | Quote }}` }} 51 | reason: Completed 52 | startedAt: {{ `{{ $now | Quote }}` }} 53 | {{ `{{ end }}` }} 54 | containerStatuses: 55 | {{ `{{ range .spec.containers }}` }} 56 | - image: {{ `{{ .image | Quote }}` }} 57 | name: {{ `{{ .name | Quote }}` }} 58 | ready: false 59 | restartCount: 0 60 | started: false 61 | state: 62 | waiting: 63 | reason: ContainerCreating 64 | {{ `{{ end }}` }} 65 | -------------------------------------------------------------------------------- /manifests/virtualcluster/nodes/templates/pod-init-container-running.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kwok.x-k8s.io/v1alpha1 2 | kind: Stage 3 | metadata: 4 | name: pod-init-container-running 5 | spec: 6 | resourceRef: 7 | apiGroup: v1 8 | kind: Pod 9 | selector: 10 | matchExpressions: 11 | - key: '.metadata.deletionTimestamp' 12 | operator: 'DoesNotExist' 13 | - key: '.status.phase' 14 | operator: 'In' 15 | values: 16 | - 'Pending' 17 | - key: '.status.conditions.[] | select( .type == "Initialized" ) | .status' 18 | operator: 'NotIn' 19 | values: 20 | - 'True' 21 | - key: '.status.initContainerStatuses.[].state.waiting.reason' 22 | operator: 'Exists' 23 | weight: 1 24 | weightFrom: 25 | expressionFrom: '.metadata.annotations["pod-init-container-running.stage.kwok.x-k8s.io/weight"]' 26 | delay: 27 | durationMilliseconds: 1000 28 | durationFrom: 29 | expressionFrom: '.metadata.annotations["pod-init-container-running.stage.kwok.x-k8s.io/delay"]' 30 | jitterDurationMilliseconds: 5000 31 | jitterDurationFrom: 32 | expressionFrom: '.metadata.annotations["pod-init-container-running.stage.kwok.x-k8s.io/jitter-delay"]' 33 | next: 34 | statusTemplate: | 35 | {{ `{{ $now := Now }}` }} 36 | {{ `{{ $root := . }}` }} 37 | initContainerStatuses: 38 | {{ `{{ range $index, $item := .spec.initContainers }}` }} 39 | {{ `{{ $origin := index $root.status.initContainerStatuses $index }}` }} 40 | - image: {{ `{{ $item.image | Quote }}` }} 41 | name: {{ `{{ $item.name | Quote }}` }} 42 | ready: true 43 | restartCount: 0 44 | started: true 45 | state: 46 | running: 47 | startedAt: {{ `{{ $now | Quote }}` }} 48 | {{ `{{ end }}` }} 49 | -------------------------------------------------------------------------------- /manifests/virtualcluster/nodes/templates/pod-ready.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kwok.x-k8s.io/v1alpha1 2 | kind: Stage 3 | metadata: 4 | name: pod-ready 5 | spec: 6 | resourceRef: 7 | apiGroup: v1 8 | kind: Pod 9 | selector: 10 | matchExpressions: 11 | - key: '.metadata.deletionTimestamp' 12 | operator: 'DoesNotExist' 13 | - key: '.status.conditions.[] | select( .type == "Initialized" ) | .status' 14 | operator: 'In' 15 | values: 16 | - 'True' 17 | - key: '.status.containerStatuses.[].state.running.startedAt' 18 | operator: 'DoesNotExist' 19 | weight: 1 20 | weightFrom: 21 | expressionFrom: '.metadata.annotations["pod-ready.stage.kwok.x-k8s.io/weight"]' 22 | delay: 23 | durationMilliseconds: 1000 24 | durationFrom: 25 | expressionFrom: '.metadata.annotations["pod-ready.stage.kwok.x-k8s.io/delay"]' 26 | jitterDurationMilliseconds: 5000 27 | jitterDurationFrom: 28 | expressionFrom: '.metadata.annotations["pod-ready.stage.kwok.x-k8s.io/jitter-delay"]' 29 | next: 30 | delete: false 31 | statusTemplate: | 32 | {{ `{{ $now := Now }}` }} 33 | {{ `{{ $root := . }}` }} 34 | conditions: 35 | - lastProbeTime: null 36 | lastTransitionTime: {{ `{{ $now | Quote }}` }} 37 | message: '' 38 | reason: '' 39 | status: "True" 40 | type: Ready 41 | - lastProbeTime: null 42 | lastTransitionTime: {{ `{{ $now | Quote }}` }} 43 | message: '' 44 | reason: '' 45 | status: "True" 46 | type: ContainersReady 47 | containerStatuses: 48 | {{ `{{ range $index, $item := .spec.containers }}` }} 49 | {{ `{{ $origin := index $root.status.containerStatuses $index }}` }} 50 | - image: {{ `{{ $item.image | Quote }}` }} 51 | name: {{ `{{ $item.name | Quote }}` }} 52 | ready: true 53 | restartCount: 0 54 | started: true 55 | state: 56 | running: 57 | startedAt: {{ `{{ $now | Quote }}` }} 58 | {{ `{{ end }}` }} 59 | phase: Running 60 | startTime: {{ `{{ $now | Quote }}` }} 61 | -------------------------------------------------------------------------------- /manifests/virtualcluster/nodes/values.yaml: -------------------------------------------------------------------------------- 1 | name: "vc-testing" 2 | nodeLabels: {} 3 | replicas: 0 4 | cpu: 0 5 | memory: 0 6 | maxPods: 0 7 | -------------------------------------------------------------------------------- /metrics/request.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package metrics 5 | 6 | import ( 7 | "container/list" 8 | "sync" 9 | "sync/atomic" 10 | "time" 11 | 12 | "github.com/Azure/kperf/api/types" 13 | ) 14 | 15 | // ResponseMetric is a measurement related to http response. 16 | type ResponseMetric interface { 17 | // ObserveLatency observes latency. 18 | ObserveLatency(url string, seconds float64) 19 | // ObserveFailure observes failure response. 20 | ObserveFailure(url string, now time.Time, seconds float64, err error) 21 | // ObserveReceivedBytes observes the bytes read from apiserver. 22 | ObserveReceivedBytes(bytes int64) 23 | // Gather returns the summary. 24 | Gather() types.ResponseStats 25 | } 26 | 27 | type responseMetricImpl struct { 28 | mu sync.Mutex 29 | errors *list.List 30 | receivedBytes int64 31 | latenciesByURLs map[string]*list.List 32 | } 33 | 34 | func NewResponseMetric() ResponseMetric { 35 | return &responseMetricImpl{ 36 | errors: list.New(), 37 | latenciesByURLs: map[string]*list.List{}, 38 | } 39 | } 40 | 41 | // ObserveLatency implements ResponseMetric. 42 | func (m *responseMetricImpl) ObserveLatency(url string, seconds float64) { 43 | m.mu.Lock() 44 | defer m.mu.Unlock() 45 | 46 | l, ok := m.latenciesByURLs[url] 47 | if !ok { 48 | m.latenciesByURLs[url] = list.New() 49 | l = m.latenciesByURLs[url] 50 | } 51 | l.PushBack(seconds) 52 | } 53 | 54 | // ObserveFailure implements ResponseMetric. 55 | func (m *responseMetricImpl) ObserveFailure(url string, now time.Time, seconds float64, err error) { 56 | if err == nil { 57 | return 58 | } 59 | 60 | m.mu.Lock() 61 | defer m.mu.Unlock() 62 | 63 | oerr := types.ResponseError{ 64 | URL: url, 65 | Timestamp: now, 66 | Duration: seconds, 67 | } 68 | 69 | // HTTP Code -> HTTP2 -> Connection -> Unknown 70 | code := codeFromHTTP(err) 71 | http2Err, isHTTP2Err := isHTTP2Error(err) 72 | connErr, isConnErr := isConnectionError(err) 73 | switch { 74 | case code != 0: 75 | oerr.Type = types.ResponseErrorTypeHTTP 76 | oerr.Code = code 77 | case isHTTP2Err: 78 | oerr.Type = types.ResponseErrorTypeHTTP2Protocol 79 | oerr.Message = http2Err 80 | case isConnErr: 81 | oerr.Type = types.ResponseErrorTypeConnection 82 | oerr.Message = connErr 83 | default: 84 | oerr.Type = types.ResponseErrorTypeUnknown 85 | oerr.Message = err.Error() 86 | } 87 | m.errors.PushBack(oerr) 88 | } 89 | 90 | // ObserveReceivedBytes implements ResponseMetric. 91 | func (m *responseMetricImpl) ObserveReceivedBytes(bytes int64) { 92 | atomic.AddInt64(&m.receivedBytes, bytes) 93 | } 94 | 95 | // Gather implements ResponseMetric. 96 | func (m *responseMetricImpl) Gather() types.ResponseStats { 97 | return types.ResponseStats{ 98 | Errors: m.dumpErrors(), 99 | LatenciesByURL: m.dumpLatencies(), 100 | TotalReceivedBytes: atomic.LoadInt64(&m.receivedBytes), 101 | } 102 | } 103 | 104 | func (m *responseMetricImpl) dumpLatencies() map[string][]float64 { 105 | m.mu.Lock() 106 | defer m.mu.Unlock() 107 | 108 | res := make(map[string][]float64) 109 | for u, latencies := range m.latenciesByURLs { 110 | res[u] = make([]float64, 0, latencies.Len()) 111 | 112 | for e := latencies.Front(); e != nil; e = e.Next() { 113 | res[u] = append(res[u], e.Value.(float64)) 114 | } 115 | } 116 | return res 117 | } 118 | 119 | func (m *responseMetricImpl) dumpErrors() []types.ResponseError { 120 | m.mu.Lock() 121 | defer m.mu.Unlock() 122 | 123 | res := make([]types.ResponseError, 0, m.errors.Len()) 124 | for e := m.errors.Front(); e != nil; e = e.Next() { 125 | res = append(res, e.Value.(types.ResponseError)) 126 | } 127 | return res 128 | } 129 | -------------------------------------------------------------------------------- /metrics/request_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package metrics 5 | 6 | import ( 7 | "context" 8 | "errors" 9 | "fmt" 10 | "io" 11 | "syscall" 12 | "testing" 13 | "time" 14 | 15 | "github.com/Azure/kperf/api/types" 16 | 17 | "github.com/stretchr/testify/assert" 18 | "golang.org/x/net/http2" 19 | apierrors "k8s.io/apimachinery/pkg/api/errors" 20 | ) 21 | 22 | func TestResponseMetric_ObserveFailure(t *testing.T) { 23 | observedAt := time.Now() 24 | dur := 10 * time.Second 25 | 26 | expectedErrors := []types.ResponseError{ 27 | { 28 | URL: "0", 29 | Timestamp: observedAt, 30 | Duration: dur.Seconds(), 31 | Type: types.ResponseErrorTypeHTTP, 32 | Code: 429, 33 | }, 34 | { 35 | URL: "1", 36 | Timestamp: observedAt, 37 | Duration: dur.Seconds(), 38 | Type: types.ResponseErrorTypeHTTP, 39 | Code: 500, 40 | }, 41 | { 42 | URL: "2", 43 | Timestamp: observedAt, 44 | Duration: dur.Seconds(), 45 | Type: types.ResponseErrorTypeHTTP, 46 | Code: 504, 47 | }, 48 | { 49 | URL: "3", 50 | Timestamp: observedAt, 51 | Duration: dur.Seconds(), 52 | Type: types.ResponseErrorTypeHTTP2Protocol, 53 | Message: "http2: server sent GOAWAY and closed the connection; ErrCode=NO_ERROR, debug=", 54 | }, 55 | { 56 | URL: "4", 57 | Timestamp: observedAt, 58 | Duration: dur.Seconds(), 59 | Type: types.ResponseErrorTypeHTTP2Protocol, 60 | Message: "http2: server sent GOAWAY and closed the connection; ErrCode=PROTOCOL_ERROR, debug=", 61 | }, 62 | { 63 | URL: "5", 64 | Timestamp: observedAt, 65 | Duration: dur.Seconds(), 66 | Type: types.ResponseErrorTypeHTTP2Protocol, 67 | Message: "http2: client connection lost", 68 | }, 69 | { 70 | URL: "6", 71 | Timestamp: observedAt, 72 | Duration: dur.Seconds(), 73 | Type: types.ResponseErrorTypeHTTP2Protocol, 74 | Message: "http2: client connection lost", 75 | }, 76 | { 77 | URL: "7", 78 | Timestamp: observedAt, 79 | Duration: dur.Seconds(), 80 | Type: types.ResponseErrorTypeHTTP2Protocol, 81 | Message: http2.ErrCode(10).String(), 82 | }, 83 | { 84 | URL: "8", 85 | Timestamp: observedAt, 86 | Duration: dur.Seconds(), 87 | Type: types.ResponseErrorTypeConnection, 88 | Message: "net/http: TLS handshake timeout", 89 | }, 90 | { 91 | URL: "9", 92 | Timestamp: observedAt, 93 | Duration: dur.Seconds(), 94 | Type: types.ResponseErrorTypeConnection, 95 | Message: "net/http: TLS handshake timeout", 96 | }, 97 | { 98 | URL: "10", 99 | Timestamp: observedAt, 100 | Duration: dur.Seconds(), 101 | Type: types.ResponseErrorTypeConnection, 102 | Message: "context deadline exceeded", 103 | }, 104 | { 105 | URL: "11", 106 | Timestamp: observedAt, 107 | Duration: dur.Seconds(), 108 | Type: types.ResponseErrorTypeConnection, 109 | Message: syscall.ECONNRESET.Error(), 110 | }, 111 | { 112 | URL: "12", 113 | Timestamp: observedAt, 114 | Duration: dur.Seconds(), 115 | Type: types.ResponseErrorTypeConnection, 116 | Message: syscall.ECONNREFUSED.Error(), 117 | }, 118 | { 119 | URL: "13", 120 | Timestamp: observedAt, 121 | Duration: dur.Seconds(), 122 | Type: types.ResponseErrorTypeConnection, 123 | Message: io.ErrUnexpectedEOF.Error(), 124 | }, 125 | { 126 | URL: "14", 127 | Timestamp: observedAt, 128 | Duration: dur.Seconds(), 129 | Type: types.ResponseErrorTypeUnknown, 130 | Message: "unknown", 131 | }, 132 | } 133 | 134 | errs := []error{ 135 | // http code 136 | apierrors.NewTooManyRequestsError("retry it later"), 137 | apierrors.NewInternalError(errors.New("oops")), 138 | apierrors.NewTimeoutError("timeout in test", 100), 139 | // http2 140 | http2.GoAwayError{ 141 | LastStreamID: 1000, 142 | ErrCode: 0, 143 | }, 144 | fmt.Errorf("oops: %w", 145 | http2.GoAwayError{ 146 | LastStreamID: 1000, 147 | ErrCode: 1, 148 | }, 149 | ), 150 | errHTTP2ClientConnectionLost, 151 | fmt.Errorf("oops: %w", errHTTP2ClientConnectionLost), 152 | http2.StreamError{ 153 | StreamID: 100, 154 | Code: 10, 155 | }, 156 | // net 157 | errTLSHandshakeTimeout, 158 | fmt.Errorf("oops: %w", errTLSHandshakeTimeout), 159 | context.DeadlineExceeded, // i/o timeout 160 | fmt.Errorf("oops: %w", syscall.ECONNRESET), 161 | fmt.Errorf("oops: %w", syscall.ECONNREFUSED), 162 | fmt.Errorf("oops: %w", io.ErrUnexpectedEOF), 163 | // unknown 164 | fmt.Errorf("unknown"), 165 | } 166 | 167 | m := NewResponseMetric() 168 | for idx, err := range errs { 169 | m.ObserveFailure(fmt.Sprintf("%d", idx), observedAt, dur.Seconds(), err) 170 | } 171 | errors := m.Gather().Errors 172 | assert.Equal(t, expectedErrors, errors) 173 | } 174 | -------------------------------------------------------------------------------- /metrics/utils.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package metrics 5 | 6 | import ( 7 | "errors" 8 | "fmt" 9 | "io" 10 | "math" 11 | "net" 12 | "net/http" 13 | "sort" 14 | "strings" 15 | "syscall" 16 | 17 | "github.com/Azure/kperf/api/types" 18 | "golang.org/x/net/http2" 19 | apierrors "k8s.io/apimachinery/pkg/api/errors" 20 | ) 21 | 22 | // BuildPercentileLatencies builds percentile latencies. 23 | func BuildPercentileLatencies(latencies []float64) [][2]float64 { 24 | if len(latencies) == 0 { 25 | return nil 26 | } 27 | 28 | var percentiles = []float64{0, 0.5, 0.90, 0.95, 0.99, 1} 29 | 30 | res := make([][2]float64, len(percentiles)) 31 | 32 | n := len(latencies) 33 | sort.Float64s(latencies) 34 | for pi, pv := range percentiles { 35 | idx := int(math.Ceil(float64(n) * pv)) 36 | if idx > 0 { 37 | idx-- 38 | } 39 | res[pi] = [2]float64{pv, latencies[idx]} 40 | } 41 | return res 42 | } 43 | 44 | // BuildErrorStatsGroupByType summaries total count for each type of errors. 45 | func BuildErrorStatsGroupByType(errors []types.ResponseError) map[string]int32 { 46 | res := map[string]int32{} 47 | 48 | for _, err := range errors { 49 | var key string 50 | switch err.Type { 51 | case types.ResponseErrorTypeHTTP: 52 | key = fmt.Sprintf("%s/%d", err.Type, err.Code) 53 | default: 54 | key = fmt.Sprintf("%s/%s", err.Type, err.Message) 55 | } 56 | res[key]++ 57 | } 58 | return res 59 | } 60 | 61 | var ( 62 | // errHTTP2ClientConnectionLost is used to track unexported http2 error. 63 | errHTTP2ClientConnectionLost = errors.New("http2: client connection lost") 64 | 65 | // errTLSHandshakeTimeout is used to track unexported tlsHandshakeTimeoutError from net/http. 66 | errTLSHandshakeTimeout = errors.New("net/http: TLS handshake timeout") 67 | ) 68 | 69 | // codeFromHTTP parses error to get http code. 70 | func codeFromHTTP(err error) int { 71 | if err == nil { 72 | return 0 73 | } 74 | 75 | switch { 76 | case apierrors.IsBadRequest(err): 77 | return http.StatusBadRequest // 400 78 | case apierrors.IsUnauthorized(err): 79 | return http.StatusUnauthorized // 401 80 | case apierrors.IsForbidden(err): 81 | return http.StatusForbidden // 403 82 | case apierrors.IsNotFound(err): 83 | return http.StatusNotFound // 404 84 | case apierrors.IsMethodNotSupported(err): 85 | return http.StatusMethodNotAllowed // 405 86 | case apierrors.IsNotAcceptable(err): 87 | return http.StatusNotAcceptable // 406 88 | case apierrors.IsAlreadyExists(err): 89 | return http.StatusConflict // 409 90 | case apierrors.IsGone(err): 91 | return http.StatusGone // 410 92 | case apierrors.IsRequestEntityTooLargeError(err): 93 | return http.StatusRequestEntityTooLarge // 413 94 | case apierrors.IsUnsupportedMediaType(err): 95 | return http.StatusUnsupportedMediaType // 415 96 | case apierrors.IsInvalid(err): 97 | return http.StatusUnprocessableEntity // 422 98 | case apierrors.IsTooManyRequests(err): 99 | return http.StatusTooManyRequests // 429 100 | case apierrors.IsInternalError(err): 101 | return http.StatusInternalServerError // 500 102 | case apierrors.IsServiceUnavailable(err): 103 | return http.StatusServiceUnavailable // 503 104 | case apierrors.IsTimeout(err): 105 | return http.StatusGatewayTimeout // 504 106 | default: 107 | if status, ok := err.(apierrors.APIStatus); ok || errors.As(err, &status) { 108 | return int(status.Status().Code) 109 | } 110 | return 0 111 | } 112 | } 113 | 114 | // isHTTP2Error returns true if it's related to http2 error. 115 | func isHTTP2Error(err error) (string, bool) { 116 | if err == nil { 117 | return "", false 118 | } 119 | 120 | if connErr, ok := err.(http2.ConnectionError); ok || errors.As(err, &connErr) { 121 | return (http2.ErrCode(connErr)).String(), true 122 | } 123 | 124 | if streamErr, ok := err.(http2.StreamError); ok || errors.As(err, &streamErr) { 125 | return streamErr.Code.String(), true 126 | } 127 | 128 | if connErr, ok := err.(http2.GoAwayError); ok || errors.As(err, &connErr) { 129 | return fmt.Sprintf("http2: server sent GOAWAY and closed the connection; ErrCode=%v, debug=%s", 130 | connErr.ErrCode, connErr.DebugData), true 131 | } 132 | 133 | if strings.Contains(err.Error(), errHTTP2ClientConnectionLost.Error()) { 134 | return errHTTP2ClientConnectionLost.Error(), true 135 | } 136 | return "", false 137 | } 138 | 139 | // isConnectionError returns true if it's related to connection error. 140 | func isConnectionError(err error) (string, bool) { 141 | if err == nil { 142 | return "", false 143 | } 144 | 145 | switch { 146 | case isTimeoutError(err): 147 | return err.Error(), true 148 | case isConnectionRefused(err): 149 | return syscall.ECONNREFUSED.Error(), true 150 | case isConnectionResetByPeer(err): 151 | return syscall.ECONNRESET.Error(), true 152 | case errors.Is(err, io.ErrUnexpectedEOF): 153 | return io.ErrUnexpectedEOF.Error(), true 154 | case errors.Is(err, io.EOF): 155 | return io.EOF.Error(), true 156 | case strings.Contains(err.Error(), errTLSHandshakeTimeout.Error()): 157 | return errTLSHandshakeTimeout.Error(), true 158 | default: 159 | return "", false 160 | } 161 | } 162 | 163 | // isTimeoutError returns true if it's related to golang standard library 164 | // net's timeout error. 165 | func isTimeoutError(err error) bool { 166 | if err == nil { 167 | return false 168 | } 169 | 170 | terr, ok := err.(net.Error) 171 | if !ok { 172 | if !errors.As(err, &terr) { 173 | return false 174 | } 175 | } 176 | return terr.Timeout() 177 | } 178 | 179 | // isConnectionRefused returns true if the error is connection refused 180 | func isConnectionRefused(err error) bool { 181 | if err == nil { 182 | return false 183 | } 184 | 185 | var errno syscall.Errno 186 | if errors.As(err, &errno) { 187 | return errno == syscall.ECONNREFUSED 188 | } 189 | return false 190 | } 191 | 192 | // isConnectionResetByPeer returns true if the error is "connection reset by peer". 193 | func isConnectionResetByPeer(err error) bool { 194 | if err == nil { 195 | return false 196 | } 197 | 198 | var errno syscall.Errno 199 | if errors.As(err, &errno) { 200 | return errno == syscall.ECONNRESET 201 | } 202 | return false 203 | } 204 | -------------------------------------------------------------------------------- /metrics/utils_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package metrics 5 | 6 | import ( 7 | "testing" 8 | 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func TestBuildPercentileLatencies(t *testing.T) { 13 | ls := make([]float64, 100) 14 | ls[0] = 50 15 | ls[1] = 49 16 | ls[2] = 1 17 | res := BuildPercentileLatencies(ls) 18 | assert.Equal(t, [2]float64{0, 0}, res[0]) 19 | assert.Equal(t, [2]float64{0.5, 0}, res[1]) 20 | assert.Equal(t, [2]float64{0.9, 0}, res[2]) 21 | assert.Equal(t, [2]float64{0.95, 0}, res[3]) 22 | assert.Equal(t, [2]float64{0.99, 49}, res[4]) 23 | assert.Equal(t, [2]float64{1, 50}, res[5]) 24 | 25 | ls = make([]float64, 1000) 26 | ls[0] = 50 27 | ls[1] = 49 28 | ls[2] = -1 29 | res = BuildPercentileLatencies(ls) 30 | assert.Equal(t, [2]float64{0, -1}, res[0]) 31 | assert.Equal(t, [2]float64{0.5, 0}, res[1]) 32 | assert.Equal(t, [2]float64{0.9, 0}, res[2]) 33 | assert.Equal(t, [2]float64{0.95, 0}, res[3]) 34 | assert.Equal(t, [2]float64{0.99, 0}, res[4]) 35 | assert.Equal(t, [2]float64{1, 50}, res[5]) 36 | } 37 | -------------------------------------------------------------------------------- /portforward/portforward.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package portforward 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | "net/http" 10 | "net/url" 11 | "time" 12 | 13 | corev1 "k8s.io/api/core/v1" 14 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 15 | "k8s.io/client-go/kubernetes" 16 | "k8s.io/client-go/rest" 17 | "k8s.io/client-go/tools/clientcmd" 18 | kubepf "k8s.io/client-go/tools/portforward" 19 | "k8s.io/client-go/transport/spdy" 20 | "k8s.io/klog/v2" 21 | ) 22 | 23 | // PodPortForwarder is used to forward traffic to specific pod's TCP port from 24 | // local listener. 25 | type PodPortForwarder struct { 26 | // targetPort is the target TCP port. 27 | targetPort uint16 28 | // portforwardURL is the pod's portforward URL. 29 | portforwardURL *url.URL 30 | // restCfg is used to create spdy transport. 31 | restCfg *rest.Config 32 | 33 | portForwarder *kubepf.PortForwarder 34 | } 35 | 36 | // NewPodPortForwarder return a new instance of PodPortForwarder. 37 | func NewPodPortForwarder(kubeCfgPath string, namespace, podName string, targetPort uint16) (*PodPortForwarder, error) { 38 | restCfg, err := clientcmd.BuildConfigFromFlags("", kubeCfgPath) 39 | if err != nil { 40 | return nil, err 41 | } 42 | restCfg.ContentType = "application/vnd.kubernetes.protobuf" 43 | 44 | restCli, err := kubernetes.NewForConfig(restCfg) 45 | if err != nil { 46 | return nil, err 47 | } 48 | 49 | if err := ensurePodIsRunning(restCli, namespace, podName); err != nil { 50 | return nil, err 51 | } 52 | 53 | u := restCli.CoreV1().RESTClient().Post(). 54 | Namespace(namespace). 55 | Resource("pods"). 56 | Name(podName). 57 | SubResource("portforward").URL() 58 | 59 | return &PodPortForwarder{ 60 | targetPort: targetPort, 61 | portforwardURL: u, 62 | restCfg: restCfg, 63 | }, nil 64 | } 65 | 66 | // Start is to start local listener to forward traffic. 67 | func (pf *PodPortForwarder) Start() error { 68 | transport, upgrader, err := spdy.RoundTripperFor(pf.restCfg) 69 | if err != nil { 70 | return fmt.Errorf("failed to create spdy transport: %w", err) 71 | } 72 | 73 | dialer := spdy.NewDialer( 74 | upgrader, 75 | &http.Client{Transport: transport}, 76 | "POST", 77 | pf.portforwardURL, 78 | ) 79 | 80 | startCh := make(chan struct{}) 81 | 82 | // pick available local port randomly. 83 | kubePortForwarder, err := kubepf.New( 84 | dialer, 85 | []string{fmt.Sprintf("0:%d", pf.targetPort)}, 86 | nil, 87 | startCh, 88 | &debugLogger{}, 89 | &debugLogger{}, 90 | ) 91 | if err != nil { 92 | return fmt.Errorf("failed to init kube port forward: %w", err) 93 | } 94 | 95 | errCh := make(chan error, 1) 96 | go func() { 97 | errCh <- kubePortForwarder.ForwardPorts() 98 | }() 99 | 100 | select { 101 | case <-startCh: 102 | case err := <-errCh: 103 | return fmt.Errorf("failed to start kube port forward: %w", err) 104 | case <-time.After(120 * time.Second): 105 | return fmt.Errorf("timeout to start kube port forward") 106 | } 107 | 108 | pf.portForwarder = kubePortForwarder 109 | return nil 110 | } 111 | 112 | // GetLocalPort returns the local listener's port. 113 | func (pf *PodPortForwarder) GetLocalPort() (uint16, error) { 114 | if pf.portForwarder == nil { 115 | return 0, fmt.Errorf("kube port forwarder doesn't start") 116 | } 117 | 118 | ports, err := pf.portForwarder.GetPorts() 119 | if err != nil { 120 | return 0, fmt.Errorf("failed to get local port: %w", err) 121 | } 122 | return ports[0].Local, nil 123 | } 124 | 125 | // Stop stops port forward. 126 | func (pf *PodPortForwarder) Stop() { 127 | defer klog.Flush() 128 | if pf.portForwarder != nil { 129 | pf.portForwarder.Close() 130 | } 131 | } 132 | 133 | // ensurePodIsRunning is to check if the target pod is still running. 134 | func ensurePodIsRunning(restCli kubernetes.Interface, namespace, podName string) error { 135 | pod, err := restCli.CoreV1(). 136 | Pods(namespace). 137 | Get(context.TODO(), podName, metav1.GetOptions{}) 138 | if err != nil { 139 | return fmt.Errorf("failed to ensure if %s in %s exists: %w", 140 | podName, namespace, err) 141 | } 142 | 143 | if pod.Status.Phase != corev1.PodRunning { 144 | return fmt.Errorf("unable to forward port because pod is not running (status=%s)", pod.Status.Phase) 145 | } 146 | return nil 147 | } 148 | 149 | type debugLogger struct{} 150 | 151 | func (l *debugLogger) Write(data []byte) (int, error) { 152 | klog.V(2).InfoS(string(data)) 153 | return len(data), nil 154 | } 155 | -------------------------------------------------------------------------------- /request/client.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package request 5 | 6 | import ( 7 | "fmt" 8 | "math" 9 | "net/http" 10 | 11 | "github.com/Azure/kperf/api/types" 12 | "github.com/Azure/kperf/request/unstructuredscheme" 13 | 14 | "k8s.io/client-go/rest" 15 | "k8s.io/client-go/tools/clientcmd" 16 | ) 17 | 18 | // NewClients creates N rest.Interface. 19 | // 20 | // FIXME(weifu): 21 | // 22 | // 1. Is it possible to build one http2 client with multiple connections? 23 | // 2. How to monitor HTTP2 GOAWAY frame? 24 | func NewClients(kubeCfgPath string, connsNum int, opts ...ClientCfgOpt) ([]rest.Interface, error) { 25 | var cfg = defaultClientCfg 26 | for _, opt := range opts { 27 | opt(&cfg) 28 | } 29 | 30 | restCfg, err := clientcmd.BuildConfigFromFlags("", kubeCfgPath) 31 | if err != nil { 32 | return nil, err 33 | } 34 | restCfg.NegotiatedSerializer = unstructuredscheme.NewNegotiatedSerializer() 35 | 36 | // NOTE: 37 | // 38 | // Make transport uncacheable. With default proxy function, client-go 39 | // will create new transport even if multiple clients use the same TLS 40 | // configuration. If not, all the clients will share one transport. 41 | // If protocol is HTTP2, there will be only one connection. 42 | // 43 | // REF: https://github.com/kubernetes/client-go/blob/c5938c6876a62f53c1f4ee55b879ca5c74253ae8/transport/cache.go#L154 44 | restCfg.Proxy = http.ProxyFromEnvironment 45 | 46 | err = cfg.apply(restCfg) 47 | if err != nil { 48 | return nil, err 49 | } 50 | 51 | restClients := make([]rest.Interface, 0, connsNum) 52 | for i := 0; i < connsNum; i++ { 53 | cfgShallowCopy := *restCfg 54 | 55 | restCli, err := rest.UnversionedRESTClientFor(&cfgShallowCopy) 56 | if err != nil { 57 | return nil, err 58 | } 59 | restClients = append(restClients, restCli) 60 | } 61 | return restClients, nil 62 | } 63 | 64 | // defaultClientCfg is default setting for http client. 65 | var defaultClientCfg = clientCfg{ 66 | qps: float64(math.MaxInt32), 67 | contentType: types.ContentTypeJSON, 68 | } 69 | 70 | type clientCfg struct { 71 | userAgent string 72 | qps float64 73 | contentType types.ContentType 74 | disableHTTP2 bool 75 | } 76 | 77 | // apply sets value to k8s.io/client-go/rest.Config. 78 | func (cfg *clientCfg) apply(restCfg *rest.Config) error { 79 | // set qps 80 | restCfg.QPS = float32(cfg.qps) 81 | 82 | // set user agent 83 | restCfg.UserAgent = cfg.userAgent 84 | if restCfg.UserAgent == "" { 85 | restCfg.UserAgent = rest.DefaultKubernetesUserAgent() 86 | } 87 | 88 | // set the content type 89 | switch cfg.contentType { 90 | case types.ContentTypeJSON: 91 | restCfg.ContentType = "application/json" 92 | case types.ContentTypeProtobuffer: 93 | restCfg.ContentType = "application/vnd.kubernetes.protobuf" 94 | default: 95 | return fmt.Errorf("invalid content type: %s", cfg.contentType) 96 | } 97 | 98 | // disable HTTP2 99 | if cfg.disableHTTP2 { 100 | restCfg.NextProtos = []string{"http/1.1"} 101 | } 102 | return nil 103 | } 104 | 105 | // ClientCfgOpt is used to update default client setting. 106 | type ClientCfgOpt func(*clientCfg) 107 | 108 | // WithClientQPSOpt updates QPS value. 109 | func WithClientQPSOpt(qps float64) ClientCfgOpt { 110 | return func(cfg *clientCfg) { 111 | if qps > 0 { 112 | cfg.qps = qps 113 | } 114 | } 115 | } 116 | 117 | // WithClientUserAgentOpt updates user agent. 118 | func WithClientUserAgentOpt(ua string) ClientCfgOpt { 119 | return func(cfg *clientCfg) { 120 | cfg.userAgent = ua 121 | } 122 | } 123 | 124 | // WithClientContentTypeOpt updates content type of response. 125 | func WithClientContentTypeOpt(ct types.ContentType) ClientCfgOpt { 126 | return func(cfg *clientCfg) { 127 | cfg.contentType = ct 128 | } 129 | } 130 | 131 | // WithClientDisableHTTP2Opt disables HTTP2 protocol. 132 | func WithClientDisableHTTP2Opt(b bool) ClientCfgOpt { 133 | return func(cfg *clientCfg) { 134 | cfg.disableHTTP2 = b 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /request/client_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package request 5 | 6 | import ( 7 | "fmt" 8 | "testing" 9 | 10 | "github.com/stretchr/testify/assert" 11 | "k8s.io/client-go/tools/metrics" 12 | ) 13 | 14 | type transportCacheTracker struct{} 15 | 16 | // Increment implements k8s.io/client-go/tools/metrics.TransportCreateCallsMetric interface. 17 | func (t *transportCacheTracker) Increment(result string) { 18 | if result != "uncacheable" { 19 | panic(fmt.Errorf("unexpected use cache transport: %s", result)) 20 | } 21 | fmt.Printf("transport cache: %s\n", result) 22 | } 23 | 24 | func init() { 25 | metrics.Register(metrics.RegisterOpts{ 26 | TransportCreateCalls: &transportCacheTracker{}, 27 | }) 28 | } 29 | 30 | func TestNewClientShouldNotPanic(t *testing.T) { 31 | defer func() { 32 | if r := recover(); r != nil { 33 | t.Fatalf("should not reuse transport: %v", r) 34 | } 35 | }() 36 | _, err := NewClients("testdata/dummy_nonexistent_kubeconfig.yaml", 10) 37 | assert.NoError(t, err) 38 | } 39 | -------------------------------------------------------------------------------- /request/requester.go: -------------------------------------------------------------------------------- 1 | package request 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "net/url" 8 | "reflect" 9 | "time" 10 | _ "unsafe" // unsafe to use internal function from client-go 11 | 12 | "k8s.io/apimachinery/pkg/runtime/schema" 13 | "k8s.io/apimachinery/pkg/watch" 14 | "k8s.io/client-go/rest" 15 | "k8s.io/client-go/tools/cache" 16 | "k8s.io/utils/clock" 17 | ) 18 | 19 | type Requester interface { 20 | Method() string 21 | URL() *url.URL 22 | Timeout(time.Duration) 23 | Do(context.Context) (bytes int64, err error) 24 | } 25 | 26 | type BaseRequester struct { 27 | method string 28 | req *rest.Request 29 | } 30 | 31 | func (reqr *BaseRequester) Method() string { 32 | return reqr.method 33 | } 34 | 35 | func (reqr *BaseRequester) URL() *url.URL { 36 | return reqr.req.URL() 37 | } 38 | 39 | func (reqr *BaseRequester) Timeout(timeout time.Duration) { 40 | reqr.req.Timeout(timeout) 41 | } 42 | 43 | type DiscardRequester struct { 44 | BaseRequester 45 | } 46 | 47 | func (reqr *DiscardRequester) Do(ctx context.Context) (bytes int64, err error) { 48 | respBody, err := reqr.req.Stream(ctx) 49 | if err != nil { 50 | return 0, err 51 | } 52 | defer respBody.Close() 53 | 54 | return io.Copy(io.Discard, respBody) 55 | } 56 | 57 | type WatchListRequester struct { 58 | BaseRequester 59 | } 60 | 61 | func (reqr *WatchListRequester) Do(ctx context.Context) (zero int64, _ error) { 62 | cl := clock.RealClock{} 63 | temporaryStore := cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc) 64 | 65 | start := time.Now() 66 | 67 | w, err := reqr.req.Watch(ctx) 68 | if err != nil { 69 | return zero, err 70 | } 71 | watchListBookmarkReceived, err := handleAnyWatch(start, w, temporaryStore, nil, nil, "", "", func(_ string) {}, true, cl, make(chan error), ctx.Done()) 72 | w.Stop() 73 | if err != nil { 74 | return zero, err 75 | } 76 | 77 | if watchListBookmarkReceived { 78 | return zero, nil 79 | } 80 | return zero, fmt.Errorf("don't receive bookmark") 81 | } 82 | 83 | //go:linkname handleAnyWatch k8s.io/client-go/tools/cache.handleAnyWatch 84 | func handleAnyWatch(start time.Time, 85 | w watch.Interface, 86 | store cache.Store, 87 | expectedType reflect.Type, 88 | expectedGVK *schema.GroupVersionKind, 89 | name string, 90 | expectedTypeName string, 91 | setLastSyncResourceVersion func(string), 92 | exitOnWatchListBookmarkReceived bool, 93 | clock clock.Clock, 94 | errCh chan error, 95 | stopCh <-chan struct{}, 96 | ) (bool, error) 97 | -------------------------------------------------------------------------------- /request/schedule.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package request 5 | 6 | import ( 7 | "context" 8 | "errors" 9 | "math" 10 | "sync" 11 | "time" 12 | 13 | "github.com/Azure/kperf/api/types" 14 | "github.com/Azure/kperf/metrics" 15 | 16 | "golang.org/x/net/http2" 17 | "golang.org/x/time/rate" 18 | "k8s.io/client-go/rest" 19 | "k8s.io/klog/v2" 20 | ) 21 | 22 | const defaultTimeout = 60 * time.Second 23 | 24 | // Result contains responseStats vlaues from Gather() and adds Duration and Total values separately 25 | type Result struct { 26 | types.ResponseStats 27 | // Duration means the time of benchmark. 28 | Duration time.Duration 29 | // Total means the total number of requests. 30 | Total int 31 | } 32 | 33 | // Schedule files requests to apiserver based on LoadProfileSpec. 34 | func Schedule(ctx context.Context, spec *types.LoadProfileSpec, restCli []rest.Interface) (*Result, error) { 35 | ctx, cancel := context.WithCancel(ctx) 36 | defer cancel() 37 | 38 | rndReqs, err := NewWeightedRandomRequests(spec) 39 | if err != nil { 40 | return nil, err 41 | } 42 | 43 | qps := spec.Rate 44 | if qps == 0 { 45 | qps = float64(math.MaxInt32) 46 | } 47 | limiter := rate.NewLimiter(rate.Limit(qps), 1) 48 | 49 | clients := spec.Client 50 | if clients == 0 { 51 | clients = spec.Conns 52 | } 53 | 54 | reqBuilderCh := rndReqs.Chan() 55 | var wg sync.WaitGroup 56 | 57 | respMetric := metrics.NewResponseMetric() 58 | for i := 0; i < clients; i++ { 59 | // reuse connection if clients > conns 60 | cli := restCli[i%len(restCli)] 61 | wg.Add(1) 62 | go func(cli rest.Interface) { 63 | defer wg.Done() 64 | 65 | for builder := range reqBuilderCh { 66 | req := builder.Build(cli) 67 | 68 | if err := limiter.Wait(ctx); err != nil { 69 | klog.V(5).Infof("Rate limiter wait failed: %v", err) 70 | cancel() 71 | return 72 | } 73 | 74 | klog.V(5).Infof("Request URL: %s", req.URL()) 75 | 76 | req.Timeout(defaultTimeout) 77 | func() { 78 | start := time.Now() 79 | 80 | var bytes int64 81 | bytes, err := req.Do(context.Background()) 82 | // Based on HTTP2 Spec Section 8.1 [1], 83 | // 84 | // A server can send a complete response prior to the client 85 | // sending an entire request if the response does not depend 86 | // on any portion of the request that has not been sent and 87 | // received. When this is true, a server MAY request that the 88 | // client abort transmission of a request without error by 89 | // sending a RST_STREAM with an error code of NO_ERROR after 90 | // sending a complete response (i.e., a frame with the END_STREAM 91 | // flag). Clients MUST NOT discard responses as a result of receiving 92 | // such a RST_STREAM, though clients can always discard responses 93 | // at their discretion for other reasons. 94 | // 95 | // We should mark NO_ERROR as nil here. 96 | // 97 | // [1]: https://httpwg.org/specs/rfc7540.html#HttpSequence 98 | if err != nil && isHTTP2StreamNoError(err) { 99 | err = nil 100 | } 101 | 102 | end := time.Now() 103 | latency := end.Sub(start).Seconds() 104 | 105 | respMetric.ObserveReceivedBytes(bytes) 106 | if err != nil { 107 | respMetric.ObserveFailure(req.URL().String(), end, latency, err) 108 | klog.V(5).Infof("Request stream failed: %v", err) 109 | return 110 | } 111 | respMetric.ObserveLatency(req.URL().String(), latency) 112 | }() 113 | } 114 | }(cli) 115 | } 116 | 117 | klog.V(2).InfoS("Setting", 118 | "clients", clients, 119 | "connections", len(restCli), 120 | "rate", qps, 121 | "total", spec.Total, 122 | "duration", spec.Duration, 123 | "http2", !spec.DisableHTTP2, 124 | "content-type", spec.ContentType, 125 | ) 126 | 127 | start := time.Now() 128 | 129 | if spec.Duration > 0 { 130 | // If duration is set, we will run for duration. 131 | var cancel context.CancelFunc 132 | ctx, cancel = context.WithTimeout(ctx, time.Duration(spec.Duration)*time.Second) 133 | defer cancel() 134 | } 135 | rndReqs.Run(ctx, spec.Total) 136 | 137 | rndReqs.Stop() 138 | wg.Wait() 139 | 140 | totalDuration := time.Since(start) 141 | responseStats := respMetric.Gather() 142 | return &Result{ 143 | ResponseStats: responseStats, 144 | Duration: totalDuration, 145 | Total: spec.Total, 146 | }, nil 147 | } 148 | 149 | // isHTTP2StreamNoError returns true if it's NO_ERROR. 150 | func isHTTP2StreamNoError(err error) bool { 151 | if err == nil { 152 | return false 153 | } 154 | 155 | if streamErr, ok := err.(http2.StreamError); ok || errors.As(err, &streamErr) { 156 | return streamErr.Code == http2.ErrCodeNo 157 | } 158 | return false 159 | } 160 | -------------------------------------------------------------------------------- /request/testdata/dummy_nonexistent_kubeconfig.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | clusters: 3 | - cluster: 4 | server: https://unit-test.kperf.io 5 | name: unit-test.kperf.io 6 | contexts: 7 | - context: 8 | cluster: unit-test.kperf.io 9 | user: testing@unit-test.kperf.io 10 | name: testing@unit-test.kperf.io 11 | current-context: testing@unit-test.kperf.io 12 | kind: Config 13 | preferences: {} 14 | users: 15 | - name: testing@unit-test.kperf.io 16 | user: 17 | exec: 18 | apiVersion: client.authentication.k8s.io/v1beta1 19 | command: unknown 20 | interactiveMode: IfAvailable 21 | provideClusterInfo: false 22 | -------------------------------------------------------------------------------- /request/unstructuredscheme/serializer.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package unstructuredscheme 5 | 6 | import ( 7 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 8 | "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 9 | "k8s.io/apimachinery/pkg/runtime" 10 | "k8s.io/apimachinery/pkg/runtime/schema" 11 | "k8s.io/apimachinery/pkg/runtime/serializer/json" 12 | ) 13 | 14 | var ( 15 | scheme = runtime.NewScheme() 16 | ) 17 | 18 | func init() { 19 | metav1.AddToGroupVersion(scheme, schema.GroupVersion{Version: "v1"}) 20 | } 21 | 22 | func NewNegotiatedSerializer() runtime.NegotiatedSerializer { 23 | return &negotiatedSerializer{} 24 | } 25 | 26 | type negotiatedSerializer struct{} 27 | 28 | func (s negotiatedSerializer) SupportedMediaTypes() []runtime.SerializerInfo { 29 | return []runtime.SerializerInfo{ 30 | { 31 | MediaType: "application/json", 32 | MediaTypeType: "application", 33 | MediaTypeSubType: "json", 34 | EncodesAsText: true, 35 | Serializer: json.NewSerializer(json.DefaultMetaFactory, creator{scheme}, typer{scheme}, false), 36 | StreamSerializer: &runtime.StreamSerializerInfo{ 37 | EncodesAsText: true, 38 | Serializer: json.NewSerializer(json.DefaultMetaFactory, scheme, scheme, false), 39 | Framer: json.Framer, 40 | }, 41 | }, 42 | } 43 | } 44 | 45 | func (s negotiatedSerializer) EncoderForVersion(encoder runtime.Encoder, gv runtime.GroupVersioner) runtime.Encoder { 46 | return runtime.WithVersionEncoder{ 47 | Version: gv, 48 | Encoder: encoder, 49 | ObjectTyper: typer{scheme}, 50 | } 51 | } 52 | 53 | func (s negotiatedSerializer) DecoderToVersion(decoder runtime.Decoder, _ runtime.GroupVersioner) runtime.Decoder { 54 | return decoder 55 | } 56 | 57 | type creator struct { 58 | objCreator runtime.ObjectCreater 59 | } 60 | 61 | func (c creator) New(kind schema.GroupVersionKind) (runtime.Object, error) { 62 | obj, err := c.objCreator.New(kind) 63 | if err == nil { 64 | return obj, nil 65 | } 66 | 67 | obj = &unstructured.Unstructured{} 68 | obj.GetObjectKind().SetGroupVersionKind(kind) 69 | return obj, nil 70 | } 71 | 72 | type typer struct { 73 | typer runtime.ObjectTyper 74 | } 75 | 76 | func (t typer) ObjectKinds(obj runtime.Object) ([]schema.GroupVersionKind, bool, error) { 77 | kinds, unversioned, err := t.typer.ObjectKinds(obj) 78 | if err == nil { 79 | return kinds, unversioned, nil 80 | } 81 | 82 | if _, ok := obj.(runtime.Unstructured); ok && !obj.GetObjectKind().GroupVersionKind().Empty() { 83 | return []schema.GroupVersionKind{obj.GetObjectKind().GroupVersionKind()}, false, nil 84 | } 85 | return nil, false, err 86 | } 87 | 88 | func (t typer) Recognizes(_ schema.GroupVersionKind) bool { 89 | return true 90 | } 91 | -------------------------------------------------------------------------------- /runner/group/parse.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package group 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | "net/url" 10 | "os" 11 | 12 | "github.com/Azure/kperf/api/types" 13 | 14 | "gopkg.in/yaml.v2" 15 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 16 | "k8s.io/client-go/kubernetes" 17 | ) 18 | 19 | // SpecURIType is the scheme of RunnerGroupSpec's URI. 20 | type SpecURIType string 21 | 22 | const ( 23 | // SpecURITypeFile is file scheme. 24 | SpecURITypeFile SpecURIType = "file" 25 | 26 | // SpecURITypeConfigMap is configmap scheme. 27 | SpecURITypeConfigMap SpecURIType = "configmap" 28 | ) 29 | 30 | // NewRunnerGroupSpecFromURI builds RunnerGroupSpec via URI. Current supported 31 | // schemes are: 32 | // 33 | // - file - The spec is stored in filesystem. 34 | // 35 | // - configmap - The spec is stored in kubernetes as configmap. 36 | // 37 | // For configmap, current supported query parameters: 38 | // 39 | // - namespace: The namespace scope for the configmap. Using `default` if not set or empty. 40 | // 41 | // - specName: The name of data which stores RunnerGroupSpec. Using `spec` if not set or empty. 42 | func NewRunnerGroupSpecFromURI(clientset kubernetes.Interface, specURI string) (*types.RunnerGroupSpec, error) { 43 | u, err := url.Parse(specURI) 44 | if err != nil { 45 | return nil, fmt.Errorf("invalid runner group uri %s: %w", specURI, err) 46 | } 47 | 48 | switch typ := SpecURIType(u.Scheme); typ { 49 | case SpecURITypeFile: 50 | return parseRunnerGroupSpecFromFile(u.Path) 51 | case SpecURITypeConfigMap: 52 | var ( 53 | namespace = "default" 54 | specName = "spec" 55 | ) 56 | 57 | if ns := u.Query().Get("namespace"); len(ns) > 0 { 58 | namespace = ns 59 | } 60 | 61 | if name := u.Query().Get("specName"); len(name) > 0 { 62 | specName = name 63 | } 64 | return parseRunnerGroupSpecFromConfigMap(clientset, namespace, u.Host, specName) 65 | default: 66 | return nil, fmt.Errorf("unsupported RunnerGroupSpec's URI scheme: %v", typ) 67 | } 68 | } 69 | 70 | func parseRunnerGroupSpecFromFile(specPath string) (*types.RunnerGroupSpec, error) { 71 | specInRaw, err := os.ReadFile(specPath) 72 | if err != nil { 73 | return nil, fmt.Errorf("failed to read runner group spec from %s: %w", specPath, err) 74 | } 75 | 76 | return parseRunnerGroupSpecFromBinary(specInRaw) 77 | } 78 | 79 | func parseRunnerGroupSpecFromConfigMap(clientset kubernetes.Interface, namespace, name, specName string) (*types.RunnerGroupSpec, error) { 80 | ctx := context.Background() 81 | 82 | cli := clientset.CoreV1().ConfigMaps(namespace) 83 | 84 | cm, err := cli.Get(ctx, name, metav1.GetOptions{}) 85 | if err != nil { 86 | return nil, fmt.Errorf("failed to load configmap %s from namespace %s: %w", 87 | name, namespace, err) 88 | } 89 | 90 | specInStr, ok := cm.Data[specName] 91 | if !ok { 92 | return nil, fmt.Errorf("no such data (%s) in configmap %s from namespace %s", 93 | specName, name, namespace) 94 | } 95 | 96 | return parseRunnerGroupSpecFromBinary([]byte(specInStr)) 97 | } 98 | 99 | func parseRunnerGroupSpecFromBinary(data []byte) (*types.RunnerGroupSpec, error) { 100 | var spec types.RunnerGroupSpec 101 | 102 | if err := yaml.Unmarshal(data, &spec); err != nil { 103 | return nil, fmt.Errorf("failed to parse RunnerGroupSpec from YAML: %s\nerror: %w", string(data), err) 104 | } 105 | return &spec, nil 106 | } 107 | -------------------------------------------------------------------------------- /runner/localstore/reader.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package localstore 5 | 6 | import "os" 7 | 8 | // sizeReadCloser implements Reader interface. 9 | type sizeReadCloser struct { 10 | *os.File 11 | size int64 12 | } 13 | 14 | // Size returns file's size. 15 | func (r *sizeReadCloser) Size() int64 { 16 | return r.size 17 | } 18 | -------------------------------------------------------------------------------- /runner/localstore/store.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package localstore 5 | 6 | import ( 7 | "fmt" 8 | "io" 9 | "os" 10 | "path/filepath" 11 | "sync" 12 | ) 13 | 14 | // Store is a filesystem-like key/value storage. 15 | // 16 | // Each key/value has committed and ingesting status. When OpenWriter returns 17 | // ingestion transcation, the Store opens rootDir/ingest/$random file to 18 | // receive value data. Once all the data is written, the Commit(ref) moves the 19 | // file into rootDir/data/ref. 20 | type Store struct { 21 | sync.Mutex 22 | 23 | dataDir string 24 | ingestDir string 25 | } 26 | 27 | // NewStore returns new instance of Store. 28 | func NewStore(rootDir string) (*Store, error) { 29 | if !filepath.IsAbs(rootDir) { 30 | return nil, fmt.Errorf("%s is not absolute path", rootDir) 31 | } 32 | 33 | dataDir := filepath.Join(rootDir, "data") 34 | if err := os.MkdirAll(dataDir, 0600); err != nil { 35 | return nil, fmt.Errorf("failed to ensure data dir %s: %w", dataDir, err) 36 | } 37 | 38 | ingestDir := filepath.Join(rootDir, "ingest") 39 | if err := os.MkdirAll(ingestDir, 0600); err != nil { 40 | return nil, fmt.Errorf("failed to ensure ingest dir %s: %w", ingestDir, err) 41 | } 42 | 43 | return &Store{ 44 | dataDir: dataDir, 45 | ingestDir: ingestDir, 46 | }, nil 47 | } 48 | 49 | // OpenWriter is to initiate a writing operation, ingestion transcation. A 50 | // single ingestion transcation is to open temporary file and allow caller to 51 | // write data into the temporary file. Once all the data is written, the caller 52 | // should call Commit to complete ingestion transcation. 53 | func (s *Store) OpenWriter() (Writer, error) { 54 | f, err := os.CreateTemp(s.ingestDir, "ingest-*") 55 | if err != nil { 56 | return nil, fmt.Errorf("failed to create ingest file: %w", err) 57 | } 58 | 59 | return &writer{ 60 | s: s, 61 | name: f.Name(), 62 | f: f, 63 | }, nil 64 | } 65 | 66 | // OpenReader is to open committed content named by ref. 67 | func (s *Store) OpenReader(ref string) (Reader, error) { 68 | s.Lock() 69 | defer s.Unlock() 70 | 71 | target := filepath.Join(s.dataDir, ref) 72 | 73 | stat, err := os.Stat(target) 74 | if err != nil { 75 | return nil, fmt.Errorf("failed to ensure if ref %s exists: %w", ref, err) 76 | } 77 | 78 | size := stat.Size() 79 | f, err := os.Open(target) 80 | if err != nil { 81 | return nil, fmt.Errorf("failed to open ref %s: %w", ref, err) 82 | } 83 | 84 | return &sizeReadCloser{ 85 | File: f, 86 | size: size, 87 | }, nil 88 | } 89 | 90 | // Delete is to delete committed content named by ref. 91 | func (s *Store) Delete(ref string) error { 92 | s.Lock() 93 | defer s.Unlock() 94 | 95 | target := filepath.Join(s.dataDir, ref) 96 | _, err := os.Stat(target) 97 | if err != nil { 98 | if os.IsNotExist(err) { 99 | return nil 100 | } 101 | return fmt.Errorf("failed to ensure if ref %s exists: %w", ref, err) 102 | } 103 | return os.Remove(target) 104 | } 105 | 106 | // Writer handles writing of content into local store 107 | type Writer interface { 108 | // Close closes the writer. 109 | // 110 | // If the writer has not been committed, this allows aborting. 111 | // Calling Close on a closed writer will not error. 112 | io.WriteCloser 113 | 114 | // Commit commits data as file named by ref. 115 | // 116 | // Commit always close Writer. If ref already exists, it will return 117 | // error. 118 | Commit(ref string) error 119 | } 120 | 121 | // Reader extends io.ReadCloser interface with io.ReaderAt and reporting of Size. 122 | type Reader interface { 123 | io.ReaderAt 124 | io.ReadCloser 125 | Size() int64 126 | } 127 | -------------------------------------------------------------------------------- /runner/localstore/writer.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package localstore 5 | 6 | import ( 7 | "errors" 8 | "fmt" 9 | "os" 10 | "path/filepath" 11 | ) 12 | 13 | // ErrAlreadyExists returns the content exists. 14 | // 15 | // TODO(weifu): move it into common pkg. 16 | var ErrAlreadyExists = errors.New("already exists") 17 | 18 | // writer implements Writer interface. 19 | type writer struct { 20 | s *Store 21 | 22 | name string 23 | f *os.File 24 | } 25 | 26 | // Write writes data into underlying file. 27 | func (w *writer) Write(data []byte) (int, error) { 28 | return w.f.Write(data) 29 | } 30 | 31 | // Close closes file and remove it. 32 | func (w *writer) Close() error { 33 | w.f.Close() 34 | if err := os.Remove(w.name); err != nil { 35 | if !os.IsNotExist(err) { 36 | return err 37 | } 38 | } 39 | return nil 40 | } 41 | 42 | // Commit commits data as file named by ref. 43 | func (w *writer) Commit(ref string) error { 44 | w.s.Lock() 45 | defer w.s.Unlock() 46 | 47 | defer w.Close() 48 | 49 | if err := w.f.Sync(); err != nil { 50 | return fmt.Errorf("failed to fsync: %w", err) 51 | } 52 | 53 | target := filepath.Join(w.s.dataDir, ref) 54 | _, err := os.Stat(target) 55 | if err == nil { 56 | return fmt.Errorf("ref %s already exists: %w", ref, ErrAlreadyExists) 57 | } 58 | 59 | if !os.IsNotExist(err) { 60 | return fmt.Errorf("failed to ensure if ref %s exists: %w", ref, err) 61 | } 62 | return os.Rename(w.name, target) 63 | } 64 | -------------------------------------------------------------------------------- /runner/runnergroup_common.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package runner 5 | 6 | import ( 7 | "fmt" 8 | 9 | "github.com/Azure/kperf/portforward" 10 | ) 11 | 12 | var ( 13 | // runnerGroupReleaseLabels is used to mark that helm chart release 14 | // is managed by kperf. 15 | runnerGroupReleaseLabels = map[string]string{ 16 | "runnergroups.kperf.io/managed": "true", 17 | } 18 | ) 19 | 20 | const ( 21 | // runnerGroupServerChartName should be aligned with ../manifests/runnergroup/server. 22 | runnerGroupServerChartName = "runnergroup/server" 23 | 24 | // runnerGroupServerReleaseName is the helm releas name for runner groups's server. 25 | runnerGroupServerReleaseName = "runnergroup-server" 26 | 27 | // runnerGroupServerPort should be aligned with ../manifests/runnergroup/server/templates/pod.yaml. 28 | runnerGroupServerPort uint16 = 8080 29 | 30 | // runnerGroupReleaseNamespace is used to host runner groups. 31 | runnerGroupReleaseNamespace = "runnergroups-kperf-io" 32 | ) 33 | 34 | // initPortForwardToServer creates local listener to forward traffic to runner 35 | // groups' server. 36 | func initPortForwardToServer(kubecfgPath string) (_localhost string, _cleanup func(), retErr error) { 37 | pf, err := portforward.NewPodPortForwarder( 38 | kubecfgPath, 39 | runnerGroupReleaseNamespace, 40 | runnerGroupServerReleaseName, 41 | runnerGroupServerPort, 42 | ) 43 | if err != nil { 44 | return "", nil, fmt.Errorf("failed to init pod portforward: %w", err) 45 | } 46 | defer func() { 47 | if retErr != nil { 48 | pf.Stop() 49 | } 50 | }() 51 | 52 | if err = pf.Start(); err != nil { 53 | return "", nil, fmt.Errorf("failed to start pod port forward: %w", err) 54 | } 55 | 56 | localPort, err := pf.GetLocalPort() 57 | if err != nil { 58 | return "", nil, fmt.Errorf("failed to get local port: %w", err) 59 | } 60 | return fmt.Sprintf("localhost:%d", localPort), pf.Stop, nil 61 | } 62 | -------------------------------------------------------------------------------- /runner/runnergroup_delete.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package runner 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | 10 | "github.com/Azure/kperf/helmcli" 11 | ) 12 | 13 | // DeleteRunnerGroupServer delete existing long running server. 14 | func DeleteRunnerGroupServer(_ context.Context, kubeconfigPath string) error { 15 | delCli, err := helmcli.NewDeleteCli(kubeconfigPath, runnerGroupReleaseNamespace) 16 | if err != nil { 17 | return fmt.Errorf("failed to create helm delete client: %w", err) 18 | } 19 | 20 | return delCli.Delete(runnerGroupServerReleaseName) 21 | } 22 | -------------------------------------------------------------------------------- /runner/runnergroup_list.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package runner 5 | 6 | import ( 7 | "context" 8 | "encoding/json" 9 | "fmt" 10 | "io" 11 | "net/http" 12 | 13 | "github.com/Azure/kperf/api/types" 14 | ) 15 | 16 | // ListRunnerGroups lists RunnerGroups from server. 17 | func ListRunnerGroups(ctx context.Context, kubeCfgPath string) ([]*types.RunnerGroup, error) { 18 | host, done, err := initPortForwardToServer(kubeCfgPath) 19 | if err != nil { 20 | return nil, err 21 | } 22 | defer done() 23 | 24 | targetURL := fmt.Sprintf("http://%s/v1/runnergroups", host) 25 | 26 | req, err := http.NewRequestWithContext(ctx, "GET", targetURL, nil) 27 | if err != nil { 28 | return nil, fmt.Errorf("failed to init GET request: %w", err) 29 | } 30 | 31 | resp, err := http.DefaultClient.Do(req) 32 | if err != nil { 33 | return nil, fmt.Errorf("failed to access %s by portforward: %w", targetURL, err) 34 | } 35 | defer resp.Body.Close() 36 | 37 | if resp.StatusCode != http.StatusOK { 38 | errInRaw, err := io.ReadAll(resp.Body) 39 | if err != nil { 40 | return nil, fmt.Errorf("failed to read error message when http code = %v: %w", 41 | resp.Status, err) 42 | } 43 | 44 | herr := types.HTTPError{} 45 | err = json.Unmarshal(errInRaw, &herr) 46 | if err != nil { 47 | return nil, fmt.Errorf("failed to get error when http code = %v: %w", 48 | resp.Status, err) 49 | } 50 | return nil, herr 51 | } 52 | 53 | dataInRaw, err := io.ReadAll(resp.Body) 54 | if err != nil { 55 | return nil, fmt.Errorf("failed to read data: %w", err) 56 | } 57 | 58 | res := []*types.RunnerGroup{} 59 | err = json.Unmarshal(dataInRaw, &res) 60 | if err != nil { 61 | return nil, fmt.Errorf("failed to unmarshal to get RunnerGroup slice: %w\n\n%s", 62 | err, string(dataInRaw)) 63 | } 64 | return res, nil 65 | } 66 | -------------------------------------------------------------------------------- /runner/runnergroup_result.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package runner 5 | 6 | import ( 7 | "context" 8 | "encoding/json" 9 | "fmt" 10 | "io" 11 | "net/http" 12 | 13 | "github.com/Azure/kperf/api/types" 14 | ) 15 | 16 | // GetRunnerGroupResult gets runner group's aggregated report. 17 | func GetRunnerGroupResult(ctx context.Context, kubecfgPath string, wait bool) (*types.RunnerGroupsReport, error) { 18 | host, done, err := initPortForwardToServer(kubecfgPath) 19 | if err != nil { 20 | return nil, err 21 | } 22 | defer done() 23 | 24 | targetURL := fmt.Sprintf("http://%s/v1/runnergroups/summary", host) 25 | if wait { 26 | targetURL += "?wait=true" 27 | } 28 | 29 | req, err := http.NewRequestWithContext(ctx, "GET", targetURL, nil) 30 | if err != nil { 31 | return nil, fmt.Errorf("failed to init GET request: %w", err) 32 | } 33 | 34 | resp, err := http.DefaultClient.Do(req) 35 | if err != nil { 36 | return nil, fmt.Errorf("failed to access %s by portforward: %w", targetURL, err) 37 | } 38 | defer resp.Body.Close() 39 | 40 | if resp.StatusCode != http.StatusOK { 41 | errInRaw, err := io.ReadAll(resp.Body) 42 | if err != nil { 43 | return nil, fmt.Errorf("failed to read error message when http code = %v: %w", 44 | resp.Status, err) 45 | } 46 | 47 | herr := types.HTTPError{} 48 | err = json.Unmarshal(errInRaw, &herr) 49 | if err != nil { 50 | return nil, fmt.Errorf("failed to get error when http code = %v: %w", 51 | resp.Status, err) 52 | } 53 | return nil, herr 54 | } 55 | 56 | dataInRaw, err := io.ReadAll(resp.Body) 57 | if err != nil { 58 | return nil, fmt.Errorf("failed to read data: %w", err) 59 | } 60 | 61 | res := types.RunnerGroupsReport{} 62 | err = json.Unmarshal(dataInRaw, &res) 63 | if err != nil { 64 | return nil, fmt.Errorf("failed to unmarshal to get result: %w\n\n%s", 65 | err, string(dataInRaw)) 66 | } 67 | return &res, nil 68 | } 69 | -------------------------------------------------------------------------------- /runner/runnergroup_run.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package runner 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | "time" 10 | 11 | "github.com/Azure/kperf/api/types" 12 | "github.com/Azure/kperf/helmcli" 13 | "github.com/Azure/kperf/manifests" 14 | 15 | "gopkg.in/yaml.v3" 16 | ) 17 | 18 | var ( 19 | defaultRunCmdCfg = runCmdConfig{ 20 | runnerGroupFlowcontrol: struct { 21 | priorityLevel string 22 | matchingPrecedence int 23 | }{ 24 | priorityLevel: "workload-low", 25 | matchingPrecedence: 1000, 26 | }, 27 | } 28 | ) 29 | 30 | // CreateRunnerGroupServer creates a long running server to deploy runner groups. 31 | // 32 | // TODO: 33 | // 1. create a new package to define ErrNotFound, ErrAlreadyExists, ... errors. 34 | // 2. support configurable timeout. 35 | func CreateRunnerGroupServer(ctx context.Context, 36 | kubeconfigPath string, 37 | runnerImage string, 38 | rgSpec *types.RunnerGroupSpec, 39 | runnerVerbosity int, 40 | opts ...RunCmdOpt, 41 | ) error { 42 | specInStr, err := tweakAndMarshalSpec(rgSpec) 43 | if err != nil { 44 | return err 45 | } 46 | 47 | cfg := defaultRunCmdCfg 48 | for _, opt := range opts { 49 | opt(&cfg) 50 | } 51 | 52 | appiler, err := cfg.toServerHelmValuesAppiler() 53 | if err != nil { 54 | return err 55 | } 56 | 57 | getCli, err := helmcli.NewGetCli(kubeconfigPath, runnerGroupReleaseNamespace) 58 | if err != nil { 59 | return fmt.Errorf("failed to create helm get client: %w", err) 60 | } 61 | 62 | _, err = getCli.Get(runnerGroupServerReleaseName) 63 | if err == nil { 64 | return fmt.Errorf("runner group server already exists") 65 | } 66 | 67 | ch, err := manifests.LoadChart(runnerGroupServerChartName) 68 | if err != nil { 69 | return fmt.Errorf("failed to load runner group server chart: %w", err) 70 | } 71 | 72 | releaseCli, err := helmcli.NewReleaseCli( 73 | kubeconfigPath, 74 | runnerGroupReleaseNamespace, 75 | runnerGroupServerReleaseName, 76 | ch, 77 | runnerGroupReleaseLabels, 78 | helmcli.StringPathValuesApplier( 79 | "name="+runnerGroupServerReleaseName, 80 | "image="+runnerImage, 81 | "runnerGroupSpec="+specInStr, 82 | // runnerVerbosity needs to be surrounded by quotes, so that YAML parse it as a string. 83 | fmt.Sprintf("runnerVerbosity=\"%d\"", runnerVerbosity), 84 | ), 85 | appiler, 86 | ) 87 | if err != nil { 88 | return fmt.Errorf("failed to create helm release client: %w", err) 89 | } 90 | return releaseCli.Deploy(ctx, 120*time.Second) 91 | } 92 | 93 | // tweakAndMarshalSpec updates spec's service account if not set and marshals 94 | // it into string. 95 | func tweakAndMarshalSpec(spec *types.RunnerGroupSpec) (string, error) { 96 | // NOTE: It should be aligned with ../manifests/runnergroup/server/templates/pod.yaml. 97 | if spec.ServiceAccount == nil { 98 | var sa = runnerGroupServerReleaseName 99 | spec.ServiceAccount = &sa 100 | } 101 | 102 | data, err := yaml.Marshal(spec) 103 | if err != nil { 104 | return "", fmt.Errorf("failed to marshal spec: %w", err) 105 | } 106 | return string(data), nil 107 | } 108 | 109 | type runCmdConfig struct { 110 | // serverNodeSelectors forces to schedule server to nodes with that specific labels. 111 | serverNodeSelectors map[string][]string 112 | // runnerGroupFlowcontrol applies flowcontrol settings to runners. 113 | // 114 | // NOTE: Please align with ../manifests/runnergroup/server/values.yaml 115 | // 116 | // FIXME(weifu): before v1.0.0, we should define type in ../manifests. 117 | runnerGroupFlowcontrol struct { 118 | priorityLevel string 119 | matchingPrecedence int 120 | } 121 | 122 | // TODO(weifu): merge name/image/specs into this 123 | } 124 | 125 | // RunCmdOpt is used to update default run command's setting. 126 | type RunCmdOpt func(*runCmdConfig) 127 | 128 | // WithRunCmdServerNodeSelectorsOpt updates server's node selectors. 129 | func WithRunCmdServerNodeSelectorsOpt(labels map[string][]string) RunCmdOpt { 130 | return func(cfg *runCmdConfig) { 131 | cfg.serverNodeSelectors = labels 132 | } 133 | } 134 | 135 | // WithRunCmdRunnerGroupFlowControl updates runner groups' flowcontrol. 136 | func WithRunCmdRunnerGroupFlowControl(priorityLevel string, matchingPrecedence int) RunCmdOpt { 137 | return func(cfg *runCmdConfig) { 138 | cfg.runnerGroupFlowcontrol.priorityLevel = priorityLevel 139 | cfg.runnerGroupFlowcontrol.matchingPrecedence = matchingPrecedence 140 | } 141 | } 142 | 143 | // toServerHelmValuesAppiler creates ValuesApplier. 144 | // 145 | // NOTE: It should be aligned with ../manifests/runnergroup/server/values.yaml. 146 | func (cfg *runCmdConfig) toServerHelmValuesAppiler() (helmcli.ValuesApplier, error) { 147 | values := map[string]interface{}{ 148 | "nodeSelectors": cfg.serverNodeSelectors, 149 | "flowcontrol": map[string]interface{}{ 150 | "priorityLevelConfiguration": cfg.runnerGroupFlowcontrol.priorityLevel, 151 | "matchingPrecedence": cfg.runnerGroupFlowcontrol.matchingPrecedence, 152 | }, 153 | } 154 | 155 | rawData, err := yaml.Marshal(values) 156 | if err != nil { 157 | return nil, fmt.Errorf("failed to render run command config into YAML: %w", err) 158 | } 159 | 160 | appiler, err := helmcli.YAMLValuesApplier(string(rawData)) 161 | if err != nil { 162 | return nil, fmt.Errorf("failed to prepare value appiler for run command config: %w", err) 163 | } 164 | return appiler, nil 165 | } 166 | -------------------------------------------------------------------------------- /runner/server.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package runner 5 | 6 | import ( 7 | "encoding/json" 8 | "errors" 9 | "fmt" 10 | "io" 11 | "net" 12 | "net/http" 13 | "sync" 14 | 15 | "github.com/Azure/kperf/api/types" 16 | "github.com/Azure/kperf/runner/group" 17 | "github.com/Azure/kperf/runner/localstore" 18 | 19 | "github.com/gorilla/mux" 20 | ) 21 | 22 | // Server is to deploy runner groups and expose endpoints for runner report. 23 | type Server struct { 24 | store *localstore.Store 25 | listeners []net.Listener 26 | groups []*group.Handler 27 | readyCh chan struct{} 28 | report *types.RunnerMetricReport 29 | } 30 | 31 | // NewServer returns new instance of server. 32 | func NewServer(dataDir string, addrs []string, groups ...*group.Handler) (*Server, error) { 33 | s, err := localstore.NewStore(dataDir) 34 | if err != nil { 35 | return nil, err 36 | } 37 | 38 | listeners, err := buildNetListeners(addrs) 39 | if err != nil { 40 | return nil, err 41 | } 42 | 43 | return &Server{ 44 | listeners: listeners, 45 | groups: groups, 46 | store: s, 47 | readyCh: make(chan struct{}), 48 | }, nil 49 | } 50 | 51 | // Run is to expose endpoints. 52 | func (s *Server) Run() error { 53 | if err := s.deployRunnerGroups(); err != nil { 54 | return fmt.Errorf("failed to deploy runner group %w", err) 55 | } 56 | 57 | go s.waitForRunnerGroups() 58 | 59 | r := mux.NewRouter() 60 | // NOTE: Please update ./runnergroup_list.go if endpoint has been changed. 61 | r.HandleFunc("/v1/runnergroups", s.listRunnerGroupsHandler).Methods("GET") 62 | // NOTE: Please update ./runnergroup_result.go if endpoint has been changed. 63 | r.HandleFunc("/v1/runnergroups/summary", s.getRunnerGroupsSummary).Methods("GET") 64 | r.HandleFunc("/v1/runnergroups/{runner_name}/result", s.postRunnerGroupsRunnerResult).Methods("POST") 65 | 66 | errCh := make(chan error, len(s.listeners)) 67 | var wg sync.WaitGroup 68 | for _, lis := range s.listeners { 69 | wg.Add(1) 70 | go func(l net.Listener) { 71 | defer wg.Done() 72 | //nolint:gosec 73 | errCh <- http.Serve(l, r) 74 | }(lis) 75 | } 76 | wg.Wait() 77 | 78 | for err := range errCh { 79 | if err != nil { 80 | return err 81 | } 82 | } 83 | return nil 84 | } 85 | 86 | // listRunnerGroupsHandler lists all the runner groups. 87 | func (s *Server) listRunnerGroupsHandler(w http.ResponseWriter, r *http.Request) { 88 | ctx := r.Context() 89 | 90 | res := make([]*types.RunnerGroup, 0, len(s.groups)) 91 | for _, g := range s.groups { 92 | res = append(res, g.Info(ctx)) 93 | } 94 | 95 | data, _ := json.Marshal(res) 96 | w.WriteHeader(http.StatusOK) 97 | _, _ = w.Write(data) 98 | } 99 | 100 | // getRunnerGroupsSummary returns summary report. 101 | func (s *Server) getRunnerGroupsSummary(w http.ResponseWriter, r *http.Request) { 102 | wait := r.URL.Query().Has("wait") 103 | 104 | select { 105 | case <-s.readyCh: 106 | default: 107 | if !wait { 108 | renderErrorResponse(w, http.StatusNotFound, fmt.Errorf("summary is not ready")) 109 | return 110 | } 111 | } 112 | 113 | ctx := r.Context() 114 | select { 115 | case <-s.readyCh: 116 | case <-ctx.Done(): 117 | renderErrorResponse(w, http.StatusRequestTimeout, fmt.Errorf("request has been canceled")) 118 | return 119 | } 120 | 121 | data, _ := json.Marshal(s.report) 122 | w.WriteHeader(http.StatusOK) 123 | _, _ = w.Write(data) 124 | } 125 | 126 | // postRunnerGroupsRunnerResult receives summary result from runner. 127 | func (s *Server) postRunnerGroupsRunnerResult(w http.ResponseWriter, r *http.Request) { 128 | runnerName := mux.Vars(r)["runner_name"] 129 | ctx := r.Context() 130 | 131 | var found = false 132 | var err error 133 | for _, g := range s.groups { 134 | found, err = g.IsControlled(ctx, runnerName) 135 | if err != nil { 136 | renderErrorResponse(w, http.StatusInternalServerError, err) 137 | return 138 | } 139 | if found { 140 | break 141 | } 142 | } 143 | 144 | if !found { 145 | renderErrorResponse(w, http.StatusNotFound, fmt.Errorf("no such runner %s", runnerName)) 146 | return 147 | } 148 | 149 | writer, err := s.store.OpenWriter() 150 | if err != nil { 151 | renderErrorResponse(w, http.StatusInternalServerError, err) 152 | return 153 | } 154 | defer writer.Close() 155 | 156 | _, err = io.Copy(writer, r.Body) 157 | if err != nil { 158 | renderErrorResponse(w, http.StatusInternalServerError, err) 159 | return 160 | } 161 | 162 | err = writer.Commit(runnerName) 163 | if err != nil { 164 | code := http.StatusInternalServerError 165 | if errors.Is(err, localstore.ErrAlreadyExists) { 166 | code = http.StatusConflict 167 | } 168 | renderErrorResponse(w, code, err) 169 | return 170 | } 171 | w.WriteHeader(http.StatusCreated) 172 | } 173 | -------------------------------------------------------------------------------- /runner/server_runnergroup.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package runner 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | "sync" 10 | ) 11 | 12 | // deployRunnerGroups deploys runner groups. 13 | // 14 | // FIXME(weifu): should decouple URL from runner group. 15 | func (s *Server) deployRunnerGroups() error { 16 | targetAddr, err := s.firstNonLocalAddr() 17 | if err != nil { 18 | return err 19 | } 20 | 21 | uploadURL := fmt.Sprintf("http://%s/v1/runnergroups/$(POD_NAME)/result", targetAddr) 22 | 23 | var wg sync.WaitGroup 24 | errCh := make(chan error, len(s.groups)) 25 | for idx := range s.groups { 26 | wg.Add(1) 27 | g := s.groups[idx] 28 | go func() { 29 | defer wg.Done() 30 | 31 | errCh <- g.Deploy(context.Background(), uploadURL) 32 | }() 33 | } 34 | wg.Wait() 35 | 36 | close(errCh) 37 | for err := range errCh { 38 | if err != nil { 39 | return err 40 | } 41 | } 42 | return nil 43 | } 44 | 45 | // waitForRunnerGroups watches all runner groups and marks summary ready until 46 | // all runner groups finish. 47 | func (s *Server) waitForRunnerGroups() { 48 | var wg sync.WaitGroup 49 | 50 | for idx := range s.groups { 51 | wg.Add(1) 52 | g := s.groups[idx] 53 | go func() { 54 | defer wg.Done() 55 | 56 | // FIXME(weifu): remove panic here 57 | if err := g.Wait(context.TODO()); err != nil { 58 | panic(err) 59 | } 60 | }() 61 | } 62 | wg.Wait() 63 | 64 | s.report = buildRunnerGroupSummary(s.store, s.groups) 65 | close(s.readyCh) 66 | } 67 | 68 | // firstNoLocalAddr returns first non-local address. 69 | func (s *Server) firstNonLocalAddr() (string, error) { 70 | for _, lis := range s.listeners { 71 | addr := lis.Addr().String() 72 | 73 | local, err := isLocalhost(addr) 74 | if err != nil { 75 | return "", err 76 | } 77 | 78 | if !local { 79 | return addr, nil 80 | } 81 | } 82 | return "", fmt.Errorf("there is no non-local address") 83 | } 84 | -------------------------------------------------------------------------------- /runner/utils.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package runner 5 | 6 | import ( 7 | "container/list" 8 | "context" 9 | "encoding/json" 10 | "fmt" 11 | "io" 12 | "net" 13 | "net/http" 14 | "strings" 15 | "time" 16 | 17 | "github.com/Azure/kperf/api/types" 18 | "github.com/Azure/kperf/metrics" 19 | "github.com/Azure/kperf/runner/group" 20 | "github.com/Azure/kperf/runner/localstore" 21 | 22 | "k8s.io/klog/v2" 23 | ) 24 | 25 | // renderErrorResponse renders error into types.HTTPError format. 26 | func renderErrorResponse(w http.ResponseWriter, code int, err error) { 27 | if err == nil { 28 | panic("unexpected error") 29 | } 30 | 31 | w.WriteHeader(code) 32 | 33 | data, _ := json.Marshal(types.HTTPError{ 34 | ErrorMessage: err.Error(), 35 | }) 36 | _, _ = w.Write(data) 37 | } 38 | 39 | // buildNetListeners returns slice of net.Listeners. 40 | func buildNetListeners(addrs []string) (_ []net.Listener, retErr error) { 41 | res := make([]net.Listener, 0, len(addrs)) 42 | 43 | defer func() { 44 | if retErr != nil { 45 | for _, l := range res { 46 | l.Close() 47 | } 48 | } 49 | }() 50 | 51 | for _, addr := range addrs { 52 | lis, err := net.Listen("tcp", addr) 53 | if err != nil { 54 | return nil, fmt.Errorf("failed to listen on %s: %w", addr, err) 55 | } 56 | res = append(res, lis) 57 | } 58 | return res, nil 59 | } 60 | 61 | // buildRunnerGroupSummary returns aggrecated summary from runner groups' report. 62 | func buildRunnerGroupSummary(s *localstore.Store, groups []*group.Handler) *types.RunnerMetricReport { 63 | totalBytes := int64(0) 64 | totalResp := 0 65 | latenciesByURL := map[string]*list.List{} 66 | errs := []types.ResponseError{} 67 | errStats := map[string]int32{} 68 | maxDuration := 0 * time.Second 69 | 70 | for idx := range groups { 71 | g := groups[idx] 72 | 73 | pods, err := g.Pods(context.TODO()) 74 | if err != nil { 75 | klog.V(2).ErrorS(err, "failed to list runners", "runner-group", g.Name()) 76 | continue 77 | } 78 | 79 | for _, pod := range pods { 80 | data, err := readBlob(s, pod.Name) 81 | if err != nil { 82 | klog.V(2).ErrorS(err, "failed to read report", "runner", pod.Name) 83 | continue 84 | } 85 | 86 | report := types.RunnerMetricReport{} 87 | 88 | err = json.Unmarshal(data, &report) 89 | if err != nil { 90 | klog.V(2).ErrorS(err, "failed to unmarshal", "runner", pod.Name) 91 | continue 92 | } 93 | 94 | // update totalReceivedBytes 95 | totalBytes += report.TotalReceivedBytes 96 | 97 | // update latencies 98 | for u, l := range report.LatenciesByURL { 99 | latencies, ok := latenciesByURL[u] 100 | if !ok { 101 | latenciesByURL[u] = list.New() 102 | latencies = latenciesByURL[u] 103 | } 104 | for _, v := range l { 105 | totalResp++ 106 | latencies.PushBack(v) 107 | } 108 | } 109 | 110 | // update error stats 111 | mergeErrorStat(errStats, report.ErrorStats) 112 | errs = append(errs, report.Errors...) 113 | report.Errors = nil 114 | 115 | // update max duration 116 | rDur, err := time.ParseDuration(report.Duration) 117 | if err != nil { 118 | klog.V(2).ErrorS(err, "failed to parse duration", "runner", 119 | pod.Name, "duration", report.Duration) 120 | } 121 | if rDur > maxDuration { 122 | maxDuration = rDur 123 | } 124 | } 125 | } 126 | 127 | percentileLatenciesByURL := map[string][][2]float64{} 128 | 129 | latencies := make([]float64, 0, totalResp) 130 | for u, l := range latenciesByURL { 131 | lInSlice := listToSliceFloat64(l) 132 | 133 | latencies = append(latencies, lInSlice...) 134 | percentileLatenciesByURL[u] = metrics.BuildPercentileLatencies(lInSlice) 135 | } 136 | 137 | return &types.RunnerMetricReport{ 138 | Total: totalResp, 139 | Errors: errs, 140 | ErrorStats: errStats, 141 | Duration: maxDuration.String(), 142 | TotalReceivedBytes: totalBytes, 143 | PercentileLatencies: metrics.BuildPercentileLatencies(latencies), 144 | PercentileLatenciesByURL: percentileLatenciesByURL, 145 | } 146 | } 147 | 148 | // listToSliceFloat64 converts list.List into []float64. 149 | func listToSliceFloat64(l *list.List) []float64 { 150 | res := make([]float64, 0, l.Len()) 151 | for e := l.Front(); e != nil; e = e.Next() { 152 | res = append(res, e.Value.(float64)) 153 | } 154 | return res 155 | } 156 | 157 | // mergeErrorStat merges two error stats. 158 | func mergeErrorStat(s, d map[string]int32) { 159 | for e, n := range d { 160 | s[e] += n 161 | } 162 | } 163 | 164 | // readBlob reads blob data from localstore. 165 | func readBlob(s *localstore.Store, ref string) ([]byte, error) { 166 | r, err := s.OpenReader(ref) 167 | if err != nil { 168 | return nil, err 169 | } 170 | defer r.Close() 171 | 172 | return io.ReadAll(r) 173 | } 174 | 175 | // isLocalhost returns true if addr is local address. 176 | func isLocalhost(addr string) (bool, error) { 177 | h, p, err := net.SplitHostPort(addr) 178 | if err != nil { 179 | if !strings.Contains(err.Error(), "missing port in address") { 180 | return false, fmt.Errorf("invalid address %s: %w", addr, err) 181 | } 182 | h = addr 183 | } 184 | 185 | if len(p) == 0 { 186 | return false, fmt.Errorf("invalid host name format %s", addr) 187 | } 188 | 189 | if h == "localhost" { 190 | h = "127.0.0.1" 191 | } 192 | 193 | ip := net.ParseIP(h) 194 | return ip.IsLoopback(), nil 195 | } 196 | -------------------------------------------------------------------------------- /scripts/run_runner.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Copyright (c) Microsoft Corporation. 4 | # Licensed under the MIT License. 5 | 6 | set -euo pipefail 7 | 8 | result_file=/data/${POD_NAMESPACE}-${POD_NAME}-${POD_UID}.json 9 | 10 | /kperf -v=${RUNNER_VERBOSITY} runner run --config=/config/load_profile.yaml \ 11 | --user-agent=${POD_NAME} \ 12 | --result=${result_file} \ 13 | --raw-data 14 | 15 | while true; do 16 | set +e 17 | http_code=$(curl -s -o /dev/null -w "%{http_code}" -XPOST -d "@${result_file}" ${TARGET_URL} || "50X") 18 | set -e 19 | 20 | case $http_code in 21 | 201) 22 | echo "Uploaded it" 23 | exit 0 24 | ;; 25 | 409) 26 | echo "Has been uploaded, skip" 27 | exit 0; 28 | ;; 29 | 404) 30 | echo "Leaking pod? skip" 31 | exit 1; 32 | ;; 33 | *) 34 | echo "Need to retry after received http code ${http_code} (or failed to connect)" 35 | sleep 5s 36 | ;; 37 | esac 38 | done 39 | -------------------------------------------------------------------------------- /virtualcluster/nodes_create.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package virtualcluster 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | "time" 10 | 11 | "github.com/Azure/kperf/helmcli" 12 | "github.com/Azure/kperf/manifests" 13 | ) 14 | 15 | // CreateNodepool creates a new node pool. 16 | // 17 | // TODO: 18 | // 1. create a new package to define ErrNotFound, ErrAlreadyExists, ... errors. 19 | // 2. support configurable timeout. 20 | // 21 | // FIXME: 22 | // 23 | // Some cloud providers will delete unknown or not-ready nodes. If we render 24 | // both nodes and controllers in one helm release, helm won't wait for 25 | // controller ready before creating nodes. The nodes will be deleted by cloud 26 | // providers. The helm's post-install or post-upgrade hook can ensure that it 27 | // won't deploy nodes until controllers ready. However, resources created by 28 | // helm hook aren't part of helm release. We need extra step to cleanup nodes 29 | // resources when we delete nodepool's helm release. Based on this fact, we 30 | // separate one helm release into two. One is for controllers and other one 31 | // is for nodes. 32 | // 33 | // However, it's not a guarantee. When controller was deleted and it takes long 34 | // time to restart, the node will be marked NotReady and deleted by cloud providers. 35 | // Maybe we can consider to contribute to difference cloud providers with 36 | // workaround. For example, if node.Spec.ProviderID contains `?ignore=virtual`, 37 | // the cloud providers should ignore this kind of nodes. 38 | func CreateNodepool(ctx context.Context, kubeCfgPath string, nodepoolName string, opts ...NodepoolOpt) (retErr error) { 39 | cfg := defaultNodepoolCfg 40 | for _, opt := range opts { 41 | opt(&cfg) 42 | } 43 | cfg.name = nodepoolName 44 | 45 | if err := cfg.validate(); err != nil { 46 | return err 47 | } 48 | 49 | getCli, err := helmcli.NewGetCli(kubeCfgPath, virtualnodeReleaseNamespace) 50 | if err != nil { 51 | return fmt.Errorf("failed to create helm get client: %w", err) 52 | } 53 | 54 | _, err = getCli.Get(cfg.nodeHelmReleaseName()) 55 | if err == nil { 56 | return fmt.Errorf("nodepool %s already exists", cfg.nodeHelmReleaseName()) 57 | } 58 | 59 | cleanupFn, err := createNodepoolController(ctx, kubeCfgPath, &cfg) 60 | if err != nil { 61 | return err 62 | } 63 | defer func() { 64 | // NOTE: Try best to cleanup. If there is leaky resources after 65 | // force stop, like kill process, it needs cleanup manually. 66 | if retErr != nil { 67 | _ = cleanupFn() 68 | } 69 | }() 70 | 71 | ch, err := manifests.LoadChart(virtualnodeChartName) 72 | if err != nil { 73 | return fmt.Errorf("failed to load virtual node chart: %w", err) 74 | } 75 | 76 | valueAppliers, err := cfg.toNodeHelmValuesAppliers() 77 | if err != nil { 78 | return err 79 | } 80 | 81 | releaseCli, err := helmcli.NewReleaseCli( 82 | kubeCfgPath, 83 | virtualnodeReleaseNamespace, 84 | cfg.nodeHelmReleaseName(), 85 | ch, 86 | virtualnodeReleaseLabels, 87 | valueAppliers..., 88 | ) 89 | if err != nil { 90 | return fmt.Errorf("failed to create helm release client: %w", err) 91 | } 92 | return releaseCli.Deploy(ctx, 30*time.Minute) 93 | } 94 | 95 | // createNodepoolController creates node controller release. 96 | func createNodepoolController(ctx context.Context, kubeCfgPath string, cfg *nodepoolConfig) (_cleanup func() error, _ error) { 97 | ch, err := manifests.LoadChart(virtualnodeControllerChartName) 98 | if err != nil { 99 | return nil, fmt.Errorf("failed to load virtual node controller chart: %w", err) 100 | } 101 | 102 | appliers, err := cfg.toNodeControllerHelmValuesAppliers() 103 | if err != nil { 104 | return nil, err 105 | } 106 | 107 | releaseCli, err := helmcli.NewReleaseCli( 108 | kubeCfgPath, 109 | virtualnodeReleaseNamespace, 110 | cfg.nodeControllerHelmReleaseName(), 111 | ch, 112 | virtualnodeReleaseLabels, 113 | appliers..., 114 | ) 115 | if err != nil { 116 | return nil, fmt.Errorf("failed to create helm release client: %w", err) 117 | } 118 | 119 | if err := releaseCli.Deploy(ctx, 30*time.Minute); err != nil { 120 | return nil, fmt.Errorf("failed to deploy virtual node controller: %w", err) 121 | } 122 | return releaseCli.Uninstall, nil 123 | } 124 | -------------------------------------------------------------------------------- /virtualcluster/nodes_delete.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package virtualcluster 5 | 6 | import ( 7 | "context" 8 | "errors" 9 | "fmt" 10 | 11 | "github.com/Azure/kperf/helmcli" 12 | 13 | "helm.sh/helm/v3/pkg/storage/driver" 14 | ) 15 | 16 | // DeleteNodepool deletes a node pool with a given name. 17 | func DeleteNodepool(_ context.Context, kubeconfigPath string, nodepoolName string) error { 18 | cfg := defaultNodepoolCfg 19 | cfg.name = nodepoolName 20 | 21 | if err := cfg.validate(); err != nil { 22 | return err 23 | } 24 | 25 | delCli, err := helmcli.NewDeleteCli(kubeconfigPath, virtualnodeReleaseNamespace) 26 | if err != nil { 27 | return fmt.Errorf("failed to create helm delete client: %w", err) 28 | } 29 | 30 | err = delCli.Delete(cfg.nodeHelmReleaseName()) 31 | if err != nil && !errors.Is(err, driver.ErrReleaseNotFound) { 32 | return fmt.Errorf("failed to cleanup virtual nodes: %w", err) 33 | } 34 | 35 | err = delCli.Delete(cfg.nodeControllerHelmReleaseName()) 36 | if err != nil && !errors.Is(err, driver.ErrReleaseNotFound) { 37 | return fmt.Errorf("failed to cleanup virtual node controller: %w", err) 38 | } 39 | 40 | return nil 41 | } 42 | -------------------------------------------------------------------------------- /virtualcluster/nodes_list.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | package virtualcluster 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | "strings" 10 | 11 | "helm.sh/helm/v3/pkg/release" 12 | 13 | "github.com/Azure/kperf/helmcli" 14 | ) 15 | 16 | // ListNodeppol lists nodepools added by the vc nodeppool add command. 17 | func ListNodepools(_ context.Context, kubeconfigPath string) ([]*release.Release, error) { 18 | listCli, err := helmcli.NewListCli(kubeconfigPath, virtualnodeReleaseNamespace) 19 | if err != nil { 20 | return nil, fmt.Errorf("failed to create helm list client: %w", err) 21 | } 22 | 23 | releases, err := listCli.List() 24 | if err != nil { 25 | return nil, fmt.Errorf("failed to list nodepool: %w", err) 26 | } 27 | 28 | // NOTE: Skip node controllers 29 | res := make([]*release.Release, 0, len(releases)/2) 30 | for idx := range releases { 31 | r := releases[idx] 32 | if strings.HasSuffix(r.Name, reservedNodepoolSuffixName) { 33 | continue 34 | } 35 | res = append(res, r) 36 | } 37 | return res, nil 38 | } 39 | --------------------------------------------------------------------------------