├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── cmd ├── caelus │ ├── app │ │ ├── api.go │ │ └── server.go │ ├── caelus.go │ └── context │ │ └── context.go └── nm-operator │ ├── app │ ├── api.go │ └── server.go │ └── nm_operator.go ├── contrib ├── lighthouse-plugin │ ├── .code.yml │ ├── .gitignore │ ├── .gitmodules │ ├── Makefile │ ├── README.md │ ├── VERSION │ ├── build │ │ ├── Dockerfile │ │ ├── config │ │ ├── plugin-server.service │ │ └── plugin-server.spec │ ├── cmd │ │ └── plugin-server │ │ │ ├── app │ │ │ └── server.go │ │ │ └── plugin-server.go │ ├── go.mod │ ├── go.sum │ ├── hack │ │ ├── binary │ │ ├── clean │ │ ├── common.sh │ │ └── rpm │ └── pkg │ │ ├── plugin │ │ ├── docker │ │ │ ├── common.go │ │ │ ├── offline │ │ │ │ ├── offline.go │ │ │ │ └── util.go │ │ │ ├── pid-limit │ │ │ │ └── pid_limit.go │ │ │ ├── prehook_create_plugin.go │ │ │ ├── prehook_update_plugin.go │ │ │ ├── storage-opts │ │ │ │ └── storage_opts.go │ │ │ └── uts-mode │ │ │ │ └── uts_mode.go │ │ ├── meta.go │ │ ├── plugin.go │ │ └── util │ │ │ └── util.go │ │ └── server │ │ ├── register_plugin.go │ │ └── server.go └── lighthouse │ ├── Makefile │ ├── README.md │ ├── VERSION │ ├── build │ ├── Dockerfile │ ├── config │ ├── config.yaml │ ├── lighthouse.service │ └── lighthouse.spec │ ├── cmd │ └── lighthouse │ │ ├── app │ │ └── server.go │ │ └── lighthouse.go │ ├── doc │ └── LighthouseDesign.png │ ├── go.mod │ ├── go.sum │ ├── hack │ ├── binary │ ├── boilerplate.go.txt │ ├── clean │ ├── codegen │ ├── common.sh │ ├── generate-groups.sh │ ├── rpm │ ├── test │ ├── update-codegen.sh │ └── verify-codegen.sh │ └── pkg │ ├── apis │ └── componentconfig.lighthouse.io │ │ ├── register.go │ │ └── v1alpha1 │ │ ├── defaults.go │ │ ├── doc.go │ │ ├── register.go │ │ ├── types.go │ │ ├── zz_generated.deepcopy.go │ │ └── zz_generated.defaulter.go │ ├── hook │ ├── hook_connector.go │ ├── hook_connector_test.go │ ├── hook_manager.go │ ├── hook_manager_test.go │ ├── proxy.go │ ├── types.go │ └── util.go │ ├── httputil │ └── proxy.go │ ├── scheme │ └── register.go │ ├── test │ └── test_util.go │ └── util │ └── util.go ├── doc ├── config.md ├── contributing.md ├── images │ ├── cgroup.png │ ├── lighthouse.png │ ├── modules.png │ └── yarn.png ├── nm_operator.md ├── rules.md ├── start.md └── tutorial.md ├── go.mod ├── go.sum ├── hack ├── Dockerfile ├── build.sh ├── clean.sh ├── common.sh ├── config │ ├── caelus.json │ └── rules.json ├── format.sh ├── lib │ └── version.sh ├── test.sh └── yaml │ ├── caelus.yaml │ └── nodemanager.yaml └── pkg ├── cadvisor └── cadvisor.go ├── caelus ├── alarm │ ├── alarm.go │ └── alarm_test.go ├── checkpoint │ ├── checkpoint.go │ └── checkpoint_test.go ├── cpi │ ├── manager.go │ └── time_series.go ├── detection │ ├── detector.go │ ├── ewma.go │ ├── ewma_test.go │ ├── expression.go │ ├── mock │ │ └── mock.go │ ├── ring │ │ ├── ring.go │ │ └── ring_test.go │ ├── types.go │ └── union.go ├── diskquota │ ├── diskquota.go │ ├── diskquota_test.go │ ├── interface.go │ ├── manager │ │ ├── manager.go │ │ ├── manager_fake.go │ │ └── projectquota │ │ │ ├── projectfile.go │ │ │ ├── projectfile_test.go │ │ │ ├── projectquota.go │ │ │ └── projectquota_test.go │ └── volumes │ │ ├── emptydir.go │ │ ├── emptydir_test.go │ │ ├── hostpath.go │ │ ├── hostpath_test.go │ │ ├── rootfs.go │ │ ├── rootfs_test.go │ │ ├── volume.go │ │ └── volume_fake.go ├── healthcheck │ ├── action │ │ ├── action.go │ │ ├── action_adjust.go │ │ ├── action_adjust_test.go │ │ ├── action_evict.go │ │ ├── action_log.go │ │ ├── action_schedule.go │ │ ├── action_schedule_test.go │ │ └── action_test.go │ ├── cgroupnotify │ │ ├── cgroup_notify.go │ │ ├── notify.go │ │ └── notify_memory.go │ ├── conflict │ │ ├── conflict.go │ │ ├── conflict_test.go │ │ └── mock │ │ │ └── mock.go │ ├── dispatcher │ │ ├── dispatcher.go │ │ └── dispatcher_test.go │ ├── health_check.go │ ├── health_check_test.go │ └── rulecheck │ │ ├── common.go │ │ ├── common_test.go │ │ ├── correlation │ │ ├── correlation.go │ │ ├── doc.go │ │ └── types.go │ │ ├── rule_check.go │ │ ├── rule_check_app.go │ │ ├── rule_check_container.go │ │ └── rule_check_node.go ├── metrics │ ├── common_metrics.go │ ├── outer │ │ ├── serverrequest │ │ │ └── serverrequest.go │ │ ├── textfile │ │ │ ├── textfile.go │ │ │ └── textfile_metrics.go │ │ └── utils.go │ ├── prometheus_config.go │ └── prometheus_metrics.go ├── mock │ ├── mock_pod_informer.go │ └── util.go ├── online │ ├── online.go │ └── online_test.go ├── predict │ ├── interface.go │ ├── logic.go │ ├── predict.go │ ├── predict_local.go │ ├── predict_local_test.go │ └── predict_vpa.go ├── qos │ ├── manager │ │ ├── manage_cpu.go │ │ ├── manage_cpu_test.go │ │ ├── manage_diskio.go │ │ ├── manage_memory.go │ │ ├── manage_memory_test.go │ │ ├── manage_netio.go │ │ ├── manager.go │ │ └── netio │ │ │ ├── log.go │ │ │ └── netio.go │ ├── mock │ │ └── mock.go │ ├── qos.go │ └── qos_k8s.go ├── resource │ ├── interface.go │ ├── interface_mock.go │ ├── k8s │ │ ├── pod_sort.go │ │ └── types.go │ ├── resource.go │ ├── resource_checkpoint.go │ ├── resource_k8s.go │ ├── resource_test.go │ ├── resource_yarn.go │ └── yarn │ │ ├── adapter.go │ │ ├── adapter_test.go │ │ ├── container_sort.go │ │ ├── disks.go │ │ ├── ginit.go │ │ ├── ginit_mock.go │ │ ├── metrics.go │ │ ├── port.go │ │ ├── port_test.go │ │ └── utils.go ├── statestore │ ├── cgroup │ │ ├── cgroup.go │ │ ├── cgroup_test.go │ │ ├── types.go │ │ └── utils.go │ ├── common │ │ ├── customize │ │ │ ├── app.go │ │ │ └── types.go │ │ ├── node │ │ │ ├── node.go │ │ │ └── type.go │ │ ├── perf │ │ │ ├── perf.go │ │ │ └── pmu │ │ │ │ └── pmu.go │ │ ├── prometheus │ │ │ └── prometheus.go │ │ ├── rdt │ │ │ ├── rdt.go │ │ │ └── rdt │ │ │ │ └── rdt.go │ │ └── state_common.go │ ├── mock │ │ ├── stats_cgroup_mock.go │ │ ├── stats_common_mock.go │ │ └── stats_mock.go │ └── state_store.go ├── types │ ├── diskquota.go │ ├── health_check.go │ ├── predict.go │ └── types.go └── util │ ├── appclass │ ├── appclass.go │ └── appclass_test.go │ ├── cgroup │ ├── blkio.go │ ├── blkio_test.go │ ├── cgroup.go │ ├── cgroup_test.go │ ├── cpu.go │ ├── cpu_test.go │ ├── cpubt.go │ ├── cpubt_test.go │ ├── cpuset.go │ ├── cpuset_test.go │ ├── memory.go │ ├── perf_event.go │ ├── topology.go │ └── topology_test.go │ ├── errors.go │ ├── machine │ ├── machine.go │ ├── machine_test.go │ └── speed.go │ ├── mountpoint │ └── mountpoint.go │ ├── ports │ └── ports.go │ ├── runtime │ ├── docker │ │ ├── docker.go │ │ └── docker_fake.go │ └── runtime.go │ ├── sets │ └── pods.go │ └── util.go ├── nm-operator ├── hadoop │ ├── hadoop_conf.go │ └── hadoop_conf_test.go ├── nmoperator │ └── nm_operator.go ├── types │ └── types.go └── util │ ├── util.go │ └── util_test.go ├── types ├── type.go └── types_yarn.go ├── util ├── times │ ├── times.go │ └── times_test.go └── util.go └── version ├── base.go ├── verflag └── verflag.go └── version.go /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | _output/ 3 | VERSION 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | REGISTRY ?= caelus 2 | 3 | .PHONY: build 4 | build: image 5 | image=${REGISTRY}/caelus:$$(cat VERSION); \ 6 | mkdir -p _output/bin/; \ 7 | docker run --rm $$image tar -cvf - -C /binaries . | tar -xvf - -C _output/bin/ 8 | 9 | .PHONY: format 10 | format: 11 | ./hack/format.sh 12 | 13 | .PHONY: test 14 | test: 15 | ./hack/test.sh 16 | 17 | .PHONY: clean 18 | clean: 19 | ./hack/clean.sh 20 | 21 | version: 22 | @version=$(VERSION); \ 23 | [[ "$$version" != "" ]] || version="$$(git describe --dirty --always --tags | sed 's/-/./g')"; \ 24 | touch VERSION && echo $$version > VERSION && echo image version is $$version 25 | 26 | image: version 27 | image=${REGISTRY}/caelus:$$(cat VERSION); \ 28 | echo building $$image;\ 29 | docker build . -t $$image -f hack/Dockerfile 30 | -------------------------------------------------------------------------------- /cmd/caelus/caelus.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package main 17 | 18 | import ( 19 | goflag "flag" 20 | "math/rand" 21 | "os" 22 | "time" 23 | 24 | "github.com/tencent/caelus/cmd/caelus/app" 25 | "github.com/tencent/caelus/pkg/version/verflag" 26 | 27 | "github.com/spf13/pflag" 28 | cliflag "k8s.io/component-base/cli/flag" 29 | "k8s.io/component-base/logs" 30 | ) 31 | 32 | // This is the main function for caelus 33 | func main() { 34 | rand.Seed(time.Now().UnixNano()) 35 | command := app.NewServerCommand() 36 | 37 | pflag.CommandLine.SetNormalizeFunc(cliflag.WordSepNormalizeFunc) 38 | pflag.CommandLine.AddGoFlagSet(goflag.CommandLine) 39 | logs.InitLogs() 40 | defer logs.FlushLogs() 41 | 42 | // support checking code version, like "--version" 43 | verflag.PrintAndExitIfRequested() 44 | 45 | if err := command.Execute(); err != nil { 46 | os.Exit(1) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /cmd/nm-operator/app/server.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package app 17 | 18 | import ( 19 | "github.com/tencent/caelus/pkg/version/verflag" 20 | 21 | "github.com/spf13/cobra" 22 | "github.com/spf13/pflag" 23 | "k8s.io/klog/v2" 24 | ) 25 | 26 | // options describe supported flags 27 | type options struct { 28 | ApiOption ApiOption 29 | } 30 | 31 | // printFlags show all flags 32 | func printFlags(flags *pflag.FlagSet) { 33 | flags.VisitAll(func(flag *pflag.Flag) { 34 | klog.V(1).Infof("FLAG: --%s=%q", flag.Name, flag.Value) 35 | }) 36 | } 37 | 38 | // NewServerCommand construct server execution context 39 | func NewServerCommand() *cobra.Command { 40 | opts := &options{} 41 | 42 | cmd := &cobra.Command{ 43 | Use: "nm_operator", 44 | Run: func(cmd *cobra.Command, args []string) { 45 | verflag.PrintAndExitIfRequested() 46 | printFlags(cmd.Flags()) 47 | 48 | if err := opts.Run(); err != nil { 49 | klog.Exitf("can't run command, %v", err) 50 | } 51 | }, 52 | } 53 | 54 | opts.AddFlags(cmd.Flags()) 55 | 56 | return cmd 57 | } 58 | 59 | // AddFlags describe supported flags 60 | func (o *options) AddFlags(fs *pflag.FlagSet) { 61 | o.ApiOption.AddFlags(fs) 62 | } 63 | 64 | // Run starts the main loop 65 | func (o *options) Run() error { 66 | if err := o.ApiOption.RegisterServerAndListen(); err != nil { 67 | return err 68 | } 69 | 70 | return nil 71 | } 72 | -------------------------------------------------------------------------------- /cmd/nm-operator/nm_operator.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package main 17 | 18 | import ( 19 | goflag "flag" 20 | "math/rand" 21 | "os" 22 | "time" 23 | 24 | "github.com/tencent/caelus/cmd/nm-operator/app" 25 | "github.com/tencent/caelus/pkg/version/verflag" 26 | 27 | "github.com/spf13/pflag" 28 | cliflag "k8s.io/component-base/cli/flag" 29 | "k8s.io/component-base/logs" 30 | ) 31 | 32 | // This is the main function for nm_operator server 33 | func main() { 34 | rand.Seed(time.Now().UnixNano()) 35 | // init command context 36 | command := app.NewServerCommand() 37 | 38 | pflag.CommandLine.SetNormalizeFunc(cliflag.WordSepNormalizeFunc) 39 | pflag.CommandLine.AddGoFlagSet(goflag.CommandLine) 40 | logs.InitLogs() 41 | defer logs.FlushLogs() 42 | 43 | // support version flag, such as "--version" 44 | verflag.PrintAndExitIfRequested() 45 | 46 | if err := command.Execute(); err != nil { 47 | os.Exit(1) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/.code.yml: -------------------------------------------------------------------------------- 1 | source: 2 | test_source: 3 | filepath_regex: [".*/"] 4 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | _output/ -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/caelus/2ade1eaa29a5bca5369d0980fa1423049522366f/contrib/lighthouse-plugin/.gitmodules -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/Makefile: -------------------------------------------------------------------------------- 1 | TARGETS := $(shell ls hack | grep -v \\.sh | grep -v \\.txt) 2 | 3 | $(TARGETS): 4 | ./hack/$@ 5 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/README.md: -------------------------------------------------------------------------------- 1 | # Lighthouse Plugin 2 | 3 | This repository implements a bundle of plugin to enhance the Kubernetes function. The provided features are 4 | listed below. 5 | 6 | 7 | - Docker runtime backend 8 | 9 | *1.* Storage Options 10 | 11 | Any annotations start with `mixer.kubernetes.io/storage-opt-` will be added to docker storage options. 12 | 13 | ``` 14 | apiVersion: v1 15 | kind: Pod 16 | metadata: 17 | ... 18 | annotations: 19 | mixer.kubernetes.io/storage-opt-size: "1G" 20 | ``` 21 | 22 | This above example means add `size=1G` to your container. 23 | 24 | *2.* uts Options 25 | 26 | Any annotations start with `mixer.kubernetes.io/uts-mode` will not use host namespace when host network is true. 27 | 28 | ``` 29 | apiVersion: v1 30 | kind: Pod 31 | metadata: 32 | ... 33 | annotations: 34 | mixer.kubernetes.io/uts-mode: "" 35 | ``` 36 | 37 | *3.* Pid limit 38 | 39 | To prevent user's pids flood the host machine, we provide a method to set pid limit for user's container. If you want this feature, 40 | set `mixer.kubernetes.io/pids-limit: ` to your pod's annotations. 41 | 42 | ``` 43 | apiVersion: v1 44 | kind: Pod 45 | metadata: 46 | ... 47 | annotations: 48 | mixer.kubernetes.io/pids-limit: "40" 49 | spec: 50 | containers: 51 | - name: test 52 | ``` 53 | 54 | *4.* Offline Options 55 | 56 | Offline pods with annotation `mixer.kubernetes.io/app-class:greedy` will be changed the cgroup path to "kubepods/offline". 57 | 58 | ``` 59 | apiVersion: v1 60 | kind: Pod 61 | metadata: 62 | ... 63 | annotations: 64 | mixer.kubernetes.io/app-class: "greedy" 65 | ``` 66 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/VERSION: -------------------------------------------------------------------------------- 1 | v0.3.0 2 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/build/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | 3 | # install rpm build tools 4 | RUN yum update -y && \ 5 | yum install -y rpm-build make && \ 6 | mkdir -p /root/rpmbuild/SPECS && \ 7 | mkdir -p /root/rpmbuild/SOURCES 8 | 9 | # install golang 10 | RUN curl -fsSL https://dl.google.com/go/go1.14.6.linux-amd64.tar.gz | tar -xzC /usr/local 11 | ENV PATH=/usr/local/go/bin:$PATH 12 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/build/config: -------------------------------------------------------------------------------- 1 | ARGS="--feature-gates=AllAlpha=true --v=3 --listen-address=unix://@plugin-server" -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/build/plugin-server.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Lighthouse plugin server 3 | Before=kubelet.service 4 | 5 | [Service] 6 | Type=notify 7 | EnvironmentFile=-/etc/plugin-server/config 8 | ExecStart=/usr/bin/plugin-server $ARGS 9 | Restart=always 10 | 11 | LimitNOFILE=infinity 12 | LimitNPROC=infinity 13 | LimitCORE=infinity 14 | # Uncomment TasksMax if your systemd version supports it. 15 | # Only systemd 226 and above support this version. 16 | TasksMax=infinity 17 | 18 | [Install] 19 | WantedBy=multi-user.target 20 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/build/plugin-server.spec: -------------------------------------------------------------------------------- 1 | Name: plugin-server 2 | Version: %{version} 3 | Release: %{commit}%{?dist} 4 | Summary: lighthouse plugin 5 | 6 | Group: Development/GAIA 7 | License: MIT 8 | Source: plugin-server.tar.gz 9 | 10 | Requires: systemd-units 11 | 12 | %define pkgname %{name}-%{version}-%{release} 13 | 14 | %description 15 | lighthouse plugin 16 | 17 | %prep 18 | %setup -n plugin-server-%{version} 19 | 20 | %build 21 | make binary 22 | 23 | %install 24 | install -d $RPM_BUILD_ROOT/%{_bindir} 25 | install -d $RPM_BUILD_ROOT/%{_unitdir} 26 | install -d $RPM_BUILD_ROOT/etc/plugin-server 27 | 28 | install -p -m 755 ./_output/bin/plugin-server $RPM_BUILD_ROOT/%{_bindir}/plugin-server 29 | install -p -m 644 ./build/config $RPM_BUILD_ROOT/etc/plugin-server/config 30 | install -p -m 644 ./build/plugin-server.service $RPM_BUILD_ROOT/%{_unitdir}/ 31 | 32 | %clean 33 | rm -rf $RPM_BUILD_ROOT 34 | 35 | %files 36 | %config(noreplace,missingok) /etc/plugin-server/config 37 | 38 | /%{_unitdir}/plugin-server.service 39 | 40 | /%{_bindir}/plugin-server 41 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/cmd/plugin-server/plugin-server.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package main 17 | 18 | import ( 19 | goflag "flag" 20 | "math/rand" 21 | "os" 22 | "time" 23 | 24 | "github.com/spf13/pflag" 25 | cliflag "k8s.io/component-base/cli/flag" 26 | "k8s.io/component-base/logs" 27 | 28 | "github.com/tencent/lighthouse-plugin/cmd/plugin-server/app" 29 | ) 30 | 31 | // This the main function for the server 32 | func main() { 33 | rand.Seed(time.Now().UnixNano()) 34 | // init command context 35 | command := app.NewPluginServerCommand() 36 | 37 | // init flags 38 | pflag.CommandLine.SetNormalizeFunc(cliflag.WordSepNormalizeFunc) 39 | pflag.CommandLine.AddGoFlagSet(goflag.CommandLine) 40 | 41 | // init logs 42 | logs.InitLogs() 43 | defer logs.FlushLogs() 44 | 45 | if err := command.Execute(); err != nil { 46 | os.Exit(1) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/tencent/lighthouse-plugin 2 | 3 | go 1.14 4 | 5 | require ( 6 | github.com/Microsoft/hcsshim v0.8.14 // indirect 7 | github.com/containerd/cgroups v0.0.0-20201119153540-4cbc285b3327 // indirect 8 | github.com/containerd/containerd v1.4.3 // indirect 9 | github.com/containerd/continuity v0.0.0-20201208142359-180525291bb7 // indirect 10 | github.com/coreos/go-systemd/v22 v22.1.0 11 | github.com/docker/distribution v2.7.1+incompatible // indirect 12 | github.com/docker/docker v20.10.1+incompatible 13 | github.com/docker/go-connections v0.4.0 // indirect 14 | github.com/evanphx/json-patch v4.2.0+incompatible 15 | github.com/gorilla/mux v1.7.0 16 | github.com/json-iterator/go v1.1.8 17 | github.com/mYmNeo/version v0.0.0-20200424030557-30e59e77cc3e 18 | github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 // indirect 19 | github.com/morikuni/aec v1.0.0 // indirect 20 | github.com/opencontainers/image-spec v1.0.1 // indirect 21 | github.com/opencontainers/runc v1.0.0-rc9 22 | github.com/spf13/cobra v0.0.5 23 | github.com/spf13/pflag v1.0.5 24 | gotest.tools/v3 v3.0.3 // indirect 25 | k8s.io/api v0.17.4 26 | k8s.io/apimachinery v0.17.4 27 | k8s.io/apiserver v0.17.4 28 | k8s.io/client-go v0.17.4 29 | k8s.io/component-base v0.17.4 30 | k8s.io/klog v1.0.0 31 | ) 32 | 33 | replace ( 34 | github.com/Sirupsen/logrus v1.7.0 => github.com/sirupsen/logrus v1.7.0 35 | github.com/sirupsen/logrus v1.7.0 => github.com/Sirupsen/logrus v1.7.0 36 | ) 37 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/hack/binary: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o pipefail 5 | set -o nounset 6 | 7 | BASE_DIR=$(cd $(dirname $0)/.. && pwd) 8 | source ${BASE_DIR}/hack/common.sh 9 | 10 | go build -o ${OUTPUT_PATH}/bin/plugin-server \ 11 | -ldflags "-X github.com/mYmNeo/version/verflag.ReleaseName=${PACKAGE_NAME} \ 12 | -X github.com/mYmNeo/version.gitCommit=${GIT_COMMIT} \ 13 | -X github.com/mYmNeo/version.gitTreeState=${TREE_STATE} \ 14 | -X github.com/mYmNeo/version.gitVersion=${GIT_VERSION} \ 15 | -X github.com/mYmNeo/version.gitMajor=${GIT_MAJOR} \ 16 | -X github.com/mYmNeo/version.gitMinor=${GIT_MINOR} \ 17 | -X github.com/mYmNeo/version.buildDate=${BUILD_DATE}" \ 18 | ${PACKAGE}/cmd/plugin-server 19 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/hack/clean: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o pipefail 5 | set -o nounset 6 | 7 | BASE_DIR=$(cd $(dirname $0)/.. && pwd) 8 | source ${BASE_DIR}/hack/common.sh 9 | 10 | rm -rf ${OUTPUT_PATH} 11 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/hack/common.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -o errexit 3 | set -o nounset 4 | set -o pipefail 5 | 6 | PACKAGE_PREFIX="github.com/tencent" 7 | PACKAGE_NAME="lighthouse-plugin" 8 | PACKAGE="${PACKAGE_PREFIX}/${PACKAGE_NAME}" 9 | OUTPUT_PATH="${BASE_DIR}/_output" 10 | mkdir -p ${OUTPUT_PATH} 11 | USER_ID=$(id -u) 12 | GROUP_ID=$(id -g) 13 | 14 | 15 | if [[ -z ${GIT_COMMIT:-""} ]]; then 16 | GIT_COMMIT=$(git rev-parse "HEAD^{commit}") 17 | fi 18 | BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') 19 | 20 | GIT_VERSION=$(cat ${BASE_DIR}/VERSION) 21 | if [[ -z ${GIT_STATUS:-""} ]]; then 22 | if GIT_STATUS=$(git status --porcelain 2>/dev/null) && [[ -z ${GIT_STATUS} ]]; then 23 | TREE_STATE="clean" 24 | else 25 | TREE_STATE="-dirty" 26 | GIT_VERSION+=${TREE_STATE} 27 | fi 28 | fi 29 | 30 | if [[ "${GIT_VERSION}" =~ ^v([0-9]+)\.([0-9]+)(\.[0-9]+)?([-].*)?([+].*)?$ ]]; then 31 | GIT_MAJOR=${BASH_REMATCH[1]} 32 | GIT_MINOR=${BASH_REMATCH[2]} 33 | if [[ -n "${BASH_REMATCH[4]}" ]]; then 34 | GIT_MINOR+="+" 35 | fi 36 | fi 37 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/hack/rpm: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o pipefail 5 | set -o nounset 6 | 7 | BASE_DIR=$(cd $(dirname $0)/.. && pwd) 8 | source ${BASE_DIR}/hack/common.sh 9 | 10 | COMMIT_NUM=$(git log --oneline | wc -l | sed -e 's/^[ \t]*//') 11 | RPM_VERSION=${GIT_VERSION:1} 12 | RPM_IMAGE="plugin-server-${RPM_VERSION}:${COMMIT_NUM}" 13 | 14 | cd $BASE_DIR 15 | 16 | git archive -o ${OUTPUT_PATH}/plugin-server.tar.gz \ 17 | --format=tar \ 18 | --prefix=plugin-server-${RPM_VERSION}/ \ 19 | HEAD 20 | 21 | cp ${BASE_DIR}/build/Dockerfile ${OUTPUT_PATH} 22 | cp ${BASE_DIR}/build/plugin-server.spec ${OUTPUT_PATH} 23 | 24 | cat >>${OUTPUT_PATH}/Dockerfile < /root/.rpmmacros \ 29 | && echo '%__os_install_post %{nil}' >> /root/.rpmmacros \ 30 | && echo '%debug_package %{nil}' >> /root/.rpmmacros 31 | WORKDIR /root/rpmbuild/SPECS 32 | 33 | ENV GIT_COMMIT=${GIT_COMMIT} 34 | ENV GIT_VERSION=${GIT_VERSION} 35 | ENV GIT_STATUS=${GIT_STATUS} 36 | 37 | RUN rpmbuild -bb \ 38 | --define 'version ${RPM_VERSION}' \ 39 | --define 'commit ${COMMIT_NUM}' \ 40 | plugin-server.spec 41 | EOF 42 | 43 | (cd "${OUTPUT_PATH}" && docker build --network=host -t "$RPM_IMAGE" .) 44 | docker run --rm "$RPM_IMAGE" bash -c 'cd /root/rpmbuild && tar -c *RPMS' | tar xvC "${OUTPUT_PATH}" 45 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/pkg/plugin/docker/common.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package docker 17 | 18 | import ( 19 | gjson "encoding/json" 20 | "fmt" 21 | "github.com/tencent/lighthouse-plugin/pkg/plugin/util" 22 | 23 | dockerapi "github.com/docker/docker/client" 24 | "github.com/evanphx/json-patch" 25 | "k8s.io/client-go/kubernetes" 26 | "k8s.io/client-go/tools/events" 27 | ) 28 | 29 | var ( 30 | noChangeResponse = []byte(`{}`) 31 | ) 32 | 33 | // ToolKits group some clients 34 | type ToolKits struct { 35 | K8sClient kubernetes.Interface 36 | EventRecorder events.EventRecorder 37 | DockerClient *dockerapi.Client 38 | } 39 | 40 | // PatchData, copied from lighthouse/pkg/hook/types.go 41 | type PatchData struct { 42 | PatchType string `json:"patchType,omitempty"` 43 | PatchData []byte `json:"patchData,omitempty"` 44 | } 45 | 46 | // PostHookData, copied from lighthouse/pkg/hook/types.go 47 | type PostHookData struct { 48 | StatusCode int `json:"statusCode,omitempty"` 49 | Body gjson.RawMessage `json:"body,omitempty"` 50 | } 51 | 52 | func groupPatchData(config interface{}, bodyBytes []byte) ([]byte, error) { 53 | newBodyBytes, err := util.Json.Marshal(config) 54 | if err != nil { 55 | return []byte{}, fmt.Errorf("cannit marshal, :%v", err) 56 | } 57 | 58 | patchBytes, err := jsonpatch.CreateMergePatch(bodyBytes, newBodyBytes) 59 | if err != nil { 60 | return []byte{}, fmt.Errorf("can't create patch, %v", err) 61 | } 62 | 63 | return patchBytes, nil 64 | } 65 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/pkg/plugin/docker/offline/util.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package offline 17 | 18 | import ( 19 | "github.com/tencent/lighthouse-plugin/pkg/plugin" 20 | ) 21 | 22 | const ( 23 | AnnotationKey = plugin.PodAnnotationPrefix + "app-class" 24 | AnnotationOfflineValue = "greedy" 25 | ) 26 | 27 | // IsOffline return if a pod is offline pod 28 | // if offline is nodemanager, should be added the annotation key with greedy 29 | func IsOffline(labels map[string]string) bool { 30 | if labels == nil { 31 | return false 32 | } 33 | if value, ok := labels[AnnotationKey]; ok { 34 | if value == AnnotationOfflineValue { 35 | return true 36 | } 37 | } 38 | return false 39 | } 40 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/pkg/plugin/docker/pid-limit/pid_limit.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package pidlimit 17 | 18 | import ( 19 | "context" 20 | "os" 21 | "strconv" 22 | 23 | "github.com/tencent/lighthouse-plugin/pkg/plugin" 24 | "github.com/tencent/lighthouse-plugin/pkg/plugin/docker" 25 | 26 | "github.com/docker/docker/runconfig" 27 | utilfeature "k8s.io/apiserver/pkg/util/feature" 28 | "k8s.io/klog" 29 | ) 30 | 31 | /* 32 | Pid limit is used to limit pid number for pod with assigned label. Kubernetes could limit pid number for all pods 33 | on the node, but not for signal pod. 34 | */ 35 | func init() { 36 | _, err := os.Stat("/sys/fs/cgroup/pids/") 37 | if err == nil { 38 | docker.PreHookCreateContainer.RegisterSubPlugin(opt.handle) 39 | } 40 | } 41 | 42 | const ( 43 | annotationPidsLimit = plugin.PodAnnotationPrefix + "pids-limit" 44 | ) 45 | 46 | var ( 47 | opt = &pidsLimit{} 48 | ) 49 | 50 | type pidsLimit struct{} 51 | 52 | func (p *pidsLimit) handle( 53 | toolKits *docker.ToolKits, 54 | containerConfig *runconfig.ContainerConfigWrapper, 55 | metadata *plugin.PodMetadata) error { 56 | if !utilfeature.DefaultMutableFeatureGate.Enabled(plugin.DockerPidsLimit) { 57 | return nil 58 | } 59 | 60 | if metadata.ContainerType != plugin.ContainerTypeLabelContainer { 61 | return nil 62 | } 63 | 64 | containerJson, err := toolKits.DockerClient.ContainerInspect(context.Background(), metadata.SandBoxID) 65 | if err != nil { 66 | klog.Errorf("can't get sandbox container %s, %v", metadata.SandBoxID, err) 67 | return err 68 | } 69 | 70 | // read pid limit number from label and pass to docker daemon 71 | for k, v := range containerJson.Config.Labels { 72 | if k == annotationPidsLimit { 73 | pidsNum, err := strconv.Atoi(v) 74 | if err != nil { 75 | klog.Errorf("invalid pid number format(%s): %v", v, err) 76 | return nil 77 | } 78 | klog.V(2).Infof("limit container %s/%s(%s) pid number to %d", 79 | metadata.Namespace, metadata.PodName, metadata.ContainerName, pidsNum) 80 | pidsNum64 := int64(pidsNum) 81 | containerConfig.InnerHostConfig.Resources.PidsLimit = &pidsNum64 82 | return nil 83 | } 84 | } 85 | 86 | return nil 87 | } 88 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/pkg/plugin/docker/storage-opts/storage_opts.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package storageopts 17 | 18 | import ( 19 | "context" 20 | "strings" 21 | 22 | "github.com/docker/docker/runconfig" 23 | utilfeature "k8s.io/apiserver/pkg/util/feature" 24 | "k8s.io/klog" 25 | 26 | "github.com/tencent/lighthouse-plugin/pkg/plugin" 27 | "github.com/tencent/lighthouse-plugin/pkg/plugin/docker" 28 | ) 29 | 30 | /* 31 | Storage opts will set container root folder disk space limit size to docker daemon based on pod label. 32 | */ 33 | func init() { 34 | docker.PreHookCreateContainer.RegisterSubPlugin(opt.handle) 35 | } 36 | 37 | var ( 38 | opt = &storageOption{} 39 | ) 40 | 41 | const ( 42 | optionPrefix = plugin.PodAnnotationPrefix + "storage-opt-" 43 | ) 44 | 45 | type storageOption struct{} 46 | 47 | func (p *storageOption) handle( 48 | toolKits *docker.ToolKits, 49 | containerConfig *runconfig.ContainerConfigWrapper, 50 | metadata *plugin.PodMetadata) error { 51 | if !utilfeature.DefaultMutableFeatureGate.Enabled(plugin.DockerStorageOption) { 52 | return nil 53 | } 54 | 55 | if metadata.ContainerType != plugin.ContainerTypeLabelContainer { 56 | return nil 57 | } 58 | 59 | if containerConfig.InnerHostConfig.StorageOpt == nil { 60 | containerConfig.InnerHostConfig.StorageOpt = make(map[string]string) 61 | } 62 | 63 | containerJson, err := toolKits.DockerClient.ContainerInspect(context.Background(), metadata.SandBoxID) 64 | if err != nil { 65 | klog.Errorf("can't get sandbox container %s, %v", metadata.SandBoxID, err) 66 | return err 67 | } 68 | 69 | // the keys must be supported by docker daemon, such as storage-opt-size = xxx 70 | for k, v := range containerJson.Config.Labels { 71 | if strings.HasPrefix(k, optionPrefix) { 72 | if optKey := strings.TrimPrefix(k, optionPrefix); len(optKey) > 0 { 73 | klog.V(2).Infof("Set pod %s/%s(%s) storage option %s=%s", 74 | metadata.Namespace, metadata.PodName, metadata.ContainerName, optKey, v) 75 | containerConfig.InnerHostConfig.StorageOpt[optKey] = v 76 | } 77 | } 78 | } 79 | 80 | return nil 81 | } 82 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/pkg/plugin/docker/uts-mode/uts_mode.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package utsmode 17 | 18 | import ( 19 | "context" 20 | 21 | "github.com/docker/docker/api/types/container" 22 | "github.com/docker/docker/runconfig" 23 | utilfeature "k8s.io/apiserver/pkg/util/feature" 24 | "k8s.io/klog" 25 | 26 | "github.com/tencent/lighthouse-plugin/pkg/plugin" 27 | "github.com/tencent/lighthouse-plugin/pkg/plugin/docker" 28 | ) 29 | 30 | /* 31 | UTS mode will create a different UTS namespace from host namespace when label is set. 32 | This is useful when setting host network for kubernetes, but do not want to share host UTS namespace. 33 | */ 34 | func init() { 35 | docker.PreHookCreateContainer.RegisterSubPlugin(opt.handle) 36 | } 37 | 38 | var ( 39 | opt = &utsMode{} 40 | ) 41 | 42 | const ( 43 | annotationUTSMode = plugin.PodAnnotationPrefix + "uts-mode" 44 | ) 45 | 46 | type utsMode struct{} 47 | 48 | func (p *utsMode) handle( 49 | toolKits *docker.ToolKits, 50 | containerConfig *runconfig.ContainerConfigWrapper, 51 | metadata *plugin.PodMetadata) error { 52 | if !utilfeature.DefaultMutableFeatureGate.Enabled(plugin.DockerUTSMode) { 53 | return nil 54 | } 55 | 56 | if metadata.ContainerType != plugin.ContainerTypeLabelContainer { 57 | return nil 58 | } 59 | 60 | containerJson, err := toolKits.DockerClient.ContainerInspect(context.Background(), metadata.SandBoxID) 61 | if err != nil { 62 | klog.Errorf("can't get sandbox container %s, %v", metadata.SandBoxID, err) 63 | return err 64 | } 65 | 66 | // create an new UTS namespace 67 | for k, v := range containerJson.Config.Labels { 68 | if k == annotationUTSMode { 69 | klog.V(2).Infof("Set pod %s/%s(%s) uts mode to :%s", 70 | metadata.Namespace, metadata.PodName, metadata.ContainerName, v) 71 | containerConfig.InnerHostConfig.UTSMode = container.UTSMode(v) 72 | } 73 | } 74 | 75 | return nil 76 | } 77 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/pkg/plugin/meta.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package plugin 17 | 18 | // PodMetadata describe pod info 19 | type PodMetadata struct { 20 | PodName string 21 | PodUID string 22 | SandBoxID string 23 | Namespace string 24 | ContainerName string 25 | ContainerType string 26 | } 27 | 28 | // GetPodMetadata get pod info from labels 29 | func GetPodMetadata(labels map[string]string) *PodMetadata { 30 | return &PodMetadata{ 31 | PodName: labels[PodNameLabelKey], 32 | PodUID: labels[PodUIDLabelKey], 33 | Namespace: labels[PodNamespaceLabelKey], 34 | ContainerName: labels[ContainerNameLabelKey], 35 | ContainerType: labels[ContainerTypeLabelKey], 36 | SandBoxID: labels[ContainerSandBoxKey], 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/pkg/plugin/plugin.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package plugin 17 | 18 | import ( 19 | "net/http" 20 | 21 | "k8s.io/apimachinery/pkg/util/runtime" 22 | utilfeature "k8s.io/apiserver/pkg/util/feature" 23 | "k8s.io/client-go/kubernetes" 24 | "k8s.io/component-base/featuregate" 25 | ) 26 | 27 | // Plugin describe common functions 28 | type Plugin interface { 29 | Method() string 30 | Path() string 31 | Handler(client kubernetes.Interface, dockerEndpoint, dockerVersion string) http.HandlerFunc 32 | SetIgnored(IgnoreNamespacesFunc) 33 | } 34 | 35 | type IgnoreNamespacesFunc func(string) bool 36 | 37 | // BasePlugin describe common variables 38 | type BasePlugin struct { 39 | Method string 40 | Path string 41 | Ignored IgnoreNamespacesFunc 42 | Handler http.HandlerFunc 43 | } 44 | 45 | var ( 46 | AllPlugins = make(map[string]Plugin) 47 | ) 48 | 49 | const ( 50 | ContainerTypeLabelKey = "io.kubernetes.docker.type" 51 | ContainerTypeLabelContainer = "container" 52 | ContainerSandBoxKey = "io.kubernetes.sandbox.id" 53 | ContainerNameLabelKey = "io.kubernetes.container.name" 54 | PodNamespaceLabelKey = "io.kubernetes.pod.namespace" 55 | PodNameLabelKey = "io.kubernetes.pod.name" 56 | PodUIDLabelKey = "io.kubernetes.pod.uid" 57 | // sandbox container will add prefix with value "annotation." 58 | PodAutoAnnotationPrefix = "annotation." 59 | PodAnnotationPrefix = PodAutoAnnotationPrefix + "mixer.kubernetes.io/" 60 | ) 61 | 62 | const ( 63 | // docker storage option 64 | DockerStorageOption featuregate.Feature = "DockerStorageOption" 65 | // UTS mode support, create a new UTS namespace different with host namespace 66 | DockerUTSMode featuregate.Feature = "DockerUTSMode" 67 | // Offline support, create cgroup path different with kubernetes 68 | DockerOfflineMutate featuregate.Feature = "DockerOfflineMutate" 69 | // limit pod pid number 70 | DockerPidsLimit featuregate.Feature = "DockerPidsLimit" 71 | ) 72 | 73 | // feature gate support 74 | var defaultFeatureGate = map[featuregate.Feature]featuregate.FeatureSpec{ 75 | DockerStorageOption: {Default: false, PreRelease: featuregate.Alpha}, 76 | DockerUTSMode: {Default: false, PreRelease: featuregate.Alpha}, 77 | DockerOfflineMutate: {Default: false, PreRelease: featuregate.Alpha}, 78 | DockerPidsLimit: {Default: false, PreRelease: featuregate.Alpha}, 79 | } 80 | 81 | func init() { 82 | runtime.Must(utilfeature.DefaultMutableFeatureGate.Add(defaultFeatureGate)) 83 | } 84 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/pkg/plugin/util/util.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package util 17 | 18 | import ( 19 | jsoniter "github.com/json-iterator/go" 20 | ) 21 | 22 | var ( 23 | // Json describe how to parse json struct data 24 | Json = jsoniter.Config{ 25 | EscapeHTML: false, 26 | SortMapKeys: true, 27 | ValidateJsonRawMessage: true, 28 | }.Froze() 29 | ) 30 | -------------------------------------------------------------------------------- /contrib/lighthouse-plugin/pkg/server/register_plugin.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package server 17 | 18 | import ( 19 | // init offline mutator 20 | _ "github.com/tencent/lighthouse-plugin/pkg/plugin/docker/offline" 21 | // init pid limit mutator 22 | _ "github.com/tencent/lighthouse-plugin/pkg/plugin/docker/pid-limit" 23 | // init storage opts mutator 24 | _ "github.com/tencent/lighthouse-plugin/pkg/plugin/docker/storage-opts" 25 | // init uts mode mutator 26 | _ "github.com/tencent/lighthouse-plugin/pkg/plugin/docker/uts-mode" 27 | ) 28 | -------------------------------------------------------------------------------- /contrib/lighthouse/Makefile: -------------------------------------------------------------------------------- 1 | TARGETS := $(shell ls hack | grep -v \\.sh | grep -v \\.txt) 2 | 3 | $(TARGETS): 4 | ./hack/$@ 5 | -------------------------------------------------------------------------------- /contrib/lighthouse/README.md: -------------------------------------------------------------------------------- 1 | # Summary 2 | 3 | Lighthouse is a framework to pre-hook/post-hook runtime request/response. With this hook, we can dynamically add options to any other OCI 4 | arguments which aren't supported in Kubernetes. 5 | 6 | # Architecture 7 | 8 | ![LighthouseDesign.png](doc/LighthouseDesign.png) 9 | 10 | # Hook Configuration 11 | 12 | ``` 13 | apiVersion: componentconfig.lighthouse.io/v1alpha1 14 | kind: HookConfiguration 15 | timeout: 10 16 | listenAddress: unix:///var/run/lighthouse.sock 17 | webhooks: 18 | - name: lighthouse.io 19 | endpoint: unix://@lighthouse-hook 20 | failurePolicy: Fail 21 | stages: 22 | - urlPattern: /containers/create 23 | method: post 24 | type: PreHook 25 | - urlPattern: /containers/create 26 | method: post 27 | type: PostHook 28 | ``` 29 | -------------------------------------------------------------------------------- /contrib/lighthouse/VERSION: -------------------------------------------------------------------------------- 1 | v0.2.1 2 | -------------------------------------------------------------------------------- /contrib/lighthouse/build/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | 3 | # install rpm build tools 4 | RUN yum update -y && \ 5 | yum install -y rpm-build make && \ 6 | mkdir -p /root/rpmbuild/SPECS && \ 7 | mkdir -p /root/rpmbuild/SOURCES 8 | 9 | # install golang 10 | RUN curl -fsSL https://dl.google.com/go/go1.15.6.linux-amd64.tar.gz | tar -xzC /usr/local 11 | ENV PATH=/usr/local/go/bin:$PATH 12 | -------------------------------------------------------------------------------- /contrib/lighthouse/build/config: -------------------------------------------------------------------------------- 1 | ARGS="--config=/etc/lighthouse/config.yaml --v=3" -------------------------------------------------------------------------------- /contrib/lighthouse/build/config.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: componentconfig.lighthouse.io/v1alpha1 2 | kind: HookConfiguration 3 | timeout: 10 4 | listenAddress: unix:///var/run/lighthouse.sock 5 | webhooks: 6 | - name: versioned 7 | endpoint: unix://@plugin-server 8 | failurePolicy: Fail 9 | stages: 10 | - urlPattern: /{id:v[.0-9]+}/containers/create 11 | method: post 12 | type: PreHook 13 | - name: non-versioned 14 | endpoint: unix://@plugin-server 15 | failurePolicy: Fail 16 | stages: 17 | - urlPattern: /containers/create 18 | method: post 19 | type: PreHook -------------------------------------------------------------------------------- /contrib/lighthouse/build/lighthouse.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Lighthouse server 3 | Before=kubelet.service 4 | 5 | [Service] 6 | Type=notify 7 | EnvironmentFile=-/etc/lighthouse/config 8 | ExecStart=/usr/bin/lighthouse $ARGS 9 | Restart=always 10 | 11 | LimitNOFILE=infinity 12 | LimitNPROC=infinity 13 | LimitCORE=infinity 14 | # Uncomment TasksMax if your systemd version supports it. 15 | # Only systemd 226 and above support this version. 16 | TasksMax=infinity 17 | 18 | [Install] 19 | WantedBy=multi-user.target 20 | -------------------------------------------------------------------------------- /contrib/lighthouse/build/lighthouse.spec: -------------------------------------------------------------------------------- 1 | Name: lighthouse 2 | Version: %{version} 3 | Release: %{commit}%{?dist} 4 | Summary: lighthouse plugin 5 | 6 | Group: Development/GAIA 7 | License: MIT 8 | Source: lighthouse.tar.gz 9 | 10 | Requires: systemd-units 11 | 12 | %define pkgname %{name}-%{version}-%{release} 13 | 14 | %description 15 | lighthouse plugin 16 | 17 | %prep 18 | %setup -n lighthouse-%{version} 19 | 20 | %build 21 | make binary 22 | 23 | %install 24 | install -d $RPM_BUILD_ROOT/%{_bindir} 25 | install -d $RPM_BUILD_ROOT/%{_unitdir} 26 | install -d $RPM_BUILD_ROOT/etc/lighthouse 27 | 28 | install -p -m 755 ./_output/bin/lighthouse $RPM_BUILD_ROOT/%{_bindir}/lighthouse 29 | install -p -m 644 ./build/config $RPM_BUILD_ROOT/etc/lighthouse/config 30 | install -p -m 644 ./build/config.yaml $RPM_BUILD_ROOT/etc/lighthouse/config.yaml 31 | install -p -m 644 ./build/lighthouse.service $RPM_BUILD_ROOT/%{_unitdir}/ 32 | 33 | %clean 34 | rm -rf $RPM_BUILD_ROOT 35 | 36 | %files 37 | %config(noreplace,missingok) /etc/lighthouse/config 38 | %config(noreplace,missingok) /etc/lighthouse/config.yaml 39 | 40 | /%{_unitdir}/lighthouse.service 41 | 42 | /%{_bindir}/lighthouse 43 | -------------------------------------------------------------------------------- /contrib/lighthouse/cmd/lighthouse/lighthouse.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package main 17 | 18 | import ( 19 | goflag "flag" 20 | "math/rand" 21 | "os" 22 | "time" 23 | 24 | "github.com/spf13/pflag" 25 | cliflag "k8s.io/component-base/cli/flag" 26 | "k8s.io/component-base/logs" 27 | 28 | "github.com/tencent/lighthouse/cmd/lighthouse/app" 29 | ) 30 | 31 | // This is the main function for the server 32 | func main() { 33 | rand.Seed(time.Now().UnixNano()) 34 | // init command context 35 | command := app.NewLighthouseCommand() 36 | 37 | pflag.CommandLine.SetNormalizeFunc(cliflag.WordSepNormalizeFunc) 38 | pflag.CommandLine.AddGoFlagSet(goflag.CommandLine) 39 | logs.InitLogs() 40 | defer logs.FlushLogs() 41 | 42 | if err := command.Execute(); err != nil { 43 | os.Exit(1) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /contrib/lighthouse/doc/LighthouseDesign.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/caelus/2ade1eaa29a5bca5369d0980fa1423049522366f/contrib/lighthouse/doc/LighthouseDesign.png -------------------------------------------------------------------------------- /contrib/lighthouse/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/tencent/lighthouse 2 | 3 | go 1.14 4 | 5 | require ( 6 | github.com/Microsoft/go-winio v0.4.16 // indirect 7 | github.com/coreos/go-systemd/v22 v22.0.0 8 | github.com/docker/go-connections v0.4.0 9 | github.com/evanphx/json-patch v4.2.0+incompatible 10 | github.com/google/uuid v1.1.1 11 | github.com/gorilla/mux v1.7.0 12 | github.com/json-iterator/go v1.1.8 13 | github.com/mYmNeo/version v0.0.0-20200424030557-30e59e77cc3e 14 | github.com/spf13/cobra v0.0.5 15 | github.com/spf13/pflag v1.0.5 16 | golang.org/x/net v0.0.0-20191004110552-13f9640d40b9 17 | k8s.io/apimachinery v0.17.4 18 | k8s.io/component-base v0.17.4 19 | k8s.io/klog v1.0.0 20 | ) 21 | -------------------------------------------------------------------------------- /contrib/lighthouse/hack/binary: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o pipefail 5 | set -o nounset 6 | 7 | BASE_DIR=$(cd $(dirname $0)/.. && pwd) 8 | source ${BASE_DIR}/hack/common.sh 9 | 10 | go build -o ${OUTPUT_PATH}/bin/${PACKAGE_NAME} \ 11 | -ldflags "-X github.com/mYmNeo/version/verflag.ReleaseName=${PACKAGE_NAME} \ 12 | -X github.com/mYmNeo/version.gitCommit=${GIT_COMMIT} \ 13 | -X github.com/mYmNeo/version.gitTreeState=${TREE_STATE} \ 14 | -X github.com/mYmNeo/version.gitVersion=${GIT_VERSION} \ 15 | -X github.com/mYmNeo/version.gitMajor=${GIT_MAJOR} \ 16 | -X github.com/mYmNeo/version.gitMinor=${GIT_MINOR} \ 17 | -X github.com/mYmNeo/version.buildDate=${BUILD_DATE}" \ 18 | ${PACKAGE}/cmd/lighthouse 19 | -------------------------------------------------------------------------------- /contrib/lighthouse/hack/boilerplate.go.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/caelus/2ade1eaa29a5bca5369d0980fa1423049522366f/contrib/lighthouse/hack/boilerplate.go.txt -------------------------------------------------------------------------------- /contrib/lighthouse/hack/clean: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o pipefail 5 | set -o nounset 6 | 7 | BASE_DIR=$(cd $(dirname $0)/.. && pwd) 8 | source ${BASE_DIR}/hack/common.sh 9 | 10 | rm -rf ${OUTPUT_PATH} 11 | -------------------------------------------------------------------------------- /contrib/lighthouse/hack/codegen: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o pipefail 5 | set -o nounset 6 | 7 | BASE_DIR=$(cd $(dirname $0)/.. && pwd) 8 | source ${BASE_DIR}/hack/common.sh 9 | 10 | mkdir -p ${OUTPUT_PATH}/${PACKAGE_PREFIX} 11 | ln -s ${BASE_DIR} ${OUTPUT_PATH}/${PACKAGE} 12 | ${BASE_DIR}/hack/update-codegen.sh 13 | -------------------------------------------------------------------------------- /contrib/lighthouse/hack/common.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -o errexit 3 | set -o nounset 4 | set -o pipefail 5 | 6 | PACKAGE_PREFIX="github.com/tencent" 7 | PACKAGE_NAME="lighthouse" 8 | PACKAGE="${PACKAGE_PREFIX}/${PACKAGE_NAME}" 9 | OUTPUT_PATH="${BASE_DIR}/_output" 10 | mkdir -p ${OUTPUT_PATH} 11 | USER_ID=$(id -u) 12 | GROUP_ID=$(id -g) 13 | 14 | if [[ -z ${GIT_COMMIT:-""} ]]; then 15 | GIT_COMMIT=$(git rev-parse "HEAD^{commit}") 16 | fi 17 | BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') 18 | 19 | GIT_VERSION=$(cat ${BASE_DIR}/VERSION) 20 | if [[ -z ${GIT_STATUS:-""} ]]; then 21 | if GIT_STATUS=$(git status --porcelain 2>/dev/null) && [[ -z ${GIT_STATUS} ]]; then 22 | TREE_STATE="clean" 23 | else 24 | TREE_STATE="-dirty" 25 | GIT_VERSION+=${TREE_STATE} 26 | fi 27 | fi 28 | 29 | if [[ "${GIT_VERSION}" =~ ^v([0-9]+)\.([0-9]+)(\.[0-9]+)?([-].*)?([+].*)?$ ]]; then 30 | GIT_MAJOR=${BASH_REMATCH[1]} 31 | GIT_MINOR=${BASH_REMATCH[2]} 32 | if [[ -n "${BASH_REMATCH[4]}" ]]; then 33 | GIT_MINOR+="+" 34 | fi 35 | fi 36 | 37 | -------------------------------------------------------------------------------- /contrib/lighthouse/hack/rpm: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o pipefail 5 | set -o nounset 6 | 7 | BASE_DIR=$(cd $(dirname $0)/.. && pwd) 8 | source ${BASE_DIR}/hack/common.sh 9 | 10 | COMMIT_NUM=$(git log --oneline | wc -l | sed -e 's/^[ \t]*//') 11 | RPM_VERSION=${GIT_VERSION:1} 12 | RPM_IMAGE="lighthouse-${RPM_VERSION}:${COMMIT_NUM}" 13 | 14 | cd $BASE_DIR 15 | 16 | git archive -o ${OUTPUT_PATH}/lighthouse.tar.gz \ 17 | --format=tar \ 18 | --prefix=lighthouse-${RPM_VERSION}/ \ 19 | HEAD 20 | 21 | cp ${BASE_DIR}/build/Dockerfile ${OUTPUT_PATH} 22 | cp ${BASE_DIR}/build/lighthouse.spec ${OUTPUT_PATH} 23 | 24 | cat >>${OUTPUT_PATH}/Dockerfile < /root/.rpmmacros \ 29 | && echo '%__os_install_post %{nil}' >> /root/.rpmmacros \ 30 | && echo '%debug_package %{nil}' >> /root/.rpmmacros 31 | WORKDIR /root/rpmbuild/SPECS 32 | 33 | ENV GIT_COMMIT=${GIT_COMMIT} 34 | ENV GIT_VERSION=${GIT_VERSION} 35 | ENV GIT_STATUS=${GIT_STATUS} 36 | 37 | RUN rpmbuild -bb \ 38 | --define 'version ${RPM_VERSION}' \ 39 | --define 'commit ${COMMIT_NUM}' \ 40 | lighthouse.spec 41 | EOF 42 | 43 | (cd "${OUTPUT_PATH}" && docker build --network=host -t "$RPM_IMAGE" .) 44 | docker run --rm "$RPM_IMAGE" bash -c 'cd /root/rpmbuild && tar -c *RPMS' | tar xvC "${OUTPUT_PATH}" 45 | -------------------------------------------------------------------------------- /contrib/lighthouse/hack/test: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o pipefail 5 | set -o nounset 6 | 7 | BASE_DIR=$(cd $(dirname $0)/.. && pwd) 8 | source ${BASE_DIR}/hack/common.sh 9 | 10 | go test -v -count=1 ./... -------------------------------------------------------------------------------- /contrib/lighthouse/hack/update-codegen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2017 The Kubernetes Authors. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | set -o errexit 18 | set -o nounset 19 | set -o pipefail 20 | 21 | SCRIPT_ROOT=$(dirname ${BASH_SOURCE})/.. 22 | 23 | # generate the code with: 24 | # --output-base because this script should also be able to run inside the vendor dir of 25 | # k8s.io/kubernetes. The output-base is needed for the generators to output into the vendor dir 26 | # instead of the $GOPATH directly. For normal projects this can be dropped. 27 | ${SCRIPT_ROOT}/hack/generate-groups.sh "deepcopy,defaulter" \ 28 | github.com/tencent/lighthouse/pkg/client github.com/tencent/lighthouse/pkg/apis \ 29 | componentconfig.lighthouse.io:v1alpha1 \ 30 | --go-header-file "$(dirname ${BASH_SOURCE})/boilerplate.go.txt" \ 31 | --output-base "$(dirname ${BASH_SOURCE})/../_output" 32 | 33 | # To use your own boilerplate text append: 34 | # --go-header-file ${SCRIPT_ROOT}/hack/custom-boilerplate.go.txt 35 | 36 | -------------------------------------------------------------------------------- /contrib/lighthouse/hack/verify-codegen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2017 The Kubernetes Authors. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | set -o errexit 18 | set -o nounset 19 | set -o pipefail 20 | 21 | SCRIPT_ROOT=$(dirname "${BASH_SOURCE}")/.. 22 | 23 | DIFFROOT="${SCRIPT_ROOT}/pkg" 24 | TMP_DIFFROOT="${SCRIPT_ROOT}/_tmp/pkg" 25 | _tmp="${SCRIPT_ROOT}/_tmp" 26 | 27 | cleanup() { 28 | rm -rf "${_tmp}" 29 | } 30 | trap "cleanup" EXIT SIGINT 31 | 32 | cleanup 33 | 34 | mkdir -p "${TMP_DIFFROOT}" 35 | cp -a "${DIFFROOT}"/* "${TMP_DIFFROOT}" 36 | 37 | "${SCRIPT_ROOT}/hack/update-codegen.sh" 38 | echo "diffing ${DIFFROOT} against freshly generated codegen" 39 | ret=0 40 | diff -Naupr "${DIFFROOT}" "${TMP_DIFFROOT}" || ret=$? 41 | cp -a "${TMP_DIFFROOT}"/* "${DIFFROOT}" 42 | if [[ $ret -eq 0 ]] 43 | then 44 | echo "${DIFFROOT} up to date." 45 | else 46 | echo "${DIFFROOT} is out of date. Please run hack/update-codegen.sh" 47 | exit 1 48 | fi 49 | -------------------------------------------------------------------------------- /contrib/lighthouse/pkg/apis/componentconfig.lighthouse.io/register.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package componentconfig 17 | 18 | const ( 19 | // lighthouse group name 20 | GroupName = "componentconfig.lighthouse.io" 21 | ) 22 | -------------------------------------------------------------------------------- /contrib/lighthouse/pkg/apis/componentconfig.lighthouse.io/v1alpha1/defaults.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package v1alpha1 17 | 18 | import ( 19 | "net/http" 20 | ) 21 | 22 | // SetDefaults_HookConfiguration set default value for hook configuration 23 | func SetDefaults_HookConfiguration(obj *HookConfiguration) { 24 | if obj.Timeout == 0 { 25 | obj.Timeout = 5 26 | } 27 | 28 | if obj.RemoteEndpoint == "" { 29 | obj.RemoteEndpoint = "unix:///var/run/docker.sock" 30 | } 31 | } 32 | 33 | // SetDefaults_HookConfigurationItem set default value for HookConfigurationItem 34 | func SetDefaults_HookConfigurationItem(obj *HookConfigurationItem) { 35 | if obj.FailurePolicy == "" { 36 | obj.FailurePolicy = PolicyFail 37 | } 38 | } 39 | 40 | // SetDefaults_HookStage set default value for HookStage 41 | func SetDefaults_HookStage(obj *HookStage) { 42 | if obj.Method == "" { 43 | obj.Method = http.MethodPost 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /contrib/lighthouse/pkg/apis/componentconfig.lighthouse.io/v1alpha1/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | /* +k8s:deepcopy-gen=package,register 17 | * 18 | * Package v1alpha1 is the v1alpha1 version of the lighthouse's componentconfig API 19 | * +groupName=componentconfig.lighthouse.io 20 | * +versionName=v1alpha1 21 | */ 22 | 23 | package v1alpha1 24 | -------------------------------------------------------------------------------- /contrib/lighthouse/pkg/apis/componentconfig.lighthouse.io/v1alpha1/register.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package v1alpha1 17 | 18 | import ( 19 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 20 | "k8s.io/apimachinery/pkg/runtime" 21 | "k8s.io/apimachinery/pkg/runtime/schema" 22 | 23 | "github.com/tencent/lighthouse/pkg/apis/componentconfig.lighthouse.io" 24 | ) 25 | 26 | var ( 27 | SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) 28 | AddToScheme = SchemeBuilder.AddToScheme 29 | ) 30 | 31 | const Version = "v1alpha1" 32 | 33 | // SchemeGroupVersion is group version used to register these objects 34 | var SchemeGroupVersion = schema.GroupVersion{Group: componentconfig.GroupName, Version: Version} 35 | 36 | // Resource takes an unqualified resource and returns a Group qualified GroupResource 37 | func Resource(resource string) schema.GroupResource { 38 | return SchemeGroupVersion.WithResource(resource).GroupResource() 39 | } 40 | 41 | func addKnownTypes(scheme *runtime.Scheme) error { 42 | // TODO this will get cleaned up with the scheme types are fixed 43 | scheme.AddKnownTypes(SchemeGroupVersion, 44 | &HookConfiguration{}, 45 | ) 46 | metav1.AddToGroupVersion(scheme, SchemeGroupVersion) 47 | return nil 48 | } 49 | -------------------------------------------------------------------------------- /contrib/lighthouse/pkg/apis/componentconfig.lighthouse.io/v1alpha1/zz_generated.defaulter.go: -------------------------------------------------------------------------------- 1 | // +build !ignore_autogenerated 2 | 3 | // Code generated by defaulter-gen. DO NOT EDIT. 4 | 5 | package v1alpha1 6 | 7 | import ( 8 | runtime "k8s.io/apimachinery/pkg/runtime" 9 | ) 10 | 11 | // RegisterDefaults adds defaulters functions to the given scheme. 12 | // Public to allow building arbitrary schemes. 13 | // All generated defaulters are covering - they call all nested defaulters. 14 | func RegisterDefaults(scheme *runtime.Scheme) error { 15 | scheme.AddTypeDefaultingFunc(&HookConfiguration{}, func(obj interface{}) { SetObjectDefaults_HookConfiguration(obj.(*HookConfiguration)) }) 16 | return nil 17 | } 18 | 19 | func SetObjectDefaults_HookConfiguration(in *HookConfiguration) { 20 | SetDefaults_HookConfiguration(in) 21 | for i := range in.WebHooks { 22 | a := &in.WebHooks[i] 23 | SetDefaults_HookConfigurationItem(a) 24 | for j := range a.Stages { 25 | b := &a.Stages[j] 26 | SetDefaults_HookStage(b) 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /contrib/lighthouse/pkg/hook/proxy.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package hook 17 | 18 | import ( 19 | "net/http" 20 | 21 | "github.com/docker/go-connections/sockets" 22 | "k8s.io/klog" 23 | 24 | "github.com/tencent/lighthouse/pkg/httputil" 25 | "github.com/tencent/lighthouse/pkg/util" 26 | ) 27 | 28 | type reverseProxy struct { 29 | proxy *httputil.ReverseProxy 30 | } 31 | 32 | func newReverseProxy(remoteEndpoint string) *reverseProxy { 33 | proto, addr, err := util.GetProtoAndAddress(remoteEndpoint) 34 | if err != nil { 35 | klog.Fatalf("can't parse remote endpoint %s, %v", remoteEndpoint, err) 36 | } 37 | 38 | tr := new(http.Transport) 39 | sockets.ConfigureTransport(tr, proto, addr) 40 | 41 | rp := &reverseProxy{ 42 | proxy: &httputil.ReverseProxy{ 43 | Director: func(req *http.Request) { 44 | req.URL.Scheme = "http" 45 | req.URL.Host = addr 46 | if _, ok := req.Header["User-Agent"]; !ok { 47 | // explicitly disable User-Agent so it's not set to default value 48 | req.Header.Set("User-Agent", "") 49 | } 50 | }, 51 | Transport: tr, 52 | }, 53 | } 54 | 55 | return rp 56 | } 57 | 58 | // ServeHTTP remote request handle interface 59 | func (rp *reverseProxy) ServeHTTP(w http.ResponseWriter, req *http.Request) { 60 | rp.proxy.ServeHTTP(w, req) 61 | } 62 | -------------------------------------------------------------------------------- /contrib/lighthouse/pkg/hook/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package hook 17 | 18 | import ( 19 | "context" 20 | gjson "encoding/json" 21 | "net/http" 22 | "net/http/httptest" 23 | ) 24 | 25 | // PatchData show struct data for patch method 26 | type PatchData struct { 27 | PatchType string `json:"patchType,omitempty"` 28 | PatchData []byte `json:"patchData,omitempty"` 29 | } 30 | 31 | // PostHookData show response data for post hook 32 | type PostHookData struct { 33 | StatusCode int `json:"statusCode,omitempty"` 34 | Body gjson.RawMessage `json:"body,omitempty"` 35 | } 36 | 37 | // HookHandler describe hook interface 38 | type HookHandler interface { 39 | PreHook(ctx context.Context, patch *PatchData, method, path string, body []byte) error 40 | PostHook(ctx context.Context, patch *PatchData, method, path string, body []byte) error 41 | } 42 | 43 | type PreHookFunc func(w http.ResponseWriter, r *http.Request) error 44 | type PostHookFunc func(w *httptest.ResponseRecorder, r *http.Request) 45 | -------------------------------------------------------------------------------- /contrib/lighthouse/pkg/hook/util.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package hook 17 | 18 | import ( 19 | "strings" 20 | 21 | jsoniter "github.com/json-iterator/go" 22 | 23 | "github.com/tencent/lighthouse/pkg/apis/componentconfig.lighthouse.io/v1alpha1" 24 | ) 25 | 26 | var json = jsoniter.Config{ 27 | EscapeHTML: false, 28 | SortMapKeys: true, 29 | ValidateJsonRawMessage: true, 30 | }.Froze() 31 | 32 | // HookPath group hook request path 33 | func HookPath(hookType v1alpha1.HookType, path string) string { 34 | return strings.ToLower(strings.Join([]string{"/", string(hookType), path}, "")) 35 | } 36 | 37 | // fixUnexpectedEscape fix Escape error 38 | func fixUnexpectedEscape(d []byte) []byte { 39 | return []byte(strings.ReplaceAll(strings.ReplaceAll(string(d), `\u003c`, "<"), `\u003e`, ">")) 40 | } 41 | -------------------------------------------------------------------------------- /contrib/lighthouse/pkg/scheme/register.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package scheme 17 | 18 | import ( 19 | v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 20 | "k8s.io/apimachinery/pkg/runtime" 21 | "k8s.io/apimachinery/pkg/runtime/schema" 22 | "k8s.io/apimachinery/pkg/runtime/serializer" 23 | utilruntime "k8s.io/apimachinery/pkg/util/runtime" 24 | 25 | componentconfigv1alpha1 "github.com/tencent/lighthouse/pkg/apis/componentconfig.lighthouse.io/v1alpha1" 26 | ) 27 | 28 | var Scheme = runtime.NewScheme() 29 | var Codecs = serializer.NewCodecFactory(Scheme) 30 | 31 | var localSchemeBuilder = runtime.SchemeBuilder{ 32 | componentconfigv1alpha1.AddToScheme, 33 | } 34 | 35 | // copied from github.com/kubernetes/client-go/blob/master/kubernetes/fake/register.go 36 | 37 | // AddToScheme adds all types of this clientset into the given scheme. This allows composition 38 | // of clientsets, like in: 39 | // 40 | // import ( 41 | // "k8s.io/client-go/kubernetes" 42 | // clientsetscheme "k8s.io/client-go/kubernetes/scheme" 43 | // aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme" 44 | // ) 45 | // 46 | // kclientset, _ := kubernetes.NewForConfig(c) 47 | // _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme) 48 | // 49 | // After this, RawExtensions in Kubernetes types will serialize kube-aggregator types 50 | // correctly. 51 | var AddToScheme = localSchemeBuilder.AddToScheme 52 | 53 | func init() { 54 | v1.AddToGroupVersion(Scheme, schema.GroupVersion{Version: "v1"}) 55 | utilruntime.Must(AddToScheme(Scheme)) 56 | } 57 | -------------------------------------------------------------------------------- /contrib/lighthouse/pkg/test/test_util.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package test 17 | 18 | import ( 19 | "fmt" 20 | "net" 21 | "net/http" 22 | 23 | "github.com/google/uuid" 24 | ) 25 | 26 | // UnixSocketServer mock socket server 27 | type UnixSocketServer struct { 28 | addr string 29 | mux *http.ServeMux 30 | l net.Listener 31 | } 32 | 33 | // NewUnixSocketServer new mock socket server instance 34 | func NewUnixSocketServer() *UnixSocketServer { 35 | return &UnixSocketServer{ 36 | addr: fmt.Sprintf("@%s", uuid.New().String()), 37 | mux: http.NewServeMux(), 38 | } 39 | } 40 | 41 | // GetAddress return socket address 42 | func (uss *UnixSocketServer) GetAddress() string { 43 | return fmt.Sprintf("unix://%s", uss.addr) 44 | } 45 | 46 | // RegisterHandler register to mock server 47 | func (uss *UnixSocketServer) RegisterHandler(path string, handler http.HandlerFunc) { 48 | uss.mux.HandleFunc(path, handler) 49 | } 50 | 51 | // Start main loop 52 | func (uss *UnixSocketServer) Start() error { 53 | l, err := net.Listen("unix", uss.addr) 54 | if err != nil { 55 | return err 56 | } 57 | 58 | uss.l = l 59 | 60 | return http.Serve(l, uss.mux) 61 | } 62 | 63 | // Stop stop mock socket server 64 | func (uss *UnixSocketServer) Stop() error { 65 | if uss.l != nil { 66 | return uss.l.Close() 67 | } 68 | return nil 69 | } 70 | -------------------------------------------------------------------------------- /contrib/lighthouse/pkg/util/util.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package util 17 | 18 | import ( 19 | "fmt" 20 | "net/http" 21 | "strings" 22 | "syscall" 23 | 24 | "github.com/docker/go-connections/sockets" 25 | "github.com/spf13/pflag" 26 | "k8s.io/klog" 27 | ) 28 | 29 | const ( 30 | // socket protocol 31 | UnixProto = "unix" 32 | ) 33 | 34 | // BuildClientOrDie build http client 35 | func BuildClientOrDie(endpoint string) *http.Client { 36 | proto, addr, err := GetProtoAndAddress(endpoint) 37 | if err != nil { 38 | klog.Fatalf("can't parse endpoint %s, %v", endpoint, err) 39 | } 40 | 41 | if proto != UnixProto { 42 | klog.Fatalf("only support unix socket") 43 | } 44 | 45 | tr := new(http.Transport) 46 | sockets.ConfigureTransport(tr, proto, addr) 47 | return &http.Client{ 48 | Transport: tr, 49 | } 50 | } 51 | 52 | // GetProtoAndAddress get protocol and address from url 53 | func GetProtoAndAddress(endpoint string) (string, string, error) { 54 | seps := strings.SplitN(endpoint, "://", 2) 55 | if len(seps) != 2 { 56 | return "", "", fmt.Errorf("malformed unix socket") 57 | } 58 | 59 | if len(seps[1]) > len(syscall.RawSockaddrUnix{}.Path) { 60 | return "", "", fmt.Errorf("unix socket path %q is too long", seps[1]) 61 | } 62 | 63 | return seps[0], seps[1], nil 64 | } 65 | 66 | // PrintFlags print flags 67 | func PrintFlags(flags *pflag.FlagSet) { 68 | flags.VisitAll(func(flag *pflag.Flag) { 69 | klog.V(1).Infof("FLAG: --%s=%q", flag.Name, flag.Value) 70 | }) 71 | } 72 | -------------------------------------------------------------------------------- /doc/contributing.md: -------------------------------------------------------------------------------- 1 | ## Contributing to Caelus 2 | Welcome to report Issues or send pull requests. It's recommended to read the following Contributing Guide first before 3 | contributing. 4 | 5 | ## Issues 6 | 7 | ### Search Known Issues First 8 | Please search the existing issues to see if any similar issue or feature request has already been filed. You should 9 | make sure your issue isn't redundant. 10 | 11 | ### Reporting New Issues 12 | If you open an issue, the more information the better. Such as detailed description, screenshot or video of your 13 | problem, logcat or code blocks for your crash. 14 | 15 | ## Pull Requests 16 | We strongly welcome your pull request to make Caelus better. 17 | 18 | ### Branch Management 19 | There are two main branches here: 20 | 21 | - master branch 22 | 23 | The developing branch. We welcome bugfix, features, typo and whatever on this branch. 24 | 25 | - branch-* branch 26 | 27 | The releasing branch. It is our stable release branch. You are recommended to submit bugfix only to these branches. 28 | 29 | ### Make Pull Requests 30 | The code team will monitor all pull request. Before submitting a pull request, please make sure the followings are done: 31 | 32 | 1. Fork the repo and create your branch from master. 33 | 2. Update code or documentation if you have changed APIs. 34 | 3. Check your code lints and checkstyles. 35 | 4. Test your code. 36 | 5. Submit your pull request to master branch. 37 | 38 | ## Code Style Guide 39 | We use gofmt to check code styles. Make sure gofmt cmd/... pkg/... contrib/... passes 40 | 41 | ## License 42 | Caelus is under the Apache License 2.0. See the [License](../LICENSE) file for details. 43 | -------------------------------------------------------------------------------- /doc/images/cgroup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/caelus/2ade1eaa29a5bca5369d0980fa1423049522366f/doc/images/cgroup.png -------------------------------------------------------------------------------- /doc/images/lighthouse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/caelus/2ade1eaa29a5bca5369d0980fa1423049522366f/doc/images/lighthouse.png -------------------------------------------------------------------------------- /doc/images/modules.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/caelus/2ade1eaa29a5bca5369d0980fa1423049522366f/doc/images/modules.png -------------------------------------------------------------------------------- /doc/images/yarn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/caelus/2ade1eaa29a5bca5369d0980fa1423049522366f/doc/images/yarn.png -------------------------------------------------------------------------------- /doc/nm_operator.md: -------------------------------------------------------------------------------- 1 | # nm_operator 2 | 3 | ## description 4 | 5 | nm_operator is a http server, which packaging YARN execution commands. By calling API, users could execute YARN command 6 | remotely, such as starting NodeManager, or updating configuration file. Now the supported API are as following: 7 | - /v1/nm/status, check if NodeManager is running 8 | - /v1/nm/capacity, get or update NodeManager resource capacity, including vcores and memory 9 | - /v1/nm/property, get or update configuration file, such as yarn-site.xml 10 | - /v1/nm/start, start NodeManager process 11 | - /v1/nm/stop, stop NodeManager process 12 | - /v1/nm/schedule/disable, disable NodeManager accepting new jobs 13 | - /v1/nm/schedule/enable, recover NodeManager accepting new jobs 14 | - /v1/nm/capacity/update, notify ResourceManager to update resource 15 | 16 | ## How to use 17 | Users prepare a base image which including the NodeManager process, and copy the nm_operator binary into Dockerfile as the 18 | ENTRYPOINT or CMD to build a new image. Then users could submit a workload on kubernetes with the new image, the workload 19 | must be scheduled on the nodes where caelus is running, let caelus call the API to execute YARN commands. Uses could follow 20 | the example [Yaml file](../hack/yaml/nodemanager.yaml). -------------------------------------------------------------------------------- /hack/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build the manager binary 2 | FROM golang:1.20 as builder 3 | 4 | WORKDIR /workspace 5 | # Copy the Go Modules manifests 6 | COPY go.mod go.mod 7 | COPY go.sum go.sum 8 | # cache deps before building and copying source so that we don't need to re-download as much 9 | # and so that source changes don't invalidate our downloaded layer 10 | RUN go mod download 11 | 12 | # Copy the go source 13 | COPY cmd/ cmd/ 14 | COPY pkg/ pkg/ 15 | COPY hack/ hack/ 16 | COPY contrib/ contrib/ 17 | COPY .git/ .git/ 18 | 19 | # mkdir binary folder 20 | RUN mkdir -p binaries 21 | 22 | # Build caelus 23 | RUN hack/build.sh && \ 24 | cp _output/bin/* /workspace/binaries/ 25 | 26 | # Build lighthouse 27 | RUN cd /workspace/contrib/lighthouse && \ 28 | ./hack/binary && \ 29 | cp _output/bin/* /workspace/binaries/ 30 | RUN cd /workspace/contrib/lighthouse-plugin && \ 31 | ./hack/binary && \ 32 | cp _output/bin/* /workspace/binaries/ 33 | 34 | # Use distroless as minimal base image to package the manager binary 35 | # Refer to https://github.com/GoogleContainerTools/distroless for more details 36 | FROM centos:centos7 37 | WORKDIR / 38 | COPY --from=builder /workspace/_output/bin/caelus ./ 39 | COPY --from=builder /workspace/binaries /binaries/ 40 | #USER nonroot:nonroot 41 | 42 | # Used to quickly reclaim memory 43 | ENV GODEBUG="madvdontneed=1" 44 | CMD ["/caelus"] 45 | -------------------------------------------------------------------------------- /hack/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o pipefail 5 | set -o nounset 6 | 7 | BASE_DIR=$(cd $(dirname $0)/.. && pwd) 8 | source "${BASE_DIR}/hack/common.sh" 9 | source "${BASE_DIR}/hack/lib/version.sh" 10 | 11 | go build -o ${OUTPUT_PATH}/bin/${PACKAGE_NAME} \ 12 | -ldflags "$(api::version::ldflags)" \ 13 | ${PACKAGE}/cmd/${PACKAGE_NAME} 14 | 15 | go build -o ${OUTPUT_PATH}/bin/${NM_OPERATOR_NAME} \ 16 | -ldflags "$(api::version::ldflags)" \ 17 | ${PACKAGE}/cmd/${NM_OPERATOR_NAME} 18 | -------------------------------------------------------------------------------- /hack/clean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | rm -rf ./_output 4 | -------------------------------------------------------------------------------- /hack/common.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -o errexit 3 | set -o nounset 4 | set -o pipefail 5 | 6 | PACKAGE_PREFIX="github.com/tencent" 7 | PACKAGE_NAME="caelus" 8 | PACKAGE="${PACKAGE_PREFIX}/${PACKAGE_NAME}" 9 | OUTPUT_PATH="${BASE_DIR}/_output" 10 | NM_OPERATOR_NAME="nm-operator" 11 | mkdir -p ${OUTPUT_PATH} 12 | USER_ID=$(id -u) 13 | GROUP_ID=$(id -g) 14 | -------------------------------------------------------------------------------- /hack/format.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o nounset 4 | set -o pipefail 5 | 6 | ROOT=$(cd $(dirname "${BASH_SOURCE}")/.. && pwd -P) 7 | GOFMT="gofmt -s -d -w" 8 | 9 | find_files() { 10 | find . -not \( \ 11 | \( \ 12 | -wholename './_output' \ 13 | \) -prune \ 14 | \) -name '*.go' 15 | } 16 | 17 | find_files | xargs $GOFMT 18 | -------------------------------------------------------------------------------- /hack/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o pipefail 4 | set -o nounset 5 | set -o nounset 6 | 7 | BASE_DIR=$(cd $(dirname $0)/.. && pwd) 8 | source ${BASE_DIR}/hack/common.sh 9 | 10 | go test -v -count=1 ./... 11 | 12 | -------------------------------------------------------------------------------- /hack/yaml/nodemanager.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: DaemonSet 3 | metadata: 4 | annotations: 5 | mixer.kubernetes.io/app-class: greedy 6 | labels: 7 | app: node-manager 8 | name: node-manager 9 | namespace: hadoop-yarn 10 | spec: 11 | selector: 12 | matchLabels: 13 | app: node-manager 14 | template: 15 | metadata: 16 | annotations: 17 | mixer.kubernetes.io/app-class: greedy 18 | labels: 19 | app: node-manager 20 | spec: 21 | affinity: 22 | podAntiAffinity: 23 | requiredDuringSchedulingIgnoredDuringExecution: 24 | - labelSelector: 25 | matchExpressions: 26 | - key: app 27 | operator: In 28 | values: 29 | - node-manager 30 | topologyKey: kubernetes.io/hostname 31 | containers: 32 | - env: 33 | - name: GINIT_PORT 34 | value: xxx 35 | - name: USER 36 | value: xxx 37 | - name: GROUP 38 | value: xxx 39 | - name: HADOOP_CONF_DIR 40 | value: /xxx 41 | - name: HADOOP_YARN_HOME 42 | value: /xxx 43 | - name: PID_FILE 44 | value: /xxx/yarn-nodemanager.pid 45 | - name: CGROUP_PATH 46 | value: /sys/fs/cgroup 47 | - name: CONTAINER_EXECUTOR 48 | value: org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor 49 | - name: NM_LOCAL_DIRS 50 | value: /xxx 51 | - name: MY_NODE_IP 52 | valueFrom: 53 | fieldRef: 54 | fieldPath: status.hostIP 55 | image: xxxx 56 | imagePullPolicy: IfNotPresent 57 | name: node-manager 58 | resources: {} 59 | dnsPolicy: ClusterFirst 60 | nodeSelector: 61 | nodemanager: "true" 62 | restartPolicy: Always 63 | -------------------------------------------------------------------------------- /pkg/caelus/checkpoint/checkpoint.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package checkpoint 17 | 18 | import ( 19 | "encoding/json" 20 | 21 | "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" 22 | "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum" 23 | ) 24 | 25 | var _ checkpointmanager.Checkpoint = &checkpointData{} 26 | 27 | var cpm checkpointmanager.CheckpointManager 28 | 29 | func InitCheckpointManager(checkpointDir string) error { 30 | var err error 31 | cpm, err = checkpointmanager.NewCheckpointManager(checkpointDir) 32 | return err 33 | } 34 | 35 | func Save(key string, obj interface{}) error { 36 | data := &checkpointData{ 37 | Data: obj, 38 | } 39 | return cpm.CreateCheckpoint(key, data) 40 | } 41 | 42 | func Restore(key string, receiver interface{}) error { 43 | data := &checkpointData{Data: receiver} 44 | return cpm.GetCheckpoint(key, data) 45 | } 46 | 47 | type checkpointData struct { 48 | Data interface{} 49 | Checksum checksum.Checksum 50 | } 51 | 52 | // MarshalCheckpoint returns marshalled checkpoing 53 | func (r *checkpointData) MarshalCheckpoint() ([]byte, error) { 54 | // should set checksum the same value(zero) when calculating, 55 | // or will get different sum value after restoring 56 | r.Checksum = 0 57 | r.Checksum = checksum.New(r) 58 | return json.Marshal(*r) 59 | } 60 | 61 | // UnmarshalCheckpoint tries to unmarshal passed types to checkpoing 62 | func (r *checkpointData) UnmarshalCheckpoint(blob []byte) error { 63 | return json.Unmarshal(blob, r) 64 | } 65 | 66 | // VerifyChecksum verifies that current checksum of checkpoint is valid 67 | func (r *checkpointData) VerifyChecksum() error { 68 | ck := r.Checksum 69 | // set checksum the same value(zero) as storing value before verify 70 | r.Checksum = 0 71 | err := ck.Verify(r) 72 | r.Checksum = ck 73 | return err 74 | } 75 | -------------------------------------------------------------------------------- /pkg/caelus/checkpoint/checkpoint_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package checkpoint 17 | 18 | import ( 19 | "testing" 20 | ) 21 | 22 | type Data struct { 23 | A string 24 | B int 25 | } 26 | 27 | func TestCheckpoint(t *testing.T) { 28 | data := Data{A: "AAA", B: 10} 29 | InitCheckpointManager("/tmp") 30 | Save("key1", &data) 31 | reveiver := &Data{} 32 | Restore("key1", reveiver) 33 | if reveiver.A != data.A || reveiver.B != data.B { 34 | t.Errorf("expect %v, got %v", data, reveiver) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /pkg/caelus/detection/detector.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package detection 17 | 18 | import "time" 19 | 20 | // Detector defines detector methods 21 | type Detector interface { 22 | // Name show detector name 23 | Name() string 24 | // Add add detect data 25 | Add(data TimedData) 26 | // AddAll add detect data array 27 | AddAll(vals []TimedData) 28 | // IsAnomaly checks if current values is anomaly 29 | IsAnomaly() (bool, error) 30 | // Metrics return current detector metrics 31 | Metrics() []string 32 | // SampleCount get current data count 33 | SampleCount() int 34 | // SampleDuration get current data time range 35 | SampleDuration() time.Duration 36 | // Reason get anomaly reason 37 | Reason() string 38 | } 39 | -------------------------------------------------------------------------------- /pkg/caelus/detection/mock/mock.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package detection 17 | 18 | import ( 19 | "time" 20 | 21 | "github.com/tencent/caelus/pkg/caelus/detection" 22 | ) 23 | 24 | // MockDetection detection mock type 25 | type MockDetection struct { 26 | anomaly *bool 27 | sampleCount int 28 | warningDuration time.Duration 29 | metrics []string 30 | vals []detection.TimedData 31 | } 32 | 33 | // NewMockDetection new detection mock manager 34 | func NewMockDetection(anomaly *bool, metrics []string) detection.Detector { 35 | return &MockDetection{anomaly: anomaly, metrics: metrics} 36 | } 37 | 38 | // Name show detector name 39 | func (m *MockDetection) Name() string { 40 | return "detectionTesting" 41 | } 42 | 43 | // Add add detect data 44 | func (m *MockDetection) Add(data detection.TimedData) { 45 | m.vals = append(m.vals, data) 46 | } 47 | 48 | // AddAll add detect data array 49 | func (m *MockDetection) AddAll(vals []detection.TimedData) { 50 | m.vals = vals 51 | } 52 | 53 | // IsAnomaly checks if current values is anomaly 54 | func (m *MockDetection) IsAnomaly() (bool, error) { 55 | return *m.anomaly, nil 56 | } 57 | 58 | // Metrics return current detector metrics 59 | func (m *MockDetection) Metrics() []string { 60 | return m.metrics 61 | } 62 | 63 | // SampleCount get current data count 64 | func (m *MockDetection) SampleCount() int { 65 | return m.sampleCount 66 | } 67 | 68 | // SampleDuration get current data time range 69 | func (m *MockDetection) SampleDuration() time.Duration { 70 | return m.warningDuration 71 | } 72 | 73 | // Reason get anomaly reason 74 | func (m *MockDetection) Reason() string { 75 | return "mock testing" 76 | } 77 | -------------------------------------------------------------------------------- /pkg/caelus/detection/ring/ring.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package ring 17 | 18 | // Ring is a ring buffer with fixed size. 19 | type Ring interface { 20 | Add(v float64) 21 | Peek() (v float64) 22 | Values() []float64 23 | Ready() bool 24 | Mean() float64 25 | } 26 | 27 | type defaultRing struct { 28 | data []float64 29 | capacity int 30 | tail int 31 | ready bool 32 | } 33 | 34 | // NewRing returns a ring buffer with fixed size. 35 | func NewRing(n int) Ring { 36 | return &defaultRing{ 37 | data: make([]float64, n, n), 38 | capacity: n, 39 | ready: false, 40 | } 41 | } 42 | 43 | // Add add values to ring 44 | func (ring *defaultRing) Add(v float64) { 45 | i := ring.tail 46 | ring.data[i] = v 47 | ring.tail++ 48 | if ring.tail == ring.capacity { 49 | ring.tail = 0 50 | ring.ready = true 51 | } 52 | } 53 | 54 | // Values return current ring buffer values 55 | func (ring *defaultRing) Values() []float64 { 56 | return ring.data 57 | } 58 | 59 | // Peek returns latest value 60 | func (ring *defaultRing) Peek() float64 { 61 | i := ring.tail - 1 62 | if i < 0 { 63 | i = ring.capacity - 1 64 | } 65 | return ring.data[i] 66 | } 67 | 68 | // Ready returns if the ring is ready to be used 69 | func (ring *defaultRing) Ready() bool { 70 | return ring.ready 71 | } 72 | 73 | // Mean returns the mean value of current ring values 74 | func (ring *defaultRing) Mean() float64 { 75 | var sum float64 = 0 76 | for _, v := range ring.data { 77 | sum += v 78 | } 79 | 80 | return sum / float64(ring.capacity) 81 | } 82 | -------------------------------------------------------------------------------- /pkg/caelus/detection/ring/ring_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package ring 17 | 18 | import ( 19 | "math" 20 | "testing" 21 | ) 22 | 23 | var epsilon = 0.00000001 24 | 25 | func TestRing(t *testing.T) { 26 | ring := NewRing(5) 27 | data := []float64{1, 2, 3, 4} 28 | for _, d := range data { 29 | ring.Add(d) 30 | } 31 | 32 | cases := []struct { 33 | newData float64 34 | expected []float64 35 | }{ 36 | { 37 | newData: 5, 38 | expected: []float64{1, 2, 3, 4, 5}, 39 | }, 40 | { 41 | newData: 6, 42 | expected: []float64{6, 2, 3, 4, 5}, 43 | }, 44 | { 45 | newData: 7, 46 | expected: []float64{6, 7, 3, 4, 5}, 47 | }, 48 | } 49 | 50 | for _, test := range cases { 51 | ring.Add(test.newData) 52 | if !isEqualFloat64(ring.Peek(), test.newData) { 53 | t.Errorf("Failed to peek, expected %+v, got %+v", test.newData, ring.Peek()) 54 | } 55 | if !isEqual(ring.Values(), test.expected) { 56 | t.Errorf("Failed, expected %+v, got %+v", test.expected, ring.Values()) 57 | } 58 | } 59 | } 60 | 61 | func isEqual(a, b []float64) bool { 62 | if len(a) != len(b) { 63 | return false 64 | } 65 | for i := range a { 66 | if !isEqualFloat64(a[i], b[i]) { 67 | return false 68 | } 69 | } 70 | return true 71 | } 72 | 73 | func isEqualFloat64(a, b float64) bool { 74 | // The following algorithm is not right, but it is OK for test now. 75 | // For more information, refer https://floating-point-gui.de/errors/comparison/#look-out-for-edge-cases. 76 | return math.Abs(a-b) < epsilon 77 | } 78 | -------------------------------------------------------------------------------- /pkg/caelus/detection/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package detection 17 | 18 | import "time" 19 | 20 | var nilTime = time.Time{} 21 | 22 | // TimedData is the data struct used by detector 23 | type TimedData struct { 24 | Ts time.Time 25 | Vals map[string]float64 26 | } 27 | -------------------------------------------------------------------------------- /pkg/caelus/detection/union.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package detection 17 | 18 | import ( 19 | "strings" 20 | "time" 21 | 22 | "github.com/tencent/caelus/pkg/caelus/types" 23 | "k8s.io/apimachinery/pkg/util/sets" 24 | ) 25 | 26 | type unionDetector struct { 27 | metrics []string 28 | detectors []Detector 29 | 30 | reason []string 31 | } 32 | 33 | var _ Detector = (*unionDetector)(nil) 34 | 35 | // NewUnionDetector union multiple detectors 36 | func NewUnionDetector(detectors []Detector) Detector { 37 | metrics := sets.NewString() 38 | for _, d := range detectors { 39 | metrics.Insert(d.Metrics()...) 40 | } 41 | return &unionDetector{ 42 | metrics: metrics.List(), 43 | detectors: detectors, 44 | } 45 | } 46 | 47 | // Name show detector name 48 | func (u *unionDetector) Name() string { 49 | return types.DetectionUnion 50 | } 51 | 52 | // Add add detect data 53 | func (u *unionDetector) Add(data TimedData) { 54 | for _, d := range u.detectors { 55 | d.Add(data) 56 | } 57 | } 58 | 59 | // AddAll add detect data array 60 | func (u *unionDetector) AddAll(vals []TimedData) { 61 | for _, d := range u.detectors { 62 | d.AddAll(vals) 63 | } 64 | } 65 | 66 | // IsAnomaly checks if current values is anomaly 67 | func (u *unionDetector) IsAnomaly() (bool, error) { 68 | var reason []string 69 | for _, d := range u.detectors { 70 | ret, err := d.IsAnomaly() 71 | if err != nil { 72 | return false, err 73 | } 74 | if !ret { 75 | return false, nil 76 | } 77 | reason = append(reason, d.Reason()) 78 | } 79 | u.reason = reason 80 | return true, nil 81 | } 82 | 83 | // Metrics return current detector metrics 84 | func (u *unionDetector) Metrics() []string { 85 | return u.metrics 86 | } 87 | 88 | // Reason get anomaly reason 89 | func (u *unionDetector) Reason() string { 90 | return strings.Join(u.reason, ";") 91 | } 92 | 93 | // SampleCount get current data count 94 | func (u *unionDetector) SampleCount() int { 95 | var n int 96 | for _, d := range u.detectors { 97 | nn := d.SampleCount() 98 | if nn > n { 99 | n = nn 100 | } 101 | } 102 | 103 | return n 104 | } 105 | 106 | // SampleDuration get current data time range 107 | func (u *unionDetector) SampleDuration() time.Duration { 108 | var duration time.Duration 109 | for _, d := range u.detectors { 110 | dd := d.SampleDuration() 111 | if dd > duration { 112 | duration = dd 113 | } 114 | } 115 | 116 | return duration 117 | } 118 | -------------------------------------------------------------------------------- /pkg/caelus/diskquota/interface.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package diskquota 17 | 18 | import ( 19 | "time" 20 | 21 | "github.com/tencent/caelus/pkg/caelus/types" 22 | "github.com/tencent/caelus/pkg/caelus/util/appclass" 23 | 24 | "k8s.io/api/core/v1" 25 | ) 26 | 27 | // DiskQuotaInterface describe disk quota functions 28 | type DiskQuotaInterface interface { 29 | // module name 30 | Name() string 31 | // Run main loop 32 | Run(stop <-chan struct{}) 33 | // GetPodDiskQuota return all volumes of the pod 34 | GetPodDiskQuota(pod *v1.Pod) (map[types.VolumeType]*VolumeInfo, error) 35 | // GetAllPodsDiskQuota return all volumes for all the pods on the node 36 | GetAllPodsDiskQuota() ([]*PodVolumes, error) 37 | } 38 | 39 | // PathInfoWrapper describe options for path info 40 | type PathInfoWrapper struct { 41 | // if set quota successfully 42 | setQuotaSuccess bool 43 | types.PathInfo 44 | } 45 | 46 | // VolumeInfo describes volume path and quota size 47 | type VolumeInfo struct { 48 | getPathsSuccess bool 49 | setQuotasSuccess bool 50 | // volume name => path info 51 | Paths map[string]*PathInfoWrapper 52 | } 53 | 54 | // PodVolumes describes volume info, such as path and quota 55 | type PodVolumes struct { 56 | Pod *v1.Pod 57 | AppClass appclass.AppClass 58 | Volumes map[types.VolumeType]*VolumeInfo 59 | // record the set quota timestamp, to check if the pod has exited 60 | lastTime time.Time 61 | } 62 | -------------------------------------------------------------------------------- /pkg/caelus/diskquota/manager/manager.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package manager 17 | 18 | import ( 19 | "errors" 20 | 21 | "github.com/tencent/caelus/pkg/caelus/types" 22 | 23 | "k8s.io/apimachinery/pkg/util/sets" 24 | ) 25 | 26 | var NotSupported = errors.New("not suppported") 27 | 28 | // QuotaManager describes functions for quota manager 29 | type QuotaManager interface { 30 | // GetQuota get disk quota for the target path 31 | GetQuota(targetPath string) (*types.DiskQuotaSize, error) 32 | // SetQuota set disk quota for path. pathFlag shows which the path is, such as emptyDir or hostPath 33 | // if sharedInfo is not nil, quota size is for a group of path of this pod 34 | SetQuota(targetPath string, pathFlag types.VolumeType, size *types.DiskQuotaSize, sharedInfo *types.SharedInfo) error 35 | // ClearQuota clears quota 36 | ClearQuota(targetPath string) error 37 | // GetAllQuotaPath return all paths which has set quota, and classified by path flag 38 | GetAllQuotaPath() map[types.VolumeType]sets.String 39 | } 40 | 41 | // IsNotSupported check if is not supported error 42 | func IsNotSupported(err error) bool { 43 | return err == NotSupported 44 | } 45 | -------------------------------------------------------------------------------- /pkg/caelus/diskquota/manager/manager_fake.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package manager 17 | 18 | import ( 19 | "fmt" 20 | "github.com/tencent/caelus/pkg/caelus/types" 21 | 22 | "k8s.io/apimachinery/pkg/util/sets" 23 | ) 24 | 25 | var ( 26 | QuotaSizeTimes uint64 = 2 27 | ) 28 | 29 | type fakeQuotaManager struct { 30 | quotas map[string]*types.DiskQuotaSize 31 | volumeTypes map[string]types.VolumeType 32 | } 33 | 34 | // NewFakeQuotaManager new fake quota manager instance 35 | func NewFakeQuotaManager() QuotaManager { 36 | return &fakeQuotaManager{ 37 | quotas: make(map[string]*types.DiskQuotaSize), 38 | volumeTypes: make(map[string]types.VolumeType), 39 | } 40 | } 41 | 42 | // GetQuota get disk quota for the target path 43 | func (f *fakeQuotaManager) GetQuota(targetPath string) (*types.DiskQuotaSize, error) { 44 | quota, ok := f.quotas[targetPath] 45 | if !ok { 46 | return nil, fmt.Errorf("not found") 47 | } 48 | // twice during call 49 | quota.QuotaUsed = quota.QuotaUsed * QuotaSizeTimes 50 | return quota, nil 51 | } 52 | 53 | // SetQuota set disk quota for path. pathFlag shows which the path is, such as emptyDir or hostPath 54 | func (f *fakeQuotaManager) SetQuota(targetPath string, pathFlag types.VolumeType, 55 | size *types.DiskQuotaSize, sharedInfo *types.SharedInfo) error { 56 | f.quotas[targetPath] = size 57 | f.volumeTypes[targetPath] = pathFlag 58 | 59 | return nil 60 | } 61 | 62 | // ClearQuota clears quota 63 | func (f *fakeQuotaManager) ClearQuota(targetPath string) error { 64 | delete(f.quotas, targetPath) 65 | delete(f.volumeTypes, targetPath) 66 | 67 | return nil 68 | } 69 | 70 | // GetAllQuotaPath return all paths which has set quota, and classified by path flag 71 | func (f *fakeQuotaManager) GetAllQuotaPath() map[types.VolumeType]sets.String { 72 | allPaths := make(map[types.VolumeType]sets.String) 73 | for p, t := range f.volumeTypes { 74 | paths, ok := allPaths[t] 75 | if !ok { 76 | paths = sets.NewString() 77 | } 78 | paths.Insert(p) 79 | allPaths[t] = paths 80 | } 81 | 82 | return allPaths 83 | } 84 | -------------------------------------------------------------------------------- /pkg/caelus/diskquota/volumes/volume_fake.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package volume 17 | 18 | import ( 19 | "github.com/tencent/caelus/pkg/caelus/types" 20 | 21 | "k8s.io/api/core/v1" 22 | apiType "k8s.io/apimachinery/pkg/types" 23 | ) 24 | 25 | var ( 26 | FakeVolumeQuotaManagerName types.VolumeType = "fakeVolumeQuotaManager" 27 | ) 28 | 29 | type fakeVolumeManager struct { 30 | pathInfos map[apiType.UID]map[string]*types.PathInfo 31 | } 32 | 33 | // NewFakeVolumeQuotaManager creates fake volume quota manager instance 34 | func NewFakeVolumeQuotaManager(pathInfos map[apiType.UID]map[string]*types.PathInfo) VolumeQuotaManager { 35 | return &fakeVolumeManager{ 36 | pathInfos: pathInfos, 37 | } 38 | } 39 | 40 | // Name return volume name 41 | func (f *fakeVolumeManager) Name() types.VolumeType { 42 | return FakeVolumeQuotaManagerName 43 | } 44 | 45 | // GetVolumes return paths, which need to set quota 46 | func (f *fakeVolumeManager) GetVolumes(pod *v1.Pod) (map[string]*types.PathInfo, error) { 47 | return f.pathInfos[pod.UID], nil 48 | } 49 | -------------------------------------------------------------------------------- /pkg/caelus/healthcheck/action/action_evict.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package action 17 | 18 | import "math" 19 | 20 | type evictAction struct{} 21 | 22 | // NewEvictAction get action instance to evict app 23 | func NewEvictAction() Action { 24 | return &evictAction{} 25 | } 26 | 27 | // ActionType return current action type 28 | func (e *evictAction) ActionType() ActionType { 29 | return Evict 30 | } 31 | 32 | // DoAction handle check result 33 | func (e *evictAction) DoAction(conflicting bool, data interface{}) (*ActionResult, error) { 34 | return nil, nil 35 | } 36 | 37 | // https://github.com/golang/go/issues/11660, we need to limit 2 decimal 38 | func float64Round(v float64) float64 { 39 | return math.Round(v*100) / 100 40 | } 41 | -------------------------------------------------------------------------------- /pkg/caelus/healthcheck/action/action_log.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package action 17 | 18 | import ( 19 | "github.com/tencent/caelus/pkg/caelus/alarm" 20 | ) 21 | 22 | type logAction struct { 23 | handler func() string 24 | } 25 | 26 | // NewLogAction get Action instance to send alarms 27 | func NewLogAction() Action { 28 | return &logAction{} 29 | } 30 | 31 | // ActionType return action type 32 | func (l *logAction) ActionType() ActionType { 33 | return Log 34 | } 35 | 36 | // DoAction handle check result 37 | func (l *logAction) DoAction(conflicting bool, data interface{}) (*ActionResult, error) { 38 | if conflicting { 39 | msg := data.(string) 40 | alarm.SendAlarm(msg) 41 | } 42 | return nil, nil 43 | } 44 | -------------------------------------------------------------------------------- /pkg/caelus/healthcheck/action/action_schedule.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package action 17 | 18 | import ( 19 | "k8s.io/api/core/v1" 20 | ) 21 | 22 | type scheduleAction struct { 23 | name string 24 | } 25 | 26 | // NewScheduleAction get Action instance to unschedule offline job 27 | func NewScheduleAction(name string) Action { 28 | return &scheduleAction{name: name} 29 | } 30 | 31 | // ActionType return current action type 32 | func (s *scheduleAction) ActionType() ActionType { 33 | return Schedule 34 | } 35 | 36 | // DoAction handle check result 37 | func (s *scheduleAction) DoAction(conflicting bool, data interface{}) (*ActionResult, error) { 38 | var ac = &ActionResult{ 39 | UnscheduleMap: map[string]bool{s.name: conflicting}, 40 | AdjustResources: make(map[v1.ResourceName]ActionResource), 41 | Messages: []string{data.(string)}, 42 | } 43 | 44 | return ac, nil 45 | } 46 | -------------------------------------------------------------------------------- /pkg/caelus/healthcheck/action/action_schedule_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package action 17 | 18 | import "testing" 19 | 20 | // TestScheduleAction_DoAction test how schedule action working 21 | func TestScheduleAction_DoAction(t *testing.T) { 22 | actionName := "testing" 23 | describe := "schedule action testing" 24 | scheduleDisabled := true 25 | 26 | scheduleAction := NewScheduleAction(actionName) 27 | acRet, err := scheduleAction.DoAction(scheduleDisabled, "just testing") 28 | if err != nil { 29 | t.Fatalf("%s: DoAction return err: %v", describe, err) 30 | } 31 | 32 | schState, ok := acRet.UnscheduleMap[actionName] 33 | if !ok { 34 | t.Fatalf("%s: key(%s) not found in UnscheduleMap: %v", describe, actionName, acRet.UnscheduleMap) 35 | } 36 | if schState != scheduleDisabled { 37 | t.Fatalf("%s: schedule state is %v, expect %v", describe, schState, scheduleDisabled) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /pkg/caelus/healthcheck/cgroupnotify/notify.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package notify 17 | 18 | import ( 19 | "github.com/tencent/caelus/pkg/caelus/resource" 20 | "github.com/tencent/caelus/pkg/caelus/types" 21 | 22 | "k8s.io/api/core/v1" 23 | "k8s.io/klog/v2" 24 | ) 25 | 26 | // ResourceNotify describe resource monitor by notify method 27 | type ResourceNotify interface { 28 | // Run main loop 29 | Run(stop <-chan struct{}) 30 | Stop() 31 | } 32 | 33 | // notifier describe all kinds of resource notifier 34 | type notifier interface { 35 | name() string 36 | start(stop <-chan struct{}) error 37 | stop() error 38 | } 39 | 40 | // notifyManager describe notify monitor data 41 | // Now just support memory cgroup notify, need to add more 42 | type notifyManager struct { 43 | notifiers []notifier 44 | } 45 | 46 | // NewNotifyManager creates notify manager instance, initializing all resource notifiers 47 | func NewNotifyManager(cfg *types.NotifyConfig, nodeResource resource.Interface) ResourceNotify { 48 | var notifiers []notifier 49 | 50 | if cfg.MemoryCgroup != nil { 51 | memNotifier := newMemoryNotifier(cfg.MemoryCgroup, func() { 52 | nodeResource.KillOfflineJob(v1.ResourceMemory) 53 | }) 54 | notifiers = append(notifiers, memNotifier) 55 | } 56 | 57 | return ¬ifyManager{ 58 | notifiers: notifiers, 59 | } 60 | } 61 | 62 | // Run starts all resource notifier 63 | func (n *notifyManager) Run(stop <-chan struct{}) { 64 | klog.V(2).Infof("cgroup notifier running") 65 | for _, nt := range n.notifiers { 66 | err := nt.start(stop) 67 | if err == nil { 68 | klog.Infof("notifier(%s) start successfully", nt.name()) 69 | } else { 70 | klog.Errorf("notifier(%s) start failed: %v", nt.name(), err) 71 | } 72 | } 73 | } 74 | 75 | // Stop stops all resource notifier 76 | func (n *notifyManager) Stop() { 77 | for _, nt := range n.notifiers { 78 | nt.stop() 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /pkg/caelus/healthcheck/conflict/mock/mock.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package conflict 17 | 18 | import ( 19 | "github.com/tencent/caelus/pkg/caelus/healthcheck/action" 20 | "github.com/tencent/caelus/pkg/caelus/healthcheck/conflict" 21 | 22 | "k8s.io/api/core/v1" 23 | ) 24 | 25 | // MockConflict mock conflict manager 26 | type MockConflict struct { 27 | conflictingResources map[v1.ResourceName]action.ActionResource 28 | } 29 | 30 | // NewMockConflict new a conflict mock manager 31 | func NewMockConflict(res map[v1.ResourceName]action.ActionResource) conflict.Manager { 32 | return &MockConflict{conflictingResources: res} 33 | } 34 | 35 | // CheckAndSubConflictResource mock function 36 | func (m *MockConflict) CheckAndSubConflictResource(predictList v1.ResourceList) (map[v1.ResourceName]bool, error) { 37 | panic("implement me") 38 | } 39 | 40 | // UpdateConflictList mock function 41 | func (m *MockConflict) UpdateConflictList(conflictList map[v1.ResourceName]action.ActionResource) (bool, error) { 42 | for k, v := range conflictList { 43 | vv, ok := m.conflictingResources[k] 44 | if !ok { 45 | return true, nil 46 | } 47 | 48 | if !v.Equal(&vv) { 49 | return true, nil 50 | } 51 | } 52 | 53 | return false, nil 54 | } 55 | -------------------------------------------------------------------------------- /pkg/caelus/healthcheck/health_check_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package health 17 | 18 | import ( 19 | "io/ioutil" 20 | "os" 21 | "testing" 22 | 23 | "github.com/tencent/caelus/pkg/caelus/types" 24 | "github.com/tencent/caelus/pkg/caelus/util" 25 | ) 26 | 27 | var ( 28 | testNodeName = "testNode" 29 | testHash = "9589f334c6f4987fc5ddb8e0ac1c096b" 30 | ) 31 | 32 | // TestNeedReload test if the rule config need to reload 33 | func TestNeedReload(t *testing.T) { 34 | configFile := "/tmp/testing" 35 | err := ioutil.WriteFile(configFile, []byte("just testing"), 0644) 36 | if err != nil { 37 | t.Skipf("creating testing file %s err: %v", configFile, err) 38 | } 39 | defer os.Remove(configFile) 40 | 41 | healthManager := &manager{ 42 | configHash: "123", 43 | configUpdateFunc: func(string) (*types.HealthCheckConfig, error) { 44 | return &types.HealthCheckConfig{ 45 | Disable: false, 46 | RuleNodes: []string{testNodeName}, 47 | }, nil 48 | }, 49 | } 50 | 51 | util.SetNodeIP(testNodeName) 52 | reload, hash, _ := healthManager.checkNeedReload(configFile) 53 | if !reload || hash != testHash { 54 | t.Fatalf("health check manager reload not expected, got reload:%v, hash:%s, expect reload:true, hash:%s", 55 | reload, hash, testHash) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /pkg/caelus/healthcheck/rulecheck/correlation/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | /* 17 | 发现异常后的关联分析 18 | 同一个(类)指标可能同时会有多个container/pod出现异常,此时优先在这些pod之间进行筛选处理 19 | 当然也可能是其他pod影响的,此时主要看最近有无新启动的pod在该指标上有load 20 | 21 | 发现某个指标有(多个)pod发生异常,首先筛选出其他有load的pod,筛选过滤条件为优先级比较低或者负载比较高的(过滤掉kube-system) 22 | 如果最近(5m)有新起的pod,则优先处理这类pod中load最大的一个,处理结束 23 | 如果没有这类候选项,则选择候选人中优先级较低且load较大的进行处理 24 | 25 | 对于io异常,各个pod的负载可以通过读写次数来反应 26 | 对于内存/cpu,由于用户有各自的申请值,所以无法直接用负载值,优先选择使用值超出起request比例来进行排序 27 | 过滤条件,对使用值绝对值有一定要求,比如内存使用大于512M,cpu大于2c 28 | 29 | TODO: 30 | 1. 处理成功后,对该指标进入一个冷却期(一段时间内不对该指标异常进行分析处理) 31 | 2. 记录上报当前结果 32 | */ 33 | 34 | package correlation 35 | -------------------------------------------------------------------------------- /pkg/caelus/healthcheck/rulecheck/correlation/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package correlation 17 | 18 | import ( 19 | cgroupstore "github.com/tencent/caelus/pkg/caelus/statestore/cgroup" 20 | "time" 21 | ) 22 | 23 | type suspect struct { 24 | ref *cgroupstore.CgroupRef 25 | priority int 26 | numKilled int 27 | runtime time.Duration // running time 28 | replicas int 29 | stateful bool 30 | // TODO: also consider pod attributes like volume/fixed-ip 31 | val float64 32 | } 33 | -------------------------------------------------------------------------------- /pkg/caelus/metrics/outer/serverrequest/serverrequest.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package serverrequest 17 | 18 | import ( 19 | "github.com/tencent/caelus/pkg/caelus/metrics/outer" 20 | "github.com/tencent/caelus/pkg/caelus/statestore" 21 | 22 | "github.com/prometheus/client_golang/prometheus" 23 | dto "github.com/prometheus/client_model/go" 24 | "k8s.io/klog/v2" 25 | ) 26 | 27 | const ( 28 | metricsServerRequest = "caelus_server_request_collector" 29 | metricsServerRequestDesc = "external metrics from server request" 30 | metricPrefix = "caelus_" 31 | ) 32 | 33 | type requestMetrics struct { 34 | desc *prometheus.Desc 35 | stStore statestore.StateStore 36 | } 37 | 38 | func newRequestMetrics(stStore statestore.StateStore) (prometheus.Collector, error) { 39 | desc := prometheus.NewDesc(metricsServerRequest, 40 | metricsServerRequestDesc, []string{"node"}, nil) 41 | 42 | return &requestMetrics{ 43 | desc: desc, 44 | stStore: stStore, 45 | }, nil 46 | } 47 | 48 | // RegisterRequestMetrics registers external metrics from server request 49 | func RegisterRequestMetrics(reg *prometheus.Registry, store statestore.StateStore) { 50 | if metrics, err := newRequestMetrics(store); err == nil { 51 | reg.MustRegister(metrics) 52 | klog.Infof("Register server request metrics successfully") 53 | } else { 54 | klog.Fatalf("Register server request metrics failed, err: %v", err) 55 | } 56 | } 57 | 58 | // Collect implements the prometheus.Collector interface. 59 | func (o *requestMetrics) Collect(ch chan<- prometheus.Metric) { 60 | metricFamilies, err := o.stStore.GetPromDirectMetrics() 61 | if err != nil { 62 | klog.Errorf("fetch prometheus metrics failed: %v", err) 63 | return 64 | } 65 | o.handleMetrics(metricFamilies, ch) 66 | } 67 | 68 | // Describe puts metrics desc to prometheus.Desc chan 69 | func (o *requestMetrics) Describe(ch chan<- *prometheus.Desc) { 70 | ch <- o.desc 71 | } 72 | 73 | func (o *requestMetrics) handleMetrics(metricFamilies map[string]*dto.MetricFamily, ch chan<- prometheus.Metric) { 74 | for _, mf := range metricFamilies { 75 | // avoid duplication with the original metric 76 | if mf.Name != nil { 77 | newName := metricPrefix + *mf.Name 78 | mf.Name = &newName 79 | } 80 | 81 | for _, metric := range mf.Metric { 82 | var values, names []string 83 | for _, label := range metric.GetLabel() { 84 | names = append(names, label.GetName()) 85 | values = append(values, label.GetValue()) 86 | } 87 | utils.HandleMetricType(mf, metric, values, names, ch) 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /pkg/caelus/metrics/outer/textfile/textfile_metrics.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package textfile 17 | 18 | import ( 19 | "github.com/prometheus/client_golang/prometheus" 20 | klog "k8s.io/klog/v2" 21 | ) 22 | 23 | const ( 24 | metricsTextfile = "caelus_textfile_collector" 25 | metricsTextfileDesc = "external metrics from textfile" 26 | ) 27 | 28 | type textFileMetrics struct { 29 | coll *textFileCollector 30 | desc *prometheus.Desc 31 | } 32 | 33 | // Collect implements the prometheus.Collector interface. 34 | func (tm *textFileMetrics) Collect(ch chan<- prometheus.Metric) { 35 | tm.coll.Update(ch) 36 | } 37 | 38 | // Describe puts metrics descs to prometheus.Desc chan 39 | func (tm *textFileMetrics) Describe(ch chan<- *prometheus.Desc) { 40 | ch <- tm.desc 41 | } 42 | 43 | func newTextFileMetrics() (prometheus.Collector, error) { 44 | desc := prometheus.NewDesc(metricsTextfile, 45 | metricsTextfileDesc, []string{"node"}, nil) 46 | 47 | coll, err := newTextFileCollector() 48 | if err != nil { 49 | return nil, err 50 | 51 | } 52 | 53 | return &textFileMetrics{ 54 | coll: coll, 55 | desc: desc, 56 | }, nil 57 | } 58 | 59 | // RegisterTextFileMetrics registers external metrics from textfile 60 | func RegisterTextFileMetrics(reg *prometheus.Registry) { 61 | if metrics, err := newTextFileMetrics(); err == nil { 62 | reg.MustRegister(metrics) 63 | klog.Infof("Register text file metrics of directory successfully") 64 | } else { 65 | klog.Fatalf("Register text file metrics failed, err: %s", err) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /pkg/caelus/metrics/outer/utils.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package utils 17 | 18 | import ( 19 | "github.com/prometheus/client_golang/prometheus" 20 | dto "github.com/prometheus/client_model/go" 21 | ) 22 | 23 | // HandleMetricType handle different prometheus metric types 24 | func HandleMetricType(metricFamily *dto.MetricFamily, metric *dto.Metric, values, names []string, 25 | ch chan<- prometheus.Metric) { 26 | var valType prometheus.ValueType 27 | var val float64 28 | metricType := metricFamily.GetType() 29 | switch metricType { 30 | case dto.MetricType_COUNTER: 31 | valType = prometheus.CounterValue 32 | val = metric.Counter.GetValue() 33 | 34 | case dto.MetricType_GAUGE: 35 | valType = prometheus.GaugeValue 36 | val = metric.Gauge.GetValue() 37 | 38 | case dto.MetricType_UNTYPED: 39 | valType = prometheus.UntypedValue 40 | val = metric.Untyped.GetValue() 41 | 42 | case dto.MetricType_SUMMARY: 43 | quantiles := map[float64]float64{} 44 | for _, q := range metric.Summary.Quantile { 45 | quantiles[q.GetQuantile()] = q.GetValue() 46 | } 47 | ch <- prometheus.MustNewConstSummary( 48 | prometheus.NewDesc( 49 | *metricFamily.Name, 50 | metricFamily.GetHelp(), 51 | names, nil, 52 | ), 53 | metric.Summary.GetSampleCount(), 54 | metric.Summary.GetSampleSum(), 55 | quantiles, values..., 56 | ) 57 | case dto.MetricType_HISTOGRAM: 58 | buckets := map[float64]uint64{} 59 | for _, b := range metric.Histogram.Bucket { 60 | buckets[b.GetUpperBound()] = b.GetCumulativeCount() 61 | } 62 | ch <- prometheus.MustNewConstHistogram( 63 | prometheus.NewDesc( 64 | *metricFamily.Name, 65 | metricFamily.GetHelp(), 66 | names, nil, 67 | ), 68 | metric.Histogram.GetSampleCount(), 69 | metric.Histogram.GetSampleSum(), 70 | buckets, values..., 71 | ) 72 | default: 73 | panic("unknown metric type") 74 | } 75 | 76 | if metricType == dto.MetricType_GAUGE || metricType == dto.MetricType_COUNTER || metricType == dto.MetricType_UNTYPED { 77 | ch <- prometheus.MustNewConstMetric( 78 | prometheus.NewDesc( 79 | *metricFamily.Name, 80 | metricFamily.GetHelp(), 81 | names, nil, 82 | ), 83 | valType, val, values..., 84 | ) 85 | } 86 | 87 | return 88 | } 89 | -------------------------------------------------------------------------------- /pkg/caelus/mock/mock_pod_informer.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package mock 17 | 18 | import ( 19 | "k8s.io/api/core/v1" 20 | "k8s.io/client-go/tools/cache" 21 | fcache "k8s.io/client-go/tools/cache/testing" 22 | "time" 23 | ) 24 | 25 | // NewMockPodInformer news a fake pod lister for testing 26 | // Users should assign pod info to the mock server 27 | func NewMockPodInformer(pods []*v1.Pod) cache.SharedIndexInformer { 28 | source := fcache.NewFakeControllerSource() 29 | for i := range pods { 30 | p := pods[i] 31 | source.Add(p) 32 | } 33 | sii := cache.NewSharedInformer(source, &v1.Pod{}, 1*time.Second).(cache.SharedIndexInformer) 34 | return sii 35 | } 36 | -------------------------------------------------------------------------------- /pkg/caelus/mock/util.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package mock 17 | 18 | import ( 19 | jsoniter "github.com/json-iterator/go" 20 | ) 21 | 22 | var json = jsoniter.Config{ 23 | EscapeHTML: false, 24 | SortMapKeys: true, 25 | ValidateJsonRawMessage: true, 26 | }.Froze() 27 | 28 | // Jsonize encodes object in json format 29 | func Jsonize(o interface{}) string { 30 | b, _ := json.Marshal(o) 31 | return string(b) 32 | } 33 | -------------------------------------------------------------------------------- /pkg/caelus/predict/interface.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package predict 17 | 18 | import ( 19 | "k8s.io/api/core/v1" 20 | ) 21 | 22 | // Interface is the predict interface 23 | type Interface interface { 24 | // GetAllocatableForBatch return allocatable resources for offline jobs 25 | GetAllocatableForBatch() v1.ResourceList 26 | // GetReservedResource return reserved resource quantity 27 | GetReservedResource() v1.ResourceList 28 | // Run starts predict 29 | Run(stop <-chan struct{}) 30 | // module name 31 | Name() string 32 | } 33 | 34 | // Predictor predicts resources used by online and system pods 35 | type Predictor interface { 36 | // Predict predicts resources used by online and system pods 37 | Predict() v1.ResourceList 38 | // AddSample starts predictor 39 | AddSample(stop <-chan struct{}) 40 | } 41 | -------------------------------------------------------------------------------- /pkg/caelus/predict/predict_local_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package predict 17 | 18 | import ( 19 | "testing" 20 | "time" 21 | 22 | "github.com/tencent/caelus/pkg/caelus/statestore/cgroup" 23 | "github.com/tencent/caelus/pkg/caelus/statestore/common/node" 24 | mockStore "github.com/tencent/caelus/pkg/caelus/statestore/mock" 25 | "github.com/tencent/caelus/pkg/caelus/types" 26 | "github.com/tencent/caelus/pkg/caelus/util/appclass" 27 | ) 28 | 29 | func TestLocalPredictGet(t *testing.T) { 30 | gb := float64(1024 * 1024 * 1024) 31 | now := time.Now() 32 | mockCgrouprStats := &mockStore.MockCgroupStore{ 33 | CgStats: map[string]*cgroupstore.CgroupStats{ 34 | "c1": { 35 | Ref: &cgroupstore.CgroupRef{Name: "c1", AppClass: appclass.AppClassOnline}, 36 | CpuUsage: 1, 37 | MemoryWorkingSetUsage: 1 * gb, 38 | Timestamp: now, 39 | }, 40 | "c2": { 41 | Ref: &cgroupstore.CgroupRef{Name: "c2", AppClass: appclass.AppClassSystem}, 42 | CpuUsage: 2, 43 | MemoryWorkingSetUsage: 3 * gb, 44 | Timestamp: now, 45 | }, 46 | // c3 won't be counted as it is a offline pod 47 | "c3": { 48 | Ref: &cgroupstore.CgroupRef{ 49 | Name: "c2", PodName: "p1", 50 | AppClass: appclass.AppClassOffline}, 51 | CpuUsage: 3, 52 | MemoryWorkingSetUsage: 5 * gb, 53 | Timestamp: now, 54 | }, 55 | }, 56 | } 57 | mockCommonStore := &mockStore.MockCommonStore{ 58 | MockNodeStore: &mockStore.MockNodeStore{ 59 | Resources: []*nodestore.NodeResourceState{ 60 | { 61 | Timestamp: now, 62 | CPU: &nodestore.NodeCpu{ 63 | CpuTotal: 7, // suppose kernel+kubelet+docker used 1 core 64 | }, 65 | Memory: &nodestore.NodeMemory{ 66 | UsageRss: 11 * gb, // suppose kernel+kubelet+docker used 2 gb 67 | }, 68 | }, 69 | }, 70 | }, 71 | } 72 | 73 | mockStatStore := mockStore.NewMockStatStore(mockCommonStore, mockCgrouprStats) 74 | config := types.PredictConfig{PredictType: types.LocalPredictorType} 75 | types.InitPredictConfig(&config) 76 | localPredict := NewLocalPredict(config, mockStatStore).(*localPredict) 77 | localPredict.initSampleTimes = 1 78 | localPredict.addSample() 79 | rl := localPredict.Predict() 80 | if len(rl) == 0 { 81 | t.Fatal() 82 | } 83 | // cpu should be 4*1.15 safetyMarginFraction 84 | if rl.Cpu().MilliValue() < 4000 || rl.Cpu().MilliValue() > 5000 { 85 | t.Fatal(rl.Cpu().MilliValue()) 86 | } 87 | // mem should be 6*1.15 88 | mem := rl.Memory().Value() / int64(gb) 89 | if mem < 5 || mem > 6 { 90 | t.Fatal(mem) 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /pkg/caelus/predict/predict_vpa.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package predict 17 | 18 | import ( 19 | "fmt" 20 | "net/http" 21 | "time" 22 | 23 | "github.com/tencent/caelus/pkg/caelus/types" 24 | "github.com/tencent/caelus/pkg/caelus/util" 25 | 26 | "github.com/parnurzeal/gorequest" 27 | "k8s.io/api/core/v1" 28 | "k8s.io/klog/v2" 29 | ) 30 | 31 | const ( 32 | PredictPath = "/v1/predict/online/" 33 | ) 34 | 35 | // vpaPredict describe options for VPA predict 36 | type vpaPredict struct { 37 | types.PredictConfig 38 | rpcClient *gorequest.SuperAgent 39 | } 40 | 41 | // AddSample starts add sample data periodically 42 | func (p *vpaPredict) AddSample(stop <-chan struct{}) { 43 | // nothing need to do 44 | } 45 | 46 | // Predict predicts resources used by online pods 47 | func (p *vpaPredict) Predict() v1.ResourceList { 48 | res := v1.ResourceList{} 49 | nodeName := util.NodeName() 50 | 51 | // get predict result from remote VPA server 52 | client := p.rpcClient.Clone().Get(fmt.Sprintf("http://%s%s%s", 53 | p.PredictServerAddr, PredictPath, nodeName)) 54 | resp, _, errs := client.EndStruct(&res) 55 | if len(errs) > 0 { 56 | klog.Errorf("can't get node resource list, %v", errs[0]) 57 | return nil 58 | } 59 | 60 | if resp.StatusCode != http.StatusOK { 61 | klog.Errorf("can't get node resource list, status code: %d", resp.StatusCode) 62 | return nil 63 | } 64 | return res 65 | } 66 | 67 | // NewVpaPredictOrDie creates a vpa predictor 68 | func NewVpaPredictOrDie(config types.PredictConfig) Predictor { 69 | return &vpaPredict{ 70 | PredictConfig: config, 71 | rpcClient: gorequest.New().SetDebug(bool(klog.V(4).Enabled())).Timeout(time.Second * 10), 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /pkg/caelus/qos/manager/manage_diskio.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package manager 17 | 18 | import ( 19 | "github.com/tencent/caelus/pkg/caelus/types" 20 | "github.com/tencent/caelus/pkg/caelus/util/cgroup" 21 | ) 22 | 23 | const QosDiskIO = "diskio" 24 | 25 | // qosDiskIO manage offline job disk io resource 26 | type qosDiskIO struct { 27 | disks []string 28 | } 29 | 30 | // NewQosDiskIO creates disk io manager 31 | func NewQosDiskIO(disks []string) ResourceQosManager { 32 | return &qosDiskIO{ 33 | disks: disks, 34 | } 35 | } 36 | 37 | // Name returns resource policy name 38 | func (d *qosDiskIO) Name() string { 39 | return QosDiskIO 40 | } 41 | 42 | // PreInit do nothing 43 | func (d *qosDiskIO) PreInit() error { 44 | return nil 45 | } 46 | 47 | // Run starts nothing 48 | func (d *qosDiskIO) Run(stop <-chan struct{}) {} 49 | 50 | // ManageDiskIO isolates disk io resource for offline jobs 51 | func (d *qosDiskIO) Manage(cgResources *CgroupResourceConfig) error { 52 | offlineParent := types.CgroupOffline 53 | return cgroup.SetBlkioWeight(offlineParent, d.disks) 54 | } 55 | -------------------------------------------------------------------------------- /pkg/caelus/qos/manager/manage_memory.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package manager 17 | 18 | import ( 19 | "github.com/tencent/caelus/pkg/caelus/statestore" 20 | "github.com/tencent/caelus/pkg/caelus/types" 21 | "github.com/tencent/caelus/pkg/caelus/util/cgroup" 22 | "github.com/tencent/caelus/pkg/caelus/util/machine" 23 | 24 | "k8s.io/api/core/v1" 25 | "k8s.io/klog/v2" 26 | ) 27 | 28 | const QosMemory = "memory" 29 | 30 | type qosMemory struct { 31 | stStore statestore.StateStore 32 | } 33 | 34 | // NewQosMemory creates memory manager instance 35 | func NewQosMemory(stStore statestore.StateStore) ResourceQosManager { 36 | return &qosMemory{ 37 | stStore: stStore, 38 | } 39 | } 40 | 41 | // Name returns resource policy name 42 | func (m *qosMemory) Name() string { 43 | return QosMemory 44 | } 45 | 46 | // PreInit do nothing 47 | func (m *qosMemory) PreInit() error { 48 | return nil 49 | } 50 | 51 | // Run starts nothing 52 | func (m *qosMemory) Run(stop <-chan struct{}) {} 53 | 54 | // ManageMemory isolates memory resource for offline jobs 55 | func (m *qosMemory) Manage(cgResources *CgroupResourceConfig) error { 56 | quantity, ok := cgResources.Resources[v1.ResourceMemory] 57 | if !ok { 58 | klog.Warningf("no memory resource found for isolation") 59 | return nil 60 | } 61 | limit := quantity.Value() 62 | 63 | var usage, newLimit int64 64 | offlineParent := types.CgroupOffline 65 | state, err := m.stStore.GetCgroupResourceRecentStateByPath(offlineParent, false) 66 | if err != nil { 67 | // just warning, no need to return 68 | klog.Errorf("get cgroup(%s) memory usage err: %v", offlineParent, err) 69 | newLimit = limit 70 | } else { 71 | usage = int64(state.MemoryTotalUsage) 72 | var reason string 73 | newLimit, reason = machine.GetMemoryCgroupLimitByUsage(limit, usage) 74 | if len(reason) != 0 { 75 | klog.Warningf("mem cgroup(%s) limit setting changed: %s", offlineParent, reason) 76 | } 77 | klog.V(4).Infof("isolating memory for offline jobs, origin limit:%d, current usage:%d, new limit:%d", 78 | limit, usage, newLimit) 79 | } 80 | 81 | err = cgroup.SetMemoryLimit(offlineParent, newLimit) 82 | if err != nil { 83 | klog.Errorf("set memory cgroup(%s) offline limit value(%d, %d) failed: %v", 84 | offlineParent, newLimit, usage, err) 85 | } 86 | 87 | return err 88 | } 89 | -------------------------------------------------------------------------------- /pkg/caelus/qos/manager/manager.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package manager 17 | 18 | import ( 19 | "fmt" 20 | 21 | "k8s.io/api/core/v1" 22 | ) 23 | 24 | // ResourceQosManager describe resource manager interface functions 25 | type ResourceQosManager interface { 26 | Name() string 27 | PreInit() error 28 | Run(stop <-chan struct{}) 29 | Manage(cgResources *CgroupResourceConfig) error 30 | } 31 | 32 | // CgroupResourceConfig group options for offline pods 33 | type CgroupResourceConfig struct { 34 | OnlineCgroups []string 35 | OfflineCgroups []string 36 | Resources v1.ResourceList 37 | PodList []*v1.Pod 38 | } 39 | 40 | // String formats cgroup resource output 41 | func (c *CgroupResourceConfig) String() string { 42 | pods := []string{} 43 | for _, pod := range c.PodList { 44 | pods = append(pods, fmt.Sprintf("%s-%s", pod.Namespace, pod.Name)) 45 | } 46 | return fmt.Sprintf("pods: %v, onlineCgroups: %v, offlineCgroups: %v,"+ 47 | " resources: %v", pods, c.OnlineCgroups, c.OfflineCgroups, c.Resources) 48 | } 49 | -------------------------------------------------------------------------------- /pkg/caelus/qos/manager/netio/log.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package netio 17 | 18 | import ( 19 | "github.com/chenchun/ipset/log" 20 | "k8s.io/klog/v2" 21 | ) 22 | 23 | var _ log.LOG = ipsetLog{} 24 | 25 | // ipsetLog is the log implementation of ipset.Log 26 | type ipsetLog struct { 27 | } 28 | 29 | // Debugf prints debug log 30 | func (i ipsetLog) Debugf(format string, args ...interface{}) { 31 | klog.V(4).Infof(format, args...) 32 | } 33 | 34 | // Infof prints info log 35 | func (i ipsetLog) Infof(format string, args ...interface{}) { 36 | klog.Infof(format, args...) 37 | } 38 | 39 | // Fatalf prints fatal log 40 | func (i ipsetLog) Fatalf(format string, args ...interface{}) { 41 | klog.Fatalf(format, args...) 42 | } 43 | -------------------------------------------------------------------------------- /pkg/caelus/qos/mock/mock.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package qos 17 | 18 | import ( 19 | "github.com/tencent/caelus/pkg/caelus/qos" 20 | "github.com/tencent/caelus/pkg/caelus/types" 21 | ) 22 | 23 | // MockQOS mock QOS manager 24 | type MockQOS struct { 25 | ReceivedEvent bool 26 | } 27 | 28 | // NewMockQOS new a mock QOS manager 29 | func NewMockQOS() qos.Manager { 30 | return &MockQOS{} 31 | } 32 | 33 | // Name mock function 34 | func (m *MockQOS) Name() string { 35 | return "mockQOS" 36 | } 37 | 38 | // Run mock function 39 | func (m *MockQOS) Run(stop <-chan struct{}) { 40 | panic("implement me") 41 | } 42 | 43 | // UpdateEvent mock function 44 | func (m *MockQOS) UpdateEvent(event *types.ResourceUpdateEvent) error { 45 | m.ReceivedEvent = true 46 | return nil 47 | } 48 | -------------------------------------------------------------------------------- /pkg/caelus/qos/qos.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package qos 17 | 18 | import "github.com/tencent/caelus/pkg/caelus/types" 19 | 20 | // Manager is the manager used to isolate offline resources 21 | type Manager interface { 22 | // Name show module name 23 | Name() string 24 | // Run start main loop 25 | Run(stop <-chan struct{}) 26 | // UpdateEvent receive event to notify manager to isolate offline resources 27 | UpdateEvent(event *types.ResourceUpdateEvent) error 28 | } 29 | -------------------------------------------------------------------------------- /pkg/caelus/resource/interface.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package resource 17 | 18 | import ( 19 | "github.com/tencent/caelus/pkg/caelus/types" 20 | 21 | "github.com/prometheus/client_golang/prometheus" 22 | "k8s.io/api/core/v1" 23 | ) 24 | 25 | // commonResourceInterface describe common functions for resource manager 26 | type commonResourceInterface interface { 27 | // module name 28 | Name() string 29 | // Run main loop 30 | Run(stopCh <-chan struct{}) 31 | // DisableOfflineSchedule disable schedule 32 | DisableOfflineSchedule() error 33 | // EnableOfflineSchedule enable schedule 34 | EnableOfflineSchedule() error 35 | // OfflineScheduleDisabled return true if schedule disabled for offline jobs 36 | OfflineScheduleDisabled() bool 37 | // GetOfflineJobs return current offline job list 38 | GetOfflineJobs() ([]types.OfflineJobs, error) 39 | // KillOfflineJob kill offline job based on conflicting resource 40 | KillOfflineJob(conflictingResource v1.ResourceName) 41 | 42 | prometheus.Collector 43 | } 44 | 45 | // Interface is the manager used to update offline resource capacity 46 | type Interface interface { 47 | commonResourceInterface 48 | // SyncNodeResource receive event to update offline resource capacity timely 49 | SyncNodeResource(event *types.ResourceUpdateEvent) error 50 | } 51 | 52 | // clientInterface describe how to update offline resource capacity 53 | type clientInterface interface { 54 | // common functions, using embedding interface 55 | commonResourceInterface 56 | // Init do some initializations 57 | Init() error 58 | // CheckPoint recover scheduler state from backup 59 | CheckPoint() error 60 | // AdaptAndUpdateOfflineResource adapt and update the resource list based on some conditions 61 | AdaptAndUpdateOfflineResource(offlineList v1.ResourceList, conflictingResources []string) error 62 | } 63 | -------------------------------------------------------------------------------- /pkg/caelus/resource/resource_checkpoint.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package resource 17 | 18 | import ( 19 | "k8s.io/klog/v2" 20 | "time" 21 | 22 | "github.com/tencent/caelus/pkg/caelus/checkpoint" 23 | ) 24 | 25 | const ( 26 | timeAvailableSeconds = 300 27 | disableScheduleDuration = time.Duration(5 * time.Minute) 28 | checkpointKey = "node_resource" 29 | ) 30 | 31 | // NodeResourceCheckPoint struct is used to store schedule state in a checkpoint 32 | type NodeResourceCheckPoint struct { 33 | Timeseconds int64 34 | ScheduleDisable bool 35 | } 36 | 37 | // checkScheduleDisable will check schedule state from local check point file when agent restarted, 38 | // if the schedule is disabled, this will set schedule disable again and enable in future time. 39 | func checkScheduleDisable(checkTime bool, 40 | enableSchedule, disableSchedule func() error) error { 41 | nodeCheckpoint := &NodeResourceCheckPoint{} 42 | err := checkpoint.Restore(checkpointKey, nodeCheckpoint) 43 | if err != nil { 44 | klog.Errorf("restore node resource check point err: %v", err) 45 | return err 46 | } 47 | 48 | if nodeCheckpoint.ScheduleDisable == true { 49 | klog.Warningf("found schedule state is disabled from checkpoint") 50 | // check if the state is too old 51 | if checkTime && time.Now().Unix()-nodeCheckpoint.Timeseconds > timeAvailableSeconds { 52 | klog.Warningf("schedule state checkpoint is too old, ignore!") 53 | } else { 54 | klog.Warningf("disable node schedule based on checkpoint") 55 | // disable schedule again 56 | disableSchedule() 57 | // should enable schedule if everything is ok 58 | klog.Warningf("enable node schedule based on checkpoint after %v", disableScheduleDuration) 59 | time.AfterFunc(disableScheduleDuration, func() { enableSchedule() }) 60 | } 61 | } 62 | 63 | return nil 64 | } 65 | 66 | // storeCheckpoint store schedule state into checkout point file 67 | func storeCheckpoint(scheduleState bool) error { 68 | nodeCheckPoint := &NodeResourceCheckPoint{ 69 | ScheduleDisable: scheduleState, 70 | Timeseconds: time.Now().Unix(), 71 | } 72 | 73 | return checkpoint.Save(checkpointKey, nodeCheckPoint) 74 | } 75 | -------------------------------------------------------------------------------- /pkg/caelus/resource/yarn/container_sort.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package yarn 17 | 18 | import ( 19 | global "github.com/tencent/caelus/pkg/types" 20 | ) 21 | 22 | type byAmAndTime []global.NMContainer 23 | 24 | // Len output the length of container list 25 | func (s byAmAndTime) Len() int { 26 | return len(s) 27 | } 28 | 29 | // Swap implement the swap function 30 | func (s byAmAndTime) Swap(i, j int) { 31 | s[i], s[j] = s[j], s[i] 32 | } 33 | 34 | // Less implement compare function 35 | func (s byAmAndTime) Less(i, j int) bool { 36 | if s[i].IsAM { 37 | if !s[j].IsAM { 38 | return false 39 | } 40 | } else if s[j].IsAM { 41 | return true 42 | } 43 | //TODO consider time weight and resource weight together 44 | return s[i].StartTime.After(s[j].StartTime) 45 | } 46 | 47 | type byAmAndCPU []global.NMContainer 48 | 49 | // Len output the length of container list 50 | func (s byAmAndCPU) Len() int { 51 | return len(s) 52 | } 53 | 54 | // Swap implement swap function 55 | func (s byAmAndCPU) Swap(i, j int) { 56 | s[i], s[j] = s[j], s[i] 57 | } 58 | 59 | // Less implement compare function 60 | func (s byAmAndCPU) Less(i, j int) bool { 61 | if s[i].IsAM { 62 | if !s[j].IsAM { 63 | return false 64 | } 65 | } else if s[j].IsAM { 66 | return true 67 | } 68 | if s[i].UsedVCores == s[j].UsedVCores { 69 | return s[i].StartTime.After(s[j].StartTime) 70 | } else { 71 | return s[i].UsedVCores > s[j].UsedVCores 72 | } 73 | } 74 | 75 | type byAmAndMemory []global.NMContainer 76 | 77 | // Len output the length of container list 78 | func (s byAmAndMemory) Len() int { 79 | return len(s) 80 | } 81 | 82 | // Swap implement swap function 83 | func (s byAmAndMemory) Swap(i, j int) { 84 | s[i], s[j] = s[j], s[i] 85 | } 86 | 87 | // Less implement compare function 88 | func (s byAmAndMemory) Less(i, j int) bool { 89 | if s[i].IsAM { 90 | if !s[j].IsAM { 91 | return false 92 | } 93 | } else if s[j].IsAM { 94 | return true 95 | } 96 | if s[i].UsedMemoryMB == s[j].UsedMemoryMB { 97 | return s[i].StartTime.After(s[j].StartTime) 98 | } else { 99 | return s[i].UsedMemoryMB > s[j].UsedMemoryMB 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /pkg/caelus/resource/yarn/port_test.go: -------------------------------------------------------------------------------- 1 | package yarn 2 | 3 | import ( 4 | "github.com/tencent/caelus/pkg/caelus/checkpoint" 5 | "testing" 6 | ) 7 | 8 | func TestPortCheckpoint(t *testing.T) { 9 | checkpoint.InitCheckpointManager("/tmp") 10 | defaultPort[yarnNodeManagerWebappAddress] = 12345 11 | storeCheckpoint() 12 | defaultPort[yarnNodeManagerWebappAddress] = 0 13 | restorePortsCheckpoint() 14 | if defaultPort[yarnNodeManagerWebappAddress] != 12345 { 15 | t.Errorf("expect restore %v to 12345, got %d", yarnNodeManagerWebappAddress, defaultPort[yarnNodeManagerWebappAddress]) 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /pkg/caelus/statestore/cgroup/utils.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package cgroupstore 17 | 18 | const ( 19 | KubernetesPodNameLabel = "io.kubernetes.pod.name" 20 | KubernetesPodNamespaceLabel = "io.kubernetes.pod.namespace" 21 | KubernetesPodUIDLabel = "io.kubernetes.pod.uid" 22 | KubernetesContainerNameLabel = "io.kubernetes.container.name" 23 | 24 | PodInfraContainerName = "POD" 25 | ) 26 | 27 | // GetContainerName get pod container name from docker container labels 28 | func GetContainerName(labels map[string]string) string { 29 | return labels[KubernetesContainerNameLabel] 30 | } 31 | 32 | // GetPodName get pod name from docker container labels 33 | func GetPodName(labels map[string]string) string { 34 | return labels[KubernetesPodNameLabel] 35 | } 36 | 37 | // GetPodUID get pod uid from docker container labels 38 | func GetPodUID(labels map[string]string) string { 39 | return labels[KubernetesPodUIDLabel] 40 | } 41 | 42 | // GetPodNamespace get pod namespace from docker container labels 43 | func GetPodNamespace(labels map[string]string) string { 44 | return labels[KubernetesPodNamespaceLabel] 45 | } 46 | -------------------------------------------------------------------------------- /pkg/caelus/statestore/common/customize/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package customizestore 17 | 18 | import ( 19 | "encoding/json" 20 | "time" 21 | ) 22 | 23 | type podMetric struct { 24 | PodName string `json:"pod_name"` 25 | Namespace string `json:"namespace"` 26 | MetricName string `json:"metric_name"` 27 | Slo float64 `json:"slo"` 28 | Value float64 `json:"value"` 29 | } 30 | 31 | type podMetrics struct { 32 | Data []podMetric `json:"data"` 33 | } 34 | 35 | // Metric describe online job's SLO and current metrics value 36 | type Metric struct { 37 | JobName string 38 | Values map[string]float64 `json:"values"` 39 | 40 | Ts time.Time `json:"-"` 41 | } 42 | 43 | // MetricsResponseData show online job metric response data 44 | type MetricsResponseData struct { 45 | Code int `json:"code"` 46 | Msg string `json:"msg"` 47 | Data []MetricResponseList `json:"data"` 48 | } 49 | 50 | // MetricResponseList group online job metric data 51 | type MetricResponseList struct { 52 | JobName string `json:"job_name"` 53 | MetricName string `json:"metric_name"` 54 | Values map[string]float64 55 | } 56 | 57 | // MarshalJSON, json data to string 58 | func (m *MetricResponseList) MarshalJSON() ([]byte, error) { 59 | if m == nil { 60 | return []byte("null"), nil 61 | } 62 | tmp := make(map[string]interface{}) 63 | tmp["job_name"] = m.JobName 64 | tmp["metric_name"] = m.MetricName 65 | for k, v := range m.Values { 66 | tmp[k] = v 67 | } 68 | return json.Marshal(tmp) 69 | } 70 | 71 | // UnmarshalJSON, string to json data 72 | func (m *MetricResponseList) UnmarshalJSON(b []byte) error { 73 | tmp := make(map[string]interface{}) 74 | err := json.Unmarshal(b, &tmp) 75 | if err != nil { 76 | return err 77 | } 78 | if m.Values == nil { 79 | m.Values = make(map[string]float64) 80 | } 81 | if name, ok := tmp["job_name"].(string); ok { 82 | m.JobName = name 83 | } 84 | if name, ok := tmp["metric_name"].(string); ok { 85 | m.MetricName = name 86 | } 87 | for k, v := range tmp { 88 | if vv, ok := v.(string); ok { 89 | if vv == "job_name" { 90 | m.JobName = vv 91 | } 92 | if vv == "metric_name" { 93 | m.MetricName = vv 94 | } 95 | } 96 | if vv, ok := v.(float64); ok { 97 | m.Values[k] = vv 98 | } 99 | } 100 | return nil 101 | } 102 | -------------------------------------------------------------------------------- /pkg/caelus/statestore/mock/stats_cgroup_mock.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package mock 17 | 18 | import ( 19 | "fmt" 20 | "time" 21 | 22 | "github.com/tencent/caelus/pkg/caelus/statestore/cgroup" 23 | 24 | cadvisorapi "github.com/google/cadvisor/info/v1" 25 | "k8s.io/apimachinery/pkg/util/sets" 26 | ) 27 | 28 | var _ cgroupstore.CgroupStoreInterface = &MockCgroupStore{} 29 | 30 | // MockCgroupStore mock cgroup resource store. 31 | // Functions are now panic if not used in testing, and you need to implement the codes when using. 32 | type MockCgroupStore struct { 33 | CgStats map[string]*cgroupstore.CgroupStats 34 | ExtraCgs []string 35 | } 36 | 37 | // mock function 38 | func (cg *MockCgroupStore) MachineInfo() (*cadvisorapi.MachineInfo, error) { 39 | panic("implement me") 40 | } 41 | 42 | // mock function 43 | func (cg *MockCgroupStore) GetCgroupResourceRangeStats(podName, podNamespace string, start, end time.Time, 44 | count int) ([]*cgroupstore.CgroupStats, error) { 45 | panic("implement me") 46 | } 47 | 48 | // mock function 49 | func (cg *MockCgroupStore) GetCgroupResourceRecentState(podName, podNamespace string, 50 | updateStats bool) (*cgroupstore.CgroupStats, error) { 51 | panic("implement me") 52 | 53 | } 54 | 55 | // mock function 56 | func (cg *MockCgroupStore) GetCgroupResourceRecentStateByPath(cgPath string, 57 | updateStats bool) (*cgroupstore.CgroupStats, error) { 58 | for _, cgStat := range cg.CgStats { 59 | if cgStat.Ref.Name == cgPath { 60 | return cgStat, nil 61 | } 62 | } 63 | return nil, fmt.Errorf("no cgroup stats found for path: %s", cgPath) 64 | } 65 | 66 | // mock function 67 | func (cg *MockCgroupStore) ListCgroupResourceRangeStats(start, end time.Time, count int, 68 | classFilter sets.String) (map[string][]*cgroupstore.CgroupStats, error) { 69 | panic("implement me") 70 | } 71 | 72 | // mock function 73 | func (cg *MockCgroupStore) ListCgroupResourceRecentState(updateStats bool, 74 | classFilter sets.String) (map[string]*cgroupstore.CgroupStats, error) { 75 | return cg.CgStats, nil 76 | } 77 | 78 | // mock function 79 | func (cg *MockCgroupStore) ListAllCgroups(classFilter sets.String) (map[string]*cgroupstore.CgroupRef, error) { 80 | panic("implement me") 81 | } 82 | 83 | // mock function 84 | func (cg *MockCgroupStore) AddExtraCgroups(extraCgs []string) error { 85 | cg.ExtraCgs = extraCgs 86 | return nil 87 | } 88 | 89 | // mock function 90 | func (cg *MockCgroupStore) GetCgroupStoreSupportedTags() []string { 91 | panic("implement me") 92 | } 93 | -------------------------------------------------------------------------------- /pkg/caelus/statestore/mock/stats_mock.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package mock 17 | 18 | import ( 19 | "github.com/tencent/caelus/pkg/caelus/statestore" 20 | ) 21 | 22 | // MockStatStore describe fake resource stats type 23 | type MockStatStore struct { 24 | *MockCommonStore 25 | *MockCgroupStore 26 | } 27 | 28 | // NewMockStatStore create fake resource stats instance 29 | func NewMockStatStore(common *MockCommonStore, cgroup *MockCgroupStore) statestore.StateStore { 30 | return &MockStatStore{ 31 | MockCommonStore: common, 32 | MockCgroupStore: cgroup, 33 | } 34 | } 35 | 36 | // mock function 37 | func (ss *MockStatStore) Run(stopCh <-chan struct{}) { 38 | panic("implement me") 39 | } 40 | 41 | // mock function 42 | func (ss *MockStatStore) Name() string { 43 | panic("implement me") 44 | } 45 | -------------------------------------------------------------------------------- /pkg/caelus/statestore/state_store.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package statestore 17 | 18 | import ( 19 | "github.com/tencent/caelus/pkg/caelus/statestore/cgroup" 20 | "github.com/tencent/caelus/pkg/caelus/statestore/common" 21 | "github.com/tencent/caelus/pkg/caelus/types" 22 | 23 | "k8s.io/client-go/tools/cache" 24 | "k8s.io/klog/v2" 25 | ) 26 | 27 | // StateStore describe interface for resource state 28 | type StateStore interface { 29 | Run(stop <-chan struct{}) 30 | Name() string 31 | commonstore.CommonStoreInterface 32 | cgroupstore.CgroupStoreInterface 33 | } 34 | 35 | type stateStoreManager struct { 36 | cgroupstore.CgroupStore 37 | commonstore.CommonStore 38 | } 39 | 40 | // NewStateStoreManager new a resource state instance 41 | func NewStateStoreManager(config *types.MetricsCollectConfig, onlineCfg *types.OnlineConfig, 42 | podInformer cache.SharedIndexInformer) StateStore { 43 | cgroupSt := cgroupstore.NewCgroupStoreManager(config.Container, podInformer) 44 | publicSt := commonstore.NewCommonStoreManager(config, onlineCfg, podInformer, cgroupSt) 45 | return &stateStoreManager{ 46 | CgroupStore: cgroupSt, 47 | CommonStore: publicSt, 48 | } 49 | } 50 | 51 | // Run main loop 52 | func (ss *stateStoreManager) Run(stop <-chan struct{}) { 53 | ss.CommonStore.Run(stop) 54 | klog.V(2).Infof("public resource state collection started") 55 | ss.CgroupStore.Run(stop) 56 | klog.V(2).Infof("cgroup resource state collection started") 57 | } 58 | 59 | // Name describe module name 60 | func (ss *stateStoreManager) Name() string { 61 | return "ModuleStateStore" 62 | } 63 | -------------------------------------------------------------------------------- /pkg/caelus/types/diskquota.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package types 17 | 18 | import ( 19 | "github.com/tencent/caelus/pkg/util/times" 20 | 21 | "k8s.io/apimachinery/pkg/util/sets" 22 | "k8s.io/klog/v2" 23 | ) 24 | 25 | const ( 26 | // container runtime 27 | ContainerRuntimeDocker = "docker" 28 | ) 29 | 30 | var ( 31 | defaultContainerRuntime = ContainerRuntimeDocker 32 | availableContainerRuntime = sets.NewString(ContainerRuntimeDocker) 33 | 34 | VolumeTypeRootFs VolumeType = "rootFs" 35 | VolumeTypeEmptyDir VolumeType = "emptyDir" 36 | VolumeTypeHostPath VolumeType = "hostPath" 37 | AvailableVolumeTypes = sets.NewString( 38 | VolumeTypeRootFs.String(), 39 | VolumeTypeEmptyDir.String(), 40 | VolumeTypeHostPath.String()) 41 | ) 42 | 43 | type VolumeType string 44 | 45 | // String output volume type to string 46 | func (vt VolumeType) String() string { 47 | return string(vt) 48 | } 49 | 50 | // DiskQuotaConfig group disk quota configurations 51 | type DiskQuotaConfig struct { 52 | Enabled bool `json:"enabled"` 53 | CheckPeriod times.Duration `json:"check_period"` 54 | // such as docker or containerd 55 | ContainerRuntime string `json:"container_runtime"` 56 | // quota size just for offline job, online jobs need to announce in annotations 57 | VolumeSizes map[VolumeType]*DiskQuotaSize `json:"volume_sizes"` 58 | } 59 | 60 | // shall we support soft feature ? 61 | type DiskQuotaSize struct { 62 | Quota uint64 `json:"quota"` 63 | Inodes uint64 `json:"inodes"` 64 | QuotaUsed uint64 `json:"-"` 65 | InodesUsed uint64 `json:"-"` 66 | } 67 | 68 | func initDiskQuotaManager(config *DiskQuotaConfig) { 69 | if config.CheckPeriod.Seconds() == 0 { 70 | config.CheckPeriod = defaultCheckPeriod 71 | } 72 | if len(config.ContainerRuntime) == 0 { 73 | config.ContainerRuntime = defaultContainerRuntime 74 | } 75 | if !availableContainerRuntime.Has(config.ContainerRuntime) { 76 | klog.Fatalf("invalid container runtime %s, should be one of the: %v", 77 | config.ContainerRuntime, availableContainerRuntime) 78 | } 79 | } 80 | 81 | // SharedInfo indicate a path has shared quota or not 82 | type SharedInfo struct { 83 | PodName string 84 | } 85 | 86 | // PathInfo group path and quota options 87 | type PathInfo struct { 88 | Path string 89 | Size *DiskQuotaSize 90 | //if we set share limit, SharedInfo containers project id name 91 | //if not, SharedInfo is nil 92 | SharedInfo *SharedInfo 93 | } 94 | -------------------------------------------------------------------------------- /pkg/caelus/util/appclass/appclass_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package appclass 17 | 18 | import ( 19 | "testing" 20 | 21 | "k8s.io/api/core/v1" 22 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 23 | ) 24 | 25 | var appClassCases = []struct { 26 | describe string 27 | pod *v1.Pod 28 | expect AppClass 29 | isOffline bool 30 | }{ 31 | { 32 | describe: "test " + AppClassSystem, 33 | pod: &v1.Pod{ 34 | ObjectMeta: metav1.ObjectMeta{ 35 | Namespace: "kube-system", 36 | }, 37 | }, 38 | expect: AppClassSystem, 39 | isOffline: false, 40 | }, 41 | { 42 | describe: "test " + AppClassUnknown, 43 | pod: &v1.Pod{ 44 | ObjectMeta: metav1.ObjectMeta{}, 45 | }, 46 | expect: AppClassUnknown, 47 | isOffline: false, 48 | }, 49 | { 50 | describe: "test " + AppClassOffline, 51 | pod: &v1.Pod{ 52 | ObjectMeta: metav1.ObjectMeta{ 53 | Annotations: map[string]string{ 54 | AnnotationOfflineKey: AnnotationOfflineValue, 55 | }, 56 | }, 57 | }, 58 | expect: AppClassOffline, 59 | isOffline: true, 60 | }, 61 | { 62 | describe: "test " + AppClassOnline, 63 | pod: &v1.Pod{ 64 | ObjectMeta: metav1.ObjectMeta{ 65 | Namespace: "default", 66 | Annotations: map[string]string{ 67 | AnnotationOfflineKey: "online", 68 | }, 69 | }, 70 | }, 71 | expect: AppClassOnline, 72 | isOffline: false, 73 | }, 74 | } 75 | 76 | // TestGetAppClass tests app class 77 | func TestGetAppClass(t *testing.T) { 78 | for _, ac := range appClassCases { 79 | t.Logf("start case: %s", ac.describe) 80 | result := GetAppClass(ac.pod) 81 | if result != ac.expect { 82 | t.Fatalf("get app class error, expect %s, but get %s", ac.expect, result) 83 | } 84 | } 85 | } 86 | 87 | // TestIsOffline tests if the app is offline 88 | func TestIsOffline(t *testing.T) { 89 | for _, ac := range appClassCases { 90 | t.Logf("start case: %s", ac.describe) 91 | result := IsOffline(ac.pod) 92 | if result != ac.isOffline { 93 | t.Fatalf("offline check err, expect %v, but get %v", ac.isOffline, result) 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /pkg/caelus/util/cgroup/blkio_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package cgroup 17 | 18 | import ( 19 | "io/ioutil" 20 | "os" 21 | "path" 22 | "strings" 23 | "testing" 24 | ) 25 | 26 | // TestBlkioLimitSet test blkio limit 27 | func TestBlkioLimitSet(t *testing.T) { 28 | root := GetRoot() 29 | 30 | subsysteCgroup := "/docker" 31 | cgroupPath := path.Join(root, "blkio", subsysteCgroup) 32 | 33 | _, err := os.Stat(cgroupPath) 34 | if err != nil { 35 | if err = os.MkdirAll(cgroupPath, 0755); err != nil { 36 | t.Skipf("Create blkio cgroup failed") 37 | } 38 | } 39 | 40 | disks, err := ioutil.ReadDir(blockDir) 41 | if err != nil { 42 | t.Skipf("Read disks from %v failed", blockDir) 43 | } 44 | 45 | targetdisk := "" 46 | for _, disk := range disks { 47 | if strings.Contains(disk.Name(), "da") { 48 | targetdisk = disk.Name() 49 | break 50 | } 51 | } 52 | if targetdisk == "" { 53 | t.Skipf("Cannot find disk from %v", blockDir) 54 | } 55 | 56 | writeKbps := map[string]uint64{ 57 | targetdisk: 20480, 58 | } 59 | readKbps := map[string]uint64{ 60 | targetdisk: 10240, 61 | } 62 | 63 | BlkioLimitSet(subsysteCgroup, writeKbps, readKbps) 64 | writevalue, err := ioutil.ReadFile(path.Join(cgroupPath, "blkio.throttle.write_bps_device")) 65 | if err != nil { 66 | t.Fatalf("get write limit failed, %v", path.Join(cgroupPath, "blkio.throttle.write_bps_device")) 67 | } 68 | if !strings.Contains(string(writevalue), "20971520") { 69 | t.Fatalf("write limit set failed, the setted value is %s, target value is %v", string(writevalue), writeKbps) 70 | } 71 | 72 | readvalue, err := ioutil.ReadFile(path.Join(cgroupPath, "blkio.throttle.read_bps_device")) 73 | if err != nil { 74 | t.Fatalf("get read limit failed, %v", path.Join(cgroupPath, "blkio.throttle.read_bps_device")) 75 | } 76 | if !strings.Contains(string(readvalue), "10485760") { 77 | t.Fatalf("read limit set failed, the setted value is %s, target value is %v", string(readvalue), readKbps) 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /pkg/caelus/util/cgroup/cpu.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package cgroup 17 | 18 | import ( 19 | "fmt" 20 | "path" 21 | "strconv" 22 | 23 | "github.com/opencontainers/runc/libcontainer/cgroups" 24 | "github.com/opencontainers/runc/libcontainer/cgroups/fs" 25 | ) 26 | 27 | const ( 28 | CPUSubsystem = "cpu" 29 | ) 30 | 31 | // GetCPUTotalUsage get cpu usage for cgroup 32 | func GetCPUTotalUsage(pathInCgroup string) (uint64, error) { 33 | root := GetRoot() 34 | cgPath := path.Join(root, CPUSubsystem, pathInCgroup) 35 | g := new(fs.CpuacctGroup) 36 | stats := cgroups.NewStats() 37 | if err := g.GetStats(cgPath, stats); err != nil { 38 | return 0, fmt.Errorf("get cpuacct cgroup stats failed: %v", err) 39 | } 40 | return stats.CpuStats.CpuUsage.TotalUsage, nil 41 | } 42 | 43 | // SetCPUShares set cpu.shares for cgroup 44 | func SetCPUShares(pathInCgroup string, value uint64) error { 45 | root := GetRoot() 46 | return WriteFile([]byte(strconv.FormatUint(value, 10)), 47 | path.Join(root, CPUSubsystem, pathInCgroup), "cpu.shares") 48 | } 49 | 50 | // SetCpuQuota set cpu.cfs_quota_us for cgroup 51 | func SetCpuQuota(pathInCgroup string, cores float64) error { 52 | root := GetRoot() 53 | // changing default value 100000 to 1000000 for cpu.cfs_period_us 54 | err := WriteFile([]byte("1000000"), 55 | path.Join(root, CPUSubsystem, pathInCgroup), "cpu.cfs_period_us") 56 | if err != nil { 57 | return err 58 | } 59 | value := int64(cores * 1000000) 60 | if cores == -1 { 61 | value = -1 62 | } 63 | return WriteFile([]byte(strconv.FormatInt(value, 10)), 64 | path.Join(root, CPUSubsystem, pathInCgroup), "cpu.cfs_quota_us") 65 | } 66 | -------------------------------------------------------------------------------- /pkg/caelus/util/cgroup/cpu_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package cgroup 17 | 18 | import ( 19 | "io/ioutil" 20 | "os" 21 | "path" 22 | "strings" 23 | "testing" 24 | ) 25 | 26 | var ( 27 | tmpCpuCgroupPath = "/sys/fs/cgroup/cpu,cpuacct/test" 28 | tmpCpuSubPath = "test" 29 | cores = 2 30 | expectQuota = "2000000" 31 | ) 32 | 33 | // TestSetCpuQuota test cpu quota 34 | func TestSetCpuQuota(t *testing.T) { 35 | err := os.MkdirAll(tmpCpuCgroupPath, 0700) 36 | if err != nil { 37 | t.Skipf("mkdir cgroup path(%s) err: %v", tmpCpuCgroupPath, err) 38 | } 39 | defer os.RemoveAll(tmpCpuCgroupPath) 40 | 41 | err = SetCpuQuota(tmpCpuSubPath, float64(cores)) 42 | if err != nil { 43 | t.Fatalf("set cpu quota for %s err: %v", tmpCpuCgroupPath, err) 44 | } 45 | 46 | quotaBytes, err := ioutil.ReadFile(path.Join(tmpCpuCgroupPath, "cpu.cfs_quota_us")) 47 | if err != nil { 48 | t.Fatalf("read cpu quota for %s err: %v", tmpCpuCgroupPath, err) 49 | } 50 | 51 | if strings.Trim(string(quotaBytes), "\n") != expectQuota { 52 | t.Fatalf("unexpect quot value: %s, expect: %s", string(quotaBytes), expectQuota) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /pkg/caelus/util/cgroup/cpuset_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package cgroup 17 | 18 | import ( 19 | "os" 20 | "testing" 21 | ) 22 | 23 | var ( 24 | tmpCpuSetCgroupPath = "/sys/fs/cgroup/cpuset/test" 25 | tmpCpuSetSub = "test" 26 | ) 27 | 28 | // TestGetCpuSet test getting cpuset value 29 | func TestGetCpuSet(t *testing.T) { 30 | err := os.MkdirAll(tmpCpuSetCgroupPath, 0711) 31 | if err != nil { 32 | t.Skipf("create cpuset path(%s) failed: %v", tmpCpuSetCgroupPath, err) 33 | } 34 | defer os.RemoveAll(tmpCpuSetCgroupPath) 35 | 36 | cpusetStr := "0,2-3" 37 | cpusetSlice := []int{0, 2, 3} 38 | cpusetStrSlice := []string{"0", "2", "3"} 39 | err = WriteCpuSetCores(tmpCpuSetSub, cpusetSlice) 40 | if err != nil { 41 | t.Fatalf("write cpuset value(%s) to %s err: %v", cpusetStr, tmpCpuSetCgroupPath, err) 42 | } 43 | 44 | resultSlice, err := GetCpuSet(tmpCpuSetSub, true) 45 | if err != nil { 46 | t.Fatalf("get cpuset value from %s failed: %v", tmpCpuSetCgroupPath, err) 47 | } 48 | for i, v := range cpusetStrSlice { 49 | if v != resultSlice[i] { 50 | t.Fatalf("get unexpect cpuset value from %s, expect %v, got %v", 51 | tmpCpuSetCgroupPath, cpusetStrSlice, resultSlice) 52 | } 53 | } 54 | 55 | resultStr, err := GetCpuSet(tmpCpuSetSub, false) 56 | if err != nil { 57 | t.Fatalf("get cpuset value from %s failed: %v", tmpCpuSetCgroupPath, err) 58 | } 59 | if resultStr[0] != cpusetStr { 60 | t.Fatalf("get unexpect cpuset value from %s, expect %s, got %s", 61 | tmpCpuSetCgroupPath, cpusetStr, resultStr) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /pkg/caelus/util/cgroup/perf_event.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package cgroup 17 | 18 | import ( 19 | "path" 20 | ) 21 | 22 | const ( 23 | PerfEventSubsystem = "perf_event" 24 | ) 25 | 26 | // GetPerfEventCgroupPath return perf_event cgroup path 27 | func GetPerfEventCgroupPath(pathInCgroup string) (string, error) { 28 | root := GetRoot() 29 | return path.Join(root, PerfEventSubsystem, pathInCgroup), nil 30 | } 31 | -------------------------------------------------------------------------------- /pkg/caelus/util/cgroup/topology.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package cgroup 17 | 18 | import ( 19 | "sort" 20 | 21 | "github.com/shirou/gopsutil/cpu" 22 | "k8s.io/klog/v2" 23 | ) 24 | 25 | var ( 26 | threadSiblings map[int]int 27 | ) 28 | 29 | // GenerateTheadSiblings will parse /proc/cpuinfo, and generate maps, showing which cores are belong to the same thead 30 | // silbings. The map is nil when HT is disabled on the machine. 31 | func GenerateTheadSiblings() error { 32 | cpuinfo, err := cpu.Info() 33 | if err != nil { 34 | return err 35 | } 36 | 37 | threadSiblings = make(map[int]int) 38 | 39 | coreIdToThead := make(map[string]int) 40 | for i := range cpuinfo { 41 | processorid := int(cpuinfo[i].CPU) 42 | nodeid := cpuinfo[i].PhysicalID 43 | coreid := cpuinfo[i].CoreID 44 | 45 | if tid, ok := coreIdToThead[nodeid+coreid]; !ok { 46 | coreIdToThead[nodeid+coreid] = processorid 47 | } else { 48 | threadSiblings[processorid] = tid 49 | threadSiblings[tid] = processorid 50 | } 51 | } 52 | klog.Infof("thread sibling generated: %v", threadSiblings) 53 | 54 | return nil 55 | } 56 | 57 | // ChooseNumaCores will choose number cores from total cores based on NUMA struct 58 | func ChooseNumaCores(totalCores []int, chosenNum int) (chosen []int, left []int) { 59 | if len(totalCores) == 0 { 60 | return 61 | } 62 | if chosenNum == 0 { 63 | left = totalCores 64 | return 65 | } 66 | 67 | // sort the cores, and select from high to low 68 | sort.Ints(totalCores) 69 | 70 | ifChosen := make(map[int]bool) 71 | for _, c := range totalCores { 72 | ifChosen[c] = false 73 | } 74 | 75 | i := len(totalCores) - 1 76 | for { 77 | threadId := totalCores[i] 78 | if !ifChosen[threadId] { 79 | chosen = append(chosen, threadId) 80 | ifChosen[threadId] = true 81 | if len(chosen) == chosenNum { 82 | break 83 | } 84 | 85 | // also choose sibling thread 86 | if sibling, ok := threadSiblings[threadId]; ok { 87 | if selected, okk := ifChosen[sibling]; okk && !selected { 88 | chosen = append(chosen, sibling) 89 | ifChosen[sibling] = true 90 | if len(chosen) == chosenNum { 91 | break 92 | } 93 | } 94 | } 95 | } 96 | 97 | i-- 98 | if i < 0 { 99 | break 100 | } 101 | } 102 | 103 | for _, c := range totalCores { 104 | if !ifChosen[c] { 105 | left = append(left, c) 106 | } 107 | } 108 | 109 | return chosen, left 110 | } 111 | -------------------------------------------------------------------------------- /pkg/caelus/util/errors.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package util 17 | 18 | import "errors" 19 | 20 | var ( 21 | // ErrNotFound show the error meaning "not found" 22 | ErrNotFound = errors.New("not found") 23 | ) 24 | -------------------------------------------------------------------------------- /pkg/caelus/util/machine/machine.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package machine 17 | 18 | import ( 19 | "fmt" 20 | 21 | "github.com/guillermo/go.procmeminfo" 22 | "github.com/shirou/gopsutil/cpu" 23 | "k8s.io/api/core/v1" 24 | "k8s.io/apimachinery/pkg/api/resource" 25 | "k8s.io/klog/v2" 26 | ) 27 | 28 | var ( 29 | // memoryBuffer allow the buffer size when the memory limit value is equal to the usage value 30 | memoryBuffer = int64(1024 * 1020 * 1024) 31 | minUsage = int64(100 * 1024 * 1024) 32 | ) 33 | 34 | // totalResource is the capacity resource of the node, no lock is ok. 35 | var totalResource = v1.ResourceList{} 36 | 37 | // GetTotalResource return node capacity resource 38 | func GetTotalResource() (v1.ResourceList, error) { 39 | if len(totalResource) == 0 { 40 | cores, err := cpu.Info() 41 | if err != nil { 42 | return totalResource, err 43 | } 44 | mem := procmeminfo.MemInfo{} 45 | mem.Update() 46 | mems := mem.Total() 47 | totalResource[v1.ResourceCPU] = *resource.NewMilliQuantity(int64(len(cores)*1000), resource.DecimalSI) 48 | totalResource[v1.ResourceMemory] = *resource.NewQuantity(int64(mems), resource.DecimalSI) 49 | } 50 | 51 | return totalResource, nil 52 | } 53 | 54 | // GetMemoryCgroupLimitByUsage check limit value and current usage value for memory cgroup, 55 | // and will change limit value if necessary 56 | func GetMemoryCgroupLimitByUsage(limit, usage int64) (newLimit int64, reason string) { 57 | newLimit = limit 58 | // if usage is too small(100Mi), just set origin limit value, which min value is 128Mi 59 | if usage <= minUsage { 60 | return 61 | } 62 | 63 | if limit-usage < memoryBuffer { 64 | newLimit = usage + memoryBuffer 65 | 66 | if limit >= usage { 67 | // for memory cgroup, if usage value is nearly to limit, it may be hang for sometimes, so we add the buffer 68 | reason = fmt.Sprintf("mem cgroup limit value(%d) is nearly to usage(%d), add buffer: %d", 69 | limit, usage, newLimit) 70 | } else { 71 | // memory setting will failed when limited value is smaller than current usage. 72 | // there is no better way except dropping cache in cgroup level. 73 | // Now we just try to set limit value as current usage 74 | reason = fmt.Sprintf("mem cgroup limit value(%d) is less than current usage(%d),"+ 75 | "try to set current value added buffer: %d", limit, usage, newLimit) 76 | } 77 | } else { 78 | klog.V(5).Infof("mem cgroup limit value(%d) is bigger than usage(%d)", limit, usage) 79 | } 80 | 81 | return newLimit, reason 82 | } 83 | -------------------------------------------------------------------------------- /pkg/caelus/util/machine/machine_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package machine 17 | 18 | import ( 19 | "strings" 20 | "testing" 21 | ) 22 | 23 | type memoryLimitTestData struct { 24 | describe string 25 | limit int64 26 | usage int64 27 | expect struct { 28 | limit int64 29 | reason string 30 | } 31 | } 32 | 33 | // TestGetMemoryCgroupLimitByUsage test memory cgroup limit setting 34 | func TestGetMemoryCgroupLimitByUsage(t *testing.T) { 35 | testCases := []memoryLimitTestData{ 36 | { 37 | describe: "small usage", 38 | limit: 10 * 1024 * 1024 * 1024, 39 | usage: 10 * 1024 * 1024, 40 | expect: struct { 41 | limit int64 42 | reason string 43 | }{ 44 | limit: 10 * 1024 * 1024 * 1024, 45 | reason: "", 46 | }, 47 | }, 48 | { 49 | describe: "usage bigger than limit", 50 | limit: 10 * 1024 * 1024 * 1024, 51 | usage: 11 * 1024 * 1024 * 1024, 52 | expect: struct { 53 | limit int64 54 | reason string 55 | }{ 56 | limit: 12880707584, 57 | reason: "less than current usage", 58 | }, 59 | }, 60 | { 61 | describe: "usage nearly reach to limit", 62 | limit: 10.5 * 1024 * 1024 * 1024, 63 | usage: 10 * 1024 * 1024 * 1024, 64 | expect: struct { 65 | limit int64 66 | reason string 67 | }{ 68 | limit: 11806965760, 69 | reason: "nearly to usage", 70 | }, 71 | }, 72 | { 73 | describe: "limit bigger than usage", 74 | limit: 12 * 1024 * 1024 * 1024, 75 | usage: 10 * 1024 * 1024, 76 | expect: struct { 77 | limit int64 78 | reason string 79 | }{ 80 | limit: 12 * 1024 * 1024 * 1024, 81 | reason: "", 82 | }, 83 | }, 84 | } 85 | 86 | for _, tc := range testCases { 87 | limit, reason := GetMemoryCgroupLimitByUsage(tc.limit, tc.usage) 88 | if tc.expect.limit != limit { 89 | t.Fatalf("memory cgroup get limit case(%s) failed, expect %d, got %d", 90 | tc.describe, tc.expect.limit, limit) 91 | } 92 | if reason != tc.expect.reason && !strings.Contains(reason, tc.expect.reason) { 93 | t.Fatalf("memory cgroup get limit case(%s) failed, expect reason: %s, got: %s", 94 | tc.describe, tc.expect.reason, reason) 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /pkg/caelus/util/machine/speed.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package machine 17 | 18 | import ( 19 | "io/ioutil" 20 | "os" 21 | "path" 22 | "strconv" 23 | "strings" 24 | 25 | "k8s.io/klog/v2" 26 | ) 27 | 28 | const NET_SPEED = "NET_SPEED" 29 | 30 | var ( 31 | // accessing the map is not very frequency, no need to add lock 32 | devSpeedMap = make(map[string]int) 33 | ) 34 | 35 | // GetIfaceSpeed get speed of ifName from /sys/class/net/$ifName/speed, or environment 36 | func GetIfaceSpeed(ifName string) int { 37 | if v, ok := devSpeedMap[ifName]; ok { 38 | return v 39 | } 40 | 41 | speed, err := getSpeed(ifName) 42 | if err != nil { 43 | klog.Errorf("get speed of %s from local file err: %v", ifName, err) 44 | } 45 | if speed <= 0 || speed > 100*1000 { // assume max speed is 100Gbit 46 | speed, _ = strconv.Atoi(os.Getenv(NET_SPEED)) 47 | } 48 | if speed <= 0 { 49 | klog.Fatalf("bad speed of %s %d", ifName, speed) 50 | } 51 | devSpeedMap[ifName] = speed 52 | return speed 53 | } 54 | 55 | // getSpeed get speed of ifName from /sys/class/net/$ifName/speed 56 | func getSpeed(ifName string) (int, error) { 57 | file := path.Join("/sys/class/net/", ifName, "/speed") 58 | speedStr, err := ioutil.ReadFile(file) 59 | if err != nil { 60 | return 0, err 61 | } 62 | value := strings.Replace(string(speedStr), "\n", "", -1) 63 | return strconv.Atoi(value) 64 | } 65 | -------------------------------------------------------------------------------- /pkg/caelus/util/ports/ports.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package ports 17 | 18 | import ( 19 | "fmt" 20 | "net" 21 | 22 | "k8s.io/apimachinery/pkg/util/sets" 23 | "k8s.io/klog/v2" 24 | ) 25 | 26 | // Unused check if a host port is available 27 | func Unused(hp *Hostport) bool { 28 | closer, err := openLocalPort(hp) 29 | if err != nil { 30 | return false 31 | } 32 | if err := closer.Close(); err != nil { 33 | klog.Warningf("can't close port %v", hp) 34 | } 35 | return true 36 | } 37 | 38 | // FindUnusedPort find a host port that is available 39 | func FindUnusedPort(start int, blackList sets.Int, protocol string) (int, error) { 40 | hp := &Hostport{ 41 | Port: 0, 42 | Protocol: protocol, 43 | } 44 | for i := start; i < 30000; i++ { 45 | if blackList.Has(i) { 46 | continue 47 | } 48 | hp.Port = i 49 | if Unused(hp) { 50 | return i, nil 51 | } 52 | } 53 | return 0, fmt.Errorf("can't find an unused port") 54 | } 55 | 56 | // closeable is a closable resource 57 | type closeable interface { 58 | Close() error 59 | } 60 | 61 | // Hostport is a host Port 62 | type Hostport struct { 63 | Port int 64 | Protocol string 65 | } 66 | 67 | // String print the host port 68 | func (hp *Hostport) String() string { 69 | return fmt.Sprintf("%s:%d", hp.Protocol, hp.Port) 70 | } 71 | 72 | // this function is copied from github.com/kubernetes/kubernetes/pkg/kubelet/network/kubenet/kubenet_linux.go 73 | func openLocalPort(hp *Hostport) (closeable, error) { 74 | var socket closeable 75 | switch hp.Protocol { 76 | case "tcp": 77 | listener, err := net.Listen("tcp", fmt.Sprintf(":%d", hp.Port)) 78 | if err != nil { 79 | return nil, err 80 | } 81 | socket = listener 82 | hp.Port = listener.Addr().(*net.TCPAddr).Port 83 | case "udp": 84 | addr, err := net.ResolveUDPAddr("udp", fmt.Sprintf(":%d", hp.Port)) 85 | if err != nil { 86 | return nil, err 87 | } 88 | conn, err := net.ListenUDP("udp", addr) 89 | if err != nil { 90 | return nil, err 91 | } 92 | socket = conn 93 | hp.Port = conn.LocalAddr().(*net.UDPAddr).Port 94 | default: 95 | return nil, fmt.Errorf("unknown Protocol %q", hp.Protocol) 96 | } 97 | klog.V(4).Infof("Opened local Port %s", hp.String()) 98 | return socket, nil 99 | } 100 | -------------------------------------------------------------------------------- /pkg/caelus/util/runtime/docker/docker_fake.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package dockerclient 17 | 18 | import ( 19 | "fmt" 20 | "github.com/tencent/caelus/pkg/caelus/util/runtime" 21 | 22 | dockertypes "github.com/docker/engine-api/types" 23 | ) 24 | 25 | type fakeDockerClient struct { 26 | containers map[string]*dockertypes.ContainerJSON 27 | } 28 | 29 | // NewFakeDockerClient create a fake docker client 30 | func NewFakeDockerClient(cons map[string]*dockertypes.ContainerJSON) runtime.RuntimeClient { 31 | return &fakeDockerClient{ 32 | containers: cons, 33 | } 34 | } 35 | 36 | // InspectContainer inspect container 37 | func (f *fakeDockerClient) InspectContainer(id string) (*dockertypes.ContainerJSON, error) { 38 | con, ok := f.containers[id] 39 | if !ok { 40 | return nil, fmt.Errorf("not found") 41 | } 42 | 43 | return con, nil 44 | } 45 | 46 | // ContainerList list containers 47 | func (f *fakeDockerClient) ContainerList(options dockertypes.ContainerListOptions) ([]dockertypes.Container, error) { 48 | panic("implementing me") 49 | } 50 | -------------------------------------------------------------------------------- /pkg/caelus/util/runtime/runtime.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package runtime 17 | 18 | import ( 19 | dockertypes "github.com/docker/engine-api/types" 20 | ) 21 | 22 | // RuntimeClient implement interface interacting with runtime, such as docker 23 | // now just implement list method, no creating method 24 | type RuntimeClient interface { 25 | // InspectContainer inspect container 26 | InspectContainer(id string) (*dockertypes.ContainerJSON, error) 27 | // ContainerList list containers 28 | ContainerList(options dockertypes.ContainerListOptions) ([]dockertypes.Container, error) 29 | } 30 | -------------------------------------------------------------------------------- /pkg/caelus/util/sets/pods.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package sets 17 | 18 | import ( 19 | v1 "k8s.io/api/core/v1" 20 | "k8s.io/client-go/tools/cache" 21 | "k8s.io/klog/v2" 22 | ) 23 | 24 | type Pod map[string]*v1.Pod 25 | 26 | // NewPod create Pod instance 27 | func NewPod(items ...*v1.Pod) Pod { 28 | ps := Pod{} 29 | ps.Insert(items...) 30 | return ps 31 | } 32 | 33 | // Insert add new pods 34 | func (p Pod) Insert(items ...*v1.Pod) Pod { 35 | for _, item := range items { 36 | key, err := cache.MetaNamespaceKeyFunc(item) 37 | if err != nil { 38 | klog.Warningf("can't get key for pod %+#v, %v", item, err) 39 | continue 40 | } 41 | p[key] = item 42 | } 43 | return p 44 | } 45 | 46 | // Delete delete old pods 47 | func (p Pod) Delete(items ...*v1.Pod) Pod { 48 | for _, item := range items { 49 | key, err := cache.MetaNamespaceKeyFunc(item) 50 | if err != nil { 51 | klog.Warningf("can't get key for pod %+#v, %v", item, err) 52 | continue 53 | } 54 | delete(p, key) 55 | } 56 | return p 57 | } 58 | 59 | // Has function check if the pod existed 60 | func (p Pod) Has(item *v1.Pod) bool { 61 | key, err := cache.MetaNamespaceKeyFunc(item) 62 | if err != nil { 63 | klog.Warningf("can't get key for pod %+#v, %v", item, err) 64 | return false 65 | } 66 | 67 | _, found := p[key] 68 | return found 69 | } 70 | 71 | // UnsortedList return pod list 72 | func (p Pod) UnsortedList() []*v1.Pod { 73 | l := make([]*v1.Pod, 0, len(p)) 74 | for _, v := range p { 75 | l = append(l, v) 76 | } 77 | return l 78 | } 79 | 80 | // Update update pod 81 | func (p Pod) Update(item *v1.Pod) bool { 82 | key, err := cache.MetaNamespaceKeyFunc(item) 83 | if err != nil { 84 | klog.Warningf("can't get key for pod %+#v, %v", item, err) 85 | return false 86 | } 87 | 88 | p[key] = item 89 | return true 90 | } 91 | -------------------------------------------------------------------------------- /pkg/caelus/util/util.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package util 17 | 18 | /* 19 | #include 20 | #include 21 | #include 22 | #include 23 | */ 24 | import "C" 25 | import ( 26 | "net" 27 | "sync/atomic" 28 | "unsafe" 29 | ) 30 | 31 | var ( 32 | // nodeName store the current node name 33 | nodeName unsafe.Pointer 34 | 35 | // nodeIP store the current node ip 36 | nodeIP unsafe.Pointer 37 | 38 | // InHostNamespace show if the current namespace is host 39 | InHostNamespace bool 40 | 41 | cpuTicksPerSecond int64 42 | 43 | // SilenceMode indicate do not running offline jobs 44 | SilenceMode bool 45 | ) 46 | 47 | // NodeName return current node name 48 | func NodeName() string { 49 | c := (*string)(atomic.LoadPointer(&nodeName)) 50 | if c != nil { 51 | return *c 52 | } 53 | return "" 54 | } 55 | 56 | // SetNodeName set current node name 57 | func SetNodeName(name string) { 58 | atomic.StorePointer( 59 | &nodeName, unsafe.Pointer(&name)) 60 | } 61 | 62 | // NodeIP return current node ip 63 | func NodeIP() string { 64 | c := (*string)(atomic.LoadPointer(&nodeIP)) 65 | if c != nil { 66 | return *c 67 | } 68 | return "" 69 | } 70 | 71 | // SetNodeIP set current node ip 72 | func SetNodeIP(ip string) { 73 | atomic.StorePointer( 74 | &nodeIP, unsafe.Pointer(&ip)) 75 | } 76 | 77 | // MatchIP check if the input string is an ip address 78 | func MatchIP(ip string) bool { 79 | return net.ParseIP(ip) != nil 80 | } 81 | 82 | // GetClockTicksPerSecond return clock ticks per second from unix 83 | func GetClockTicksPerSecond() int64 { 84 | if t := atomic.LoadInt64(&cpuTicksPerSecond); t > 0 { 85 | return t 86 | } 87 | atomic.StoreInt64(&cpuTicksPerSecond, int64(C.sysconf(C._SC_CLK_TCK))) 88 | return cpuTicksPerSecond 89 | } 90 | -------------------------------------------------------------------------------- /pkg/nm-operator/hadoop/hadoop_conf_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package hadoop 17 | 18 | import ( 19 | "strings" 20 | "testing" 21 | 22 | "gotest.tools/assert" 23 | ) 24 | 25 | const YARN_SITE = ` 26 | 27 | 28 | yarn.resourcemanager.address 29 | RM_IP:18032 30 | 31 | 32 | yarn.resourcemanager.resource-tracker.address 33 | RM_IP:18031 34 | 35 | 36 | yarn.resourcemanager.webapp.address 37 | RM_IP:8080 38 | 39 | 40 | yarn.resourcemanager.scheduler.address 41 | RM_IP:18030 42 | 43 | 44 | 45 | yarn.nodemanager.resource.memory-mb 46 | MEMORY 47 | 48 | 49 | 50 | yarn.nodemanager.resource.cpu-vcores 51 | VCORE 52 | 53 | 54 | yarn.nodemanager.local-dirs 55 | /data/nm-local 56 | 57 | 58 | 59 | yarn.nodemanager.log-dirs 60 | /data/nm-log 61 | 62 | ` 63 | 64 | // TestGetConfDataFromStream test if to get xml data from io reader 65 | func TestGetConfDataFromStream(t *testing.T) { 66 | reader := strings.NewReader(YARN_SITE) 67 | confData, err := LoadConfDataFromStream(reader) 68 | if err != nil { 69 | t.Fail() 70 | } 71 | 72 | assert.Equal(t, "MEMORY", confData.Get("yarn.nodemanager.resource.memory-mb")) 73 | assert.Equal(t, "VCORE", confData.Get("yarn.nodemanager.resource.cpu-vcores")) 74 | assert.Equal(t, confData.Get("xxx"), "") 75 | } 76 | 77 | // TestSetConfDataFromStream test if to get xml data from io reader 78 | func TestSetConfDataFromStream(t *testing.T) { 79 | reader := strings.NewReader(YARN_SITE) 80 | confData, err := LoadConfDataFromStream(reader) 81 | if err != nil { 82 | t.Fail() 83 | } 84 | 85 | assert.Equal(t, "MEMORY", confData.Get("yarn.nodemanager.resource.memory-mb")) 86 | confData.Set("yarn.nodemanager.resource.memory-mb", "1024") 87 | assert.Equal(t, "1024", confData.Get("yarn.nodemanager.resource.memory-mb")) 88 | } 89 | -------------------------------------------------------------------------------- /pkg/nm-operator/types/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package types 17 | 18 | import ( 19 | "time" 20 | 21 | "github.com/tencent/caelus/pkg/cadvisor" 22 | 23 | "github.com/google/cadvisor/cache/memory" 24 | cadvisormetrics "github.com/google/cadvisor/container" 25 | "github.com/google/cadvisor/utils/sysfs" 26 | ) 27 | 28 | const ( 29 | // EnvPort show server port 30 | EnvPort = "GINIT_PORT" 31 | ) 32 | 33 | var ( 34 | // cadvisorMetrics describe which metrics need to collect, just collecting cpu and memory 35 | cadvisorMetrics = cadvisormetrics.MetricSet{ 36 | "cpu": struct{}{}, 37 | "memory": struct{}{}, 38 | } 39 | cadvisorCacheDuration = 2 * time.Minute 40 | cadvisorMaxHousekeepingInterval = 15 * time.Second 41 | 42 | HadoopPath = "/hadoop-yarn" 43 | CgroupRoot = "/sys/fs/cgroup" 44 | 45 | // CgroupPath describe witch cgroup path to collect 46 | CgroupPath = []string{HadoopPath} 47 | 48 | CadvisorParameters = cadvisor.CadvisorParameter{ 49 | MemCache: memory.New(cadvisorCacheDuration, nil), 50 | SysFs: sysfs.NewRealSysFs(), 51 | IncludeMetrics: cadvisorMetrics, 52 | MaxHousekeepingInterval: cadvisorMaxHousekeepingInterval, 53 | } 54 | ) 55 | 56 | // RMAppWrapper show the applications struct from nodemanager API 57 | type RMAppWrapper struct { 58 | App RMApp `json:"app"` 59 | } 60 | 61 | // RMApp describe application options 62 | type RMApp struct { 63 | ID string `json:"id"` 64 | User string `json:"user"` 65 | Name string `json:"name"` 66 | Queue string `json:"queue"` 67 | State string `json:"state"` 68 | FinalStatus string `json:"finalStatus"` 69 | Progress float32 `json:"progress"` 70 | TrackingUI string `json:"trackingUI"` 71 | AmLogs string `json:"amContainerLogs"` 72 | } 73 | -------------------------------------------------------------------------------- /pkg/nm-operator/util/util_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package util 17 | 18 | import ( 19 | "fmt" 20 | "io/ioutil" 21 | "os" 22 | "os/exec" 23 | "strings" 24 | "testing" 25 | ) 26 | 27 | func TestExecuteYarnCMD(t *testing.T) { 28 | user := "tester" 29 | binFile := "/tmp/test.sh" 30 | 31 | // add user 32 | output, err := exec.Command("useradd", user).Output() 33 | if err != nil { 34 | t.Skipf("create user(%s) err: %v, output: %s, just skip", user, err, string(output)) 35 | } 36 | defer exec.Command("userdel", "-rf", user).Output() 37 | 38 | // get uid & gid 39 | uidBytes, err := exec.Command("id", "-u", user).Output() 40 | if err != nil { 41 | t.Skipf("get user id fail for %s: %v, just skip", user, err) 42 | } 43 | uidStr := strings.Trim(string(uidBytes), "\n") 44 | gidBytes, err := exec.Command("id", "-g", user).Output() 45 | if err != nil { 46 | t.Skipf("get group id fail for %s: %v, just skip", user, err) 47 | } 48 | gidStr := strings.Trim(string(gidBytes), "\n") 49 | 50 | // generate executable file 51 | context := fmt.Sprintf(` 52 | #!/bin/sh 53 | 54 | if [ $# != 2 ]; then 55 | echo "should be two parameters" 56 | exit 1 57 | fi 58 | 59 | uid=$(id -u) 60 | if [ "$uid" != "%s" ]; then 61 | echo "bad uid $uid" 62 | exit 1 63 | fi 64 | 65 | gid=$(id -g) 66 | if [ "$gid" != "%s" ]; then 67 | echo "bad gid $gid" 68 | exit 1 69 | fi 70 | 71 | `, uidStr, gidStr) 72 | 73 | ioutil.WriteFile(binFile, []byte(context), 0755) 74 | os.Chmod(binFile, 0777) 75 | defer os.Remove(binFile) 76 | 77 | err = ExecuteYarnCMD([]string{binFile, "1", "2"}, user) 78 | if err != nil { 79 | t.Fatalf("execute yarn cmd err: %v", err) 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /pkg/types/type.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package types 17 | 18 | import "syscall" 19 | 20 | // DiskPartitionStats show disk space size 21 | type DiskPartitionStats struct { 22 | // TotalSize show total disk size in bytes 23 | TotalSize int64 24 | // UsedSize show used disk size in bytes 25 | UsedSize int64 26 | // FreeSize show free disk size in bytes 27 | FreeSize int64 28 | } 29 | 30 | // GetDiskPartitionStats output disk space stats for the partition 31 | func GetDiskPartitionStats(partitionName string) (*DiskPartitionStats, error) { 32 | stat := syscall.Statfs_t{} 33 | 34 | err := syscall.Statfs(partitionName, &stat) 35 | if err != nil { 36 | return nil, err 37 | } 38 | 39 | dStats := &DiskPartitionStats{ 40 | TotalSize: int64(stat.Blocks) * stat.Bsize, 41 | FreeSize: int64(stat.Bfree) * stat.Bsize, 42 | } 43 | dStats.UsedSize = dStats.TotalSize - dStats.FreeSize 44 | return dStats, nil 45 | } 46 | -------------------------------------------------------------------------------- /pkg/util/times/times_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package times 17 | 18 | import ( 19 | "encoding/json" 20 | "testing" 21 | "time" 22 | ) 23 | 24 | // A show test struct 25 | type A struct { 26 | T Duration 27 | } 28 | 29 | // TestUnMarshal test unmarshal 30 | func TestUnMarshal(t *testing.T) { 31 | var a A 32 | if err := json.Unmarshal([]byte(`{"T":"5m"}`), &a); err != nil { 33 | t.Fatal(err) 34 | } 35 | if time.Duration(a.T).Seconds() != 300 { 36 | t.Fatal(a.T) 37 | } 38 | if err := json.Unmarshal([]byte(`{"T":""}`), &a); err != nil { 39 | t.Fatal(err) 40 | } 41 | if time.Duration(a.T).Seconds() != 0 { 42 | t.Fatal(a.T) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /pkg/util/util.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package util 17 | 18 | import ( 19 | "os" 20 | "os/signal" 21 | "syscall" 22 | ) 23 | 24 | var ( 25 | onlyOneSignalHandler = make(chan struct{}) 26 | shutdownHandler chan os.Signal 27 | shutdownSignals = []os.Signal{os.Interrupt, syscall.SIGTERM} 28 | ) 29 | 30 | // SetupSignalHandler registered for SIGTERM and SIGINT. A stop channel is returned 31 | // which is closed on one of these signals. If a second signal is caught, the program 32 | // is terminated with exit code 1. 33 | func SetupSignalHandler() <-chan struct{} { 34 | close(onlyOneSignalHandler) // panics when called twice 35 | 36 | shutdownHandler = make(chan os.Signal, 2) 37 | 38 | stop := make(chan struct{}) 39 | signal.Notify(shutdownHandler, shutdownSignals...) 40 | go func() { 41 | <-shutdownHandler 42 | close(stop) 43 | <-shutdownHandler 44 | os.Exit(1) // second signal. Exit directly. 45 | }() 46 | 47 | return stop 48 | } 49 | -------------------------------------------------------------------------------- /pkg/version/base.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 THL A29 Limited, a Tencent company. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * 7 | * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package version 17 | 18 | // Base version information. 19 | // 20 | // This is the fallback data used when version information from git is not 21 | // provided via go ldflags. It provides an approximation of the Apiswitch 22 | // version for ad-hoc builds (e.g. `go build`) that cannot get the version 23 | // information from git. 24 | // 25 | // If you are looking at these fields in the git tree, they look 26 | // strange. They are modified on the fly by the build process. The 27 | // in-tree values are dummy values used for "git archive", which also 28 | // works for GitHub tar downloads. 29 | var ( 30 | gitBranch string = "Not a git repo" // branch of git 31 | gitCommit string = "$Format:%H$" // sha1 from git, output of $(git rev-parse HEAD) 32 | gitTreeState string = "Not a git tree" // state of git tree, either "clean" or "dirty" 33 | buildDate string = "1970-01-01T00:00:00Z" // build date in ISO8601 format 34 | ) 35 | -------------------------------------------------------------------------------- /pkg/version/version.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2014 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | ttp://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | This is copied from github.com/kubernetes/kubernetes/pkg/version/version.go, 17 | and does some modification as following: 18 | - remove gitMajor, gitMinor, gitVersion 19 | 20 | */ 21 | 22 | package version 23 | 24 | import ( 25 | "fmt" 26 | "runtime" 27 | ) 28 | 29 | // Info contain versioning information. 30 | // TODO: Add []string of api versions supported? It's still unclear 31 | // how we'll want to distribute that information. 32 | type Info struct { 33 | GitBranch string `json:"gitBranch"` 34 | GitCommit string `json:"gitCommit"` 35 | GitTreeState string `json:"gitTreeState"` 36 | BuildDate string `json:"buildDate"` 37 | GoVersion string `json:"goVersion"` 38 | Compiler string `json:"compiler"` 39 | Platform string `json:"platform"` 40 | } 41 | 42 | // String return info as a human-friendly version string. 43 | func (info Info) String() string { 44 | return info.GitBranch + "-" + info.GitCommit 45 | } 46 | 47 | // Get return the overall codebase version. It's for detecting 48 | // what code a binary was built from. 49 | func Get() Info { 50 | // These variables typically come from -ldflags settings and in 51 | // their absence fallback to the settings in pkg/version/base.go 52 | return Info{ 53 | GitBranch: gitBranch, 54 | GitCommit: gitCommit, 55 | GitTreeState: gitTreeState, 56 | BuildDate: buildDate, 57 | GoVersion: runtime.Version(), 58 | Compiler: runtime.Compiler, 59 | Platform: fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH), 60 | } 61 | } 62 | --------------------------------------------------------------------------------