├── .gitignore ├── Dockerfile ├── README.md ├── cmd └── main.go ├── go.mod ├── go.sum └── pkg ├── exporter ├── exporter.go └── podcache.go ├── kubepods └── kubepods.go ├── metrics └── metrics.go ├── nvidia └── device.go ├── ptree ├── ptree.go ├── scanner.go ├── scanner_cgroup.go └── types.go └── util ├── config.go ├── kube.go └── util.go /.gitignore: -------------------------------------------------------------------------------- 1 | vendor 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.16-stretch as build 2 | 3 | 4 | ENV GO111MODULE=on 5 | ENV CGO_ENABLED=1 6 | ENV GOOS=linux 7 | ENV GOARCH=amd64 8 | # config 9 | WORKDIR /go/src/elastic-gpu-exporter 10 | COPY . . 11 | 12 | RUN go env -w GO111MODULE=on 13 | RUN go env -w GOPROXY=https://goproxy.cn,direct 14 | RUN GO111MODULE=on go mod download 15 | RUN go get github.com/prometheus/client_golang/prometheus@v1.0.0 16 | #RUN go mod download github.com/alex337/go-nvml 17 | RUN go build -o /go/bin/elastic-gpu-exporter cmd/main.go 18 | 19 | # runtime image 20 | FROM nvidia/cuda:10.0-base 21 | COPY --from=build /go/bin/elastic-gpu-exporter /usr/bin/elastic-gpu-exporter 22 | RUN ln -s /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1 /usr/lib/x86_64-linux-gnu/libnvidia-ml.so 23 | 24 | CMD ["elastic-gpu-exporter"] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # elastic-gpu-exporter 2 | A general-purpose GPU metrics exporter for collecting and exporting GPU resource usages at pod and container level. 3 | -------------------------------------------------------------------------------- /cmd/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "github.com/prometheus/client_golang/prometheus" 6 | "github.com/prometheus/client_golang/prometheus/promhttp" 7 | "github.com/prometheus/common/log" 8 | "elasticgpu.io/elastic-gpu-exporter/pkg/exporter" 9 | "elasticgpu.io/elastic-gpu-exporter/pkg/util" 10 | "net/http" 11 | "strings" 12 | "time" 13 | ) 14 | 15 | const Resources = "nvidia.com/gpu,tke.cloud.tencent.com/qgpu-core,tke.cloud.tencent.com/qgpu-memory,elastic-gpu/gpu-percent" 16 | 17 | var ( 18 | addr = flag.String("listen-address", ":8080", "The address to listen on for HTTP requests.") 19 | node string 20 | resources string 21 | interval int 22 | ) 23 | 24 | func init(){ 25 | flag.StringVar(&node, "node", "", "node name") 26 | flag.StringVar(&resources, "labels", Resources, "gpu resources name") 27 | flag.IntVar(&interval, "interval", 30, "monitor interval (second)") 28 | flag.Parse() 29 | } 30 | 31 | func main() { 32 | e := exporter.NewExporter(node, strings.Split(resources, ","), time.Duration(interval) * time.Second) 33 | go e.Run(util.NeverStop) 34 | 35 | http.Handle("/metrics", promhttp.HandlerFor( 36 | prometheus.DefaultGatherer, 37 | promhttp.HandlerOpts{ 38 | DisableCompression: true, 39 | }, 40 | )) 41 | http.Handle("/metrics", promhttp.Handler()) 42 | log.Fatal(http.ListenAndServe(*addr, nil)) 43 | } -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module elasticgpu.io/elastic-gpu-exporter 2 | 3 | go 1.16 4 | 5 | //replace tkestack.io/nvml => github.com/alex337/go-nvml v1 6 | replace tkestack.io/nvml => github.com/tkestack/go-nvml v0.0.0-20191217064248-7363e630a33e 7 | 8 | require ( 9 | github.com/NVIDIA/gpu-monitoring-tools v0.0.0-20210817155834-f476d8a022cf // indirect 10 | github.com/prometheus/client_golang v1.0.0 11 | github.com/prometheus/common v0.4.1 12 | k8s.io/api v0.17.4 13 | k8s.io/apimachinery v0.17.4 14 | k8s.io/client-go v0.17.4 15 | k8s.io/klog v1.0.0 16 | k8s.io/kubectl v0.17.4 17 | //github.com/alex337/go-nvml v1.0.0 18 | tkestack.io/nvml v0.0.0-00010101000000-000000000000 19 | ) 20 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= 2 | cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= 3 | cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= 4 | github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8= 5 | github.com/Azure/go-autorest/autorest v0.9.0/go.mod h1:xyHB1BMZT0cuDHU7I0+g046+BFDTQ8rEZB0s4Yfa6bI= 6 | github.com/Azure/go-autorest/autorest/adal v0.5.0/go.mod h1:8Z9fGy2MpX0PvDjB1pEgQTmVqjGhiHBW7RJJEciWzS0= 7 | github.com/Azure/go-autorest/autorest/date v0.1.0/go.mod h1:plvfp3oPSKwf2DNjlBjWF/7vwR+cUD/ELuzDCXwHUVA= 8 | github.com/Azure/go-autorest/autorest/mocks v0.1.0/go.mod h1:OTyCOPRA2IgIlWxVYxBee2F5Gr4kF2zd2J5cFRaIDN0= 9 | github.com/Azure/go-autorest/autorest/mocks v0.2.0/go.mod h1:OTyCOPRA2IgIlWxVYxBee2F5Gr4kF2zd2J5cFRaIDN0= 10 | github.com/Azure/go-autorest/logger v0.1.0/go.mod h1:oExouG+K6PryycPJfVSxi/koC6LSNgds39diKLz7Vrc= 11 | github.com/Azure/go-autorest/tracing v0.5.0/go.mod h1:r/s2XiOKccPW3HrqB+W0TQzfbtp2fGCgRFtBroKn4Dk= 12 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 13 | github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= 14 | github.com/MakeNowJust/heredoc v0.0.0-20170808103936-bb23615498cd/go.mod h1:64YHyfSL2R96J44Nlwm39UHepQbyR5q10x7iYa1ks2E= 15 | github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= 16 | github.com/NVIDIA/gpu-monitoring-tools v0.0.0-20210817155834-f476d8a022cf h1:t4TOchUA16QIQK8CoaMktjM6ZZbM3ODOfbLKKHs5a50= 17 | github.com/NVIDIA/gpu-monitoring-tools v0.0.0-20210817155834-f476d8a022cf/go.mod h1:oKPJa5eOTkWvlT4/Y4D8Nds44Fzmww5HUK+xwO+DwTA= 18 | github.com/NVIDIA/gpu-monitoring-tools/bindings/go/dcgm v0.0.0-20210325210537-29b4f1784f18/go.mod h1:8qXwltEzU3idjUcVpMOv3FNgxxbDeXZPGMLyc/khWiY= 19 | github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ= 20 | github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= 21 | github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= 22 | github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= 23 | github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= 24 | github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc h1:cAKDfWh5VpdgMhJosfJnn5/FoN2SRZ4p7fJNX58YPaU= 25 | github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= 26 | github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf h1:qet1QNfXsQxTZqLG4oE62mJzwPIB8+Tee4RNCL9ulrY= 27 | github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= 28 | github.com/antihax/optional v0.0.0-20180407024304-ca021399b1a6/go.mod h1:V8iCPQYkqmusNa815XgQio277wI47sdRh1dUOLdyC6Q= 29 | github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= 30 | github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= 31 | github.com/beorn7/perks v1.0.0 h1:HWo1m869IqiPhD389kmkxeTalrjNbbJTC8LXupb+sl0= 32 | github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= 33 | github.com/blang/semver v3.5.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= 34 | github.com/chai2010/gettext-go v0.0.0-20160711120539-c6fed771bfd5/go.mod h1:/iP1qXHoty45bqomnu2LM+VVyAEdWN+vtSHGlQgyxbw= 35 | github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= 36 | github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= 37 | github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= 38 | github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= 39 | github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= 40 | github.com/davecgh/go-spew v0.0.0-20151105211317-5215b55f46b2/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 41 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 42 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 43 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 44 | github.com/daviddengcn/go-colortext v0.0.0-20160507010035-511bcaf42ccd/go.mod h1:dv4zxwHi5C/8AeI+4gX4dCWOIvNi7I6JCSX0HvlKPgE= 45 | github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= 46 | github.com/docker/distribution v2.7.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= 47 | github.com/docker/docker v0.7.3-0.20190327010347-be7ac8be2ae0/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= 48 | github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= 49 | github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM= 50 | github.com/elazarl/goproxy v0.0.0-20170405201442-c4fc26588b6e/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc= 51 | github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= 52 | github.com/emicklei/go-restful v2.9.5+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= 53 | github.com/evanphx/json-patch v4.2.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= 54 | github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d/go.mod h1:ZZMPRZwes7CROmyNKgQzC3XPs6L/G2EJLHddWejkmf4= 55 | github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc= 56 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= 57 | github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= 58 | github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= 59 | github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= 60 | github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= 61 | github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= 62 | github.com/go-openapi/jsonpointer v0.0.0-20160704185906-46af16f9f7b1/go.mod h1:+35s3my2LFTysnkMfxsJBAMHj/DoqoB9knIWoYG/Vk0= 63 | github.com/go-openapi/jsonpointer v0.19.2/go.mod h1:3akKfEdA7DF1sugOqz1dVQHBcuDBPKZGEoHC/NkiQRg= 64 | github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= 65 | github.com/go-openapi/jsonreference v0.0.0-20160704190145-13c6e3589ad9/go.mod h1:W3Z9FmVs9qj+KR4zFKmDPGiLdk1D9Rlm7cyMvf57TTg= 66 | github.com/go-openapi/jsonreference v0.19.2/go.mod h1:jMjeRr2HHw6nAVajTXJ4eiUwohSTlpa0o73RUL1owJc= 67 | github.com/go-openapi/jsonreference v0.19.3/go.mod h1:rjx6GuL8TTa9VaixXglHmQmIL98+wF9xc8zWvFonSJ8= 68 | github.com/go-openapi/spec v0.0.0-20160808142527-6aced65f8501/go.mod h1:J8+jY1nAiCcj+friV/PDoE1/3eeccG9LYBs0tYvLOWc= 69 | github.com/go-openapi/spec v0.19.3/go.mod h1:FpwSN1ksY1eteniUU7X0N/BgJ7a4WvBFVA8Lj9mJglo= 70 | github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I= 71 | github.com/go-openapi/swag v0.19.2/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= 72 | github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= 73 | github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= 74 | github.com/godbus/dbus v0.0.0-20181101234600-2ff6f7ffd60f/go.mod h1:/YcGZj5zSblfDWMMoOzV4fas9FZnQYTkDnsGvmh2Grw= 75 | github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= 76 | github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d h1:3PaI8p3seN09VjbTYC/QWlUZdZ1qS1zGjy7LH2Wt07I= 77 | github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= 78 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= 79 | github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903 h1:LbsanbbD6LieFkXbj9YNNBupiGHJgFeLpO0j0Fza1h8= 80 | github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= 81 | github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= 82 | github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= 83 | github.com/golang/protobuf v0.0.0-20161109072736-4bd1920723d7/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 84 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 85 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 86 | github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs= 87 | github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 88 | github.com/golangplus/bytes v0.0.0-20160111154220-45c989fe5450/go.mod h1:Bk6SMAONeMXrxql8uvOKuAZSu8aM5RUGv+1C6IJaEho= 89 | github.com/golangplus/fmt v0.0.0-20150411045040-2a5d6d7d2995/go.mod h1:lJgMEyOkYFkPcDKwRXegd+iM6E7matEszMG5HhwytU8= 90 | github.com/golangplus/testing v0.0.0-20180327235837-af21d9c3145e/go.mod h1:0AA//k/eakGydO4jKRoRL2j92ZKSzTgj9tclaCrvXHk= 91 | github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= 92 | github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= 93 | github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= 94 | github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY= 95 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 96 | github.com/google/gofuzz v0.0.0-20161122191042-44d81051d367/go.mod h1:HP5RmnzzSNb993RKQDq4+1A4ia9nllfqcQFTQJedwGI= 97 | github.com/google/gofuzz v1.0.0 h1:A8PeW59pxE9IoFRqBp37U+mSNaQoZ46F1f0f863XSXw= 98 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 99 | github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= 100 | github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= 101 | github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY= 102 | github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 103 | github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= 104 | github.com/googleapis/gnostic v0.0.0-20170729233727-0c5108395e2d h1:7XGaL1e6bYS1yIonGp9761ExpPPV1ui0SAC59Yube9k= 105 | github.com/googleapis/gnostic v0.0.0-20170729233727-0c5108395e2d/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY= 106 | github.com/gophercloud/gophercloud v0.1.0/go.mod h1:vxM41WHh5uqHVBMZHzuwNOHh8XEoIEcSTewFxm1c5g8= 107 | github.com/gorilla/mux v1.7.4/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= 108 | github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= 109 | github.com/grpc-ecosystem/grpc-gateway v1.12.1/go.mod h1:8XEsbTttt/W+VvjtQhLACqCisSPWTxCZ7sBRjU6iH9c= 110 | github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= 111 | github.com/hashicorp/golang-lru v0.5.1 h1:0hERBMJE1eitiLkihrMvRVBYAkpHzc/J3QdDN+dAcgU= 112 | github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= 113 | github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= 114 | github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= 115 | github.com/imdario/mergo v0.3.5 h1:JboBksRwiiAJWvIYJVo46AfV+IAIKZpfrSzVKj42R4Q= 116 | github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= 117 | github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= 118 | github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= 119 | github.com/json-iterator/go v0.0.0-20180612202835-f2b4162afba3/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= 120 | github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= 121 | github.com/json-iterator/go v1.1.8 h1:QiWkFLKq0T7mpzwOTu6BzNDbfTE8OLrYhVKYMLF46Ok= 122 | github.com/json-iterator/go v1.1.8/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= 123 | github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= 124 | github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= 125 | github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= 126 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= 127 | github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk= 128 | github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= 129 | github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= 130 | github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= 131 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 132 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 133 | github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA= 134 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= 135 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 136 | github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE= 137 | github.com/lithammer/dedent v1.1.0/go.mod h1:jrXYCQtgg0nJiN+StA2KgR7w6CiQNv9Fd/Z9BP0jIOc= 138 | github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= 139 | github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= 140 | github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= 141 | github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= 142 | github.com/mailru/easyjson v0.7.0/go.mod h1:KAzv3t3aY1NaHWoQz1+4F1ccyAH66Jk7yos7ldAVICs= 143 | github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= 144 | github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= 145 | github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= 146 | github.com/mitchellh/go-wordwrap v1.0.0/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo= 147 | github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= 148 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 149 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= 150 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 151 | github.com/modern-go/reflect2 v0.0.0-20180320133207-05fbef0ca5da/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= 152 | github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= 153 | github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI= 154 | github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= 155 | github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= 156 | github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= 157 | github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= 158 | github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= 159 | github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= 160 | github.com/onsi/ginkgo v1.10.1/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= 161 | github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= 162 | github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= 163 | github.com/opencontainers/go-digest v1.0.0-rc1/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= 164 | github.com/opencontainers/runc v1.0.0-rc9/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= 165 | github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= 166 | github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= 167 | github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= 168 | github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 169 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 170 | github.com/pmezard/go-difflib v0.0.0-20151028094244-d8ed2627bdf0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 171 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 172 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 173 | github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= 174 | github.com/prometheus/client_golang v1.0.0 h1:vrDKnkGzuGvhNAL56c7DBz29ZL+KxnoR0x7enabFceM= 175 | github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= 176 | github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= 177 | github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90 h1:S/YWwWx/RA8rT8tKFRuGUZhuA90OyIBpPCXkcbwU8DE= 178 | github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= 179 | github.com/prometheus/common v0.4.1 h1:K0MGApIoQvMw27RTdJkPbr3JZ7DNbtxQNyi5STVM6Kw= 180 | github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= 181 | github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= 182 | github.com/prometheus/procfs v0.0.2 h1:6LJUbpNm42llc4HRCuvApCSWB/WfhuNo9K98Q9sNGfs= 183 | github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= 184 | github.com/remyoudompheng/bigfft v0.0.0-20170806203942-52369c62f446/go.mod h1:uYEyJGbgTkfkS4+E/PavXkNJcbFIpEtjt2B0KDQ5+9M= 185 | github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= 186 | github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= 187 | github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= 188 | github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4= 189 | github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= 190 | github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= 191 | github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= 192 | github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= 193 | github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= 194 | github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= 195 | github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= 196 | github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= 197 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= 198 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 199 | github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= 200 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 201 | github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 202 | github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= 203 | github.com/stretchr/testify v0.0.0-20151208002404-e3a8ff8ce365/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 204 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 205 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 206 | github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= 207 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 208 | github.com/tkestack/go-nvml v0.0.0-20191217064248-7363e630a33e h1:0+K32iJJnNaL/0QxJjfk33QiF+9b9yVkHkuXJWDQPfI= 209 | github.com/tkestack/go-nvml v0.0.0-20191217064248-7363e630a33e/go.mod h1:6U/NbUmn/gt/d6yR3Qd4Hc9Yu3qyyREnz6hiYz9/qws= 210 | github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= 211 | github.com/xlab/handysort v0.0.0-20150421192137-fb3537ed64a1/go.mod h1:QcJo0QPSfTONNIgpN5RA8prR7fF8nkF6cTWTcNerRO8= 212 | github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= 213 | go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= 214 | golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= 215 | golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= 216 | golang.org/x/crypto v0.0.0-20190211182817-74369b46fc67/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= 217 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 218 | golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 219 | golang.org/x/crypto v0.0.0-20200220183623-bac4c82f6975 h1:/Tl7pH94bvbAAHBdZJT947M/+gp0+CqQXDtMRC0fseo= 220 | golang.org/x/crypto v0.0.0-20200220183623-bac4c82f6975/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 221 | golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 222 | golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 223 | golang.org/x/exp v0.0.0-20190312203227-4b39c73a6495/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= 224 | golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= 225 | golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= 226 | golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= 227 | golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= 228 | golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 229 | golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= 230 | golang.org/x/net v0.0.0-20170114055629-f2499483f923/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 231 | golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 232 | golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 233 | golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 234 | golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 235 | golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 236 | golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 237 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 238 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 239 | golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 240 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 241 | golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 242 | golang.org/x/net v0.0.0-20191002035440-2ec189313ef0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 243 | golang.org/x/net v0.0.0-20191004110552-13f9640d40b9 h1:rjwSpXsdiK0dV8/Naq3kAw9ymfAeJIyd0upUIElB+lI= 244 | golang.org/x/net v0.0.0-20191004110552-13f9640d40b9/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 245 | golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= 246 | golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= 247 | golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 h1:SVwTIAaPC2U/AvvLNZ2a7OVsmBpC8L5BlwK1whH3hm0= 248 | golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= 249 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 250 | golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 251 | golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 252 | golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 253 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 254 | golang.org/x/sys v0.0.0-20170830134202-bb24a47a89ea/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 255 | golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 256 | golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 257 | golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 258 | golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 259 | golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 260 | golang.org/x/sys v0.0.0-20190209173611-3b5209105503/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 261 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 262 | golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 263 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 264 | golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 265 | golang.org/x/sys v0.0.0-20190616124812-15dcb6c0061f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 266 | golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456 h1:ng0gs1AKnRRuEMZoTLLlbOd+C17zUDepwGQBb/n+JVg= 267 | golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 268 | golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 269 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 270 | golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 271 | golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= 272 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 273 | golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= 274 | golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 h1:SvFZT6jyqRaOeXpc5h/JSfZenJ2O330aBsf7JfSUXmQ= 275 | golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= 276 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 277 | golang.org/x/tools v0.0.0-20181011042414-1f849cf54d09/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 278 | golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 279 | golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 280 | golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 281 | golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= 282 | golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 283 | golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 284 | golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 285 | golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= 286 | golang.org/x/tools v0.0.0-20190614205625-5aca471b1d59/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= 287 | golang.org/x/tools v0.0.0-20190920225731-5eefd052ad72/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 288 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 289 | gonum.org/v1/gonum v0.0.0-20190331200053-3d26580ed485/go.mod h1:2ltnJ7xHfj0zHS40VVPYEAAMTa3ZGguvHGBSJeRWqE0= 290 | gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= 291 | gonum.org/v1/netlib v0.0.0-20190331212654-76723241ea4e/go.mod h1:kS+toOQn6AQKjmKJ7gzohV1XkqsFehRA2FbsbkopSuQ= 292 | google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= 293 | google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= 294 | google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= 295 | google.golang.org/appengine v1.5.0 h1:KxkO13IPW4Lslp2bz+KHP2E3gtFlrIGNThxkZQ3g+4c= 296 | google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= 297 | google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= 298 | google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= 299 | google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= 300 | google.golang.org/genproto v0.0.0-20190927181202-20e1ac93f88c/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= 301 | google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= 302 | google.golang.org/grpc v1.24.0/go.mod h1:XDChyiUovWa60DnaeDeZmSW86xtLtjtZbwvSiRnRtcA= 303 | gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc= 304 | gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= 305 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 306 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= 307 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 308 | gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= 309 | gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= 310 | gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= 311 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= 312 | gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 313 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 314 | gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 315 | gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= 316 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 317 | gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= 318 | honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 319 | honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 320 | honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 321 | k8s.io/api v0.17.4 h1:HbwOhDapkguO8lTAE8OX3hdF2qp8GtpC9CW/MQATXXo= 322 | k8s.io/api v0.17.4/go.mod h1:5qxx6vjmwUVG2nHQTKGlLts8Tbok8PzHl4vHtVFuZCA= 323 | k8s.io/apimachinery v0.17.4 h1:UzM+38cPUJnzqSQ+E1PY4YxMHIzQyCg29LOoGfo79Zw= 324 | k8s.io/apimachinery v0.17.4/go.mod h1:gxLnyZcGNdZTCLnq3fgzyg2A5BVCHTNDFrw8AmuJ+0g= 325 | k8s.io/cli-runtime v0.17.4/go.mod h1:IVW4zrKKx/8gBgNNkhiUIc7nZbVVNhc1+HcQh+PiNHc= 326 | k8s.io/client-go v0.17.4 h1:VVdVbpTY70jiNHS1eiFkUt7ZIJX3txd29nDxxXH4en8= 327 | k8s.io/client-go v0.17.4/go.mod h1:ouF6o5pz3is8qU0/qYL2RnoxOPqgfuidYLowytyLJmc= 328 | k8s.io/code-generator v0.17.4/go.mod h1:l8BLVwASXQZTo2xamW5mQNFCe1XPiAesVq7Y1t7PiQQ= 329 | k8s.io/component-base v0.17.4/go.mod h1:5BRqHMbbQPm2kKu35v3G+CpVq4K0RJKC7TRioF0I9lE= 330 | k8s.io/gengo v0.0.0-20190128074634-0689ccc1d7d6/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= 331 | k8s.io/gengo v0.0.0-20190822140433-26a664648505/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= 332 | k8s.io/klog v0.0.0-20181102134211-b9b56d5dfc92/go.mod h1:Gq+BEi5rUBO/HRz0bTSXDUcqjScdoY3a9IHpCEIOOfk= 333 | k8s.io/klog v0.3.0/go.mod h1:Gq+BEi5rUBO/HRz0bTSXDUcqjScdoY3a9IHpCEIOOfk= 334 | k8s.io/klog v1.0.0 h1:Pt+yjF5aB1xDSVbau4VsWe+dQNzA0qv1LlXdC2dF6Q8= 335 | k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I= 336 | k8s.io/kube-openapi v0.0.0-20191107075043-30be4d16710a/go.mod h1:1TqjTSzOxsLGIKfj0lK8EeCP7K1iUG65v09OM0/WG5E= 337 | k8s.io/kubectl v0.17.4 h1:Ts0CvqvIVceS4RTVXgWMH+YqtieLAzyS2T9eoz8uDQ0= 338 | k8s.io/kubectl v0.17.4/go.mod h1:im5QWmh6fvtmJkkNm4HToLe8z9aM3jihYK5X/wOybcY= 339 | k8s.io/metrics v0.17.4/go.mod h1:6rylW2iD3M9VppnEAAtJASY1XS8Pt9tcYh+tHxBeV3I= 340 | k8s.io/utils v0.0.0-20191114184206-e782cd3c129f h1:GiPwtSzdP43eI1hpPCbROQCCIgCuiMMNF8YUVLF3vJo= 341 | k8s.io/utils v0.0.0-20191114184206-e782cd3c129f/go.mod h1:sZAwmy6armz5eXlNoLmJcl4F1QuKu7sr+mFQ0byX7Ew= 342 | modernc.org/cc v1.0.0/go.mod h1:1Sk4//wdnYJiUIxnW8ddKpaOJCF37yAdqYnkxUpaYxw= 343 | modernc.org/golex v1.0.0/go.mod h1:b/QX9oBD/LhixY6NDh+IdGv17hgB+51fET1i2kPSmvk= 344 | modernc.org/mathutil v1.0.0/go.mod h1:wU0vUrJsVWBZ4P6e7xtFJEhFSNsfRLJ8H458uRjg03k= 345 | modernc.org/strutil v1.0.0/go.mod h1:lstksw84oURvj9y3tn8lGvRxyRC1S2+g5uuIzNfIOBs= 346 | modernc.org/xc v1.0.0/go.mod h1:mRNCo0bvLjGhHO9WsyuKVU4q0ceiDDDoEeWDJHrNx8I= 347 | sigs.k8s.io/kustomize v2.0.3+incompatible/go.mod h1:MkjgH3RdOWrievjo6c9T245dYlB5QeXV4WCbnt/PEpU= 348 | sigs.k8s.io/structured-merge-diff v0.0.0-20190525122527-15d366b2352e/go.mod h1:wWxsB5ozmmv/SG7nM11ayaAW51xMvak/t1r0CSlcokI= 349 | sigs.k8s.io/yaml v1.1.0 h1:4A07+ZFc2wgJwo8YNlQpr1rVlgUDlxXHhPJciaPY5gs= 350 | sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= 351 | vbom.ml/util v0.0.0-20160121211510-db5cfe13f5cc/go.mod h1:so/NYdZXCz+E3ZpW0uAoCj6uzU2+8OWDFv/HxUSs7kI= 352 | -------------------------------------------------------------------------------- /pkg/exporter/exporter.go: -------------------------------------------------------------------------------- 1 | package exporter 2 | 3 | import ( 4 | "fmt" 5 | "k8s.io/klog" 6 | "elasticgpu.io/elastic-gpu-exporter/pkg/kubepods" 7 | "elasticgpu.io/elastic-gpu-exporter/pkg/metrics" 8 | "elasticgpu.io/elastic-gpu-exporter/pkg/nvidia" 9 | tree "elasticgpu.io/elastic-gpu-exporter/pkg/ptree" 10 | "elasticgpu.io/elastic-gpu-exporter/pkg/util" 11 | "strconv" 12 | "time" 13 | //"github.com/alex337/go-nvml" 14 | "tkestack.io/nvml" 15 | v1 "k8s.io/api/core/v1" 16 | ) 17 | 18 | const ( 19 | HundredCore = 100 20 | GiBToMiB = 1024 21 | ) 22 | type Exporter struct { 23 | node string 24 | gpuLabels []string 25 | interval time.Duration 26 | 27 | podCache Cache 28 | ptree tree.PTree 29 | collector *metrics.Collector 30 | device *nvidia.DeviceImpl 31 | watcher kubepods.Watcher 32 | } 33 | 34 | func NewExporter(node string, gpuLabels []string, interval time.Duration) *Exporter { 35 | collector := metrics.NewCollector() 36 | collector.Register() 37 | ptree := tree.NewPTree(interval) 38 | podCache := NewCache() 39 | return &Exporter{ 40 | node: node, 41 | gpuLabels: gpuLabels, 42 | interval: interval, 43 | podCache: podCache, 44 | ptree: ptree, 45 | collector: collector, 46 | 47 | watcher: kubepods.NewWatcher(&kubepods.Handler{ 48 | AddFunc: func(pod *v1.Pod) { 49 | podCache.AddPod(string(pod.UID), pod) 50 | ptree.InterestPod(string(pod.UID), util.QoS(pod)) 51 | }, 52 | DelFunc: func(pod *v1.Pod) { 53 | podCache.DelPod(string(pod.UID)) 54 | klog.Info("pod",pod.Name) 55 | ptree.ForgetPod(string(pod.UID)) 56 | }, 57 | }, gpuLabels, node), 58 | } 59 | } 60 | 61 | func (e *Exporter) once() { 62 | nvml.Init() 63 | defer nvml.Shutdown() 64 | cardCount, err := nvml.DeviceGetCount() 65 | klog.Info("Exporter run") 66 | if err != nil{ 67 | klog.Error("Cannot get DeviceGetCount by nvml") 68 | klog.Info(err) 69 | } 70 | 71 | cardUsages := make([]tree.CardUsage, cardCount) 72 | processUsages := make([]map[int]*tree.ProcessUsage, cardCount) 73 | 74 | for i := 0; i < int(cardCount); i++ { 75 | processUsages[i], err = e.device.GetDeviceUsage(i) 76 | if err != nil{ 77 | klog.Errorf("Cannot get processusage in GPU %d", i) 78 | } 79 | } 80 | klog.Info("e.podCache:",e.podCache) 81 | klog.Info("e.ptree.Snapshot():",e.ptree.Snapshot()) 82 | 83 | var totalMem uint64 84 | for i := 0; i < int(cardCount); i++ { 85 | dev, err := nvml.DeviceGetHandleByIndex(uint(i)) 86 | if err != nil{ 87 | klog.Error("DeviceGetHandleByIndex", err) 88 | } 89 | _, _, memTotal, err := dev.DeviceGetMemoryInfo() 90 | totalMem += memTotal >> 20 91 | } 92 | 93 | node := e.ptree.Snapshot() 94 | for _, pod := range node.Pods{ 95 | var podCore, podMem, podCoreRequest, podMemRequest float64 96 | 97 | p, _ := e.podCache.GetPod(pod.UID) 98 | klog.Info("p.Spec.Containers-------:",p.Spec.Containers) 99 | for _,cont := range p.Spec.Containers{ 100 | klog.Info("cont.id",cont.Name) 101 | } 102 | klog.Info("p.Status.ContainerStatuses-------:",p.Status.ContainerStatuses) 103 | containerMap := make(map[string]string) 104 | for _, cont := range p.Status.ContainerStatuses{ 105 | klog.Info("cont.ContainerID",cont.ContainerID) 106 | klog.Info("cont.Name",cont.Name) 107 | 108 | containerMap[cont.ContainerID] = cont.Name 109 | } 110 | ns := p.Namespace 111 | klog.Info("containerMap:",containerMap) 112 | for _, container := range pod.Containers{ 113 | 114 | contName, exist := containerMap[fmt.Sprintf(util.ContainerID,container.ID)] 115 | klog.Info("container.parent",container.Parent) 116 | if !exist { 117 | continue 118 | } 119 | var contCore, contMem float64 120 | for _, proc := range container.Processes{ 121 | for i := 0; i < int(cardCount); i++ { 122 | procUsage, exist := processUsages[i][proc.Pid] 123 | if exist { 124 | contMem += procUsage.GPUMem 125 | contCore += procUsage.GPUCore 126 | //klog.Info("contCore----------------:", contCore) 127 | cardUsages[i].Mem += procUsage.GPUMem 128 | cardUsages[i].Core += procUsage.GPUCore 129 | } 130 | } 131 | } 132 | //contMem /= float64(1024) 133 | podCore += contCore 134 | podMem += contMem 135 | var memRequest, coreRequest float64 136 | for _,cont := range p.Spec.Containers{ 137 | if contName == cont.Name { 138 | memRequest = float64(util.GetGPUMemoryFromContainer(&cont)) 139 | coreRequest = float64(util.GetGPUCoreFromContainer(&cont)) 140 | } 141 | } 142 | podCoreRequest += coreRequest 143 | podMemRequest += memRequest 144 | 145 | var contCoreUtil float64 146 | var contMemUtil float64 147 | if contCore != 0 && coreRequest != 0{ 148 | contCoreUtil = contCore / coreRequest 149 | } 150 | if contMem != 0 && memRequest != 0{ 151 | contMemUtil = (contMem / GiBToMiB) / memRequest 152 | } 153 | klog.Info("contMem:",contMem) 154 | klog.Info("memRequest:",memRequest) 155 | klog.Info("contCore:",contCore) 156 | klog.Info("coreRequest:",coreRequest) 157 | 158 | e.collector.Container(e.node, ns, pod.UID, contName, contCore, contMem, util.Decimal(contCoreUtil * 100), util.Decimal(contMemUtil * 100)) 159 | } 160 | var podMemUtil, podCoreUtil float64 161 | if podMemRequest != 0 && podMem != 0 { 162 | podMemUtil = (podMem / GiBToMiB) / podMemRequest 163 | } 164 | if podCoreRequest != 0 && podCore != 0 { 165 | podCoreUtil = podCore / podCoreRequest 166 | } 167 | 168 | e.collector.Pod(e.node, ns, pod.UID, podCore, podMem, util.Decimal(podCoreUtil * 100), util.Decimal(podMemUtil * 100), podMemRequest * GiBToMiB, util.Decimal(podCore / float64(cardCount * HundredCore) * 100), util.Decimal(podMem / float64(totalMem) * 100)) 169 | } 170 | for i := 0; i < int(cardCount); i++ { 171 | dev, err := nvml.DeviceGetHandleByIndex(uint(i)) 172 | _, memUsed, memTotal, err := dev.DeviceGetMemoryInfo() 173 | //util1, _ := dev.DeviceGetAverageGPUUsage(time.Second) 174 | //klog.Info("util1----------",util1) 175 | 176 | 177 | utilization, err := dev.DeviceGetUtilizationRates() 178 | //klog.Info("util:",utilization.GPU) 179 | 180 | if err != nil { 181 | klog.Error("DeviceGetMemoryInfo", err) 182 | } 183 | 184 | if cardUsages[i].Mem >= 0 || cardUsages[i].Core >= 0 { 185 | e.collector.Card(e.node, strconv.Itoa(i), cardUsages[i].Core, float64(memUsed >> 20), util.Decimal(float64(utilization.GPU )), util.Decimal(float64(memUsed >> 20) / float64(memTotal >> 20) * 100)) 186 | } 187 | } 188 | } 189 | 190 | func (e *Exporter) Run(stop <-chan struct{}) { 191 | go e.ptree.Run(stop) 192 | e.watcher.Run(stop) 193 | util.Loop(e.once, e.interval, stop) 194 | } 195 | -------------------------------------------------------------------------------- /pkg/exporter/podcache.go: -------------------------------------------------------------------------------- 1 | package exporter 2 | 3 | import ( 4 | "sync" 5 | 6 | v1 "k8s.io/api/core/v1" 7 | ) 8 | 9 | type Cache interface { 10 | AddPod(UID string, pod *v1.Pod) 11 | DelPod(UID string) 12 | GetPod(UID string) (*v1.Pod, bool) 13 | } 14 | 15 | type PodCache struct { 16 | cache map[string]*v1.Pod 17 | mu sync.Mutex 18 | } 19 | 20 | func (p *PodCache) AddPod(UID string, pod *v1.Pod) { 21 | p.mu.Lock() 22 | defer p.mu.Unlock() 23 | p.cache[UID] = pod 24 | } 25 | 26 | func (p *PodCache) DelPod(UID string) { 27 | p.mu.Lock() 28 | defer p.mu.Unlock() 29 | delete(p.cache, UID) 30 | } 31 | 32 | func (p *PodCache) GetPod(UID string) (*v1.Pod, bool) { 33 | p.mu.Lock() 34 | defer p.mu.Unlock() 35 | pod, ok := p.cache[UID] 36 | return pod, ok 37 | } 38 | 39 | func NewCache() Cache { 40 | return &PodCache{ 41 | cache: make(map[string]*v1.Pod), 42 | mu: sync.Mutex{}, 43 | } 44 | } 45 | 46 | -------------------------------------------------------------------------------- /pkg/kubepods/kubepods.go: -------------------------------------------------------------------------------- 1 | package kubepods 2 | 3 | import ( 4 | v12 "k8s.io/client-go/listers/core/v1" 5 | "k8s.io/client-go/rest" 6 | "elastic-gpu-exporter/pkg/util" 7 | "time" 8 | 9 | v1 "k8s.io/api/core/v1" 10 | 11 | "k8s.io/client-go/tools/cache" 12 | 13 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 14 | "k8s.io/apimachinery/pkg/fields" 15 | "k8s.io/client-go/informers" 16 | "k8s.io/client-go/kubernetes" 17 | "k8s.io/klog" 18 | ) 19 | 20 | const( 21 | RecommendedKubeConfigPathEnv = "KUBECONFIG" 22 | ) 23 | 24 | type Handler struct { 25 | AddFunc func(pod *v1.Pod) 26 | DelFunc func(pod *v1.Pod) 27 | } 28 | 29 | type Watcher interface { 30 | Run(stop <-chan struct{}) 31 | } 32 | 33 | type KubeWatcher struct { 34 | labelSet map[string]struct{} 35 | node string 36 | client *kubernetes.Clientset 37 | informers informers.SharedInformerFactory 38 | podInformers cache.SharedIndexInformer 39 | podLister v12.PodLister 40 | handler *Handler 41 | } 42 | 43 | func NewWatcher(handler *Handler, gpuLabels []string, node string) Watcher { 44 | //var kubeconfig *string 45 | //if home := homedir.HomeDir(); home != "" { 46 | // kubeconfig = flag.String("kubeconfig", filepath.Join(home, ".kube", "config"), "(optional) absolute path to the kubeconfig file") 47 | //} else { 48 | // kubeconfig = flag.String("kubeconfig", "", "absolute path to the kubeconfig file") 49 | //} 50 | //flag.Parse() 51 | // 52 | //config, err := clientcmd.BuildConfigFromFlags("", *kubeconfig) 53 | //if err != nil { 54 | // klog.Fatalf("Could not get config") 55 | //} 56 | //------------------ 57 | // create the clientset 58 | //clientset, err = kubernetes.NewForConfig(restConfig) 59 | 60 | // Grab a dynamic interface that we can create informers from 61 | //dc, err := dynamic.NewForConfig(cfg) 62 | //if err != nil { 63 | // logrus.WithError(err).Fatal("could not generate dynamic client for config") 64 | //} 65 | 66 | 67 | config, err := rest.InClusterConfig() 68 | if err != nil { 69 | klog.Fatalf("create watcher failed: %s", err.Error()) 70 | } 71 | 72 | //------------ 73 | //kubeConfig := "" 74 | //if len(os.Getenv(RecommendedKubeConfigPathEnv)) > 0 { 75 | // // use the current context in kubeconfig 76 | // // This is very useful for running locally. 77 | // kubeConfig = os.Getenv(RecommendedKubeConfigPathEnv) 78 | //} 79 | // 80 | //// Get kubernetes config. 81 | //restConfig, err := clientcmd.BuildConfigFromFlags("", kubeConfig) 82 | //if err != nil { 83 | // klog.Fatalf("Error building kubeconfig: %s", err.Error()) 84 | //} 85 | // 86 | //// create the clientset 87 | //client, err := kubernetes.NewForConfig(restConfig) 88 | //if err != nil { 89 | // klog.Fatalf("Failed to init rest config due to %v", err) 90 | //} 91 | //-------------- 92 | client, _ := kubernetes.NewForConfig(config) 93 | informersFactory := informers.NewSharedInformerFactoryWithOptions(client, time.Second, informers.WithTweakListOptions(nodeNameFilter(node))) 94 | labelSet := make(map[string]struct{}) 95 | for _, label := range gpuLabels { 96 | labelSet[label] = struct{}{} 97 | } 98 | return &KubeWatcher{ 99 | labelSet: labelSet, 100 | node: node, 101 | client: client, 102 | informers: informersFactory, 103 | podInformers: informersFactory.Core().V1().Pods().Informer(), 104 | handler: handler, 105 | } 106 | } 107 | 108 | func (w *KubeWatcher) Run(stop <-chan struct{}) { 109 | w.podInformers.AddEventHandler(cache.ResourceEventHandlerFuncs{ 110 | AddFunc: func(obj interface{}) { 111 | pod, ok := obj.(*v1.Pod) 112 | if !ok { 113 | klog.Errorf("Cannot convert to *v1.Pod: %t %v", obj, obj) 114 | return 115 | } 116 | if !util.PodHasResource(pod, w.labelSet) { 117 | return 118 | } 119 | w.handler.AddFunc(pod) 120 | }, 121 | DeleteFunc: func(obj interface{}) { 122 | pod, ok := obj.(*v1.Pod) 123 | if !ok { 124 | klog.Errorf("Cannot convert to *v1.Pod: %t %v", obj, obj) 125 | return 126 | } 127 | if !util.PodHasResource(pod, w.labelSet) { 128 | return 129 | } 130 | w.handler.DelFunc(pod) 131 | }, 132 | }) 133 | w.informers.Start(stop) 134 | w.informers.WaitForCacheSync(stop) 135 | } 136 | 137 | func nodeNameFilter(nodeName string) func(options *metav1.ListOptions) { 138 | return func(options *metav1.ListOptions) { 139 | options.FieldSelector = fields.OneTermEqualSelector(util.NodeNameField, nodeName).String() 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /pkg/metrics/metrics.go: -------------------------------------------------------------------------------- 1 | package metrics 2 | 3 | import "github.com/prometheus/client_golang/prometheus" 4 | 5 | type Collector struct { 6 | GPUCore *prometheus.GaugeVec 7 | GPUCoreUtil *prometheus.GaugeVec 8 | GPUMem *prometheus.GaugeVec 9 | GPUMemUtil *prometheus.GaugeVec 10 | PodCore *prometheus.GaugeVec 11 | PodCoreUtil *prometheus.GaugeVec 12 | PodCoreOccupyNode *prometheus.GaugeVec 13 | PodMem *prometheus.GaugeVec 14 | PodMemUtil *prometheus.GaugeVec 15 | PodMemOccupyNode *prometheus.GaugeVec 16 | PodMemRequest *prometheus.GaugeVec 17 | ContainerCore *prometheus.GaugeVec 18 | ContainerCoreUtil *prometheus.GaugeVec 19 | ContainerMem *prometheus.GaugeVec 20 | ContainerMemUtil *prometheus.GaugeVec 21 | } 22 | 23 | func NewCollector() *Collector { 24 | return &Collector{ 25 | GPUCore: prometheus.NewGaugeVec( 26 | prometheus.GaugeOpts{ 27 | Name: "gpu_core_usage", 28 | Help: "Usage of gpu core per card", 29 | }, 30 | []string{"node","card"}, 31 | ), 32 | GPUCoreUtil: prometheus.NewGaugeVec( 33 | prometheus.GaugeOpts{ 34 | Name: "gpu_core_utilization", 35 | Help: "Utilization of gpu core per card", 36 | }, 37 | []string{"node","card"}, 38 | ), 39 | GPUMem: prometheus.NewGaugeVec( 40 | prometheus.GaugeOpts{ 41 | Name: "gpu_mem_usage", 42 | Help: "Usage of gpu memory per card", 43 | }, 44 | []string{"node","card"}, 45 | ), 46 | GPUMemUtil: prometheus.NewGaugeVec( 47 | prometheus.GaugeOpts{ 48 | Name: "gpu_mem_utilization", 49 | Help: "Utilization of gpu memory per card", 50 | }, 51 | []string{"node","card"}, 52 | ), 53 | PodCore: prometheus.NewGaugeVec( 54 | prometheus.GaugeOpts{ 55 | Name: "pod_core_usage", 56 | Help: "Usage of gpu core per pod", 57 | }, 58 | []string{"node","namespace", "pod"}, 59 | ), 60 | PodCoreUtil: prometheus.NewGaugeVec( 61 | prometheus.GaugeOpts{ 62 | Name: "pod_core_utilization", 63 | Help: "Utilization of gpu core", 64 | }, 65 | []string{"node","namespace", "pod"}, 66 | ), 67 | PodCoreOccupyNode: prometheus.NewGaugeVec( 68 | prometheus.GaugeOpts{ 69 | Name: "pod_core_occupy_node", 70 | Help: "Utilization of pod core occupied the node", 71 | }, 72 | []string{"node","namespace", "pod"}, 73 | ), 74 | PodMem: prometheus.NewGaugeVec( 75 | prometheus.GaugeOpts{ 76 | Name: "pod_mem_usage", 77 | Help: "Usage of gpu memory per pod", 78 | }, 79 | []string{"node","namespace", "pod"}, 80 | ), 81 | PodMemUtil: prometheus.NewGaugeVec( 82 | prometheus.GaugeOpts{ 83 | Name: "pod_mem_utilization", 84 | Help: "Utilization of pod memory", 85 | }, 86 | []string{"node","namespace", "pod"}, 87 | ), 88 | PodMemOccupyNode: prometheus.NewGaugeVec( 89 | prometheus.GaugeOpts{ 90 | Name: "pod_mem_occupy_node", 91 | Help: "Utilization of pod memory occupied the node", 92 | }, 93 | []string{"node","namespace", "pod"}, 94 | ), 95 | PodMemRequest: prometheus.NewGaugeVec( 96 | prometheus.GaugeOpts{ 97 | Name: "pod_mem_request", 98 | Help: "Request of pod memory", 99 | }, 100 | []string{"node","namespace", "pod"}, 101 | ), 102 | ContainerCore: prometheus.NewGaugeVec( 103 | prometheus.GaugeOpts{ 104 | Name: "container_core_usage", 105 | Help: "Usage of gpu computing per container", 106 | }, 107 | []string{"node","namespace", "pod", "container"}, 108 | ), 109 | ContainerCoreUtil: prometheus.NewGaugeVec( 110 | prometheus.GaugeOpts{ 111 | Name: "container_core_utilization", 112 | Help: "Utilization of container core", 113 | }, 114 | []string{"node","namespace", "pod", "container"}, 115 | ), 116 | ContainerMem: prometheus.NewGaugeVec( 117 | prometheus.GaugeOpts{ 118 | Name: "container_mem_usage", 119 | Help: "Usage of gpu memory per container", 120 | }, 121 | []string{"node","namespace", "pod", "container"}, 122 | ), 123 | ContainerMemUtil: prometheus.NewGaugeVec( 124 | prometheus.GaugeOpts{ 125 | Name: "container_mem_utilization", 126 | Help: "Utilization of container memory", 127 | }, 128 | []string{"node","namespace", "pod", "container"}, 129 | ), 130 | } 131 | } 132 | 133 | func (c *Collector) Register() { 134 | prometheus.MustRegister(c.PodMem) 135 | prometheus.MustRegister(c.PodMemUtil) 136 | prometheus.MustRegister(c.PodMemOccupyNode) 137 | prometheus.MustRegister(c.PodMemRequest) 138 | prometheus.MustRegister(c.PodCore) 139 | prometheus.MustRegister(c.PodCoreUtil) 140 | prometheus.MustRegister(c.PodCoreOccupyNode) 141 | prometheus.MustRegister(c.GPUMem) 142 | prometheus.MustRegister(c.GPUMemUtil) 143 | prometheus.MustRegister(c.GPUCore) 144 | prometheus.MustRegister(c.GPUCoreUtil) 145 | prometheus.MustRegister(c.ContainerCore) 146 | prometheus.MustRegister(c.ContainerCoreUtil) 147 | prometheus.MustRegister(c.ContainerMem) 148 | prometheus.MustRegister(c.ContainerMemUtil) 149 | 150 | 151 | } 152 | 153 | func (c *Collector) Card(node, id string, core, mem, coreUtil, memUtil float64) { 154 | c.GPUCore.WithLabelValues(node, id).Set(core) 155 | c.GPUMem.WithLabelValues(node,id).Set(mem) 156 | c.GPUCoreUtil.WithLabelValues(node,id).Set(coreUtil) 157 | c.GPUMemUtil.WithLabelValues(node,id).Set(memUtil) 158 | 159 | 160 | 161 | } 162 | 163 | func (c *Collector) Pod(node, namespace, name string, core, mem, coreUtil, memUtil, memRequest, coreOccupy, memOccupy float64) { 164 | c.PodCore.WithLabelValues(node, namespace, name).Set(core) 165 | c.PodMem.WithLabelValues(node, namespace, name).Set(mem) 166 | c.PodMemRequest.WithLabelValues(node, namespace, name).Set(memRequest) 167 | c.PodMemUtil.WithLabelValues(node, namespace, name).Set(memUtil) 168 | c.PodCoreUtil.WithLabelValues(node, namespace, name).Set(coreUtil) 169 | c.PodMemOccupyNode.WithLabelValues(node, namespace, name).Set(memOccupy) 170 | c.PodCoreOccupyNode.WithLabelValues(node, namespace, name).Set(coreOccupy) 171 | 172 | 173 | } 174 | 175 | func (c *Collector) Container(node, namespace, pod, container string, core, mem, coreUtil, memUtil float64) { 176 | c.ContainerCore.WithLabelValues(node, namespace, pod, container).Set(core) 177 | c.ContainerMem.WithLabelValues(node, namespace, pod, container).Set(mem) 178 | c.ContainerCoreUtil.WithLabelValues(node, namespace, pod, container).Set(coreUtil) 179 | c.ContainerMemUtil.WithLabelValues(node, namespace, pod, container).Set(memUtil) 180 | 181 | } 182 | 183 | 184 | -------------------------------------------------------------------------------- /pkg/nvidia/device.go: -------------------------------------------------------------------------------- 1 | package nvidia 2 | 3 | import ( 4 | "k8s.io/klog" 5 | process "elastic-gpu-exporter/pkg/ptree" 6 | "time" 7 | //"github.com/alex337/go-nvml" 8 | "tkestack.io/nvml" 9 | ) 10 | 11 | type Device interface { 12 | GetDeviceUsage(cardNum int) (map[int]*process.ProcessUsage,error) 13 | } 14 | 15 | type DeviceImpl struct { 16 | } 17 | 18 | func (device *DeviceImpl) GetDeviceUsage(cardNum int) (map[int]*process.ProcessUsage, error ){ 19 | nvml.Init() 20 | defer nvml.Shutdown() 21 | dev, _ := nvml.DeviceGetHandleByIndex(uint(cardNum)) 22 | processOnDevices, err := dev.DeviceGetComputeRunningProcesses(1024) 23 | if err != nil { 24 | klog.Warningf("Can't get processes info from device %d, error %s", uint(cardNum), err) 25 | return nil, err 26 | } 27 | usageMap := make(map[int]*process.ProcessUsage) 28 | for _, info := range processOnDevices { 29 | _, exit := usageMap[int(info.Pid)] 30 | if !exit { 31 | usageMap[int(info.Pid)] = new(process.ProcessUsage) 32 | } 33 | usageMap[int(info.Pid)].GPUMem = float64(info.UsedGPUMemory >> 20) 34 | } 35 | processUtilization, err := dev.DeviceGetProcessUtilization(1024, time.Second) 36 | if err != nil { 37 | klog.Warningf("Can't get processes utilization from device %d, error %s", uint(cardNum), err) 38 | return nil, err 39 | } 40 | for _, info := range processUtilization { 41 | _, exit := usageMap[int(info.Pid)] 42 | if !exit { 43 | usageMap[int(info.Pid)] = new(process.ProcessUsage) 44 | } 45 | usageMap[int(info.Pid)].GPUCore = float64(info.SmUtil) 46 | } 47 | return usageMap, nil 48 | } 49 | 50 | func (device *DeviceImpl) getPidUsage(pid int) (*process.ProcessUsage, error) { 51 | nvml.Init() 52 | defer nvml.Shutdown() 53 | num, err := nvml.DeviceGetCount() 54 | if err != nil { 55 | return nil, err 56 | } 57 | var usedMemory float64 58 | var usedCore float64 59 | 60 | for i := 0; i < int(num); i++ { 61 | dev, _ := nvml.DeviceGetHandleByIndex(uint(i)) 62 | processOnDevices, err := dev.DeviceGetComputeRunningProcesses(1024) 63 | if err != nil { 64 | klog.Warningf("Can't get processes info from device %d, error %s", uint(i), err) 65 | return nil, err 66 | } 67 | for _, info := range processOnDevices { 68 | if int(info.Pid) == pid { 69 | usedMemory = float64(info.UsedGPUMemory >> 20) 70 | } 71 | } 72 | processUtilizations, err := dev.DeviceGetProcessUtilization(1024, time.Second) 73 | if err != nil { 74 | klog.Warningf("Can't get processes utilization from device %d, error %s", uint(i), err) 75 | return nil, err 76 | } 77 | for _, info := range processUtilizations { 78 | if int(info.Pid) == pid { 79 | usedCore = float64(info.SmUtil) 80 | } 81 | } 82 | return &process.ProcessUsage{ 83 | GPUMem: usedMemory, 84 | GPUCore: usedCore, 85 | }, nil 86 | } 87 | return nil, err 88 | } -------------------------------------------------------------------------------- /pkg/ptree/ptree.go: -------------------------------------------------------------------------------- 1 | package ptree 2 | 3 | import ( 4 | "fmt" 5 | "elastic-gpu-exporter/pkg/util" 6 | "strings" 7 | "sync" 8 | "time" 9 | 10 | "k8s.io/klog" 11 | ) 12 | 13 | // PTree is a common interface to detect the tree such as: 14 | // node -> pods -> containers -> processes 15 | 16 | const count int = 0 17 | 18 | type PTree interface { 19 | Run(stop <-chan struct{}) 20 | InterestPod(UID, QOS string) 21 | ForgetPod(UID string) 22 | Snapshot() *Node 23 | LastUpdate() time.Time 24 | } 25 | 26 | type PTreeImpl struct { 27 | interval time.Duration 28 | mu sync.Mutex 29 | interestingPods map[string]string 30 | nodeSnapshot *Node 31 | lastUpdate time.Time 32 | scanner Scanner 33 | } 34 | 35 | func NewPTree(interval time.Duration) *PTreeImpl { 36 | return &PTreeImpl{ 37 | interval: interval, 38 | mu: sync.Mutex{}, 39 | interestingPods: make(map[string]string), 40 | nodeSnapshot: NewNode(), 41 | lastUpdate: time.Now(), 42 | scanner: NewScanner(), 43 | } 44 | } 45 | 46 | func (p *PTreeImpl) Run(stop <-chan struct{}) { 47 | util.Loop(func() { 48 | if err := p.nextSnapshot(); err != nil { 49 | klog.Error(err.Error()) 50 | } 51 | }, p.interval, stop) 52 | } 53 | 54 | func (p *PTreeImpl) InterestPod(UID string, QOS string) { 55 | p.mu.Lock() 56 | defer p.mu.Unlock() 57 | p.interestingPods[UID] = QOS 58 | } 59 | 60 | func (p *PTreeImpl) ForgetPod(UID string) { 61 | p.mu.Lock() 62 | defer p.mu.Unlock() 63 | //for { 64 | // 65 | //} 66 | delete(p.interestingPods, UID) 67 | } 68 | 69 | func (p *PTreeImpl) Snapshot() *Node { 70 | p.mu.Lock() 71 | defer p.mu.Unlock() 72 | return p.nodeSnapshot 73 | } 74 | 75 | func (p *PTreeImpl) LastUpdate() time.Time { 76 | p.mu.Lock() 77 | defer p.mu.Unlock() 78 | return p.lastUpdate 79 | } 80 | 81 | func (p *PTreeImpl) nextSnapshot() error { 82 | var ( 83 | pods = p.interesting() 84 | errors = []string{} 85 | snapshot = NewNode() 86 | ) 87 | for UID, QOS := range pods { 88 | if pod, err := p.scanner.Scan(UID, QOS); err != nil { 89 | errors = append(errors, err.Error()) 90 | } else { 91 | snapshot.addPod(&pod) 92 | } 93 | } 94 | klog.Info("time:",time.Now()) 95 | klog.Info("nextSnapshot-------",snapshot) 96 | 97 | p.mu.Lock() 98 | defer p.mu.Unlock() 99 | p.nodeSnapshot = snapshot 100 | p.lastUpdate = time.Now() 101 | if len(errors) == 0 { 102 | return nil 103 | } 104 | return fmt.Errorf("%d errors: %s", len(errors), strings.Join(errors, "; ")) 105 | } 106 | 107 | func (p *PTreeImpl) interesting() map[string]string { 108 | p.mu.Lock() 109 | defer p.mu.Unlock() 110 | pods := map[string]string{} 111 | for UID, QOS := range p.interestingPods { 112 | pods[UID] = QOS 113 | } 114 | return pods 115 | } 116 | -------------------------------------------------------------------------------- /pkg/ptree/scanner.go: -------------------------------------------------------------------------------- 1 | package ptree 2 | 3 | import ( 4 | "bufio" 5 | "io/ioutil" 6 | "k8s.io/klog" 7 | "os" 8 | "path" 9 | "path/filepath" 10 | "regexp" 11 | "strconv" 12 | ) 13 | const( 14 | QOSGuaranteed = "guaranteed" 15 | QOSBurstable = "burstable" 16 | QOSBestEffort = "besteffort" 17 | CgroupBase = "/host/sys/fs/cgroup/memory" 18 | PodPrefix = "pod" 19 | CgroupProcs = "cgroup.procs" 20 | kubeRoot = "kubepods" 21 | ) 22 | 23 | var ( 24 | validShortID = regexp.MustCompile("^[a-f0-9]{64}$") 25 | ) 26 | 27 | func IsContainerID(id string) bool { 28 | return validShortID.MatchString(id) 29 | } 30 | 31 | type Scanner interface { 32 | Scan(UID, QOS string) (Pod, error) 33 | } 34 | 35 | type ScannerImpl struct{ 36 | pod *Pod 37 | container *Container 38 | } 39 | 40 | func NewScanner() Scanner { 41 | return &ScannerImpl{ 42 | pod: NewP(), 43 | container: NewC(), 44 | } 45 | } 46 | 47 | type CgroupName []string 48 | 49 | func (scan *ScannerImpl) Scan(UID, QOS string) (Pod, error) { 50 | //scan.pod.Containers = make(map[string]*Container) 51 | //scan.container.Processes = make(map[int]*Process) 52 | pod, err := scan.getContainers(NewPod(QOS, UID)) 53 | if err != nil { 54 | klog.Errorf("Cannot scan pod: pod%s, %v", UID, err) 55 | return Pod{}, err 56 | } 57 | return *pod, nil 58 | } 59 | 60 | func (scan *ScannerImpl) getContainers(p *Pod) (*Pod, error) { 61 | podPath := scan.getPodPath(p.UID, p.QOS) 62 | basePodPath := filepath.Clean(filepath.Join(CgroupBase, podPath)) 63 | containers, err := scan.readContainerFile(basePodPath, p) 64 | if err !=nil { 65 | klog.Errorf("Cannot read the containers in the pod: pod%s, %v", p.UID, err) 66 | return nil, err 67 | } 68 | return &Pod{ 69 | UID: p.UID, 70 | QOS: p.QOS, 71 | Containers: containers, 72 | },nil 73 | } 74 | 75 | //getPodPath is to get the path of the pod ,such as:kubepods/besteffort/pod17eb80b0-6085-4d12-8e79-553e799d2f0b 76 | func (scan *ScannerImpl) getPodPath(UID string, QOS string) (podPath string) { 77 | var parentPath CgroupName 78 | switch QOS { 79 | case QOSGuaranteed: 80 | parentPath = append(parentPath,kubeRoot) 81 | case QOSBurstable: 82 | parentPath = append(parentPath, kubeRoot, QOSBurstable) 83 | case QOSBestEffort: 84 | parentPath = append(parentPath, kubeRoot, QOSBestEffort) 85 | } 86 | podContainer := PodPrefix + UID 87 | parentPath = append(parentPath,podContainer) 88 | podPath = scan.transformToPath(parentPath) 89 | return podPath 90 | } 91 | 92 | func (scan *ScannerImpl) transformToPath(cgroupName CgroupName) string { 93 | return "/" + path.Join(cgroupName...) 94 | } 95 | 96 | func (scan *ScannerImpl)readContainerFile(podPath string, pod *Pod) (map[string]*Container, error) { 97 | fileList, err := ioutil.ReadDir(podPath) 98 | klog.Info("podpath-------",podPath) 99 | if err != nil { 100 | klog.Errorf("Can't read %s, %v", podPath, err) 101 | return nil, err 102 | } 103 | for _,file :=range fileList { 104 | klog.Info("file-----------:",file) 105 | containerId := file.Name() 106 | if IsContainerID(containerId) { 107 | scan.pod.AddContainer(containerId) 108 | scan.pod.Containers[containerId] = &Container{ 109 | ID: containerId, 110 | Parent: pod, 111 | } 112 | procPath := filepath.Join(podPath, containerId, CgroupProcs) 113 | process, err := scan.readPidFile(procPath, scan.pod.Containers[containerId]) 114 | if err != nil { 115 | klog.Errorf("Cannot read the pid in the container: %s, %v", containerId, err) 116 | return nil, err 117 | } 118 | scan.pod.Containers[containerId].Processes = process 119 | } 120 | } 121 | return scan.pod.Containers, nil 122 | } 123 | 124 | func (scan *ScannerImpl)readPidFile(procPath string, container *Container) (map[int]*Process, error) { 125 | file, err := os.Open(procPath) 126 | if err != nil { 127 | klog.Errorf("Cannot read %s, %v", procPath, err) 128 | return nil, err 129 | } 130 | defer file.Close() 131 | scanner := bufio.NewScanner(file) 132 | for scanner.Scan() { 133 | line := scanner.Text() 134 | if pid, err := strconv.Atoi(line); err == nil { 135 | scan.container.AddProcess(pid) 136 | scan.container.Processes[pid] = &Process{ 137 | Pid: pid, 138 | Parent: container, 139 | } 140 | } 141 | } 142 | klog.V(4).Infof("Read from %s, pids", procPath, scan.container.Processes) 143 | return scan.container.Processes, nil 144 | } -------------------------------------------------------------------------------- /pkg/ptree/scanner_cgroup.go: -------------------------------------------------------------------------------- 1 | package ptree 2 | 3 | // TODO: implement Scanner here -------------------------------------------------------------------------------- /pkg/ptree/types.go: -------------------------------------------------------------------------------- 1 | package ptree 2 | 3 | type Node struct { 4 | Pods map[string]*Pod 5 | Containers map[string]*Container 6 | Processes map[int]*Process 7 | } 8 | 9 | func NewNode() *Node { 10 | return &Node{ 11 | Pods: make(map[string]*Pod), 12 | Containers: make(map[string]*Container), 13 | Processes: make(map[int]*Process), 14 | } 15 | } 16 | 17 | func NewP() *Pod { 18 | return &Pod{ 19 | Containers: make(map[string]*Container), 20 | } 21 | } 22 | 23 | func NewC() *Container { 24 | return &Container{ 25 | Processes: make(map[int]*Process), 26 | } 27 | } 28 | 29 | func (n *Node) GetProcessByPid(pid int) (p *Process, exist bool) { 30 | if process, ok := n.Processes[pid]; ok { 31 | return process, true 32 | } 33 | return nil, false 34 | } 35 | 36 | func (n *Node) addPod(pod *Pod) { 37 | for _, c := range pod.Containers { 38 | n.addContainer(c) 39 | } 40 | n.Pods[pod.UID] = pod 41 | pod.Parent = n 42 | } 43 | 44 | func (n *Node) addContainer(container *Container) { 45 | for _, p := range container.Processes { 46 | n.addProcess(p) 47 | } 48 | n.Containers[container.ID] = container 49 | } 50 | 51 | func (n *Node) addProcess(process *Process) { 52 | n.Processes[process.Pid] = process 53 | } 54 | 55 | type Pod struct { 56 | QOS string 57 | UID string 58 | Parent *Node 59 | Containers map[string]*Container 60 | } 61 | 62 | func NewPod(QOS, UID string) *Pod { 63 | return &Pod{ 64 | QOS: QOS, 65 | UID: UID, 66 | Containers: make(map[string]*Container), 67 | } 68 | } 69 | 70 | func (p *Pod) AddContainer(ID string) *Container { 71 | p.Containers[ID] = &Container{ 72 | ID: ID, 73 | Parent: p, 74 | Processes: make(map[int]*Process), 75 | } 76 | return p.Containers[ID] 77 | } 78 | 79 | type Container struct { 80 | ID string 81 | Parent *Pod 82 | Processes map[int]*Process 83 | } 84 | 85 | func (c *Container) AddProcess(pid int) { 86 | c.Processes[pid] = &Process{ 87 | Pid: pid, 88 | Parent: c, 89 | } 90 | } 91 | 92 | type Process struct { 93 | Pid int 94 | Parent *Container 95 | } 96 | 97 | type ProcessUsage struct { 98 | GPUCore float64 99 | GPUMem float64 100 | } 101 | 102 | type CardUsage struct { 103 | Core float64 104 | Mem float64 105 | } -------------------------------------------------------------------------------- /pkg/util/config.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | const ( 4 | NodeNameField = "spec.nodeName" 5 | ContainerID = "docker://%s" 6 | ResourceGPUMemory = "tke.cloud.tencent.com/qgpu-memory" 7 | ResourceGPUCore = "tke.cloud.tencent.com/qgpu-core" 8 | ) 9 | 10 | -------------------------------------------------------------------------------- /pkg/util/kube.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "strings" 5 | 6 | v1 "k8s.io/api/core/v1" 7 | "k8s.io/kubectl/pkg/util/qos" 8 | ) 9 | 10 | func QoS(pod *v1.Pod) string { 11 | podQoS := pod.Status.QOSClass 12 | if podQoS == "" { 13 | podQoS = qos.GetPodQOS(pod) 14 | } 15 | return strings.ToLower(string(podQoS)) 16 | } 17 | 18 | func PodHasResource(pod *v1.Pod, set map[string]struct{}) bool { 19 | for _, container := range pod.Spec.Containers { 20 | for name, _ := range container.Resources.Limits { 21 | if _, ok := set[name.String()]; ok { 22 | return true 23 | } 24 | } 25 | } 26 | return false 27 | } 28 | -------------------------------------------------------------------------------- /pkg/util/util.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "fmt" 5 | v1 "k8s.io/api/core/v1" 6 | "strconv" 7 | "time" 8 | ) 9 | 10 | var NeverStop = make(chan struct{}) 11 | 12 | // TODO: add recover 13 | func Loop(f func(), duration time.Duration, stop <-chan struct{}) { 14 | for range time.Tick(duration) { 15 | select { 16 | case <- stop: 17 | return 18 | default: 19 | f() 20 | } 21 | } 22 | } 23 | 24 | func GetGPUCoreFromContainer(container *v1.Container) int { 25 | val, ok := container.Resources.Limits[ResourceGPUCore] 26 | if !ok { 27 | return 0 28 | } 29 | return int(val.Value()) 30 | } 31 | 32 | func GetGPUMemoryFromContainer(container *v1.Container) int { 33 | val, ok := container.Resources.Limits[ResourceGPUMemory] 34 | if !ok { 35 | return 0 36 | } 37 | return int(val.Value()) 38 | } 39 | 40 | func Decimal(value float64) float64 { 41 | value, _ = strconv.ParseFloat(fmt.Sprintf("%.2f", value), 64) 42 | return value 43 | } 44 | 45 | --------------------------------------------------------------------------------