├── .gitignore ├── .github ├── ISSUE_TEMPLATE │ ├── question.md │ ├── bug-report.md │ └── feature-request..md └── workflows │ └── container.yaml ├── deploy ├── Dockerfile ├── example-pod.yaml └── biren-device-plugin.yaml ├── .pre-commit-config.yaml ├── license_header.txt ├── go.mod ├── pkg ├── brgpu │ ├── cdi_test.go │ ├── allocator.go │ ├── manager.go │ ├── runc.go │ ├── cdi.go │ ├── kata.go │ └── plugin.go └── utils │ ├── client.go │ ├── topo.go │ └── topo_test.go ├── Makefile ├── cmd ├── manager.go └── app │ └── server.go ├── debug └── topo │ └── topo.go ├── README.md └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | k8s-device* 2 | *.swp 3 | device-plugin.tar 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F680 Question" 3 | about: Ask a question about Kratos. 4 | title: "[Question]" 5 | labels: question 6 | --- 7 | 8 | ## Question 9 | -------------------------------------------------------------------------------- /deploy/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.20 AS builder 2 | 3 | ARG build_arch 4 | 5 | COPY . /src 6 | WORKDIR /src 7 | 8 | RUN if [ "$build_arch" = "arm64" ]; then \ 9 | make build-arm ; \ 10 | else \ 11 | make build ; \ 12 | fi 13 | 14 | 15 | FROM ubuntu:22.04 16 | 17 | COPY --from=builder /src/k8s-device-plugin /root/k8s-device-plugin 18 | 19 | WORKDIR /root/ 20 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/Lucas-C/pre-commit-hooks 3 | rev: v1.5.1 4 | hooks: 5 | - id: insert-license 6 | files: \.go$ 7 | args: 8 | - --license-filepath 9 | - license_header.txt 10 | - --comment-style 11 | - // 12 | - --use-current-year 13 | - --no-extra-eol -------------------------------------------------------------------------------- /deploy/example-pod.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | generateName: pod- 5 | spec: 6 | restartPolicy: OnFailure 7 | containers: 8 | - image: ubuntu 9 | name: pod1-ctr 10 | command: ["sleep"] 11 | args: ["infinity"] 12 | resources: 13 | requests: 14 | birentech.com/gpu: 4 15 | limits: 16 | birentech.com/gpu: 4 17 | # birentech.com/1-4-gpu: 1 18 | # birentech.com/1-2-gpu: 1 19 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F41B Bug Report" 3 | about: As a User, I want to report a Bug. 4 | title: "[Bug Report]" 5 | labels: bug 6 | --- 7 | 8 | 9 | ## Bug Report 10 | 11 | Please answer these questions before submitting your issue. Thanks! 12 | 13 | ### 1. What happened: 14 | 15 | ### 2. How to reproduce it (as minimally and precisely as possible): 16 | 17 | ### 3. Anything else we need to know?: 18 | 19 | ### 4. What is your version?: -------------------------------------------------------------------------------- /license_header.txt: -------------------------------------------------------------------------------- 1 | Copyright 2024 Shanghai Biren Technology Co., Ltd. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | 15 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/BirenTechnology/k8s-device-plugin 2 | 3 | go 1.16 4 | 5 | require ( 6 | github.com/BirenTechnology/go-brml v0.0.0-20240612073547-7d6adadc1c0b 7 | github.com/imdario/mergo v0.3.13 // indirect 8 | github.com/kubevirt/device-plugin-manager v1.19.4 9 | github.com/onsi/ginkgo v1.16.5 // indirect 10 | github.com/sirupsen/logrus v1.9.0 11 | github.com/spf13/cobra v1.7.0 12 | github.com/spf13/pflag v1.0.5 13 | github.com/stretchr/testify v1.8.3 14 | google.golang.org/grpc v1.56.3 15 | k8s.io/client-go v0.28.4 16 | k8s.io/kubelet v0.28.4 17 | sigs.k8s.io/yaml v1.3.0 18 | tags.cncf.io/container-device-interface/specs-go v0.6.0 19 | ) 20 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request..md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F4A1 Feature Request" 3 | about: As a user, I want to request a New Feature on the product. 4 | title: "[Feature Request]" 5 | labels: feature-request 6 | --- 7 | 8 | ## Feature Request 9 | 10 | ### Is your feature request related to a problem? Please describe: 11 | 12 | 13 | ### Describe the feature you'd like: 14 | 15 | 16 | ### Describe alternatives you've considered: 17 | 18 | 19 | ### Teachability, Documentation, Adoption, Migration Strategy: 20 | 21 | -------------------------------------------------------------------------------- /pkg/brgpu/cdi_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Shanghai Biren Technology Co., Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | package brgpu 15 | 16 | import ( 17 | "testing" 18 | 19 | "github.com/BirenTechnology/go-brml/brml" 20 | log "github.com/sirupsen/logrus" 21 | ) 22 | 23 | func TestGenerateFile(t *testing.T) { 24 | err := brml.Init() 25 | if err != nil { 26 | log.Error(err) 27 | } 28 | defer brml.Shutdown() 29 | 30 | err = generateConfigCdiFile(RuntimeRunc) 31 | if err != nil { 32 | t.Error(err) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | REPO ?= ghcr.io/BirenTechnology 2 | PROJECT ?= k8s-device-plugin 3 | BUILD_ENV?= 4 | tag=$(shell git describe --abbrev=0 --tags) 5 | VERSION=$(shell git describe --tags --always) 6 | 7 | image-build: 8 | docker build --build-arg build_arch=amd64 -t $(REPO)/$(PROJECT):$(VERSION) -f deploy/Dockerfile . 9 | 10 | image-build-arm: 11 | docker build --build-arg build_arch=arm64 -t $(REPO)/$(PROJECT):$(VERSION)-arm64 -f deploy/Dockerfile . 12 | 13 | push: 14 | docker push $(REPO)/$(PROJECT):$(VERSION) 15 | 16 | 17 | build: 18 | ${BUILD_ENV} GOOS=linux GOARCH=amd64 CGO_ENABLED=1 go build -ldflags="-X 'main.Version=$(VERSION)' -X 'main.Time=$(shell LC_TIME=en_US.UTF-8 date)' -X 'main.Commit=$(shell git rev-parse --short HEAD)'" -o k8s-device-plugin cmd/manager.go 19 | ${BUILD_ENV} GOOS=linux GOARCH=amd64 CGO_ENABLED=1 go build -ldflags="-X 'main.Version=$(VERSION)' -X 'main.Time=$(shell LC_TIME=en_US.UTF-8 date)' -X 'main.Commit=$(shell git rev-parse --short HEAD)'" -o k8s-device-topo debug/topo/topo.go 20 | 21 | build-arm: 22 | ${BUILD_ENV} GOOS=linux GOARCH=arm64 CGO_ENABLED=1 go build -ldflags="-X 'main.Version=$(VERSION)' -X 'main.Time=$(shell LC_TIME=en_US.UTF-8 date)' -X 'main.Commit=$(shell git rev-parse --short HEAD)'" -o k8s-device-plugin cmd/manager.go -------------------------------------------------------------------------------- /cmd/manager.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Shanghai Biren Technology Co., Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | package main 15 | 16 | import ( 17 | "math/rand" 18 | "os" 19 | "time" 20 | 21 | "github.com/BirenTechnology/k8s-device-plugin/cmd/app" 22 | log "github.com/sirupsen/logrus" 23 | ) 24 | 25 | func init() { 26 | formatter := &log.TextFormatter{} 27 | formatter.DisableQuote = true 28 | log.SetFormatter(formatter) 29 | log.SetOutput(os.Stdout) 30 | } 31 | 32 | var ( 33 | Version string 34 | Commit string 35 | Time string 36 | ) 37 | 38 | func main() { 39 | log.Info("Biren GPU Device Plugin Start") 40 | log.Infof("Version: %s;Commit: %s; Build At: %s", Version, Commit, Time) 41 | rand.Seed(time.Now().UnixNano()) 42 | 43 | command := app.NewManagerCommand() 44 | 45 | if err := command.Execute(); err != nil { 46 | os.Exit(1) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /pkg/utils/client.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Shanghai Biren Technology Co., Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | package utils 16 | 17 | import ( 18 | "os" 19 | "path/filepath" 20 | 21 | "k8s.io/client-go/kubernetes" 22 | "k8s.io/client-go/rest" 23 | "k8s.io/client-go/tools/clientcmd" 24 | "k8s.io/client-go/util/homedir" 25 | ) 26 | 27 | type Client struct { 28 | K8s *kubernetes.Clientset 29 | } 30 | 31 | func NewClient(inCluster bool) (Client, error) { 32 | var config *rest.Config 33 | var err error 34 | switch inCluster { 35 | case false: 36 | kubeconfig := filepath.Join(homedir.HomeDir(), ".kube", "config") 37 | config, err = clientcmd.BuildConfigFromFlags("", kubeconfig) 38 | case true: 39 | config, err = rest.InClusterConfig() 40 | } 41 | if err != nil { 42 | return Client{}, err 43 | } 44 | clientset, err := kubernetes.NewForConfig(config) 45 | if err != nil { 46 | return Client{}, err 47 | } 48 | return Client{ 49 | K8s: clientset, 50 | }, nil 51 | } 52 | 53 | func InCluster() bool { 54 | ic := false 55 | if os.Getenv("KUBERNETES_SERVICE_HOST") != "" { 56 | ic = true 57 | } 58 | return ic 59 | } 60 | -------------------------------------------------------------------------------- /debug/topo/topo.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "os/exec" 7 | "strings" 8 | 9 | "github.com/BirenTechnology/go-brml/brml" 10 | "github.com/BirenTechnology/k8s-device-plugin/pkg/brgpu" 11 | log "github.com/sirupsen/logrus" 12 | ) 13 | 14 | func main() { 15 | err := brml.Init() 16 | if err != nil { 17 | log.Errorf("brml init failed %v", err) 18 | } 19 | defer brml.Shutdown() 20 | 21 | cardsFiles, err := os.ReadDir("/dev/biren") 22 | if err != nil { 23 | log.Errorf("read dir /dev/biren failed %v", err) 24 | panic(err) 25 | } 26 | cards := []string{} 27 | for _, f := range cardsFiles { 28 | if strings.Contains(f.Name(), "card_") { 29 | cards = append(cards, f.Name()) 30 | } 31 | } 32 | devices, err := brgpu.DeviceDiscover() 33 | if err != nil { 34 | log.Errorf("discover devices failed %v", err) 35 | panic(err) 36 | } 37 | log.Info("discover devices:") 38 | for _, d := range devices { 39 | fmt.Println(d.PhysicalNum, d.Instances) 40 | } 41 | 42 | _, err = brgpu.Device2Graph(cards) 43 | if err != nil { 44 | log.Errorf("device %v to graph failed %v", cards, err) 45 | panic(err) 46 | } 47 | 48 | log.Info("/dev/biren/card_x -> gpu hw:") 49 | for _, c := range cards { 50 | gpu_id, err := os.ReadFile(fmt.Sprintf("/sys/class/biren/%s/device/physical_id", c)) 51 | if err != nil { 52 | log.Errorf("read sys/class/biren/%s/device/physical_id failed %v", c, err) 53 | } else { 54 | fmt.Printf("%s -> %v", c, string(gpu_id)) 55 | } 56 | } 57 | 58 | log.Info("brsmi gpu list:") 59 | cmd := exec.Command("brsmi", "gpu", "list") 60 | cmd.Stdout = os.Stdout 61 | if err := cmd.Run(); err != nil { 62 | log.Errorf("exec `brsmi gpu list` failed %v", err) 63 | } 64 | 65 | log.Info("brsmi gpu topo:") 66 | cmd = exec.Command("brsmi", "topo", "-m") 67 | cmd.Stdout = os.Stdout 68 | if err := cmd.Run(); err != nil { 69 | log.Errorf("exec `brsmi gpu list` failed %v", err) 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /pkg/brgpu/allocator.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Shanghai Biren Technology Co., Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | package brgpu 15 | 16 | import ( 17 | "math" 18 | 19 | log "github.com/sirupsen/logrus" 20 | 21 | "github.com/BirenTechnology/go-brml/brml" 22 | "github.com/BirenTechnology/k8s-device-plugin/pkg/utils" 23 | ) 24 | 25 | func Device2Graph(devices []string) (*utils.Graph, error) { 26 | res := &utils.Graph{} 27 | for _, v := range devices { 28 | diIndex, err := cardID2Index(v) 29 | if err != nil { 30 | return nil, err 31 | } 32 | 33 | cNode := &utils.Node{ 34 | Name: cardIDFormat(diIndex), 35 | } 36 | res.AddNode(cNode) 37 | di, err := brml.HandleByNodeID(diIndex) 38 | if err != nil { 39 | return nil, err 40 | } 41 | for _, v2 := range devices { 42 | djIndex, err := cardID2Index(v2) 43 | if err != nil { 44 | return nil, err 45 | } 46 | dj, err := brml.HandleByNodeID(djIndex) 47 | if err != nil { 48 | return nil, err 49 | } 50 | ps, err := brml.P2PStatusV2(di, dj) 51 | if err != nil { 52 | return nil, err 53 | } 54 | 55 | res.AddEdge(cNode, &utils.Node{ 56 | Name: cardIDFormat(djIndex), 57 | }, scoreEnlarge(int(ps.Type))) 58 | 59 | } 60 | } 61 | log.Infof("create topo for devices: %v; result: \n%s", devices, res.String()) 62 | return res, nil 63 | } 64 | 65 | func scoreEnlarge(num int) int { 66 | return int(math.Pow(float64(num)+1, 2)) 67 | } 68 | 69 | func Allocate(g utils.Graph, mustIncludeNodes []string, size int) []string { 70 | _, names := g.MaxValCount(size) 71 | if len(mustIncludeNodes) != 0 { 72 | log.Error("must include nodes not nil") 73 | } 74 | log.Infof("Select devices: %v from topo: %v", names, g.String()) 75 | return names 76 | } 77 | -------------------------------------------------------------------------------- /.github/workflows/container.yaml: -------------------------------------------------------------------------------- 1 | name: Publish Docker image 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | push_to_registries: 9 | name: Push Docker image to multiple registries 10 | runs-on: ubuntu-latest 11 | permissions: 12 | packages: write 13 | contents: read 14 | attestations: write 15 | id-token: write 16 | steps: 17 | - name: Set up QEMU 18 | uses: docker/setup-qemu-action@v3 19 | 20 | - name: Set up Docker Buildx 21 | uses: docker/setup-buildx-action@v3 22 | 23 | - name: Check out the repo 24 | uses: actions/checkout@v4 25 | 26 | - name: Log in to the Container registry 27 | uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 28 | with: 29 | registry: ghcr.io 30 | username: ${{ github.actor }} 31 | password: ${{ secrets.GITHUB_TOKEN }} 32 | 33 | - name: Extract metadata (tags, labels) for Docker - arm64 34 | id: meta-arm64 35 | uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 36 | with: 37 | images: | 38 | ghcr.io/${{ github.repository }} 39 | flavor: | 40 | latest=auto 41 | suffix=-arm64,onlatest=true 42 | 43 | - name: Extract metadata (tags, labels) for Docker - x86_64 44 | id: meta-x86_64 45 | uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 46 | with: 47 | images: | 48 | ghcr.io/${{ github.repository }} 49 | 50 | - name: Build and push Docker images - arm64 51 | id: push-arm64 52 | uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671 53 | with: 54 | context: . 55 | file: deploy/Dockerfile 56 | build-args: | 57 | build_arch=arm64 58 | platforms: linux/arm64 59 | push: true 60 | tags: ${{ steps.meta-arm64.outputs.tags }} 61 | labels: ${{ steps.meta-arm64.outputs.labels }} 62 | 63 | - name: Build and push Docker images - x86_64 64 | id: push-x86-64 65 | uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671 66 | with: 67 | context: . 68 | file: deploy/Dockerfile 69 | build-args: | 70 | build_arch=amd64 71 | platforms: linux/amd64 72 | push: true 73 | tags: ${{ steps.meta-x86_64.outputs.tags }} 74 | labels: ${{ steps.meta-x86_64.outputs.labels }} -------------------------------------------------------------------------------- /cmd/app/server.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Shanghai Biren Technology Co., Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | package app 15 | 16 | import ( 17 | "fmt" 18 | "os" 19 | "os/signal" 20 | "syscall" 21 | 22 | "github.com/BirenTechnology/k8s-device-plugin/pkg/brgpu" 23 | log "github.com/sirupsen/logrus" 24 | "github.com/spf13/cobra" 25 | "github.com/spf13/pflag" 26 | ) 27 | 28 | type Options struct { 29 | mode string 30 | pluginMountPath string 31 | pulse int 32 | initModeTolerateLevel int 33 | mountAllDevice bool 34 | mountDriDevice bool 35 | runtime string 36 | } 37 | 38 | func NewOptions() *Options { 39 | return &Options{} 40 | } 41 | 42 | func (o *Options) AddFlags(fs *pflag.FlagSet) { 43 | fs.IntVar(&o.pulse, "pulse", o.pulse, "heart beating every seconds") 44 | fs.StringVar(&o.runtime, "container-runtime", o.runtime, "the container runtime;runc or kata, default is runc") 45 | fs.BoolVar(&brgpu.CdiFeature, "cdi-feature", brgpu.CdiFeature, "enable cdi feature") 46 | fs.BoolVar(&brgpu.OverwriteCdiConfig, "overwrite-cdi-config", brgpu.OverwriteCdiConfig, "overwrite cdi config") 47 | fs.BoolVar(&brgpu.MountHostPath, "mount-host-path", brgpu.MountHostPath, "mount lib and bin folder in host to container, default is false") 48 | } 49 | 50 | func (o *Options) Run() error { 51 | sigs := make(chan os.Signal, 1) 52 | signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) 53 | var gpuConfig brgpu.GPUConfig 54 | bgm := brgpu.NewBrGPUManager(o.pluginMountPath, gpuConfig) 55 | 56 | go func() { 57 | sig := <-sigs 58 | log.Infof("Get the signal %s", sig) 59 | bgm.Stop <- true 60 | }() 61 | 62 | bgm.Serve(o.pulse, o.mountAllDevice, o.mountDriDevice, o.runtime) 63 | return nil 64 | } 65 | 66 | func NewManagerCommand() *cobra.Command { 67 | opts := NewOptions() 68 | 69 | cmd := &cobra.Command{ 70 | Use: "br-gpu-device-plugin", 71 | Long: "Biren gpu device plugin", 72 | Run: func(cmd *cobra.Command, args []string) { 73 | opts.Run() 74 | }, 75 | Args: func(cmd *cobra.Command, args []string) error { 76 | for _, arg := range args { 77 | if len(arg) > 0 { 78 | return fmt.Errorf("%q does not take any arguments, got %q", cmd.CommandPath(), args) 79 | } 80 | } 81 | return nil 82 | }, 83 | } 84 | opts.AddFlags(cmd.Flags()) 85 | return cmd 86 | } 87 | -------------------------------------------------------------------------------- /deploy/biren-device-plugin.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: biren-gpu 5 | --- 6 | 7 | apiVersion: v1 8 | kind: ServiceAccount 9 | metadata: 10 | name: device-plugin-sa 11 | namespace: biren-gpu 12 | --- 13 | 14 | apiVersion: rbac.authorization.k8s.io/v1 15 | kind: ClusterRole 16 | metadata: 17 | name: birentech-device-plugin 18 | rules: 19 | - apiGroups: [""] 20 | resources: 21 | - nodes 22 | - pods 23 | verbs: ["get", "list", "watch", "update"] 24 | 25 | --- 26 | apiVersion: rbac.authorization.k8s.io/v1 27 | kind: ClusterRoleBinding 28 | metadata: 29 | name: birentech-device-plugin 30 | roleRef: 31 | apiGroup: rbac.authorization.k8s.io 32 | kind: ClusterRole 33 | name: birentech-device-plugin 34 | subjects: 35 | - kind: ServiceAccount 36 | name: device-plugin-sa 37 | namespace: biren-gpu 38 | 39 | --- 40 | apiVersion: apps/v1 41 | kind: DaemonSet 42 | metadata: 43 | name: biren-device-plugin-daemonset 44 | namespace: biren-gpu 45 | spec: 46 | selector: 47 | matchLabels: 48 | name: biren-device-plugin 49 | template: 50 | metadata: 51 | annotations: 52 | scheduler.alpha.kubernetes.io/critical-pod: "" 53 | labels: 54 | name: biren-device-plugin 55 | app.kubernetes.io/component: exporter 56 | app.kubernetes.io/name: gpu-exporter 57 | spec: 58 | nodeSelector: 59 | birentech.com: gpu 60 | tolerations: 61 | - key: CriticalAddonsOnly 62 | operator: Exists 63 | - key: birentech.com/gpu 64 | operator: Exists 65 | effect: NoSchedule 66 | priorityClassName: "system-node-critical" 67 | containers: 68 | - image: ghcr.io/birentechnology/k8s-device-plugin:v0.7.1 69 | name: k8s-device-plugin 70 | env: 71 | - name: LD_LIBRARY_PATH 72 | value: /opt/birentech/lib 73 | command: ["/root/k8s-device-plugin"] 74 | args: ["--pulse", "300", "--container-runtime", "runc"] 75 | securityContext: 76 | privileged: true 77 | volumeMounts: 78 | - name: dp 79 | mountPath: /var/lib/kubelet/device-plugins 80 | - name: sys 81 | mountPath: /sys 82 | - name: brml 83 | mountPath: /opt/birentech/lib 84 | - name: brsmi 85 | mountPath: /opt/birentech/bin 86 | - mountPath: /dev 87 | name: device 88 | - name: cdi-config 89 | mountPath: /etc/cdi 90 | serviceAccountName: device-plugin-sa 91 | volumes: 92 | - name: dp 93 | hostPath: 94 | path: /var/lib/kubelet/device-plugins 95 | - name: sys 96 | hostPath: 97 | path: /sys 98 | - name: brml 99 | hostPath: 100 | path: /usr/lib 101 | - name: brsmi 102 | hostPath: 103 | path: /usr/bin 104 | - name: device 105 | hostPath: 106 | path: /dev 107 | - name: cdi-config 108 | hostPath: 109 | path: /etc/cdi 110 | -------------------------------------------------------------------------------- /pkg/brgpu/manager.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Shanghai Biren Technology Co., Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | package brgpu 15 | 16 | import ( 17 | "os" 18 | "sync" 19 | 20 | "github.com/kubevirt/device-plugin-manager/pkg/dpm" 21 | log "github.com/sirupsen/logrus" 22 | "google.golang.org/grpc" 23 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 24 | ) 25 | 26 | type ContainerRuntime string 27 | 28 | const ( 29 | RuntimeKata ContainerRuntime = "kata" 30 | RuntimeRunc ContainerRuntime = "runc" 31 | ) 32 | 33 | const ( 34 | vendor = "birentech.com" 35 | ) 36 | 37 | type Lister struct { 38 | ResUpdateChan chan dpm.PluginNameList 39 | Heartbeat chan bool 40 | MountAllDevice bool 41 | MountDriDevice bool 42 | DevicesInfoList DevicesInfoList 43 | PFDeviceInfoList PFDeviceInfoList 44 | Runtime string 45 | MountHostPath bool 46 | } 47 | 48 | func (l *Lister) GetResourceNamespace() string { 49 | return vendor 50 | } 51 | 52 | func (l *Lister) NewPlugin(resourceLastName string) dpm.PluginInterface { 53 | return &Plugin{ 54 | Runtime: l.Runtime, 55 | PFDevices: l.PFDeviceInfoList.FilterByName(resourceLastName), 56 | BRGPUs: l.DevicesInfoList.FilterByName(resourceLastName), 57 | Heartbeat: l.Heartbeat, 58 | MountAllDevice: l.MountAllDevice, 59 | MountDriDevice: l.MountDriDevice, 60 | MountHostPath: l.MountHostPath, 61 | } 62 | } 63 | func (l *Lister) Discover(pluginListCh chan dpm.PluginNameList) { 64 | for { 65 | select { 66 | case newResourcesList := <-l.ResUpdateChan: // New resources found 67 | pluginListCh <- newResourcesList 68 | case <-pluginListCh: // Stop message received 69 | // Stop resourceUpdateCh 70 | return 71 | } 72 | } 73 | } 74 | 75 | type MountPath struct { 76 | HostPath string 77 | ContainerPath string 78 | } 79 | 80 | // GPUConfig stores the settings used to configure the GPUs on a node. 81 | type GPUConfig struct { 82 | GPUPartitionSize string 83 | } 84 | 85 | type brGPUManager struct { 86 | devDirectory string 87 | defaultDevices []string 88 | devices map[string]pluginapi.Device 89 | grpcServer *grpc.Server 90 | socket string 91 | Stop chan bool 92 | devicesMutex sync.Mutex 93 | gpuConfig GPUConfig 94 | Health chan pluginapi.Device 95 | 96 | // 生成 cdi config 97 | generateCdiConfigFile func(runtime ContainerRuntime) error 98 | } 99 | 100 | func NewBrGPUManager(devDirectory string, gpuConfig GPUConfig) *brGPUManager { 101 | return &brGPUManager{ 102 | devDirectory: devDirectory, 103 | devices: make(map[string]pluginapi.Device), 104 | Stop: make(chan bool), 105 | gpuConfig: gpuConfig, 106 | Health: make(chan pluginapi.Device), 107 | generateCdiConfigFile: generateConfigCdiFile, 108 | } 109 | } 110 | 111 | func (bgm *brGPUManager) ListDevices() map[string]pluginapi.Device { 112 | if bgm.gpuConfig.GPUPartitionSize == "" { 113 | return bgm.devices 114 | } 115 | return map[string]pluginapi.Device{} 116 | } 117 | 118 | func (bgm *brGPUManager) Serve(pulse int, mountAllDev bool, mountDriDevice bool, runtime string) { 119 | log.Info("Container runtime: ", runtime) 120 | go func() { 121 | <-bgm.Stop 122 | close(bgm.Stop) 123 | os.Exit(1) 124 | }() 125 | 126 | switch runtime { 127 | case string(RuntimeKata): 128 | bgm.kataManager() 129 | case string(RuntimeRunc): 130 | bgm.runcManager(pulse, mountAllDev, mountDriDevice) 131 | } 132 | log.Error("Can't find any manager for runtime ", runtime) 133 | } 134 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Biren GPU Device plugin 2 | 3 | ![GitHub License](https://img.shields.io/github/license/BirenTechnology/k8s-device-plugin) ![GitHub Release](https://img.shields.io/github/v/release/BirenTechnology/k8s-device-plugin) [![Publish Docker image](https://github.com/BirenTechnology/k8s-device-plugin/actions/workflows/container.yaml/badge.svg)](https://github.com/BirenTechnology/k8s-device-plugin/actions/workflows/container.yaml) 4 | 5 | ## About 6 | The Biren GPU device plugin is as Daemonset that allows you to automatically: 7 | 8 | 1. Expose the number of GPUs on each nodes for you cluster 9 | 2. Keep track of the health of your GPUs 10 | 3. Run GPU enabled containers in your k8s cluster 11 | 12 | This repository contains Biren's official implementation of the [k8s device plugin](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/resource-management/device-plugin.md) 13 | ## Prerequisites 14 | The list of prerequisites for running the Biren device plugin is described below: 15 | 1. Biren GPU Driver >= 1.2.2 16 | 2. Kubernetes >=1.13 17 | 3. if need mount dri device, need run `modprobe -v vgem` in host which have gpus 18 | 19 | ## SVI in Device plugin 20 | 1. SVI devices will not be created dynamically anywhere within the k8s software stack (GPU must be configured into svi card and split into svi devices priori) 21 | 22 | 23 | ## SR-IOV in device plugin 24 | 1. setup SR-IOV vfio driver 25 | 2. run device plugin with --container-runtime kata 26 | 27 | 28 | ## Quick Start 29 | ### Deploy 30 | `kubectl create -f deploy/biren-device-plugin.yaml` 31 | ### Running GPU Pods 32 | ``` 33 | $ cat <= 1.28 81 | - containerd >= 1.7.0 82 | 83 | ### How to use it 84 | 85 | #### kubelet 86 | 87 | In kubelet version 1.28, the CDI feature is in alpha state, so it needs to be enabled manually. To do this, add the `--feature-gates=DevicePluginCDIDevices=true` argument to the kubelet startup command. 88 | 89 | #### containerd 90 | 91 | Modify the containerd configuration file as follows: 92 | 93 | ```toml 94 | [plugins."io.containerd.grpc.v1.cri"] 95 | cdi_spec_dirs = ["/etc/cdi", "/var/run/cdi"] 96 | enable_cdi = true 97 | ``` 98 | 99 | #### k8s-device-plugin 100 | 101 | Add the startup command parameter `--cdi-feature` to enable the CDI feature. If the CDI feature is enabled, this will generate a biren.yaml file in the node's `/etc/cdi` directory, which defines the configuration of CDI. If the startup command parameter includes `--overwrite-cdi-config`, the configuration file will be overwritten each time it starts. Otherwise, if the biren.yaml configuration file already exists, it will not be overwritten. 102 | 103 | k8s-device-plugin startup command example: 104 | 105 | ```yaml 106 | command: 107 | - "/root/k8s-device-plugin" 108 | args: 109 | - "--pulse" 110 | - "300" 111 | - "--container-runtime" 112 | - "runc" 113 | - "--cdi-feature" # enable cdi feature 114 | - "--overwrite-cdi-config" # overwrite cdi config 115 | ``` 116 | -------------------------------------------------------------------------------- /pkg/utils/topo.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Shanghai Biren Technology Co., Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | package utils 16 | 17 | import ( 18 | "fmt" 19 | "sort" 20 | "strings" 21 | ) 22 | 23 | type Node struct { 24 | Name string 25 | } 26 | 27 | type nodeWithVal struct { 28 | val int 29 | node *Node 30 | } 31 | 32 | type Graph struct { 33 | nodes []*Node 34 | edges map[string][]nodeWithVal 35 | } 36 | 37 | func (g *Graph) AddEdge(u, v *Node, val int) { 38 | if g.edges == nil { 39 | g.edges = make(map[string][]nodeWithVal) 40 | } 41 | if g.edgeExist(u, v) { 42 | return 43 | } 44 | g.edges[u.Name] = append(g.edges[u.Name], nodeWithVal{ 45 | val: val, 46 | node: v, 47 | }) 48 | g.edges[v.Name] = append(g.edges[v.Name], nodeWithVal{ 49 | val: val, 50 | node: u, 51 | }) 52 | } 53 | 54 | func (g *Graph) AddNode(n *Node) { 55 | g.nodes = append(g.nodes, n) 56 | } 57 | 58 | func (g *Graph) DeleteNode(n *Node) *Graph { 59 | newGraph := &Graph{ 60 | nodes: []*Node{}, 61 | edges: map[string][]nodeWithVal{}, 62 | } 63 | for _, v := range g.nodes { 64 | if v.Name != n.Name { 65 | newGraph.nodes = append(newGraph.nodes, v) 66 | } 67 | } 68 | for k, nodes := range g.edges { 69 | newVals := []nodeWithVal{} 70 | for _, v := range nodes { 71 | if v.node.Name != n.Name { 72 | newVals = append(newVals, v) 73 | } 74 | } 75 | newGraph.edges[k] = newVals 76 | } 77 | return newGraph 78 | } 79 | 80 | func (g *Graph) DeleteNodes(nodes []*Node) *Graph { 81 | tg := g 82 | for _, n := range nodes { 83 | tg = tg.DeleteNode(n) 84 | } 85 | return tg 86 | } 87 | 88 | func (g *Graph) SelectNodes(nodes []*Node) *Graph { 89 | nodeNames := []string{} 90 | for _, v := range nodes { 91 | nodeNames = append(nodeNames, v.Name) 92 | } 93 | delNodes := []*Node{} 94 | for _, v := range g.nodes { 95 | if !exist(v.Name, nodeNames) { 96 | delNodes = append(delNodes, v) 97 | } 98 | } 99 | return g.DeleteNodes(delNodes) 100 | } 101 | 102 | func (g *Graph) String() string { 103 | str := "" 104 | for k, iNode := range g.nodes { 105 | str += iNode.String() + " -> " 106 | nexts := g.edges[iNode.Name] 107 | sort.SliceStable(nexts, func(i, j int) bool { 108 | si := strings.Split(nexts[i].node.Name, "-") 109 | sj := strings.Split(nexts[j].node.Name, "-") 110 | return si[len(si)-1] < sj[len(sj)-1] 111 | }) 112 | for _, next := range nexts { 113 | str += next.node.String() + fmt.Sprintf("(%d) ", next.val) 114 | } 115 | if k != len(g.nodes)-1 { 116 | str += "\n" 117 | } 118 | } 119 | return str 120 | } 121 | 122 | func (n *Node) String() string { 123 | return n.Name 124 | } 125 | 126 | func (g *Graph) edgeExist(u, v *Node) bool { 127 | if u.Name == v.Name { 128 | return true 129 | } 130 | val, ok := g.edges[u.Name] 131 | if ok { 132 | for _, n := range val { 133 | if n.node.Name == v.Name { 134 | return true 135 | } 136 | } 137 | } 138 | return false 139 | } 140 | 141 | func (g *Graph) MaxValCount(x int) (int, []string) { 142 | if x < 1 { 143 | return 0, nil 144 | } 145 | if x > len(g.nodes) { 146 | return 0, nil 147 | } 148 | if len(g.nodes) == 1 { 149 | return 10, []string{g.nodes[0].Name} 150 | } 151 | allNodes := []string{} 152 | 153 | res := 0 154 | resSet := []string{} 155 | for _, n := range g.nodes { 156 | allNodes = append(allNodes, n.Name) 157 | } 158 | for _, ss := range subset(allNodes, x) { 159 | num := g.bridgeVal(ss) 160 | if num > res { 161 | res = num 162 | resSet = ss 163 | } 164 | } 165 | return res, resSet 166 | } 167 | 168 | func subset(g []string, x int) [][]string { 169 | res := [][]string{} 170 | var dfs func(index int, list []string) 171 | 172 | dfs = func(index int, list []string) { 173 | if len(list) == x { 174 | tmp := make([]string, len(list)) 175 | copy(tmp, list) 176 | res = append(res, tmp) 177 | } 178 | for i := index; i < len(g); i++ { 179 | list = append(list, g[i]) 180 | dfs(i+1, list) 181 | list = list[:len(list)-1] 182 | } 183 | } 184 | dfs(0, []string{}) 185 | return res 186 | } 187 | 188 | func (g *Graph) bridgeVal(subset []string) int { 189 | if len(subset) <= 1 { 190 | return 1 191 | } 192 | val := 0 193 | for i := 0; i < len(subset); i++ { 194 | for _, n := range g.edges[subset[i]] { 195 | if exist(n.node.Name, subset) { 196 | if n.node.Name != subset[i] { 197 | val += n.val 198 | } 199 | } 200 | } 201 | } 202 | return val / 2 203 | } 204 | 205 | func exist(s string, ss []string) bool { 206 | for _, v := range ss { 207 | if v == s { 208 | return true 209 | } 210 | } 211 | return false 212 | } 213 | -------------------------------------------------------------------------------- /pkg/brgpu/runc.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Shanghai Biren Technology Co., Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | package brgpu 15 | 16 | import ( 17 | "errors" 18 | "fmt" 19 | "os" 20 | "strconv" 21 | "strings" 22 | "time" 23 | 24 | "github.com/BirenTechnology/go-brml/brml" 25 | "github.com/kubevirt/device-plugin-manager/pkg/dpm" 26 | log "github.com/sirupsen/logrus" 27 | ) 28 | 29 | type Instance struct { 30 | UUID string 31 | Memory int 32 | ResourceName string 33 | CardID string 34 | } 35 | 36 | type DevicesInfo struct { 37 | PhysicalNum int 38 | Instances []Instance 39 | SVICount int 40 | } 41 | 42 | type DevicesInfoList []DevicesInfo 43 | 44 | func (d DevicesInfoList) ResourceNames() []string { 45 | res := []string{} 46 | names := map[string]int{} 47 | for _, v := range d { 48 | names[v.Instances[0].ResourceName] += 1 49 | } 50 | 51 | for k, _ := range names { 52 | res = append(res, k) 53 | } 54 | return res 55 | } 56 | 57 | func (d DevicesInfoList) FilterByName(resourceName string) DevicesInfoList { 58 | res := DevicesInfoList{} 59 | for _, v := range d { 60 | if v.Instances[0].ResourceName == resourceName { 61 | res = append(res, v) 62 | } 63 | } 64 | return res 65 | } 66 | 67 | func (d DevicesInfoList) AllCardIDs() []string { 68 | res := []string{} 69 | for _, v := range d { 70 | for _, i := range v.Instances { 71 | res = append(res, i.CardID) 72 | } 73 | } 74 | return res 75 | } 76 | 77 | func (d DevicesInfoList) getResourceByCardId(cardId string) string { 78 | for _, vs := range d { 79 | for _, v := range vs.Instances { 80 | if v.CardID == cardId { 81 | return v.ResourceName 82 | } 83 | } 84 | } 85 | return "gpu" 86 | } 87 | 88 | func (bgm *brGPUManager) runcManager(pulse int, mountAllDev bool, mountDriDevice bool) { 89 | err := brml.Init() 90 | if err != nil { 91 | log.Errorf("brml init failed %v", err) 92 | bgm.Stop <- true 93 | } 94 | defer brml.Shutdown() 95 | 96 | info, err := DeviceDiscover() 97 | if err != nil { 98 | log.Errorf("runc device discover failed: %v", err) 99 | bgm.Stop <- true 100 | } 101 | l := Lister{ 102 | ResUpdateChan: make(chan dpm.PluginNameList), 103 | Heartbeat: make(chan bool), 104 | MountAllDevice: mountAllDev, 105 | MountDriDevice: mountDriDevice, 106 | DevicesInfoList: info, 107 | Runtime: string(RuntimeRunc), 108 | MountHostPath: MountHostPath, 109 | } 110 | 111 | manager := dpm.NewManager(&l) 112 | if pulse > 0 { 113 | go func() { 114 | for { 115 | time.Sleep(time.Second * time.Duration(pulse)) 116 | _, err = brml.DeviceCount() 117 | if err != nil { 118 | log.Errorf("Can't find device from host") 119 | bgm.Stop <- true 120 | } 121 | l.Heartbeat <- true 122 | } 123 | }() 124 | } 125 | 126 | go func() { 127 | var path = "/sys/class/biren" 128 | if _, err := os.Stat(path); err == nil { 129 | l.ResUpdateChan <- info.ResourceNames() 130 | } 131 | }() 132 | 133 | err = bgm.generateCdiConfigFile(RuntimeRunc) 134 | if err != nil { 135 | log.Errorf("runc generate cdi config failed %v", err) 136 | bgm.Stop <- true 137 | } 138 | 139 | manager.Run() 140 | } 141 | 142 | func DeviceDiscover() (DevicesInfoList, error) { 143 | dis := DevicesInfoList{} 144 | physicalNum, err := brml.DeviceCount() 145 | if err != nil { 146 | log.Errorf("brml device count err: %v", err) 147 | return nil, err 148 | } 149 | 150 | for i := 0; i < physicalNum; i++ { 151 | log.Infof("discovering device node id %v/%v", i, physicalNum) 152 | device, err := brml.HandleByIndex(i) 153 | if err != nil { 154 | log.Errorf("brml HandleByIndex %v err: %v", i, err) 155 | return nil, err 156 | } 157 | sviCount, err := brml.GetSviMode(device) 158 | if err != nil { 159 | log.Errorf("brml GetSviMode %v err: %v", device, err) 160 | return nil, err 161 | } 162 | 163 | phyUUID, err := brml.DeviceUUID(device) 164 | if err != nil { 165 | log.Errorf("brml DeviceUUID %v err: %v", device, err) 166 | return nil, err 167 | } 168 | 169 | phyUUID = strings.TrimSpace(phyUUID) 170 | 171 | switch sviCount { 172 | case 0, 1: 173 | memInfo, err := brml.MemoryInfo(device) 174 | if err != nil { 175 | log.Errorf("brml MemoryInfo %v err: %v", device, err) 176 | return nil, err 177 | } 178 | 179 | id, err := brml.GetGPUNodeIds(device) 180 | if err != nil { 181 | log.Errorf("brml GetGPUNodeIds %v err: %v", device, err) 182 | return nil, err 183 | } 184 | 185 | dis = append(dis, DevicesInfo{ 186 | PhysicalNum: i, 187 | Instances: []Instance{{ 188 | UUID: phyUUID, 189 | Memory: int(memInfo.Total), 190 | ResourceName: "gpu", 191 | CardID: cardIDFormat(id), 192 | }}, 193 | SVICount: 1, 194 | }) 195 | case 2, 4: 196 | di := DevicesInfo{ 197 | PhysicalNum: i, 198 | Instances: []Instance{}, 199 | SVICount: sviCount, 200 | } 201 | for j := 0; j < sviCount; j++ { 202 | ins, err := brml.GetGPUInstanceByID(device, uint32(j)) 203 | if err != nil { 204 | log.Errorf("brml GetGPUInstanceByID %v/%v err: %v", device, j, err) 205 | return nil, err 206 | } 207 | 208 | mem, err := brml.MemoryInfo(ins) 209 | if err != nil { 210 | log.Errorf("brml MemoryInfo %v err: %v", ins, err) 211 | return nil, err 212 | } 213 | 214 | id, err := brml.GetGPUNodeIds(ins) 215 | if err != nil { 216 | log.Errorf("brml GetGPUNodeIds %v err: %v", ins, err) 217 | return nil, err 218 | } 219 | 220 | di.Instances = append(di.Instances, Instance{ 221 | UUID: fmt.Sprintf("%s-instance-%d", phyUUID, j), 222 | Memory: int(mem.Total), 223 | ResourceName: fmt.Sprintf("1-%d-gpu", sviCount), 224 | CardID: cardIDFormat(id), 225 | }) 226 | } 227 | dis = append(dis, di) 228 | } 229 | } 230 | return dis, nil 231 | } 232 | 233 | func cardIDFormat(i int) string { 234 | return fmt.Sprintf("card_%d", i) 235 | } 236 | 237 | func cardID2Index(s string) (int, error) { 238 | if !strings.HasPrefix(s, "card_") { 239 | return 0, errors.New(fmt.Sprintf("card handler %s is not valid format", s)) 240 | } 241 | s = strings.ReplaceAll(s, "card_", "") 242 | return strconv.Atoi(s) 243 | } 244 | -------------------------------------------------------------------------------- /pkg/brgpu/cdi.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Shanghai Biren Technology Co., Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | package brgpu 15 | 16 | import ( 17 | "encoding/json" 18 | "fmt" 19 | "os" 20 | "path" 21 | 22 | "github.com/BirenTechnology/go-brml/brml" 23 | log "github.com/sirupsen/logrus" 24 | "sigs.k8s.io/yaml" 25 | cdi "tags.cncf.io/container-device-interface/specs-go" 26 | ) 27 | 28 | const ( 29 | deviceBasePath = "/dev/biren" 30 | cdiConfigPath = "/etc/cdi/biren.yaml" 31 | // 最新版 0.6.0 containerd不兼容 32 | cdiVersion = "0.5.0" 33 | ) 34 | 35 | var ( 36 | CdiFeature bool 37 | OverwriteCdiConfig bool 38 | ) 39 | 40 | func cdiSPec(runtime ContainerRuntime) ([]*cdi.Spec, error) { 41 | switch runtime { 42 | case RuntimeRunc: 43 | return runcCDI() 44 | case RuntimeKata: 45 | return kataCDI() 46 | } 47 | 48 | return nil, nil 49 | } 50 | 51 | func runcCDI() ([]*cdi.Spec, error) { 52 | info, err := DeviceDiscover() 53 | if err != nil { 54 | log.Errorf("deviceDiscover error: %v", err) 55 | return nil, err 56 | } 57 | 58 | specs := make([]*cdi.Spec, 0) 59 | 60 | resourceInstances := make(map[string][]Instance) 61 | 62 | for _, v := range info { 63 | for _, ins := range v.Instances { 64 | if _, ok := resourceInstances[ins.ResourceName]; !ok { 65 | resourceInstances[ins.ResourceName] = []Instance{} 66 | } 67 | resourceInstances[ins.ResourceName] = append(resourceInstances[ins.ResourceName], ins) 68 | } 69 | } 70 | 71 | for k, vs := range resourceInstances { 72 | spec := genSpec(k, MountHostPath) 73 | for _, v := range vs { 74 | spec.Devices = append(spec.Devices, cdi.Device{ 75 | Name: v.CardID, 76 | Annotations: map[string]string{}, 77 | ContainerEdits: cdi.ContainerEdits{ 78 | Env: []string{}, 79 | DeviceNodes: []*cdi.DeviceNode{ 80 | { 81 | Path: path.Join(deviceBasePath, v.CardID), 82 | HostPath: path.Join(deviceBasePath, v.CardID), 83 | Type: "c", 84 | Permissions: "rw", 85 | }, 86 | }, 87 | Hooks: []*cdi.Hook{}, 88 | Mounts: []*cdi.Mount{}, 89 | }, 90 | }) 91 | } 92 | specs = append(specs, spec) 93 | } 94 | 95 | return specs, nil 96 | } 97 | 98 | func kataCDI() ([]*cdi.Spec, error) { 99 | info, err := vfDeviceDiscover() 100 | if err != nil { 101 | log.Errorf("vfDeviceDiscover error: %v", err) 102 | return nil, err 103 | } 104 | 105 | specs := make([]*cdi.Spec, 0) 106 | resourceVFDeviceInfos := make(map[string][]VFDeviceInfo) 107 | for _, v := range info { 108 | for _, vf := range v.VFs { 109 | if _, ok := resourceVFDeviceInfos[vf.ResourceName]; !ok { 110 | resourceVFDeviceInfos[vf.ResourceName] = []VFDeviceInfo{} 111 | } 112 | resourceVFDeviceInfos[vf.ResourceName] = append(resourceVFDeviceInfos[vf.ResourceName], vf) 113 | } 114 | } 115 | 116 | for k, vs := range resourceVFDeviceInfos { 117 | spec := genSpec(k, MountHostPath) 118 | for _, v := range vs { 119 | spec.Devices = append(spec.Devices, cdi.Device{ 120 | Name: v.deviceEndpoint(), 121 | Annotations: map[string]string{}, 122 | ContainerEdits: cdi.ContainerEdits{ 123 | Env: []string{}, 124 | DeviceNodes: []*cdi.DeviceNode{{ 125 | Path: path.Join(deviceBasePath, v.DeviceID), 126 | HostPath: path.Join(deviceBasePath, v.DeviceID), 127 | Type: "char", 128 | Permissions: "c", 129 | }}, 130 | Hooks: []*cdi.Hook{}, 131 | Mounts: []*cdi.Mount{}, 132 | }, 133 | }) 134 | } 135 | specs = append(specs, spec) 136 | } 137 | return specs, nil 138 | } 139 | 140 | func genSpec(resource string, mountHostPath bool) *cdi.Spec { 141 | spec := &cdi.Spec{ 142 | Version: cdiVersion, 143 | Kind: fmt.Sprintf("%s/%s", vendor, resource), 144 | Annotations: map[string]string{}, 145 | Devices: []cdi.Device{}, 146 | ContainerEdits: cdi.ContainerEdits{ 147 | Env: []string{}, 148 | DeviceNodes: []*cdi.DeviceNode{}, 149 | Hooks: []*cdi.Hook{}, 150 | Mounts: []*cdi.Mount{}, 151 | }, 152 | } 153 | if mountHostPath { 154 | cdiMounts := []*cdi.Mount{} 155 | brmlVersion, _ := brml.BRMLVersion() 156 | mountPaths := map[string]func(string) string{ 157 | "/usr/lib/libbiren-ml.so": defaultMountPathFunc, 158 | "/usr/lib/libbiren-ml.so.1": defaultMountPathFunc, 159 | "/usr/bin/brsmi": defaultMountPathFunc, 160 | fmt.Sprintf("/usr/lib/libbiren-ml.so.%s", brmlVersion): defaultMountPathFunc, 161 | } 162 | 163 | for h, c := range mountPaths { 164 | if _, err := os.Stat(defaultMountPathFunc(h)); err == nil { 165 | m := &cdi.Mount{ 166 | HostPath: h, 167 | ContainerPath: c(h), 168 | Options: []string{"ro", "nosuid", "nodev", "bind"}, 169 | } 170 | cdiMounts = append(cdiMounts, m) 171 | } 172 | } 173 | spec.ContainerEdits.Mounts = append(spec.ContainerEdits.Mounts, cdiMounts...) 174 | } 175 | return spec 176 | } 177 | 178 | func generateConfigCdiFile(runtime ContainerRuntime) error { 179 | if !CdiFeature { 180 | log.Info("cdi feature isn't open") 181 | return nil 182 | } 183 | exists, err := PathExists(cdiConfigPath) 184 | if err != nil { 185 | log.Errorf("cdiconfig path %v", err) 186 | return err 187 | } 188 | // 如果文件存在并且不需要覆盖写 直接返回 189 | if exists && !OverwriteCdiConfig { 190 | log.Infof("file already exists and no need to rewrite") 191 | return nil 192 | } 193 | 194 | specs, err := cdiSPec(runtime) 195 | if err != nil { 196 | return err 197 | } 198 | 199 | file, err := os.OpenFile(cdiConfigPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0666) 200 | if err != nil { 201 | log.Errorf("open file failed:%v \n", err) 202 | return err 203 | } 204 | defer file.Close() 205 | 206 | for _, spec := range specs { 207 | bs, err := json.Marshal(spec) 208 | if err != nil { 209 | return err 210 | } 211 | 212 | bs, err = yaml.JSONToYAML(bs) 213 | if err != nil { 214 | return err 215 | } 216 | 217 | // 写入分隔符 218 | if _, err := file.WriteString("---\n"); err != nil { 219 | log.Errorf("Error writing separator: %v \n", err) 220 | return err 221 | } 222 | 223 | _, err = file.Write(bs) 224 | if err != nil { 225 | return err 226 | } 227 | } 228 | 229 | return nil 230 | } 231 | 232 | func PathExists(path string) (bool, error) { 233 | _, err := os.Stat(path) 234 | if err == nil { 235 | return true, nil 236 | } 237 | if os.IsNotExist(err) { 238 | return false, nil 239 | } 240 | return false, err 241 | } 242 | -------------------------------------------------------------------------------- /pkg/brgpu/kata.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Shanghai Biren Technology Co., Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | package brgpu 15 | 16 | import ( 17 | "fmt" 18 | "io/ioutil" 19 | "os" 20 | "path/filepath" 21 | "strconv" 22 | "strings" 23 | 24 | "github.com/kubevirt/device-plugin-manager/pkg/dpm" 25 | log "github.com/sirupsen/logrus" 26 | ) 27 | 28 | const ( 29 | BirenVendorID = "1ee0" 30 | basePath = "/sys/bus/pci/devices" 31 | ) 32 | 33 | type VFDeviceInfo struct { 34 | DeviceID string 35 | IOMMUGroup string 36 | Addr string 37 | ResourceName string 38 | } 39 | 40 | func (v VFDeviceInfo) deviceEndpoint() string { 41 | return "/dev/vfio/" + v.IOMMUGroup 42 | } 43 | 44 | type PFDeviceInfo struct { 45 | Addr string 46 | VFs []VFDeviceInfo 47 | VFCount int 48 | } 49 | 50 | type PFDeviceInfoList []PFDeviceInfo 51 | 52 | func (p PFDeviceInfoList) ResourceNames() []string { 53 | res := []string{} 54 | names := map[string]int{} 55 | for _, v := range p { 56 | names[v.VFs[0].ResourceName] += 1 57 | } 58 | 59 | for k, _ := range names { 60 | res = append(res, k) 61 | } 62 | return res 63 | 64 | } 65 | 66 | func (p PFDeviceInfoList) FilterByName(resourceName string) PFDeviceInfoList { 67 | res := PFDeviceInfoList{} 68 | for _, v := range p { 69 | if v.VFs[0].ResourceName == resourceName { 70 | res = append(res, v) 71 | } 72 | } 73 | return res 74 | } 75 | 76 | func (p PFDeviceInfoList) Contain(pciaddr string) bool { 77 | for _, v := range p { 78 | for _, val := range v.VFs { 79 | if val.Addr == pciaddr { 80 | return true 81 | } 82 | } 83 | } 84 | return false 85 | } 86 | 87 | func (p PFDeviceInfoList) getResourceByCardId(cardId string) string { 88 | for _, vs := range p { 89 | for _, v := range vs.VFs { 90 | if v.DeviceID == cardId { 91 | return v.ResourceName 92 | } 93 | } 94 | } 95 | return "gpu" 96 | } 97 | 98 | func (bgm *brGPUManager) kataManager() { 99 | info, err := vfDeviceDiscover() 100 | if err != nil { 101 | log.Errorf("kata device discover failed %v", err) 102 | bgm.Stop <- true 103 | } 104 | l := Lister{ 105 | ResUpdateChan: make(chan dpm.PluginNameList), 106 | Heartbeat: make(chan bool), 107 | PFDeviceInfoList: info, 108 | Runtime: string(RuntimeKata), 109 | } 110 | manager := dpm.NewManager(&l) 111 | go func() { 112 | l.ResUpdateChan <- info.ResourceNames() 113 | }() 114 | 115 | err = bgm.generateCdiConfigFile(RuntimeKata) 116 | if err != nil { 117 | log.Errorf("kata generate cdi config failed %v", err) 118 | bgm.Stop <- true 119 | } 120 | 121 | manager.Run() 122 | } 123 | 124 | func readIDFromFile(basePath string, deviceAddress string, property string) (string, error) { 125 | data, err := ioutil.ReadFile(filepath.Join(basePath, deviceAddress, property)) 126 | if err != nil { 127 | log.Errorf("Could not read %s for device %s: %s", property, deviceAddress, err) 128 | return "", err 129 | } 130 | id := strings.Trim(string(data[2:]), "\n") 131 | return id, nil 132 | } 133 | 134 | func readLink(basePath string, deviceAddress string, link string) (string, error) { 135 | path, err := os.Readlink(filepath.Join(basePath, deviceAddress, link)) 136 | if err != nil { 137 | log.Errorf("Could not read link %s for device %s: %s", link, deviceAddress, err) 138 | return "", err 139 | } 140 | _, file := filepath.Split(path) 141 | return file, nil 142 | } 143 | 144 | func readNumFromFile(basePath string, deviceAddress string, property string) (int, error) { 145 | data, err := ioutil.ReadFile(filepath.Join(basePath, deviceAddress, property)) 146 | if err != nil { 147 | log.Errorf("Could not read %s for device %s: %s", property, deviceAddress, err) 148 | return 0, err 149 | } 150 | 151 | s := strings.Trim(string(data), "\n") 152 | return strconv.Atoi(s) 153 | } 154 | 155 | func vfDeviceDiscover() (PFDeviceInfoList, error) { 156 | pdl := PFDeviceInfoList{} 157 | err := filepath.Walk(basePath, func(path string, info os.FileInfo, err error) error { 158 | if err != nil { 159 | return err 160 | } 161 | if info.IsDir() { 162 | return nil 163 | } 164 | 165 | vendorID, err := readIDFromFile(basePath, info.Name(), "vendor") 166 | if err != nil { 167 | log.Error("Could not get vendor ID for device ", info.Name()) 168 | return nil 169 | } 170 | 171 | if vendorID == BirenVendorID { 172 | log.Infof("Birentech device %s", info.Name()) 173 | driver, err := readLink(basePath, info.Name(), "driver") 174 | if err != nil { 175 | log.Error("Could not get driver for device, Skip it.", info.Name()) 176 | return nil 177 | } 178 | if driver == "BEV_HYPER_DRIVER" { 179 | _, err := readLink(basePath, info.Name(), "physfn") 180 | if os.IsNotExist(err) { 181 | vfNum, err := readNumFromFile(basePath, info.Name(), "sriov_numvfs") 182 | if err != nil { 183 | return err 184 | } 185 | vfs := []VFDeviceInfo{} 186 | for i := 0; i < vfNum; i++ { 187 | vfLink := fmt.Sprintf("virtfn%d", i) 188 | vfAddr, err := readLink(basePath, info.Name(), vfLink) 189 | if err != nil { 190 | return err 191 | } 192 | 193 | iommuGroup, err := readLink(basePath, vfAddr, "iommu_group") 194 | if err != nil { 195 | return err 196 | } 197 | 198 | deviceID, err := readIDFromFile(basePath, vfAddr, "device") 199 | if err != nil { 200 | return err 201 | } 202 | 203 | vfs = append(vfs, VFDeviceInfo{ 204 | DeviceID: deviceID, 205 | IOMMUGroup: iommuGroup, 206 | Addr: vfAddr, 207 | ResourceName: func() string { 208 | if vfNum == 1 { 209 | return "gpu" 210 | } 211 | return fmt.Sprintf("1-%d-gpu", vfNum) 212 | }(), 213 | }) 214 | } 215 | pdl = append(pdl, PFDeviceInfo{ 216 | Addr: info.Name(), 217 | VFCount: vfNum, 218 | VFs: vfs, 219 | }) 220 | } 221 | } 222 | if driver == "vfio-pci" { 223 | iommuGroup, err := readLink(basePath, info.Name(), "iommu_group") 224 | if err != nil { 225 | return err 226 | } 227 | deviceID, err := readIDFromFile(basePath, info.Name(), "device") 228 | if err != nil { 229 | return err 230 | } 231 | if !pdl.Contain(info.Name()) { 232 | pdl = append(pdl, PFDeviceInfo{ 233 | Addr: info.Name(), 234 | VFCount: 1, 235 | VFs: []VFDeviceInfo{ 236 | { 237 | DeviceID: deviceID, 238 | IOMMUGroup: iommuGroup, 239 | Addr: info.Name(), 240 | ResourceName: "gpu", 241 | }, 242 | }, 243 | }) 244 | } 245 | } 246 | } 247 | 248 | return nil 249 | }) 250 | return pdl, err 251 | } 252 | -------------------------------------------------------------------------------- /pkg/utils/topo_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Shanghai Biren Technology Co., Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | package utils 16 | 17 | import ( 18 | "strings" 19 | "testing" 20 | 21 | "github.com/stretchr/testify/assert" 22 | ) 23 | 24 | func TestGrap(t *testing.T) { 25 | g := Graph{} 26 | a, b, c, d := Node{"a"}, Node{"b"}, Node{"c"}, Node{"d"} 27 | 28 | g.AddNode(&a) 29 | g.AddNode(&b) 30 | g.AddNode(&c) 31 | g.AddNode(&d) 32 | 33 | g.AddEdge(&a, &b, 1) 34 | g.AddEdge(&a, &c, 2) 35 | g.AddEdge(&a, &d, 2) 36 | g.AddEdge(&b, &c, 2) 37 | g.AddEdge(&b, &d, 1) 38 | g.AddEdge(&c, &d, 2) 39 | 40 | g.String() 41 | } 42 | 43 | func TestSubset(t *testing.T) { 44 | cases := []struct { 45 | s []string 46 | num int 47 | res [][]string 48 | }{ 49 | { 50 | []string{"a", "b", "c"}, 51 | 3, 52 | [][]string{ 53 | { 54 | "a", "b", "c", 55 | }, 56 | }, 57 | }, 58 | { 59 | []string{"a", "b", "c"}, 60 | 2, 61 | [][]string{ 62 | { 63 | "a", "b", 64 | }, 65 | { 66 | "a", "c", 67 | }, 68 | { 69 | "b", "c", 70 | }, 71 | }, 72 | }, 73 | } 74 | 75 | for _, v := range cases { 76 | assert.Equal(t, v.res, subset(v.s, v.num)) 77 | } 78 | } 79 | 80 | func TestBridgeVal(t *testing.T) { 81 | g1 := &Graph{ 82 | nodes: []*Node{ 83 | { 84 | "A", 85 | }, { 86 | "B", 87 | }, { 88 | "C", 89 | }, { 90 | "D", 91 | }, 92 | }, 93 | edges: map[string][]nodeWithVal{ 94 | "A": { 95 | { 96 | node: &Node{ 97 | Name: "A", 98 | }, 99 | val: 100, 100 | }, 101 | { 102 | node: &Node{ 103 | Name: "B", 104 | }, 105 | val: 10, 106 | }, 107 | { 108 | node: &Node{ 109 | Name: "C", 110 | }, 111 | val: 3, 112 | }, 113 | { 114 | node: &Node{ 115 | Name: "D", 116 | }, 117 | val: 11, 118 | }, 119 | }, 120 | "B": { 121 | { 122 | node: &Node{ 123 | Name: "A", 124 | }, 125 | val: 10, 126 | }, 127 | { 128 | node: &Node{ 129 | Name: "B", 130 | }, 131 | val: 100, 132 | }, 133 | { 134 | node: &Node{ 135 | Name: "C", 136 | }, 137 | val: 4, 138 | }, 139 | { 140 | node: &Node{ 141 | Name: "D", 142 | }, 143 | val: 11, 144 | }, 145 | }, 146 | "C": { 147 | { 148 | node: &Node{ 149 | Name: "A", 150 | }, 151 | val: 3, 152 | }, 153 | { 154 | node: &Node{ 155 | Name: "B", 156 | }, 157 | val: 4, 158 | }, 159 | { 160 | node: &Node{ 161 | Name: "C", 162 | }, 163 | val: 100, 164 | }, 165 | { 166 | node: &Node{ 167 | Name: "D", 168 | }, 169 | val: 11, 170 | }, 171 | }, 172 | "D": { 173 | { 174 | node: &Node{ 175 | Name: "A", 176 | }, 177 | val: 11, 178 | }, 179 | { 180 | node: &Node{ 181 | Name: "B", 182 | }, 183 | val: 11, 184 | }, 185 | { 186 | node: &Node{ 187 | Name: "C", 188 | }, 189 | val: 11, 190 | }, 191 | { 192 | node: &Node{ 193 | Name: "D", 194 | }, 195 | val: 100, 196 | }, 197 | }, 198 | }, 199 | } 200 | 201 | assert.Equal(t, 17, g1.bridgeVal([]string{"A", "B", "C"})) 202 | assert.Equal(t, 11, g1.bridgeVal([]string{"A", "D"})) 203 | assert.Equal(t, 25, g1.bridgeVal([]string{"A", "C", "D"})) 204 | assert.Equal(t, 50, g1.bridgeVal([]string{"A", "B", "C", "D"})) 205 | 206 | g2 := &Graph{ 207 | nodes: []*Node{ 208 | { 209 | "pe-system-0", 210 | }, 211 | { 212 | "pe-system-1", 213 | }, 214 | }, 215 | edges: map[string][]nodeWithVal{ 216 | "pe-system-0": { 217 | { 218 | node: &Node{ 219 | Name: "pe-system-0", 220 | }, 221 | val: 100, 222 | }, 223 | { 224 | node: &Node{ 225 | Name: "pe-system-1", 226 | }, 227 | val: 5, 228 | }, 229 | }, 230 | "pe-system-1": { 231 | { 232 | node: &Node{ 233 | Name: "pe-system-0", 234 | }, 235 | val: 5, 236 | }, 237 | { 238 | node: &Node{ 239 | Name: "pe-system-1", 240 | }, 241 | val: 100, 242 | }, 243 | }, 244 | }, 245 | } 246 | 247 | assert.Equal(t, 1, g2.bridgeVal([]string{"pe-system-1"})) 248 | } 249 | 250 | func TestMaxValCount(t *testing.T) { 251 | a := ` 252 | A B C D 253 | A x single multiple multiple 254 | B single x multiple multiple 255 | C multiple multiple x multiple 256 | D multiple multiple multiple x 257 | ` 258 | var score int 259 | g1 := string2Graph(a) 260 | score, _ = g1.MaxValCount(2) 261 | assert.Equal(t, 10, score) 262 | 263 | score, _ = g1.MaxValCount(3) 264 | assert.Equal(t, 28, score) 265 | 266 | b := ` 267 | A B C D E F G H 268 | A x single multiple multiple node node node node 269 | B single x multiple multiple node node node node 270 | C multiple multiple x multiple node node node node 271 | D multiple multiple multiple x node node node node 272 | E node node node node x single multiple multiple 273 | F node node node node single x multiple multiple 274 | G node node node node multiple multiple x multiple 275 | H node node node node multiple multiple multiple x 276 | ` 277 | 278 | g2 := string2Graph(b) 279 | score, _ = g2.MaxValCount(3) 280 | assert.Equal(t, 28, score) 281 | 282 | score, _ = g2.MaxValCount(4) 283 | assert.Equal(t, 55, score) 284 | 285 | score, _ = g2.MaxValCount(5) 286 | assert.Equal(t, 79, score) 287 | 288 | score, _ = g2.MaxValCount(6) 289 | assert.Equal(t, 113, score) 290 | 291 | score, _ = g2.MaxValCount(7) 292 | assert.Equal(t, 155, score) 293 | 294 | score, _ = g2.MaxValCount(8) 295 | assert.Equal(t, 206, score) 296 | } 297 | 298 | func string2Graph(s string) *Graph { 299 | splitFn := func(c rune) bool { 300 | return c == ' ' 301 | } 302 | valMapping := map[string]int{ 303 | "x": 999, 304 | "multiple": 9, 305 | "single": 10, 306 | "node": 6, 307 | "n": 4, 308 | "p": 9, 309 | } 310 | g := &Graph{ 311 | nodes: []*Node{}, 312 | edges: map[string][]nodeWithVal{}, 313 | } 314 | lines := strings.Split(s, "\n") 315 | nodeNames := strings.FieldsFunc(lines[1], splitFn) 316 | for k, v := range lines { 317 | if len(v) == 0 { 318 | continue 319 | } 320 | if k == 1 { 321 | for _, name := range nodeNames { 322 | g.nodes = append(g.nodes, &Node{ 323 | Name: name, 324 | }) 325 | } 326 | continue 327 | } 328 | columes := strings.FieldsFunc(v, splitFn) 329 | var tempEdges = []nodeWithVal{} 330 | for k2, c := range columes { 331 | if k2 != 0 { 332 | tempEdges = append(tempEdges, nodeWithVal{ 333 | val: valMapping[c], 334 | node: &Node{ 335 | Name: nodeNames[k2-1], 336 | }, 337 | }) 338 | } 339 | } 340 | g.edges[nodeNames[k-2]] = tempEdges 341 | } 342 | return g 343 | } 344 | 345 | func TestScore(t *testing.T) { 346 | s := ` 347 | A B C D E F G H 348 | A x p p p n n p n 349 | B p x p p n n n n 350 | C p p x p p n n n 351 | D p p p x n p n n 352 | E n n p n x p p p 353 | F n n n p n x p p 354 | G p n n n p p x p 355 | H n p n n p p p x 356 | ` 357 | g1 := string2Graph(s) 358 | score, _ := g1.MaxValCount(4) 359 | assert.Equal(t, 54, score) 360 | 361 | num := g1.bridgeVal([]string{"A", "C", "D", "G"}) 362 | assert.Equal(t, 44, num) 363 | n2 := g1.bridgeVal([]string{"A", "B", "C", "D"}) 364 | assert.Equal(t, 54, n2) 365 | 366 | n3 := g1.bridgeVal([]string{"A", "B"}) 367 | assert.Equal(t, 9, n3) 368 | 369 | } 370 | 371 | func TestDeleteNodes(t *testing.T) { 372 | s := ` 373 | A B C D E F G H 374 | A x p p p n n p n 375 | B p x p p n n n n 376 | C p p x p p n n n 377 | D p p p x n p n n 378 | E n n p n x p p p 379 | F n n n p n x p p 380 | G p n n n p p x p 381 | H n p n n p p p x 382 | ` 383 | 384 | g1 := string2Graph(s) 385 | g2 := g1.DeleteNodes([]*Node{ 386 | {Name: "A"}, 387 | {Name: "F"}, 388 | {Name: "G"}, 389 | }) 390 | assert.Equal(t, 5, len(g2.nodes)) 391 | assert.Equal(t, 8, len(g1.nodes)) 392 | } 393 | 394 | func TestSelectNodes(t *testing.T) { 395 | s := ` 396 | A B C D E F G H 397 | A x p p p n n p n 398 | B p x p p n n n n 399 | C p p x p p n n n 400 | D p p p x n p n n 401 | E n n p n x p p p 402 | F n n n p n x p p 403 | G p n n n p p x p 404 | H n p n n p p p x 405 | ` 406 | g1 := string2Graph(s) 407 | g2 := g1.SelectNodes([]*Node{ 408 | {Name: "A"}, 409 | {Name: "B"}, 410 | {Name: "C"}, 411 | {Name: "D"}, 412 | }) 413 | assert.Equal(t, 4, len(g2.nodes)) 414 | 415 | g3 := g1.SelectNodes([]*Node{ 416 | {Name: "A"}, 417 | {Name: "E"}, 418 | {Name: "F"}, 419 | {Name: "D"}, 420 | }) 421 | 422 | assert.Equal(t, 4, len(g3.nodes)) 423 | } 424 | -------------------------------------------------------------------------------- /pkg/brgpu/plugin.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Shanghai Biren Technology Co., Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | package brgpu 15 | 16 | import ( 17 | "bytes" 18 | "context" 19 | "fmt" 20 | "os" 21 | "strconv" 22 | "strings" 23 | 24 | "github.com/BirenTechnology/go-brml/brml" 25 | "github.com/BirenTechnology/k8s-device-plugin/pkg/utils" 26 | 27 | log "github.com/sirupsen/logrus" 28 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 29 | ) 30 | 31 | var ( 32 | MountHostPath bool 33 | ) 34 | 35 | const ( 36 | allocatedDeviceEnv = "BR_PHY_CARDS" 37 | ) 38 | 39 | func healthCheck() bool { 40 | return true 41 | } 42 | 43 | // int8Slice wraps an []int8 with more functions. 44 | type int8Slice []int8 45 | 46 | // String turns a nil terminated int8Slice into a string 47 | func (s int8Slice) String() string { 48 | var b []byte 49 | for _, c := range s { 50 | if c == 0 { 51 | break 52 | } 53 | b = append(b, byte(c)) 54 | } 55 | return string(b) 56 | } 57 | 58 | // uintPtr returns a *uint from a uint32 59 | func uintPtr(c uint32) *uint { 60 | i := uint(c) 61 | return &i 62 | } 63 | 64 | type Plugin struct { 65 | PFDevices PFDeviceInfoList 66 | BRGPUs DevicesInfoList 67 | Runtime string 68 | Heartbeat chan bool 69 | resourceName string 70 | MountAllDevice bool 71 | MountDriDevice bool 72 | MountHostPath bool 73 | TopoGraph *utils.Graph 74 | } 75 | 76 | func (p *Plugin) gpuExist(id string) (bool, error) { 77 | for _, v := range p.BRGPUs.AllCardIDs() { 78 | if id == v { 79 | return true, nil 80 | } 81 | } 82 | return false, nil 83 | } 84 | 85 | func (p *Plugin) Start() error { 86 | return nil 87 | } 88 | 89 | func (p *Plugin) Stop() error { 90 | return nil 91 | } 92 | 93 | func (p *Plugin) GetDevicePluginOptions(ctx context.Context, e *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error) { 94 | options := &pluginapi.DevicePluginOptions{ 95 | GetPreferredAllocationAvailable: true, 96 | } 97 | log.Infof("Start Plugin With Options %v", options) 98 | return options, nil 99 | } 100 | 101 | func (p *Plugin) PreStartContainer(ctx context.Context, r *pluginapi.PreStartContainerRequest) (*pluginapi.PreStartContainerResponse, error) { 102 | return &pluginapi.PreStartContainerResponse{}, nil 103 | } 104 | 105 | func (p *Plugin) GetPreferredAllocation(ctx context.Context, r *pluginapi.PreferredAllocationRequest) (*pluginapi.PreferredAllocationResponse, error) { 106 | if p.TopoGraph == nil { 107 | return &pluginapi.PreferredAllocationResponse{}, nil 108 | } 109 | nodes := []*utils.Node{} 110 | for _, v := range r.ContainerRequests[0].AvailableDeviceIDs { 111 | nodes = append(nodes, &utils.Node{ 112 | Name: v, 113 | }) 114 | } 115 | 116 | newGraph := p.TopoGraph.SelectNodes(nodes) 117 | 118 | devices := Allocate(*newGraph, r.ContainerRequests[0].MustIncludeDeviceIDs, int(r.ContainerRequests[0].AllocationSize)) 119 | 120 | return &pluginapi.PreferredAllocationResponse{ 121 | ContainerResponses: []*pluginapi.ContainerPreferredAllocationResponse{ 122 | { 123 | DeviceIDs: devices, 124 | }, 125 | }, 126 | }, nil 127 | } 128 | 129 | func (d *Plugin) GetNumaNode(idx int) (bool, int, error) { 130 | dev, err := brml.HandleByIndex(idx) 131 | if err != nil { 132 | log.Errorf("parse device id index %v fail %v", idx, err) 133 | return false, 0, err 134 | } 135 | pcie, err := brml.DevicePciInfo(dev) 136 | if err != nil { 137 | log.Errorf("get device index %v %v pcie info err %v", idx, d, err) 138 | return false, 0, err 139 | } 140 | 141 | // Discard leading zeros. 142 | busID := strings.ToLower(strings.TrimPrefix(int8Slice(pcie.BusId[:]).String(), "0000")) 143 | b, err := os.ReadFile(fmt.Sprintf("/sys/bus/pci/devices/%s/numa_node", busID)) 144 | if err != nil { 145 | log.Errorf("read bus file id %v fail %v ", busID, err) 146 | return false, 0, nil 147 | } 148 | 149 | node, err := strconv.Atoi(string(bytes.TrimSpace(b))) 150 | if err != nil { 151 | return false, 0, fmt.Errorf("eror parsing value for NUMA node: %v", err) 152 | } 153 | 154 | if node < 0 { 155 | return false, 0, nil 156 | } 157 | 158 | return true, node, nil 159 | } 160 | 161 | func (p *Plugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error { 162 | devs := []*pluginapi.Device{} 163 | if p.Runtime == string(RuntimeRunc) { 164 | devIDs := []string{} 165 | for _, v := range p.BRGPUs { 166 | for _, ins := range v.Instances { 167 | dev := &pluginapi.Device{ 168 | ID: ins.CardID, 169 | Health: pluginapi.Healthy, 170 | } 171 | 172 | hasNum, numa, err := p.GetNumaNode(v.PhysicalNum) 173 | if err != nil { 174 | log.Errorf("get numa node %v err %v", v.PhysicalNum, err) 175 | } 176 | 177 | if hasNum { 178 | log.Infof("dev %v topology numa %v", v.PhysicalNum, numa) 179 | dev.Topology = &pluginapi.TopologyInfo{ 180 | Nodes: []*pluginapi.NUMANode{ 181 | { 182 | ID: int64(numa), 183 | }, 184 | }, 185 | } 186 | } 187 | devs = append(devs, dev) 188 | devIDs = append(devIDs, ins.CardID) 189 | } 190 | 191 | } 192 | tg, err := Device2Graph(devIDs) 193 | if err != nil { 194 | log.Errorf("Generate gpu %v topo error %v", devIDs, err) 195 | } 196 | p.TopoGraph = tg 197 | } 198 | if p.Runtime == string(RuntimeKata) { 199 | for _, v := range p.PFDevices { 200 | for _, vf := range v.VFs { 201 | dev := &pluginapi.Device{ 202 | ID: vf.deviceEndpoint(), 203 | Health: pluginapi.Healthy, 204 | } 205 | devs = append(devs, dev) 206 | } 207 | } 208 | } 209 | 210 | s.Send(&pluginapi.ListAndWatchResponse{Devices: devs}) 211 | 212 | select {} 213 | } 214 | 215 | func (p *Plugin) Allocate(ctx context.Context, r *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) { 216 | responses := pluginapi.AllocateResponse{} 217 | for _, req := range r.ContainerRequests { 218 | response := pluginapi.ContainerAllocateResponse{} 219 | if CdiFeature { 220 | for _, id := range req.DevicesIDs { 221 | response.CDIDevices = append(response.CDIDevices, &pluginapi.CDIDevice{ 222 | Name: fmt.Sprintf("%s/%s=%s", vendor, p.getResourceByCardId(ContainerRuntime(p.Runtime), id), id), 223 | }) 224 | } 225 | responses.ContainerResponses = append(responses.ContainerResponses, &response) 226 | continue 227 | } 228 | if p.MountHostPath { 229 | response.Mounts = append(response.Mounts, podMounts()...) 230 | } 231 | if p.Runtime == string(RuntimeRunc) { 232 | if p.MountDriDevice { 233 | driDevs, err := drmDevices() 234 | if err != nil { 235 | return nil, err 236 | } 237 | response.Devices = append(response.Devices, driDevs...) 238 | } 239 | 240 | for _, id := range req.DevicesIDs { 241 | exist, err := p.gpuExist(id) 242 | if err != nil { 243 | return nil, err 244 | } 245 | if !exist { 246 | log.Errorf("Invalid allocation request for %s: unknown device %s", p.resourceName, id) 247 | return nil, fmt.Errorf("invalid allocation request for %s: unknown device %s", p.resourceName, id) 248 | } 249 | 250 | devpath := fmt.Sprintf("/dev/biren/%s", id) 251 | dev := pluginapi.DeviceSpec{ 252 | HostPath: devpath, 253 | ContainerPath: devpath, 254 | Permissions: "rw", 255 | } 256 | 257 | response.Devices = append(response.Devices, &dev) 258 | log.Infof("Allocate device %s successfully", id) 259 | } 260 | } 261 | if p.Runtime == string(RuntimeKata) { 262 | for _, id := range req.DevicesIDs { 263 | dev := pluginapi.DeviceSpec{ 264 | HostPath: id, 265 | ContainerPath: id, 266 | Permissions: "rw", 267 | } 268 | response.Devices = append(response.Devices, &dev) 269 | log.Infof("Allocate device %s successfully", id) 270 | } 271 | } 272 | response.Envs = map[string]string{ 273 | allocatedDeviceEnv: strings.Join(req.DevicesIDs, ","), 274 | } 275 | responses.ContainerResponses = append(responses.ContainerResponses, &response) 276 | } 277 | //log.Info(responses.ContainerResponses) 278 | return &responses, nil 279 | } 280 | 281 | func defaultMountPathFunc(h string) string { 282 | return strings.Replace(h, "/usr/", "/opt/birentech/", -1) 283 | } 284 | 285 | func podMounts() []*pluginapi.Mount { 286 | mounts := []*pluginapi.Mount{} 287 | brmlVersion, _ := brml.BRMLVersion() 288 | mountPaths := map[string]func(string) string{ 289 | "/usr/lib/libbiren-ml.so": defaultMountPathFunc, 290 | "/usr/lib/libbiren-ml.so.1": defaultMountPathFunc, 291 | "/usr/bin/brsmi": defaultMountPathFunc, 292 | fmt.Sprintf("/usr/lib/libbiren-ml.so.%s", brmlVersion): defaultMountPathFunc, 293 | } 294 | 295 | for h, c := range mountPaths { 296 | if _, err := os.Stat(defaultMountPathFunc(h)); err == nil { 297 | m := &pluginapi.Mount{ 298 | HostPath: h, 299 | ContainerPath: c(h), 300 | ReadOnly: true, 301 | } 302 | mounts = append(mounts, m) 303 | } 304 | } 305 | return mounts 306 | } 307 | 308 | func drmDevices() ([]*pluginapi.DeviceSpec, error) { 309 | res := []*pluginapi.DeviceSpec{} 310 | path := "/dev/dri/renderD128" 311 | res = append(res, &pluginapi.DeviceSpec{ 312 | ContainerPath: path, 313 | HostPath: path, 314 | Permissions: "rw", 315 | }) 316 | return res, nil 317 | } 318 | 319 | func allDevices() ([]*pluginapi.DeviceSpec, error) { 320 | res := []*pluginapi.DeviceSpec{} 321 | c, err := brml.DeviceCount() 322 | if err != nil { 323 | return nil, err 324 | } 325 | for i := 0; i < c; i++ { 326 | path := fmt.Sprintf("/dev/biren/card_%d", i) 327 | res = append(res, &pluginapi.DeviceSpec{ 328 | ContainerPath: path, 329 | HostPath: path, 330 | Permissions: "rw", 331 | }) 332 | } 333 | return res, nil 334 | } 335 | 336 | func (p *Plugin) getResourceByCardId(runtime ContainerRuntime, id string) string { 337 | switch runtime { 338 | case RuntimeRunc: 339 | return p.BRGPUs.getResourceByCardId(id) 340 | case RuntimeKata: 341 | return p.PFDevices.getResourceByCardId(id) 342 | } 343 | return "gpu" 344 | } 345 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. --------------------------------------------------------------------------------