├── example ├── dex登录界面.png ├── kubeflow-dashboardcenter.png └── kitab-random-example.yaml ├── manifest1.3 ├── 005-istio-1-9-0-istio-namespace-base.yaml ├── 014-kubeflow-namespace-kubeflow-namespace-base.yaml ├── 033-user-namespace-user-namespace-base.yaml ├── 016-istio-1-9-0-kubeflow-istio-resources-base.yaml ├── 001-cert-manager-cert-manager-kube-system-resources-base.yaml ├── 007-oidc-authservice-oidc-authservice-base.yaml ├── 008-dex-overlays-istio.yaml ├── 025-volumes-web-app-overlays-istio.yaml ├── 027-tensorboard-overlays-istio.yaml ├── 015-kubeflow-roles-kubeflow-roles-base.yaml ├── 031-mxnet-job-overlays-kubeflow.yaml ├── 029-pytorch-job-overlays-kubeflow.yaml ├── 028-tf-training-overlays-kubeflow.yaml ├── 021-admission-webhook-overlays-cert-manager.yaml ├── 026-tensorboard-overlays-kubeflow.yaml ├── 023-jupyter-overlays-kubeflow.yaml ├── 020-centraldashboard-overlays-istio.yaml ├── 013-istio-1-9-0-cluster-local-gateway-base.yaml ├── 030-mpi-job-overlays-kubeflow.yaml ├── 009-knative-knative-serving-crds-base.yaml └── 022-jupyter-overlays-istio.yaml ├── kind └── kind-config.yaml ├── pre-install.py ├── patch ├── volumes-web-app.yaml ├── tensorboard.yaml ├── envoy-filter.yaml ├── data.yaml ├── jupyter-web-app.yaml ├── auth.yaml ├── workflow-controller.yaml ├── kfserving.yaml ├── cluster-local-gateway.yaml └── istio-ingressgateway.yaml ├── database-patch └── mysql-persistent-storage.yaml ├── README-dev.md ├── docs ├── problems.md └── introduction.md ├── replaceVolumes.py ├── install.py ├── .gitignore ├── replace.py ├── local-path └── local-path-storage.yaml └── README.md /example/dex登录界面.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shikanon/kubeflow-manifests/HEAD/example/dex登录界面.png -------------------------------------------------------------------------------- /example/kubeflow-dashboardcenter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shikanon/kubeflow-manifests/HEAD/example/kubeflow-dashboardcenter.png -------------------------------------------------------------------------------- /manifest1.3/005-istio-1-9-0-istio-namespace-base.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | labels: 5 | istio-injection: disabled 6 | istio-operator-managed: Reconcile 7 | name: istio-system 8 | -------------------------------------------------------------------------------- /manifest1.3/014-kubeflow-namespace-kubeflow-namespace-base.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | labels: 5 | control-plane: kubeflow 6 | istio-injection: enabled 7 | katib-metricscollector-injection: enabled 8 | name: kubeflow 9 | -------------------------------------------------------------------------------- /manifest1.3/033-user-namespace-user-namespace-base.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | profile-name: kubeflow-user-example-com 4 | user: user@example.com 5 | kind: ConfigMap 6 | metadata: 7 | name: default-install-config-9h2h2b6hbk 8 | --- 9 | apiVersion: kubeflow.org/v1beta1 10 | kind: Profile 11 | metadata: 12 | name: kubeflow-user-example-com 13 | spec: 14 | owner: 15 | kind: User 16 | name: user@example.com 17 | -------------------------------------------------------------------------------- /kind/kind-config.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kind.x-k8s.io/v1alpha4 2 | kind: Cluster 3 | nodes: 4 | - role: control-plane 5 | extraPortMappings: 6 | - containerPort: 30000 7 | hostPort: 30000 8 | listenAddress: "0.0.0.0" # Optional, defaults to "0.0.0.0" 9 | protocol: tcp # Optional, defaults to tcp 10 | kubeadmConfigPatches: 11 | - | 12 | kind: InitConfiguration 13 | nodeRegistration: 14 | kubeletExtraArgs: 15 | node-labels: "ingress-ready=true" 16 | -------------------------------------------------------------------------------- /pre-install.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | #coding:utf-8 3 | 4 | import os 5 | import shlex 6 | import yaml 7 | from yaml import CLoader 8 | from replace import replaceImage 9 | import subprocess 10 | 11 | 12 | mainfile = "kustomization.yaml" 13 | 14 | with open(mainfile, "r") as fr: 15 | kustomizefile = yaml.load(fr,Loader=CLoader) 16 | 17 | n = 0 18 | for path in kustomizefile['resources']: 19 | n = n + 1 20 | abspath = os.path.abspath(path) 21 | abspath = abspath.replace("\\","/") 22 | filename = "-".join([path.split("/")[2]]+path.split("/")[-2:]) 23 | cmd = "kustomize build --load_restrictor=none {path}".format(path=path) 24 | print(cmd) 25 | p = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE) 26 | out = p.stdout.read() 27 | if out == "": 28 | raise ValueError(cmd) 29 | filename = str(n).zfill(3) + "-" + filename +".yaml" 30 | out = replaceImage(out.decode("utf-8")) 31 | with open("file/"+ filename, "w", encoding="utf-8") as fw: 32 | fw.write(out) -------------------------------------------------------------------------------- /patch/volumes-web-app.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | labels: 6 | app: volumes-web-app 7 | kustomize.component: volumes-web-app 8 | name: volumes-web-app-deployment 9 | namespace: kubeflow 10 | spec: 11 | replicas: 1 12 | selector: 13 | matchLabels: 14 | app: volumes-web-app 15 | kustomize.component: volumes-web-app 16 | template: 17 | metadata: 18 | annotations: 19 | sidecar.istio.io/inject: "false" 20 | labels: 21 | app: volumes-web-app 22 | kustomize.component: volumes-web-app 23 | spec: 24 | containers: 25 | - env: 26 | - name: APP_PREFIX 27 | value: /volumes 28 | - name: USERID_HEADER 29 | value: kubeflow-userid 30 | - name: USERID_PREFIX 31 | value: "" 32 | - name: APP_SECURE_COOKIES 33 | value: "False" 34 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/notebooks-volumes-web-app:v1.3.0-rc.0-fe235 35 | name: volumes-web-app 36 | ports: 37 | - containerPort: 5000 38 | serviceAccountName: volumes-web-app-service-account 39 | -------------------------------------------------------------------------------- /patch/tensorboard.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | app: tensorboards-web-app 6 | kustomize.component: tensorboards-web-app 7 | name: tensorboards-web-app-deployment 8 | namespace: kubeflow 9 | spec: 10 | replicas: 1 11 | selector: 12 | matchLabels: 13 | app: tensorboards-web-app 14 | kustomize.component: tensorboards-web-app 15 | template: 16 | metadata: 17 | annotations: 18 | sidecar.istio.io/inject: "false" 19 | labels: 20 | app: tensorboards-web-app 21 | kustomize.component: tensorboards-web-app 22 | spec: 23 | containers: 24 | - env: 25 | - name: APP_PREFIX 26 | value: /tensorboards 27 | - name: USERID_HEADER 28 | value: kubeflow-userid 29 | - name: USERID_PREFIX 30 | value: "" 31 | - name: APP_SECURE_COOKIES 32 | value: "False" 33 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/notebooks-tensorboards-web-app:v1.3.0-rc.0-258dd 34 | name: tensorboards-web-app 35 | ports: 36 | - containerPort: 5000 37 | serviceAccountName: tensorboards-web-app-service-account -------------------------------------------------------------------------------- /database-patch/mysql-persistent-storage.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | app: mysql 6 | application-crd-id: kubeflow-pipelines 7 | name: mysql 8 | namespace: kubeflow 9 | spec: 10 | selector: 11 | matchLabels: 12 | app: mysql 13 | application-crd-id: kubeflow-pipelines 14 | strategy: 15 | type: Recreate 16 | template: 17 | metadata: 18 | labels: 19 | app: mysql 20 | application-crd-id: kubeflow-pipelines 21 | spec: 22 | containers: 23 | - args: 24 | - --ignore-db-dir=lost+found 25 | - --datadir 26 | - /var/lib/mysql 27 | env: 28 | - name: MYSQL_ALLOW_EMPTY_PASSWORD 29 | value: "true" 30 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/ml-pipeline-mysql:5.7-f8fcd 31 | name: mysql 32 | ports: 33 | - containerPort: 3306 34 | name: mysql 35 | resources: 36 | requests: 37 | cpu: 100m 38 | memory: 800Mi 39 | volumeMounts: 40 | - mountPath: /var/lib/mysql 41 | name: mysql-persistent-storage 42 | serviceAccountName: mysql 43 | volumes: 44 | - name: mysql-persistent-storage 45 | emptyDir: 46 | {} 47 | -------------------------------------------------------------------------------- /README-dev.md: -------------------------------------------------------------------------------- 1 | # 开发文档 2 | 3 | 这里主要介绍如何构建这个项目的。主要分为几步:替换镜像,重新打标签上传到私有镜像仓库,生成安装文档。 4 | 5 | 运行命令: 6 | ```bash 7 | python pre-install.py 8 | python install.py 9 | ``` 10 | 11 | ## 实现原理 12 | 13 | ### 预处理 14 | 15 | 通过 `kustomize build --load_restrictor=none` 生成镜像目标yaml 文件 16 | 17 | ### 替换镜像 18 | 19 | 替换镜像主要是 `replace.py`实现,主要从 deployment, statefulset 找到镜像字段,重新打标签替换成新的镜像仓库地址,push上传到私有镜像仓库 20 | 21 | ### 安装文件 22 | 23 | 运行`python install.py` 安装文件。 24 | 25 | ## PATCH文件 26 | 27 | patch文件主要针对官方yaml安装使用过程中的一些问题打的补丁 28 | 29 | ### 鉴权问题 30 | `auth.yaml` 主要用于创建用户自己的账号,用户名`admin@example.com`,密码`password` 31 | 32 | ### istio报istio-token找不到 33 | 34 | 主要是由于istio的JWT策略用到第三方鉴权,有些k8s版本不支持,可以将isito中的 `third-party-jwt` 改成 `first-party-jwt`,详细见`cluster-local-gateway.yaml`,`istio-ingressgateway.yaml`,`istiod.yaml`。 35 | 36 | ### 创建jupyter的时候返回 Could not find CSRF cookie XSRF-TOKEN 错误 37 | 38 | 主要是由于jupyter-web-app的安全验证策略导致的,详细见https://github.com/kubeflow/kubeflow/issues/5803 39 | 解决方案环境变量加上`APP_SECURE_COOKIES=false`,修改见`jupyter-web-app.yaml` 40 | 41 | ### 解决docker.sock not found 问题 42 | 43 | 因为 kind 使用的 containerd 作为容器运行时,而 argo workflow 默认 Workflow Executors使用的是 docker ,他会尝试挂载宿主机的 `docker.sock`,如果不存在就会报错,这里尝试将`workflow-controller-configmap`的`containerRuntimeExecutor` 改为 `k8sapi` 更换 Workflow Executors 来解决。详细见:https://argoproj.github.io/argo-workflows/workflow-executors/ -------------------------------------------------------------------------------- /docs/problems.md: -------------------------------------------------------------------------------- 1 | # 问题汇总 2 | 3 | 1. 没有 namespace, Experiments 报错。 4 | 5 | 这种是 `profile` 设置问题。 6 | 7 | 由于官方使用的是`user@example.com`创建命名空间`kubeflow-user-example-com`,这里在`patch`改成了`admin@example.com` 8 | ,当命名空间已经创建后,就会报错,一般我们查看 profiles-deployment 日志,会看到: 9 | ```bash 10 | 2021-05-19T06:41:43.069Z INFO controllers.Profile namespace already exist, but not owned by profile creator admin@example.com {"profile": "/kubeflow-user-example-com"} 11 | 2021-05-19T06:41:43.077Z DEBUG controller Successfully Reconciled {"reconcilerGroup": "kubeflow.org", "reconcilerKind": "Profile", "controller": "profile", "name": "kubeflow-user-example-com", "namespace": ""} 12 | ``` 13 | 这时候只需要删除`profile`命名空间`kubeflow-user-example-com`,重新生产`profile`即可。 14 | ```bash 15 | kubectl delete -f patch/auth.yaml 16 | kubectl delete ns kubeflow-user-example-com 17 | kubectl apply -f patch/auth.yaml 18 | ``` 19 | 20 | 2. 运行 pipeline 报错,错误显示`xxx is not implemented in the k8sapi executor` 21 | 22 | 这个错误是由于 kind 集群创建的 k8s 集群容器运行时用的containerd,而workflow默认的pipeline执行器是docker,因此有些特性不兼容。如果你的 k8s 集群是自己基于docker runtime 搭建的,可以将`patch/workflow-controller.yaml`的`containerRuntimeExecutor`改为`docker`,这样就不存在兼容性问题了。 23 | 24 | 详细见: 25 | 26 | https://github.com/argoproj/argo-workflows/issues/2685#issuecomment-613632304 27 | https://argoproj.github.io/argo-workflows/workflow-executors/ -------------------------------------------------------------------------------- /replaceVolumes.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | #coding:utf-8 3 | import os 4 | import yaml 5 | 6 | def findVolumeDeployment(content): 7 | crs = content.split("---\n") 8 | images = dict() 9 | for cr in crs: 10 | if len(cr) < 0: 11 | continue 12 | obj = yaml.load(cr, yaml.CLoader) 13 | if obj is None or "kind" not in obj: 14 | continue 15 | if obj["kind"] == "Deployment": 16 | specs = obj["spec"]["template"]["spec"] 17 | if "volumes" in specs: 18 | for v in specs["volumes"]: 19 | if "persistentVolumeClaim" in v: 20 | del v["persistentVolumeClaim"] 21 | v ["emptyDir"] = dict() 22 | yield v["name"],cr 23 | 24 | 25 | def savePatchPath(content,filename): 26 | path = "./patch/" + filename + ".yaml" 27 | with open(path,"w") as fw: 28 | fw.write(content) 29 | 30 | 31 | if __name__ == "__main__": 32 | for root,path,files in os.walk("./file"): 33 | for f in files: 34 | findfile = root + "/" + f 35 | with open(findfile,"r",encoding="utf-8") as fr: 36 | for name,cr in findVolumeDeployment(fr.read()): 37 | print(name) 38 | print(cr) 39 | savePatchPath(cr, name) -------------------------------------------------------------------------------- /patch/envoy-filter.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: networking.istio.io/v1alpha3 3 | kind: EnvoyFilter 4 | metadata: 5 | name: authn-filter 6 | namespace: istio-system 7 | spec: 8 | configPatches: 9 | - applyTo: HTTP_FILTER 10 | listener: 11 | filterChain: 12 | filter: 13 | name: envoy.http_connection_manager 14 | subFilter: 15 | name: "" 16 | match: 17 | context: GATEWAY 18 | patch: 19 | operation: INSERT_BEFORE 20 | value: 21 | name: envoy.filters.http.ext_authz 22 | typed_config: 23 | '@type': type.googleapis.com/envoy.extensions.filters.http.ext_authz.v3.ExtAuthz 24 | http_service: 25 | authorization_request: 26 | allowed_headers: 27 | patterns: 28 | - exact: authorization 29 | - exact: cookie 30 | - exact: x-auth-token 31 | authorization_response: 32 | allowed_upstream_headers: 33 | patterns: 34 | - exact: kubeflow-userid 35 | server_uri: 36 | cluster: outbound|8080||authservice.istio-system.svc.cluster.local 37 | timeout: 10s 38 | uri: http://authservice.istio-system.svc.cluster.local 39 | workloadSelector: 40 | labels: 41 | istio: ingressgateway -------------------------------------------------------------------------------- /patch/data.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | app: minio 6 | application-crd-id: kubeflow-pipelines 7 | name: minio 8 | namespace: kubeflow 9 | spec: 10 | selector: 11 | matchLabels: 12 | app: minio 13 | application-crd-id: kubeflow-pipelines 14 | strategy: 15 | type: Recreate 16 | template: 17 | metadata: 18 | labels: 19 | app: minio 20 | application-crd-id: kubeflow-pipelines 21 | spec: 22 | containers: 23 | - args: 24 | - server 25 | - /data 26 | env: 27 | - name: MINIO_ACCESS_KEY 28 | valueFrom: 29 | secretKeyRef: 30 | key: accesskey 31 | name: mlpipeline-minio-artifact 32 | - name: MINIO_SECRET_KEY 33 | valueFrom: 34 | secretKeyRef: 35 | key: secretkey 36 | name: mlpipeline-minio-artifact 37 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/ml-pipeline-minio:RELEASE.2019-08-14T20-37-41Z-license-compliance-290a7 38 | name: minio 39 | ports: 40 | - containerPort: 9000 41 | resources: 42 | requests: 43 | cpu: 20m 44 | memory: 100Mi 45 | volumeMounts: 46 | - mountPath: /data 47 | name: data 48 | subPath: minio 49 | volumes: 50 | - name: data 51 | emptyDir: 52 | {} 53 | -------------------------------------------------------------------------------- /install.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | #coding:utf-8 3 | import os 4 | import subprocess 5 | import sys 6 | import time 7 | 8 | def install(path): 9 | for root,path,files in os.walk(path): 10 | files = sorted(files) 11 | for f in files: 12 | installfile = root + "/" + f 13 | cmd = "kubectl apply -f {installfile}".format(installfile=installfile) 14 | print(cmd) 15 | p = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE) 16 | out = p.stdout.read() 17 | print(out) 18 | time.sleep(10) 19 | 20 | ''' 21 | 因为一些patch安装涉及到的一些修改需要重启pod,所以先删除再安装 22 | ''' 23 | def patchInstall(path): 24 | print("start to patch...") 25 | for root,path,files in os.walk(path): 26 | files = sorted(files) 27 | for f in files: 28 | installfile = root + "/" + f 29 | cmd_delete = "kubectl delete -f {installfile}".format(installfile=installfile) 30 | p = subprocess.Popen(cmd_delete,shell=True,stdout=subprocess.PIPE) 31 | out = p.stdout.read() 32 | print(out) 33 | cmd_apply = "kubectl apply -f {installfile}".format(installfile=installfile) 34 | p = subprocess.Popen(cmd_apply,shell=True,stdout=subprocess.PIPE) 35 | out = p.stdout.read() 36 | print(out) 37 | 38 | # 安装文件 39 | path = "./manifest1.3" 40 | install(path) 41 | 42 | # 安装patch 43 | patchPath = "./patch" 44 | patchInstall(patchPath) 45 | -------------------------------------------------------------------------------- /patch/jupyter-web-app.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | labels: 6 | app: jupyter-web-app 7 | kustomize.component: jupyter-web-app 8 | name: jupyter-web-app-deployment 9 | namespace: kubeflow 10 | spec: 11 | replicas: 1 12 | selector: 13 | matchLabels: 14 | app: jupyter-web-app 15 | kustomize.component: jupyter-web-app 16 | template: 17 | metadata: 18 | annotations: 19 | sidecar.istio.io/inject: "false" 20 | labels: 21 | app: jupyter-web-app 22 | kustomize.component: jupyter-web-app 23 | spec: 24 | containers: 25 | - env: 26 | - name: APP_PREFIX 27 | value: /jupyter 28 | - name: UI 29 | value: default 30 | - name: USERID_HEADER 31 | value: kubeflow-userid 32 | - name: USERID_PREFIX 33 | value: "" 34 | - name: APP_DISABLE_AUTH 35 | value: "True" 36 | # This gets rid of erro: Could not find CSRF cookie XSRF-TOKEN in the request 37 | - name: APP_SECURE_COOKIES 38 | value: "False" 39 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/notebooks-jupyter-web-app:v1.3.0-rc.0-70edb 40 | name: jupyter-web-app 41 | ports: 42 | - containerPort: 5000 43 | volumeMounts: 44 | - mountPath: /etc/config 45 | name: config-volume 46 | serviceAccountName: jupyter-web-app-service-account 47 | volumes: 48 | - configMap: 49 | name: jupyter-web-app-config-tkhtgh5mcm 50 | name: config-volume -------------------------------------------------------------------------------- /manifest1.3/016-istio-1-9-0-kubeflow-istio-resources-base.yaml: -------------------------------------------------------------------------------- 1 | aggregationRule: 2 | clusterRoleSelectors: 3 | - matchLabels: 4 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-istio-admin: "true" 5 | apiVersion: rbac.authorization.k8s.io/v1 6 | kind: ClusterRole 7 | metadata: 8 | labels: 9 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true" 10 | name: kubeflow-istio-admin 11 | rules: [] 12 | --- 13 | apiVersion: rbac.authorization.k8s.io/v1 14 | kind: ClusterRole 15 | metadata: 16 | labels: 17 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" 18 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-istio-admin: "true" 19 | name: kubeflow-istio-edit 20 | rules: 21 | - apiGroups: 22 | - istio.io 23 | - networking.istio.io 24 | resources: 25 | - '*' 26 | verbs: 27 | - get 28 | - list 29 | - watch 30 | - create 31 | - delete 32 | - deletecollection 33 | - patch 34 | - update 35 | --- 36 | apiVersion: rbac.authorization.k8s.io/v1 37 | kind: ClusterRole 38 | metadata: 39 | labels: 40 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true" 41 | name: kubeflow-istio-view 42 | rules: 43 | - apiGroups: 44 | - istio.io 45 | - networking.istio.io 46 | resources: 47 | - '*' 48 | verbs: 49 | - get 50 | - list 51 | - watch 52 | --- 53 | apiVersion: networking.istio.io/v1alpha3 54 | kind: Gateway 55 | metadata: 56 | name: kubeflow-gateway 57 | namespace: kubeflow 58 | spec: 59 | selector: 60 | istio: ingressgateway 61 | servers: 62 | - hosts: 63 | - '*' 64 | port: 65 | name: http 66 | number: 80 67 | protocol: HTTP 68 | -------------------------------------------------------------------------------- /example/kitab-random-example.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: "kubeflow.org/v1beta1" 2 | kind: Experiment 3 | metadata: 4 | namespace: kubeflow-user-example-com 5 | name: random-example 6 | spec: 7 | objective: 8 | type: maximize 9 | goal: 0.99 10 | objectiveMetricName: Validation-accuracy 11 | additionalMetricNames: 12 | - Train-accuracy 13 | algorithm: 14 | algorithmName: random 15 | parallelTrialCount: 3 16 | maxTrialCount: 12 17 | maxFailedTrialCount: 3 18 | parameters: 19 | - name: lr 20 | parameterType: double 21 | feasibleSpace: 22 | min: "0.01" 23 | max: "0.03" 24 | - name: num-layers 25 | parameterType: int 26 | feasibleSpace: 27 | min: "2" 28 | max: "5" 29 | - name: optimizer 30 | parameterType: categorical 31 | feasibleSpace: 32 | list: 33 | - sgd 34 | - adam 35 | - ftrl 36 | trialTemplate: 37 | primaryContainerName: training-container 38 | trialParameters: 39 | - name: learningRate 40 | description: Learning rate for the training model 41 | reference: lr 42 | - name: numberLayers 43 | description: Number of training model layers 44 | reference: num-layers 45 | - name: optimizer 46 | description: Training model optimizer (sdg, adam or ftrl) 47 | reference: optimizer 48 | trialSpec: 49 | apiVersion: batch/v1 50 | kind: Job 51 | spec: 52 | template: 53 | spec: 54 | containers: 55 | - name: training-container 56 | image: docker.io/kubeflowkatib/mxnet-mnist:v1beta1-45c5727 57 | command: 58 | - "python3" 59 | - "/opt/mxnet-mnist/mnist.py" 60 | - "--batch-size=64" 61 | - "--lr=${trialParameters.learningRate}" 62 | - "--num-layers=${trialParameters.numberLayers}" 63 | - "--optimizer=${trialParameters.optimizer}" 64 | restartPolicy: Never -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /patch/auth.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | config.yaml: | 4 | issuer: http://dex.auth.svc.cluster.local:5556/dex 5 | storage: 6 | type: kubernetes 7 | config: 8 | inCluster: true 9 | web: 10 | http: 0.0.0.0:5556 11 | logger: 12 | level: "debug" 13 | format: text 14 | oauth2: 15 | skipApprovalScreen: true 16 | enablePasswordDB: true 17 | staticPasswords: 18 | - email: "admin@example.com" 19 | # hash string is "password" 20 | hash: "$2y$12$X.oNHMsIfRSq35eRfiTYV.dPIYlWyPDRRc1.JVp0f3c.YqqJNW4uK" 21 | username: "admin" 22 | userID: "08a8684b-db88-4b73-90a9-3cd1661f5466" 23 | staticClients: 24 | # https://github.com/dexidp/dex/pull/1664 25 | - idEnv: OIDC_CLIENT_ID 26 | redirectURIs: ["/login/oidc"] 27 | name: 'Dex Login Application' 28 | secretEnv: OIDC_CLIENT_SECRET 29 | kind: ConfigMap 30 | metadata: 31 | name: dex 32 | namespace: auth 33 | --- 34 | apiVersion: apps/v1 35 | kind: Deployment 36 | metadata: 37 | labels: 38 | app: dex 39 | name: dex 40 | namespace: auth 41 | spec: 42 | replicas: 1 43 | selector: 44 | matchLabels: 45 | app: dex 46 | template: 47 | metadata: 48 | labels: 49 | app: dex 50 | spec: 51 | containers: 52 | - command: 53 | - dex 54 | - serve 55 | - /etc/dex/cfg/config.yaml 56 | envFrom: 57 | - secretRef: 58 | name: dex-oidc-client 59 | env: 60 | - name: KUBERNETES_POD_NAMESPACE 61 | valueFrom: 62 | fieldRef: 63 | fieldPath: metadata.namespace 64 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/dexidp-dex:v2.24.0-bb0b9 65 | name: dex 66 | ports: 67 | - containerPort: 5556 68 | name: http 69 | volumeMounts: 70 | - mountPath: /etc/dex/cfg 71 | name: config 72 | serviceAccountName: dex 73 | volumes: 74 | - configMap: 75 | items: 76 | - key: config.yaml 77 | path: config.yaml 78 | name: dex 79 | name: config 80 | --- 81 | apiVersion: v1 82 | data: 83 | profile-name: kubeflow-user-example-com 84 | user: admin@example.com 85 | kind: ConfigMap 86 | metadata: 87 | name: default-install-config-9h2h2b6hbk 88 | --- 89 | apiVersion: kubeflow.org/v1beta1 90 | kind: Profile 91 | metadata: 92 | name: kubeflow-user-example-com 93 | spec: 94 | owner: 95 | kind: User 96 | name: admin@example.com 97 | -------------------------------------------------------------------------------- /manifest1.3/001-cert-manager-cert-manager-kube-system-resources-base.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1beta1 2 | kind: Role 3 | metadata: 4 | labels: 5 | app: cainjector 6 | kustomize.component: cert-manager 7 | name: cert-manager-cainjector:leaderelection 8 | namespace: kube-system 9 | rules: 10 | - apiGroups: 11 | - "" 12 | resources: 13 | - configmaps 14 | verbs: 15 | - get 16 | - create 17 | - update 18 | - patch 19 | --- 20 | apiVersion: rbac.authorization.k8s.io/v1beta1 21 | kind: Role 22 | metadata: 23 | labels: 24 | app: cert-manager 25 | kustomize.component: cert-manager 26 | name: cert-manager:leaderelection 27 | namespace: kube-system 28 | rules: 29 | - apiGroups: 30 | - "" 31 | resources: 32 | - configmaps 33 | verbs: 34 | - get 35 | - create 36 | - update 37 | - patch 38 | --- 39 | apiVersion: rbac.authorization.k8s.io/v1beta1 40 | kind: RoleBinding 41 | metadata: 42 | labels: 43 | app: cainjector 44 | kustomize.component: cert-manager 45 | name: cert-manager-cainjector:leaderelection 46 | namespace: kube-system 47 | roleRef: 48 | apiGroup: rbac.authorization.k8s.io 49 | kind: Role 50 | name: cert-manager-cainjector:leaderelection 51 | subjects: 52 | - apiGroup: "" 53 | kind: ServiceAccount 54 | name: cert-manager-cainjector 55 | namespace: cert-manager 56 | --- 57 | apiVersion: rbac.authorization.k8s.io/v1beta1 58 | kind: RoleBinding 59 | metadata: 60 | labels: 61 | app: webhook 62 | kustomize.component: cert-manager 63 | name: cert-manager-webhook:webhook-authentication-reader 64 | namespace: kube-system 65 | roleRef: 66 | apiGroup: rbac.authorization.k8s.io 67 | kind: Role 68 | name: extension-apiserver-authentication-reader 69 | subjects: 70 | - apiGroup: "" 71 | kind: ServiceAccount 72 | name: cert-manager-webhook 73 | namespace: cert-manager 74 | --- 75 | apiVersion: rbac.authorization.k8s.io/v1beta1 76 | kind: RoleBinding 77 | metadata: 78 | labels: 79 | app: cert-manager 80 | kustomize.component: cert-manager 81 | name: cert-manager:leaderelection 82 | namespace: kube-system 83 | roleRef: 84 | apiGroup: rbac.authorization.k8s.io 85 | kind: Role 86 | name: cert-manager:leaderelection 87 | subjects: 88 | - apiGroup: "" 89 | kind: ServiceAccount 90 | name: cert-manager 91 | namespace: cert-manager 92 | --- 93 | apiVersion: v1 94 | data: 95 | certManagerNamespace: cert-manager 96 | kind: ConfigMap 97 | metadata: 98 | labels: 99 | kustomize.component: cert-manager 100 | name: cert-manager-kube-params-parameters 101 | namespace: kube-system 102 | -------------------------------------------------------------------------------- /replace.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | #coding:utf-8 3 | import yaml 4 | import os 5 | import subprocess 6 | import sys 7 | import json 8 | 9 | 10 | IMAGE_PREFIX = "registry.cn-shenzhen.aliyuncs.com/tensorbytes/" 11 | 12 | 13 | def getNewImage(image, prefix): 14 | # get hash of image 15 | cmd = "docker inspect "+image 16 | print(cmd) 17 | p = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE) 18 | out = p.stdout.read() 19 | out = json.loads(out)[0] 20 | imagehash = out["Id"].split(":")[-1][:5] 21 | pending = "-" + imagehash 22 | # change image to new tag 23 | app = image.split("/")[-1] 24 | if len(image.split("/")) > 1: 25 | org = image.split("/")[-2] 26 | app = org + "-" + app 27 | if ":" in app: 28 | if "@sha256:" in app: 29 | appname = app.split("@")[0] 30 | appversion = "special" 31 | else: 32 | appversion = app.split(":")[-1] 33 | appname = app.split(":")[0] 34 | else: 35 | appname = app 36 | appversion = "latest" 37 | newImage = prefix + appname + ":" + appversion + pending 38 | return newImage 39 | 40 | 41 | def findDeploymentImage(content): 42 | crs = content.split("---\n") 43 | images = dict() 44 | for cr in crs: 45 | if len(cr) < 0: 46 | continue 47 | obj = yaml.load(cr, yaml.CLoader) 48 | if obj is None or "kind" not in obj: 49 | continue 50 | if obj["kind"] == "Deployment" or obj["kind"] == "StatefulSet": 51 | containers = obj["spec"]["template"]["spec"]["containers"] 52 | for c in containers: 53 | obj_image = c["image"] 54 | cmdPull = "docker pull {image}".format(image=obj_image) 55 | os.system(cmdPull) 56 | newimage = getNewImage(obj_image, IMAGE_PREFIX) 57 | images[obj_image] = newimage 58 | return images 59 | 60 | 61 | def replaceImage(content): 62 | imageMap = findDeploymentImage(content) 63 | for image in imageMap: 64 | content = content.replace(image,imageMap[image]) 65 | logAndPushImage(imageMap) 66 | return content 67 | 68 | def logAndPushImage(imageMap): 69 | with open("images.log","a") as fw: 70 | for image in imageMap: 71 | # pull image 72 | cmdPull = "docker pull {image}".format(image=image) 73 | # tag image 74 | cmdTag = "docker tag {oldimage} {newimage}".format(oldimage=image, newimage=imageMap[image]) 75 | # push new images 76 | cmdPush = "docker push {image}".format(image=imageMap[image]) 77 | print(cmdPush) 78 | os.system(cmdTag) 79 | os.system(cmdPush) 80 | # log 81 | line = image + "\t" + imageMap[image] 82 | fw.write(line+"\n") 83 | 84 | 85 | if __name__ == "__main__": 86 | with open("./file/023-jupyter-overlays-kubeflow.yaml") as fr: 87 | images = replaceImage(fr.read()) 88 | # print(images) -------------------------------------------------------------------------------- /local-path/local-path-storage.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: local-path-storage 5 | --- 6 | apiVersion: v1 7 | kind: ServiceAccount 8 | metadata: 9 | name: local-path-provisioner-service-account 10 | namespace: local-path-storage 11 | --- 12 | apiVersion: rbac.authorization.k8s.io/v1 13 | kind: ClusterRole 14 | metadata: 15 | name: local-path-provisioner-role 16 | rules: 17 | - apiGroups: [""] 18 | resources: ["nodes", "persistentvolumeclaims"] 19 | verbs: ["get", "list", "watch"] 20 | - apiGroups: [""] 21 | resources: ["endpoints", "persistentvolumes", "pods"] 22 | verbs: ["*"] 23 | - apiGroups: [""] 24 | resources: ["events"] 25 | verbs: ["create", "patch"] 26 | - apiGroups: ["storage.k8s.io"] 27 | resources: ["storageclasses"] 28 | verbs: ["get", "list", "watch"] 29 | --- 30 | apiVersion: rbac.authorization.k8s.io/v1 31 | kind: ClusterRoleBinding 32 | metadata: 33 | name: local-path-provisioner-bind 34 | roleRef: 35 | apiGroup: rbac.authorization.k8s.io 36 | kind: ClusterRole 37 | name: local-path-provisioner-role 38 | subjects: 39 | - kind: ServiceAccount 40 | name: local-path-provisioner-service-account 41 | namespace: local-path-storage 42 | --- 43 | apiVersion: apps/v1 44 | kind: Deployment 45 | metadata: 46 | name: local-path-provisioner 47 | namespace: local-path-storage 48 | spec: 49 | replicas: 1 50 | selector: 51 | matchLabels: 52 | app: local-path-provisioner 53 | template: 54 | metadata: 55 | labels: 56 | app: local-path-provisioner 57 | spec: 58 | serviceAccountName: local-path-provisioner-service-account 59 | containers: 60 | - name: local-path-provisioner 61 | image: rancher/local-path-provisioner:v0.0.11 62 | imagePullPolicy: IfNotPresent 63 | command: 64 | - local-path-provisioner 65 | - --debug 66 | - start 67 | - --config 68 | - /etc/config/config.json 69 | volumeMounts: 70 | - name: config-volume 71 | mountPath: /etc/config/ 72 | env: 73 | - name: POD_NAMESPACE 74 | valueFrom: 75 | fieldRef: 76 | fieldPath: metadata.namespace 77 | volumes: 78 | - name: config-volume 79 | configMap: 80 | name: local-path-config 81 | --- 82 | apiVersion: storage.k8s.io/v1 83 | kind: StorageClass 84 | metadata: 85 | name: local-path 86 | annotations: #添加为默认StorageClass 87 | storageclass.beta.kubernetes.io/is-default-class: "true" 88 | provisioner: rancher.io/local-path 89 | volumeBindingMode: WaitForFirstConsumer 90 | reclaimPolicy: Delete 91 | --- 92 | kind: ConfigMap 93 | apiVersion: v1 94 | metadata: 95 | name: local-path-config 96 | namespace: local-path-storage 97 | data: 98 | config.json: |- 99 | { 100 | "nodePathMap":[ 101 | { 102 | "node":"DEFAULT_PATH_FOR_NON_LISTED_NODES", 103 | "paths":["/opt/local-path-provisioner"] 104 | } 105 | ] 106 | } 107 | -------------------------------------------------------------------------------- /manifest1.3/007-oidc-authservice-oidc-authservice-base.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | OIDC_AUTH_URL: /dex/auth 4 | OIDC_PROVIDER: http://dex.auth.svc.cluster.local:5556/dex 5 | OIDC_SCOPES: profile email groups 6 | PORT: '"8080"' 7 | REDIRECT_URL: /login/oidc 8 | SKIP_AUTH_URI: /dex 9 | STORE_PATH: /var/lib/authservice/data.db 10 | USERID_CLAIM: email 11 | USERID_HEADER: kubeflow-userid 12 | USERID_PREFIX: "" 13 | kind: ConfigMap 14 | metadata: 15 | name: oidc-authservice-parameters 16 | namespace: istio-system 17 | --- 18 | apiVersion: v1 19 | data: 20 | CLIENT_ID: a3ViZWZsb3ctb2lkYy1hdXRoc2VydmljZQ== 21 | CLIENT_SECRET: cFVCbkJPWTgwU25YZ2ppYlRZTTlaV056WTJ4cmVOR1Fvaw== 22 | kind: Secret 23 | metadata: 24 | name: oidc-authservice-client 25 | namespace: istio-system 26 | type: Opaque 27 | --- 28 | apiVersion: v1 29 | kind: Service 30 | metadata: 31 | name: authservice 32 | namespace: istio-system 33 | spec: 34 | ports: 35 | - name: http-authservice 36 | port: 8080 37 | targetPort: http-api 38 | publishNotReadyAddresses: true 39 | selector: 40 | app: authservice 41 | type: ClusterIP 42 | --- 43 | apiVersion: v1 44 | kind: PersistentVolumeClaim 45 | metadata: 46 | name: authservice-pvc 47 | namespace: istio-system 48 | spec: 49 | accessModes: 50 | - ReadWriteOnce 51 | resources: 52 | requests: 53 | storage: 10Gi 54 | --- 55 | apiVersion: apps/v1 56 | kind: StatefulSet 57 | metadata: 58 | name: authservice 59 | namespace: istio-system 60 | spec: 61 | replicas: 1 62 | selector: 63 | matchLabels: 64 | app: authservice 65 | serviceName: authservice 66 | template: 67 | metadata: 68 | annotations: 69 | sidecar.istio.io/inject: "false" 70 | labels: 71 | app: authservice 72 | spec: 73 | containers: 74 | - envFrom: 75 | - secretRef: 76 | name: oidc-authservice-client 77 | - configMapRef: 78 | name: oidc-authservice-parameters 79 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/kubeflow-oidc-authservice:28c59ef-c8870 80 | imagePullPolicy: Always 81 | name: authservice 82 | ports: 83 | - containerPort: 8080 84 | name: http-api 85 | readinessProbe: 86 | httpGet: 87 | path: / 88 | port: 8081 89 | volumeMounts: 90 | - mountPath: /var/lib/authservice 91 | name: data 92 | securityContext: 93 | fsGroup: 111 94 | volumes: 95 | - name: data 96 | persistentVolumeClaim: 97 | claimName: authservice-pvc 98 | --- 99 | apiVersion: networking.istio.io/v1alpha3 100 | kind: EnvoyFilter 101 | metadata: 102 | name: authn-filter 103 | namespace: istio-system 104 | spec: 105 | configPatches: 106 | - applyTo: HTTP_FILTER 107 | listener: 108 | filterChain: 109 | filter: 110 | name: envoy.http_connection_manager 111 | subFilter: 112 | name: "" 113 | match: 114 | context: GATEWAY 115 | patch: 116 | operation: INSERT_BEFORE 117 | value: 118 | name: envoy.filters.http.ext_authz 119 | typed_config: 120 | '@type': type.googleapis.com/envoy.extensions.filters.http.ext_authz.v3.ExtAuthz 121 | http_service: 122 | authorization_request: 123 | allowed_headers: 124 | patterns: 125 | - exact: authorization 126 | - exact: cookie 127 | - exact: x-auth-token 128 | authorization_response: 129 | allowed_upstream_headers: 130 | patterns: 131 | - exact: kubeflow-userid 132 | server_uri: 133 | cluster: outbound|8080||authservice.istio-system.svc.cluster.local 134 | timeout: 10s 135 | uri: http://authservice.istio-system.svc.cluster.local 136 | workloadSelector: 137 | labels: 138 | istio: ingressgateway 139 | -------------------------------------------------------------------------------- /manifest1.3/008-dex-overlays-istio.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: auth 5 | --- 6 | apiVersion: apiextensions.k8s.io/v1beta1 7 | kind: CustomResourceDefinition 8 | metadata: 9 | name: authcodes.dex.coreos.com 10 | spec: 11 | group: dex.coreos.com 12 | names: 13 | kind: AuthCode 14 | listKind: AuthCodeList 15 | plural: authcodes 16 | singular: authcode 17 | scope: Namespaced 18 | version: v1 19 | --- 20 | apiVersion: v1 21 | kind: ServiceAccount 22 | metadata: 23 | name: dex 24 | namespace: auth 25 | --- 26 | apiVersion: rbac.authorization.k8s.io/v1beta1 27 | kind: ClusterRole 28 | metadata: 29 | name: dex 30 | rules: 31 | - apiGroups: 32 | - dex.coreos.com 33 | resources: 34 | - '*' 35 | verbs: 36 | - '*' 37 | - apiGroups: 38 | - apiextensions.k8s.io 39 | resources: 40 | - customresourcedefinitions 41 | verbs: 42 | - create 43 | --- 44 | apiVersion: rbac.authorization.k8s.io/v1beta1 45 | kind: ClusterRoleBinding 46 | metadata: 47 | name: dex 48 | roleRef: 49 | apiGroup: rbac.authorization.k8s.io 50 | kind: ClusterRole 51 | name: dex 52 | subjects: 53 | - kind: ServiceAccount 54 | name: dex 55 | namespace: auth 56 | --- 57 | apiVersion: v1 58 | data: 59 | config.yaml: | 60 | issuer: http://dex.auth.svc.cluster.local:5556/dex 61 | storage: 62 | type: kubernetes 63 | config: 64 | inCluster: true 65 | web: 66 | http: 0.0.0.0:5556 67 | logger: 68 | level: "debug" 69 | format: text 70 | oauth2: 71 | skipApprovalScreen: true 72 | enablePasswordDB: true 73 | staticPasswords: 74 | - email: user@example.com 75 | hash: $2y$12$4K/VkmDd1q1Orb3xAt82zu8gk7Ad6ReFR4LCP9UeYE90NLiN9Df72 76 | # https://github.com/dexidp/dex/pull/1601/commits 77 | # FIXME: Use hashFromEnv instead 78 | username: user 79 | userID: "15841185641784" 80 | staticClients: 81 | # https://github.com/dexidp/dex/pull/1664 82 | - idEnv: OIDC_CLIENT_ID 83 | redirectURIs: ["/login/oidc"] 84 | name: 'Dex Login Application' 85 | secretEnv: OIDC_CLIENT_SECRET 86 | kind: ConfigMap 87 | metadata: 88 | name: dex 89 | namespace: auth 90 | --- 91 | apiVersion: v1 92 | data: 93 | OIDC_CLIENT_ID: a3ViZWZsb3ctb2lkYy1hdXRoc2VydmljZQ== 94 | OIDC_CLIENT_SECRET: cFVCbkJPWTgwU25YZ2ppYlRZTTlaV056WTJ4cmVOR1Fvaw== 95 | kind: Secret 96 | metadata: 97 | name: dex-oidc-client 98 | namespace: auth 99 | type: Opaque 100 | --- 101 | apiVersion: v1 102 | kind: Service 103 | metadata: 104 | name: dex 105 | namespace: auth 106 | spec: 107 | ports: 108 | - name: dex 109 | nodePort: 32000 110 | port: 5556 111 | protocol: TCP 112 | targetPort: 5556 113 | selector: 114 | app: dex 115 | type: NodePort 116 | --- 117 | apiVersion: apps/v1 118 | kind: Deployment 119 | metadata: 120 | labels: 121 | app: dex 122 | name: dex 123 | namespace: auth 124 | spec: 125 | replicas: 1 126 | selector: 127 | matchLabels: 128 | app: dex 129 | template: 130 | metadata: 131 | labels: 132 | app: dex 133 | spec: 134 | containers: 135 | - command: 136 | - dex 137 | - serve 138 | - /etc/dex/cfg/config.yaml 139 | envFrom: 140 | - secretRef: 141 | name: dex-oidc-client 142 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/dexidp-dex:v2.24.0-bb0b9 143 | name: dex 144 | ports: 145 | - containerPort: 5556 146 | name: http 147 | volumeMounts: 148 | - mountPath: /etc/dex/cfg 149 | name: config 150 | serviceAccountName: dex 151 | volumes: 152 | - configMap: 153 | items: 154 | - key: config.yaml 155 | path: config.yaml 156 | name: dex 157 | name: config 158 | --- 159 | apiVersion: networking.istio.io/v1alpha3 160 | kind: VirtualService 161 | metadata: 162 | name: dex 163 | namespace: auth 164 | spec: 165 | gateways: 166 | - kubeflow/kubeflow-gateway 167 | hosts: 168 | - '*' 169 | http: 170 | - match: 171 | - uri: 172 | prefix: /dex/ 173 | route: 174 | - destination: 175 | host: dex.auth.svc.cluster.local 176 | port: 177 | number: 5556 178 | -------------------------------------------------------------------------------- /patch/workflow-controller.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | artifactRepository: | 4 | archiveLogs: true 5 | s3: 6 | endpoint: "minio-service.kubeflow:9000" 7 | bucket: "mlpipeline" 8 | keyFormat: "artifacts/{{workflow.name}}/{{pod.name}}" 9 | # insecure will disable TLS. Primarily used for minio installs not configured with TLS 10 | insecure: true 11 | accessKeySecret: 12 | name: mlpipeline-minio-artifact 13 | key: accesskey 14 | secretKeySecret: 15 | name: mlpipeline-minio-artifact 16 | key: secretkey 17 | containerRuntimeExecutor: k8sapi 18 | kind: ConfigMap 19 | metadata: 20 | labels: 21 | application-crd-id: kubeflow-pipelines 22 | name: workflow-controller-configmap 23 | namespace: kubeflow 24 | 25 | --- 26 | apiVersion: apps/v1 27 | kind: Deployment 28 | metadata: 29 | labels: 30 | application-crd-id: kubeflow-pipelines 31 | name: workflow-controller 32 | namespace: kubeflow 33 | spec: 34 | selector: 35 | matchLabels: 36 | app: workflow-controller 37 | application-crd-id: kubeflow-pipelines 38 | template: 39 | metadata: 40 | labels: 41 | app: workflow-controller 42 | application-crd-id: kubeflow-pipelines 43 | spec: 44 | containers: 45 | - args: 46 | - --configmap 47 | - workflow-controller-configmap 48 | - --executor-image 49 | - registry.cn-shenzhen.aliyuncs.com/tensorbytes/ml-pipeline-argoexec:v2.12.9-license-compliance 50 | command: 51 | - workflow-controller 52 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/ml-pipeline-workflow-controller:v2.12.9-license-compliance-2d9c1 53 | livenessProbe: 54 | httpGet: 55 | path: /metrics 56 | port: metrics 57 | initialDelaySeconds: 30 58 | periodSeconds: 30 59 | name: workflow-controller 60 | ports: 61 | - containerPort: 9090 62 | name: metrics 63 | resources: 64 | requests: 65 | cpu: 100m 66 | memory: 500Mi 67 | nodeSelector: 68 | kubernetes.io/os: linux 69 | securityContext: 70 | runAsNonRoot: true 71 | serviceAccountName: argo 72 | 73 | --- 74 | apiVersion: apps/v1 75 | kind: Deployment 76 | metadata: 77 | labels: 78 | app: cache-server 79 | app.kubernetes.io/component: ml-pipeline 80 | app.kubernetes.io/name: kubeflow-pipelines 81 | application-crd-id: kubeflow-pipelines 82 | name: cache-server 83 | namespace: kubeflow 84 | spec: 85 | replicas: 1 86 | selector: 87 | matchLabels: 88 | app: cache-server 89 | app.kubernetes.io/component: ml-pipeline 90 | app.kubernetes.io/name: kubeflow-pipelines 91 | application-crd-id: kubeflow-pipelines 92 | template: 93 | metadata: 94 | labels: 95 | app: cache-server 96 | app.kubernetes.io/component: ml-pipeline 97 | app.kubernetes.io/name: kubeflow-pipelines 98 | application-crd-id: kubeflow-pipelines 99 | spec: 100 | containers: 101 | - args: 102 | - --db_driver=$(DBCONFIG_DRIVER) 103 | - --db_host=$(DBCONFIG_HOST_NAME) 104 | - --db_port=$(DBCONFIG_PORT) 105 | - --db_name=$(DBCONFIG_DB_NAME) 106 | - --db_user=$(DBCONFIG_USER) 107 | - --db_password=$(DBCONFIG_PASSWORD) 108 | - --namespace_to_watch=$(NAMESPACE_TO_WATCH) 109 | env: 110 | - name: NAMESPACE_TO_WATCH 111 | value: "" 112 | - name: CACHE_IMAGE 113 | valueFrom: 114 | configMapKeyRef: 115 | key: cacheImage 116 | name: pipeline-install-config 117 | - name: DBCONFIG_DRIVER 118 | value: mysql 119 | - name: DBCONFIG_DB_NAME 120 | valueFrom: 121 | configMapKeyRef: 122 | key: cacheDb 123 | name: pipeline-install-config 124 | - name: DBCONFIG_HOST_NAME 125 | valueFrom: 126 | configMapKeyRef: 127 | key: dbHost 128 | name: pipeline-install-config 129 | - name: DBCONFIG_PORT 130 | valueFrom: 131 | configMapKeyRef: 132 | key: dbPort 133 | name: pipeline-install-config 134 | - name: DBCONFIG_USER 135 | valueFrom: 136 | secretKeyRef: 137 | key: username 138 | name: mysql-secret 139 | - name: DBCONFIG_PASSWORD 140 | valueFrom: 141 | secretKeyRef: 142 | key: password 143 | name: mysql-secret 144 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/ml-pipeline-cache-server:1.5.0-rc.2-a44df 145 | imagePullPolicy: Always 146 | name: server 147 | ports: 148 | - containerPort: 8443 149 | name: webhook-api 150 | volumeMounts: 151 | - mountPath: /etc/webhook/certs 152 | name: webhook-tls-certs 153 | readOnly: true 154 | serviceAccountName: kubeflow-pipelines-cache 155 | volumes: 156 | - name: webhook-tls-certs 157 | secret: 158 | secretName: webhook-server-tls -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kubeflow安装及使用教程(中国版) 2 | 3 | 由于国内网络问题,Kubeflow 通常安装都是各种磕磕碰碰,以一颗为广大人民谋福利的心,这里提供中国的本地镜像版(阿里云镜像/dockerhub)的**安装**。 4 | 同时这里汇总了一些kubeflow的中文教程资料供大家参考。 5 | 6 | ## Kubeflow 使用教程 7 | - [kubeflow安装](/README.md) 8 | - [kubeflow各组件介绍](/docs/introduction.md) 9 | - [问题汇总](/docs/problems.md) 10 | 11 | ## 安装步骤 12 | 13 | ### 安装k8s 14 | 15 | 如果已经有k8s集群,这一步可以跳过,直接到[kubeflow安装](https://github.com/shikanon/kubeflow-manifests#%E5%AE%89%E8%A3%85kubeflow)。 16 | 17 | **kind安装k8s集群** 18 | 19 | 下载[kind工具](https://github.com/kubernetes-sigs/kind/tags) 20 | 21 | 使用kind安装k8s集群: 22 | 23 | ```bash 24 | $ kind create cluster --config=kind/kind-config.yaml --name=kubeflow --image=kindest/node:v1.16.15 25 | ``` 26 | 27 | 启动成功后可以看到开了一个30000端口: 28 | ```bash 29 | $ docker ps 30 | CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 31 | 5f67af713e28 kindest/node:v1.19.1 "/usr/local/bin/entr…" 3 minutes ago Up 3 minutes 0.0.0.0:30000->30000/tcp, 127.0.0.1:56682->6443/tcp kubeflow-control-plane 32 | ``` 33 | 34 | 由于 kubeflow 实验组件较多,最好准备机器的最低配置能够大于*CPU8核,内存32G*以上。 35 | 36 | ### 安装kubeflow 37 | 38 | **2.启动** 39 | ```bash 40 | $ python install.py 41 | ``` 42 | 43 | 等待镜像拉取,由于涉及的镜像比较多,要20~30分钟左右,可以通过命令查看是否就绪: 44 | 45 | **3.查看结果** 46 | ``` 47 | $ kubectl get pod -nkubeflow 48 | NAME READY STATUS RESTARTS AGE 49 | admission-webhook-deployment-6fb9d65887-pzvgc 1/1 Running 0 19h 50 | cache-deployer-deployment-7558d65bf4-jhgwg 2/2 Running 1 3h54m 51 | cache-server-c64c68ddf-lx7xq 2/2 Running 0 3h54m 52 | centraldashboard-7b7676d8bd-g2s8j 1/1 Running 0 4h46m 53 | jupyter-web-app-deployment-66f74586d9-scbsm 1/1 Running 0 3h4m 54 | katib-controller-77675c88df-mx4rh 1/1 Running 0 19h 55 | katib-db-manager-646695754f-z797r 1/1 Running 0 19h 56 | katib-mysql-5bb5bd9957-gbl5t 1/1 Running 0 19h 57 | katib-ui-55fd4bd6f9-r98r2 1/1 Running 0 19h 58 | kfserving-controller-manager-0 2/2 Running 0 19h 59 | kubeflow-pipelines-profile-controller-5698bf57cf-dhtsj 1/1 Running 0 3h52m 60 | metacontroller-0 1/1 Running 0 4h52m 61 | metadata-envoy-deployment-76d65977f7-rmlzc 1/1 Running 0 4h52m 62 | metadata-grpc-deployment-697d9c6c67-j6dl2 2/2 Running 3 4h52m 63 | metadata-writer-58cdd57678-8t6gw 2/2 Running 1 4h52m 64 | minio-6d6784db95-tqs77 2/2 Running 0 4h45m 65 | ml-pipeline-85fc99f899-plsz2 2/2 Running 1 4h52m 66 | ml-pipeline-persistenceagent-65cb9594c7-xvn4j 2/2 Running 1 4h52m 67 | ml-pipeline-scheduledworkflow-7f8d8dfc69-7wfs4 2/2 Running 0 4h52m 68 | ml-pipeline-ui-5c765cc7bd-4r2j7 2/2 Running 0 4h52m 69 | ml-pipeline-viewer-crd-5b8df7f458-5b8qg 2/2 Running 1 4h52m 70 | ml-pipeline-visualizationserver-56c5ff68d5-92bkf 2/2 Running 0 4h52m 71 | mpi-operator-789f88879-n4xms 1/1 Running 0 19h 72 | mxnet-operator-7fff864957-vq2bg 1/1 Running 0 19h 73 | mysql-56b554ff66-kd7bd 2/2 Running 0 4h45m 74 | notebook-controller-deployment-74d9584477-qhpp8 1/1 Running 0 19h 75 | profiles-deployment-67b4666796-k7t2h 2/2 Running 0 19h 76 | pytorch-operator-fd86f7694-dxbgf 2/2 Running 0 19h 77 | tensorboard-controller-controller-manager-fd6bcffb4-k9qvx 3/3 Running 1 19h 78 | tensorboards-web-app-deployment-78d7b8b658-dktc6 1/1 Running 0 19h 79 | tf-job-operator-7bc5cf4cc7-gk8tz 1/1 Running 0 19h 80 | volumes-web-app-deployment-68fcfc9775-bz9gq 1/1 Running 0 19h 81 | workflow-controller-566998f76b-2v2kq 2/2 Running 1 4h52m 82 | xgboost-operator-deployment-5c7bfd57cc-9rtq6 2/2 Running 1 19h 83 | ``` 84 | 85 | 如果所有pod 都running了表示安装完了。 86 | 87 | *注:除了kubeflow命名空间,该一键安装工具也会安装istio,knative,因此也要保证这两个命名空间下的服务全部running* 88 | *如果你的mysql没启动成功,可以运行kubectl apply -f database-patch/mysql-persistent-storage.yaml* 89 | 90 | 全部pod running后,可以访问本地的30000端口(istio-ingressgateway设置了nodeport为30000端口),就可以看到登录界面了: 91 | ![](/example/dex登录界面.png) 92 | 93 | 输入账号密码即可登录,这里的账号密码可以通过`patch/auth.yaml`进行更改。 94 | 默认的用户名是`admin@example.com`,密码是`password` 95 | 96 | 登录后进入kubeflow界面: 97 | ![](/example/kubeflow-dashboardcenter.png) 98 | 99 | ### 删除kubeflow资源 100 | 101 | ```bash 102 | kind delete cluster --name kubeflow 103 | ``` 104 | 105 | **如果不希望流量鉴权,可以把istio的authorizationpolicies全部删除** 106 | ```bash 107 | kubectl delete authorizationpolicies --all -A 108 | ``` -------------------------------------------------------------------------------- /manifest1.3/025-volumes-web-app-overlays-istio.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | labels: 5 | app: volumes-web-app 6 | kustomize.component: volumes-web-app 7 | name: volumes-web-app-service-account 8 | namespace: kubeflow 9 | --- 10 | apiVersion: rbac.authorization.k8s.io/v1 11 | kind: ClusterRole 12 | metadata: 13 | labels: 14 | app: volumes-web-app 15 | kustomize.component: volumes-web-app 16 | name: volumes-web-app-cluster-role 17 | rules: 18 | - apiGroups: 19 | - "" 20 | resources: 21 | - namespaces 22 | - pods 23 | verbs: 24 | - get 25 | - list 26 | - apiGroups: 27 | - authorization.k8s.io 28 | resources: 29 | - subjectaccessreviews 30 | verbs: 31 | - create 32 | - apiGroups: 33 | - "" 34 | resources: 35 | - persistentvolumeclaims 36 | verbs: 37 | - create 38 | - delete 39 | - get 40 | - list 41 | - watch 42 | - update 43 | - patch 44 | - apiGroups: 45 | - storage.k8s.io 46 | resources: 47 | - storageclasses 48 | verbs: 49 | - get 50 | - list 51 | - watch 52 | - apiGroups: 53 | - "" 54 | resources: 55 | - events 56 | verbs: 57 | - list 58 | --- 59 | apiVersion: rbac.authorization.k8s.io/v1 60 | kind: ClusterRole 61 | metadata: 62 | labels: 63 | app: volumes-web-app 64 | kustomize.component: volumes-web-app 65 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true" 66 | name: volumes-web-app-kubeflow-volume-ui-admin 67 | rules: [] 68 | --- 69 | apiVersion: rbac.authorization.k8s.io/v1 70 | kind: ClusterRole 71 | metadata: 72 | labels: 73 | app: volumes-web-app 74 | kustomize.component: volumes-web-app 75 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" 76 | name: volumes-web-app-kubeflow-volume-ui-edit 77 | rules: 78 | - apiGroups: 79 | - "" 80 | resources: 81 | - persistentvolumeclaims 82 | verbs: 83 | - create 84 | - delete 85 | - get 86 | - list 87 | - watch 88 | - update 89 | - patch 90 | --- 91 | apiVersion: rbac.authorization.k8s.io/v1 92 | kind: ClusterRole 93 | metadata: 94 | labels: 95 | app: volumes-web-app 96 | kustomize.component: volumes-web-app 97 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true" 98 | name: volumes-web-app-kubeflow-volume-ui-view 99 | rules: 100 | - apiGroups: 101 | - "" 102 | resources: 103 | - persistentvolumeclaims 104 | verbs: 105 | - get 106 | - list 107 | - watch 108 | - apiGroups: 109 | - storage.k8s.io 110 | resources: 111 | - storageclasses 112 | verbs: 113 | - get 114 | - list 115 | - watch 116 | --- 117 | apiVersion: rbac.authorization.k8s.io/v1 118 | kind: ClusterRoleBinding 119 | metadata: 120 | labels: 121 | app: volumes-web-app 122 | kustomize.component: volumes-web-app 123 | name: volumes-web-app-cluster-role-binding 124 | roleRef: 125 | apiGroup: rbac.authorization.k8s.io 126 | kind: ClusterRole 127 | name: volumes-web-app-cluster-role 128 | subjects: 129 | - kind: ServiceAccount 130 | name: volumes-web-app-service-account 131 | namespace: kubeflow 132 | --- 133 | apiVersion: v1 134 | data: 135 | VWA_CLUSTER_DOMAIN: cluster.local 136 | VWA_PREFIX: /volumes 137 | VWA_USERID_HEADER: kubeflow-userid 138 | VWA_USERID_PREFIX: "" 139 | kind: ConfigMap 140 | metadata: 141 | labels: 142 | app: volumes-web-app 143 | kustomize.component: volumes-web-app 144 | name: volumes-web-app-parameters-4gg8cm2gmk 145 | namespace: kubeflow 146 | --- 147 | apiVersion: v1 148 | kind: Service 149 | metadata: 150 | labels: 151 | app: volumes-web-app 152 | kustomize.component: volumes-web-app 153 | run: volumes-web-app 154 | name: volumes-web-app-service 155 | namespace: kubeflow 156 | spec: 157 | ports: 158 | - name: http 159 | port: 80 160 | protocol: TCP 161 | targetPort: 5000 162 | selector: 163 | app: volumes-web-app 164 | kustomize.component: volumes-web-app 165 | type: ClusterIP 166 | --- 167 | apiVersion: apps/v1 168 | kind: Deployment 169 | metadata: 170 | labels: 171 | app: volumes-web-app 172 | kustomize.component: volumes-web-app 173 | name: volumes-web-app-deployment 174 | namespace: kubeflow 175 | spec: 176 | replicas: 1 177 | selector: 178 | matchLabels: 179 | app: volumes-web-app 180 | kustomize.component: volumes-web-app 181 | template: 182 | metadata: 183 | annotations: 184 | sidecar.istio.io/inject: "false" 185 | labels: 186 | app: volumes-web-app 187 | kustomize.component: volumes-web-app 188 | spec: 189 | containers: 190 | - env: 191 | - name: APP_PREFIX 192 | value: /volumes 193 | - name: USERID_HEADER 194 | value: kubeflow-userid 195 | - name: USERID_PREFIX 196 | value: "" 197 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/notebooks-volumes-web-app:v1.3.0-rc.0-fe235 198 | name: volumes-web-app 199 | ports: 200 | - containerPort: 5000 201 | serviceAccountName: volumes-web-app-service-account 202 | --- 203 | apiVersion: networking.istio.io/v1alpha3 204 | kind: VirtualService 205 | metadata: 206 | labels: 207 | app: volumes-web-app 208 | kustomize.component: volumes-web-app 209 | name: volumes-web-app-volumes-web-app 210 | namespace: kubeflow 211 | spec: 212 | gateways: 213 | - kubeflow-gateway 214 | hosts: 215 | - '*' 216 | http: 217 | - headers: 218 | request: 219 | add: 220 | x-forwarded-prefix: /volumes 221 | match: 222 | - uri: 223 | prefix: /volumes/ 224 | rewrite: 225 | uri: / 226 | route: 227 | - destination: 228 | host: volumes-web-app-service.kubeflow.svc.cluster.local 229 | port: 230 | number: 80 231 | -------------------------------------------------------------------------------- /manifest1.3/027-tensorboard-overlays-istio.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | labels: 5 | app: tensorboards-web-app 6 | kustomize.component: tensorboards-web-app 7 | name: tensorboards-web-app-service-account 8 | namespace: kubeflow 9 | --- 10 | apiVersion: rbac.authorization.k8s.io/v1 11 | kind: ClusterRole 12 | metadata: 13 | labels: 14 | app: tensorboards-web-app 15 | kustomize.component: tensorboards-web-app 16 | name: tensorboards-web-app-cluster-role 17 | rules: 18 | - apiGroups: 19 | - "" 20 | resources: 21 | - namespaces 22 | verbs: 23 | - get 24 | - list 25 | - apiGroups: 26 | - authorization.k8s.io 27 | resources: 28 | - subjectaccessreviews 29 | verbs: 30 | - create 31 | - apiGroups: 32 | - tensorboard.kubeflow.org 33 | resources: 34 | - tensorboards 35 | - tensorboards/finalizers 36 | verbs: 37 | - get 38 | - list 39 | - create 40 | - delete 41 | - apiGroups: 42 | - "" 43 | resources: 44 | - persistentvolumeclaims 45 | verbs: 46 | - create 47 | - delete 48 | - get 49 | - list 50 | - apiGroups: 51 | - storage.k8s.io 52 | resources: 53 | - storageclasses 54 | verbs: 55 | - get 56 | - list 57 | - watch 58 | --- 59 | apiVersion: rbac.authorization.k8s.io/v1 60 | kind: ClusterRole 61 | metadata: 62 | labels: 63 | app: tensorboards-web-app 64 | kustomize.component: tensorboards-web-app 65 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true" 66 | name: tensorboards-web-app-kubeflow-tensorboard-ui-admin 67 | rules: [] 68 | --- 69 | apiVersion: rbac.authorization.k8s.io/v1 70 | kind: ClusterRole 71 | metadata: 72 | labels: 73 | app: tensorboards-web-app 74 | kustomize.component: tensorboards-web-app 75 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" 76 | name: tensorboards-web-app-kubeflow-tensorboard-ui-edit 77 | rules: 78 | - apiGroups: 79 | - tensorboard.kubeflow.org 80 | resources: 81 | - tensorboards 82 | - tensorboards/finalizers 83 | verbs: 84 | - get 85 | - list 86 | - create 87 | - delete 88 | --- 89 | apiVersion: rbac.authorization.k8s.io/v1 90 | kind: ClusterRole 91 | metadata: 92 | labels: 93 | app: tensorboards-web-app 94 | kustomize.component: tensorboards-web-app 95 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true" 96 | name: tensorboards-web-app-kubeflow-tensorboard-ui-view 97 | rules: 98 | - apiGroups: 99 | - tensorboard.kubeflow.org 100 | resources: 101 | - tensorboards 102 | - tensorboards/finalizers 103 | verbs: 104 | - get 105 | - list 106 | - apiGroups: 107 | - storage.k8s.io 108 | resources: 109 | - storageclasses 110 | verbs: 111 | - get 112 | - list 113 | - watch 114 | --- 115 | apiVersion: rbac.authorization.k8s.io/v1 116 | kind: ClusterRoleBinding 117 | metadata: 118 | labels: 119 | app: tensorboards-web-app 120 | kustomize.component: tensorboards-web-app 121 | name: tensorboards-web-app-cluster-role-binding 122 | roleRef: 123 | apiGroup: rbac.authorization.k8s.io 124 | kind: ClusterRole 125 | name: tensorboards-web-app-cluster-role 126 | subjects: 127 | - kind: ServiceAccount 128 | name: tensorboards-web-app-service-account 129 | namespace: kubeflow 130 | --- 131 | apiVersion: v1 132 | data: 133 | TWA_CLUSTER_DOMAIN: cluster.local 134 | TWA_PREFIX: /tensorboards 135 | TWA_USERID_HEADER: kubeflow-userid 136 | TWA_USERID_PREFIX: "" 137 | kind: ConfigMap 138 | metadata: 139 | labels: 140 | app: tensorboards-web-app 141 | kustomize.component: tensorboards-web-app 142 | name: tensorboards-web-app-parameters-g28fbd6cch 143 | namespace: kubeflow 144 | --- 145 | apiVersion: v1 146 | kind: Service 147 | metadata: 148 | labels: 149 | app: tensorboards-web-app 150 | kustomize.component: tensorboards-web-app 151 | run: tensorboards-web-app 152 | name: tensorboards-web-app-service 153 | namespace: kubeflow 154 | spec: 155 | ports: 156 | - name: http 157 | port: 80 158 | protocol: TCP 159 | targetPort: 5000 160 | selector: 161 | app: tensorboards-web-app 162 | kustomize.component: tensorboards-web-app 163 | type: ClusterIP 164 | --- 165 | apiVersion: apps/v1 166 | kind: Deployment 167 | metadata: 168 | labels: 169 | app: tensorboards-web-app 170 | kustomize.component: tensorboards-web-app 171 | name: tensorboards-web-app-deployment 172 | namespace: kubeflow 173 | spec: 174 | replicas: 1 175 | selector: 176 | matchLabels: 177 | app: tensorboards-web-app 178 | kustomize.component: tensorboards-web-app 179 | template: 180 | metadata: 181 | annotations: 182 | sidecar.istio.io/inject: "false" 183 | labels: 184 | app: tensorboards-web-app 185 | kustomize.component: tensorboards-web-app 186 | spec: 187 | containers: 188 | - env: 189 | - name: APP_PREFIX 190 | value: /tensorboards 191 | - name: USERID_HEADER 192 | value: kubeflow-userid 193 | - name: USERID_PREFIX 194 | value: "" 195 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/notebooks-tensorboards-web-app:v1.3.0-rc.0-258dd 196 | name: tensorboards-web-app 197 | ports: 198 | - containerPort: 5000 199 | serviceAccountName: tensorboards-web-app-service-account 200 | --- 201 | apiVersion: networking.istio.io/v1alpha3 202 | kind: VirtualService 203 | metadata: 204 | labels: 205 | app: tensorboards-web-app 206 | kustomize.component: tensorboards-web-app 207 | name: tensorboards-web-app-tensorboards-web-app 208 | namespace: kubeflow 209 | spec: 210 | gateways: 211 | - kubeflow-gateway 212 | hosts: 213 | - '*' 214 | http: 215 | - headers: 216 | request: 217 | add: 218 | x-forwarded-prefix: /tensorboards 219 | match: 220 | - uri: 221 | prefix: /tensorboards/ 222 | rewrite: 223 | uri: / 224 | route: 225 | - destination: 226 | host: tensorboards-web-app-service.kubeflow.svc.cluster.local 227 | port: 228 | number: 80 229 | -------------------------------------------------------------------------------- /manifest1.3/015-kubeflow-roles-kubeflow-roles-base.yaml: -------------------------------------------------------------------------------- 1 | aggregationRule: 2 | clusterRoleSelectors: 3 | - matchLabels: 4 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true" 5 | apiVersion: rbac.authorization.k8s.io/v1 6 | kind: ClusterRole 7 | metadata: 8 | name: kubeflow-admin 9 | rules: [] 10 | --- 11 | aggregationRule: 12 | clusterRoleSelectors: 13 | - matchLabels: 14 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" 15 | apiVersion: rbac.authorization.k8s.io/v1 16 | kind: ClusterRole 17 | metadata: 18 | labels: 19 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true" 20 | name: kubeflow-edit 21 | rules: [] 22 | --- 23 | apiVersion: rbac.authorization.k8s.io/v1 24 | kind: ClusterRole 25 | metadata: 26 | labels: 27 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true" 28 | name: kubeflow-kubernetes-admin 29 | rules: 30 | - apiGroups: 31 | - authorization.k8s.io 32 | resources: 33 | - localsubjectaccessreviews 34 | verbs: 35 | - create 36 | - apiGroups: 37 | - rbac.authorization.k8s.io 38 | resources: 39 | - rolebindings 40 | - roles 41 | verbs: 42 | - create 43 | - delete 44 | - deletecollection 45 | - get 46 | - list 47 | - patch 48 | - update 49 | - watch 50 | --- 51 | apiVersion: rbac.authorization.k8s.io/v1 52 | kind: ClusterRole 53 | metadata: 54 | labels: 55 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" 56 | name: kubeflow-kubernetes-edit 57 | rules: 58 | - apiGroups: 59 | - "" 60 | resources: 61 | - pods/attach 62 | - pods/exec 63 | - pods/portforward 64 | - pods/proxy 65 | - secrets 66 | - services/proxy 67 | verbs: 68 | - get 69 | - list 70 | - watch 71 | - apiGroups: 72 | - "" 73 | resources: 74 | - serviceaccounts 75 | verbs: 76 | - impersonate 77 | - apiGroups: 78 | - "" 79 | resources: 80 | - pods 81 | - pods/attach 82 | - pods/exec 83 | - pods/portforward 84 | - pods/proxy 85 | verbs: 86 | - create 87 | - delete 88 | - deletecollection 89 | - patch 90 | - update 91 | - apiGroups: 92 | - "" 93 | resources: 94 | - configmaps 95 | - endpoints 96 | - persistentvolumeclaims 97 | - replicationcontrollers 98 | - replicationcontrollers/scale 99 | - secrets 100 | - serviceaccounts 101 | - services 102 | - services/proxy 103 | verbs: 104 | - create 105 | - delete 106 | - deletecollection 107 | - patch 108 | - update 109 | - apiGroups: 110 | - apps 111 | resources: 112 | - daemonsets 113 | - deployments 114 | - deployments/rollback 115 | - deployments/scale 116 | - replicasets 117 | - replicasets/scale 118 | - statefulsets 119 | - statefulsets/scale 120 | verbs: 121 | - create 122 | - delete 123 | - deletecollection 124 | - patch 125 | - update 126 | - apiGroups: 127 | - autoscaling 128 | resources: 129 | - horizontalpodautoscalers 130 | verbs: 131 | - create 132 | - delete 133 | - deletecollection 134 | - patch 135 | - update 136 | - apiGroups: 137 | - batch 138 | resources: 139 | - cronjobs 140 | - jobs 141 | verbs: 142 | - create 143 | - delete 144 | - deletecollection 145 | - patch 146 | - update 147 | - apiGroups: 148 | - extensions 149 | resources: 150 | - daemonsets 151 | - deployments 152 | - deployments/rollback 153 | - deployments/scale 154 | - ingresses 155 | - networkpolicies 156 | - replicasets 157 | - replicasets/scale 158 | - replicationcontrollers/scale 159 | verbs: 160 | - create 161 | - delete 162 | - deletecollection 163 | - patch 164 | - update 165 | - apiGroups: 166 | - policy 167 | resources: 168 | - poddisruptionbudgets 169 | verbs: 170 | - create 171 | - delete 172 | - deletecollection 173 | - patch 174 | - update 175 | - apiGroups: 176 | - networking.k8s.io 177 | resources: 178 | - ingresses 179 | - networkpolicies 180 | verbs: 181 | - create 182 | - delete 183 | - deletecollection 184 | - patch 185 | - update 186 | --- 187 | apiVersion: rbac.authorization.k8s.io/v1 188 | kind: ClusterRole 189 | metadata: 190 | labels: 191 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true" 192 | name: kubeflow-kubernetes-view 193 | rules: 194 | - apiGroups: 195 | - "" 196 | resources: 197 | - configmaps 198 | - endpoints 199 | - persistentvolumeclaims 200 | - persistentvolumeclaims/status 201 | - pods 202 | - replicationcontrollers 203 | - replicationcontrollers/scale 204 | - serviceaccounts 205 | - services 206 | - services/status 207 | verbs: 208 | - get 209 | - list 210 | - watch 211 | - apiGroups: 212 | - "" 213 | resources: 214 | - bindings 215 | - events 216 | - limitranges 217 | - namespaces/status 218 | - pods/log 219 | - pods/status 220 | - replicationcontrollers/status 221 | - resourcequotas 222 | - resourcequotas/status 223 | verbs: 224 | - get 225 | - list 226 | - watch 227 | - apiGroups: 228 | - "" 229 | resources: 230 | - namespaces 231 | verbs: 232 | - get 233 | - list 234 | - watch 235 | - apiGroups: 236 | - apps 237 | resources: 238 | - controllerrevisions 239 | - daemonsets 240 | - daemonsets/status 241 | - deployments 242 | - deployments/scale 243 | - deployments/status 244 | - replicasets 245 | - replicasets/scale 246 | - replicasets/status 247 | - statefulsets 248 | - statefulsets/scale 249 | - statefulsets/status 250 | verbs: 251 | - get 252 | - list 253 | - watch 254 | - apiGroups: 255 | - autoscaling 256 | resources: 257 | - horizontalpodautoscalers 258 | - horizontalpodautoscalers/status 259 | verbs: 260 | - get 261 | - list 262 | - watch 263 | - apiGroups: 264 | - batch 265 | resources: 266 | - cronjobs 267 | - cronjobs/status 268 | - jobs 269 | - jobs/status 270 | verbs: 271 | - get 272 | - list 273 | - watch 274 | - apiGroups: 275 | - extensions 276 | resources: 277 | - daemonsets 278 | - daemonsets/status 279 | - deployments 280 | - deployments/scale 281 | - deployments/status 282 | - ingresses 283 | - ingresses/status 284 | - networkpolicies 285 | - replicasets 286 | - replicasets/scale 287 | - replicasets/status 288 | - replicationcontrollers/scale 289 | verbs: 290 | - get 291 | - list 292 | - watch 293 | - apiGroups: 294 | - policy 295 | resources: 296 | - poddisruptionbudgets 297 | - poddisruptionbudgets/status 298 | verbs: 299 | - get 300 | - list 301 | - watch 302 | - apiGroups: 303 | - networking.k8s.io 304 | resources: 305 | - ingresses 306 | - ingresses/status 307 | - networkpolicies 308 | verbs: 309 | - get 310 | - list 311 | - watch 312 | --- 313 | aggregationRule: 314 | clusterRoleSelectors: 315 | - matchLabels: 316 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true" 317 | apiVersion: rbac.authorization.k8s.io/v1 318 | kind: ClusterRole 319 | metadata: 320 | labels: 321 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" 322 | name: kubeflow-view 323 | rules: [] 324 | -------------------------------------------------------------------------------- /manifest1.3/031-mxnet-job-overlays-kubeflow.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiextensions.k8s.io/v1beta1 2 | kind: CustomResourceDefinition 3 | metadata: 4 | labels: 5 | app: mxnet-operator 6 | app.kubernetes.io/component: mxnet 7 | app.kubernetes.io/name: mxnet-operator 8 | kustomize.component: mxnet-operator 9 | name: mxjobs.kubeflow.org 10 | spec: 11 | group: kubeflow.org 12 | names: 13 | kind: MXJob 14 | plural: mxjobs 15 | singular: mxjob 16 | scope: Namespaced 17 | subresources: 18 | status: {} 19 | validation: 20 | openAPIV3Schema: 21 | properties: 22 | spec: 23 | properties: 24 | mxReplicaSpecs: 25 | properties: 26 | Scheduler: 27 | properties: 28 | replicas: 29 | maximum: 1 30 | minimum: 1 31 | type: integer 32 | Server: 33 | properties: 34 | replicas: 35 | minimum: 1 36 | type: integer 37 | Tuner: 38 | properties: 39 | replicas: 40 | maximum: 1 41 | minimum: 1 42 | type: integer 43 | TunerServer: 44 | properties: 45 | replicas: 46 | minimum: 1 47 | type: integer 48 | TunerTracker: 49 | properties: 50 | replicas: 51 | maximum: 1 52 | minimum: 1 53 | type: integer 54 | Worker: 55 | properties: 56 | replicas: 57 | minimum: 1 58 | type: integer 59 | version: v1 60 | --- 61 | apiVersion: v1 62 | kind: ServiceAccount 63 | metadata: 64 | labels: 65 | app: mxnet-operator 66 | app.kubernetes.io/component: mxnet 67 | app.kubernetes.io/name: mxnet-operator 68 | kustomize.component: mxnet-operator 69 | name: mxnet-operator 70 | namespace: kubeflow 71 | --- 72 | aggregationRule: 73 | clusterRoleSelectors: 74 | - matchLabels: 75 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-mxjobs-admin: "true" 76 | apiVersion: rbac.authorization.k8s.io/v1 77 | kind: ClusterRole 78 | metadata: 79 | labels: 80 | app: mxnet-operator 81 | app.kubernetes.io/component: mxnet 82 | app.kubernetes.io/name: mxnet-operator 83 | kustomize.component: mxnet-operator 84 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true" 85 | name: kubeflow-mxjobs-admin 86 | rules: [] 87 | --- 88 | apiVersion: rbac.authorization.k8s.io/v1 89 | kind: ClusterRole 90 | metadata: 91 | labels: 92 | app: mxnet-operator 93 | app.kubernetes.io/component: mxnet 94 | app.kubernetes.io/name: mxnet-operator 95 | kustomize.component: mxnet-operator 96 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" 97 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-mxjobs-admin: "true" 98 | name: kubeflow-mxjobs-edit 99 | rules: 100 | - apiGroups: 101 | - kubeflow.org 102 | resources: 103 | - mxjobs 104 | - mxjobs/status 105 | verbs: 106 | - get 107 | - list 108 | - watch 109 | - create 110 | - delete 111 | - deletecollection 112 | - patch 113 | - update 114 | --- 115 | apiVersion: rbac.authorization.k8s.io/v1 116 | kind: ClusterRole 117 | metadata: 118 | labels: 119 | app: mxnet-operator 120 | app.kubernetes.io/component: mxnet 121 | app.kubernetes.io/name: mxnet-operator 122 | kustomize.component: mxnet-operator 123 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true" 124 | name: kubeflow-mxjobs-view 125 | rules: 126 | - apiGroups: 127 | - kubeflow.org 128 | resources: 129 | - mxjobs 130 | - mxjobs/status 131 | verbs: 132 | - get 133 | - list 134 | - watch 135 | --- 136 | apiVersion: rbac.authorization.k8s.io/v1beta1 137 | kind: ClusterRole 138 | metadata: 139 | labels: 140 | app: mxnet-operator 141 | app.kubernetes.io/component: mxnet 142 | app.kubernetes.io/name: mxnet-operator 143 | kustomize.component: mxnet-operator 144 | name: mxnet-operator 145 | rules: 146 | - apiGroups: 147 | - kubeflow.org 148 | resources: 149 | - mxjobs 150 | verbs: 151 | - '*' 152 | - apiGroups: 153 | - apiextensions.k8s.io 154 | resources: 155 | - customresourcedefinitions 156 | verbs: 157 | - '*' 158 | - apiGroups: 159 | - storage.k8s.io 160 | resources: 161 | - storageclasses 162 | verbs: 163 | - '*' 164 | - apiGroups: 165 | - batch 166 | resources: 167 | - jobs 168 | verbs: 169 | - '*' 170 | - apiGroups: 171 | - "" 172 | resources: 173 | - configmaps 174 | - pods 175 | - services 176 | - endpoints 177 | - persistentvolumeclaims 178 | - events 179 | verbs: 180 | - '*' 181 | - apiGroups: 182 | - apps 183 | - extensions 184 | resources: 185 | - deployments 186 | verbs: 187 | - '*' 188 | --- 189 | apiVersion: rbac.authorization.k8s.io/v1beta1 190 | kind: ClusterRoleBinding 191 | metadata: 192 | labels: 193 | app: mxnet-operator 194 | app.kubernetes.io/component: mxnet 195 | app.kubernetes.io/name: mxnet-operator 196 | kustomize.component: mxnet-operator 197 | name: mxnet-operator 198 | roleRef: 199 | apiGroup: rbac.authorization.k8s.io 200 | kind: ClusterRole 201 | name: mxnet-operator 202 | subjects: 203 | - kind: ServiceAccount 204 | name: mxnet-operator 205 | namespace: kubeflow 206 | --- 207 | apiVersion: apps/v1 208 | kind: Deployment 209 | metadata: 210 | labels: 211 | app: mxnet-operator 212 | app.kubernetes.io/component: mxnet 213 | app.kubernetes.io/name: mxnet-operator 214 | kustomize.component: mxnet-operator 215 | name: mxnet-operator 216 | namespace: kubeflow 217 | spec: 218 | replicas: 1 219 | selector: 220 | matchLabels: 221 | app: mxnet-operator 222 | app.kubernetes.io/component: mxnet 223 | app.kubernetes.io/name: mxnet-operator 224 | kustomize.component: mxnet-operator 225 | template: 226 | metadata: 227 | annotations: 228 | sidecar.istio.io/inject: "false" 229 | labels: 230 | app: mxnet-operator 231 | app.kubernetes.io/component: mxnet 232 | app.kubernetes.io/name: mxnet-operator 233 | kustomize.component: mxnet-operator 234 | spec: 235 | containers: 236 | - command: 237 | - /opt/kubeflow/mxnet-operator.v1 238 | env: 239 | - name: MY_POD_NAMESPACE 240 | valueFrom: 241 | fieldRef: 242 | fieldPath: metadata.namespace 243 | - name: MY_POD_NAME 244 | valueFrom: 245 | fieldRef: 246 | fieldPath: metadata.name 247 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/kubeflow-mxnet-operator:v1.1.0-9863e 248 | imagePullPolicy: Always 249 | name: mxnet-operator 250 | serviceAccountName: mxnet-operator 251 | -------------------------------------------------------------------------------- /manifest1.3/029-pytorch-job-overlays-kubeflow.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiextensions.k8s.io/v1beta1 2 | kind: CustomResourceDefinition 3 | metadata: 4 | labels: 5 | app: pytorch-operator 6 | app.kubernetes.io/component: pytorch 7 | app.kubernetes.io/name: pytorch-operator 8 | kustomize.component: pytorch-operator 9 | name: pytorchjobs.kubeflow.org 10 | spec: 11 | additionalPrinterColumns: 12 | - JSONPath: .status.conditions[-1:].type 13 | name: State 14 | type: string 15 | - JSONPath: .metadata.creationTimestamp 16 | name: Age 17 | type: date 18 | group: kubeflow.org 19 | names: 20 | kind: PyTorchJob 21 | plural: pytorchjobs 22 | singular: pytorchjob 23 | scope: Namespaced 24 | subresources: 25 | status: {} 26 | validation: 27 | openAPIV3Schema: 28 | properties: 29 | spec: 30 | properties: 31 | pytorchReplicaSpecs: 32 | properties: 33 | Master: 34 | properties: 35 | replicas: 36 | maximum: 1 37 | minimum: 1 38 | type: integer 39 | Worker: 40 | properties: 41 | replicas: 42 | minimum: 1 43 | type: integer 44 | versions: 45 | - name: v1 46 | served: true 47 | storage: true 48 | --- 49 | apiVersion: v1 50 | kind: ServiceAccount 51 | metadata: 52 | labels: 53 | app: pytorch-operator 54 | app.kubernetes.io/component: pytorch 55 | app.kubernetes.io/name: pytorch-operator 56 | kustomize.component: pytorch-operator 57 | name: pytorch-operator 58 | namespace: kubeflow 59 | --- 60 | aggregationRule: 61 | clusterRoleSelectors: 62 | - matchLabels: 63 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-pytorchjobs-admin: "true" 64 | apiVersion: rbac.authorization.k8s.io/v1 65 | kind: ClusterRole 66 | metadata: 67 | labels: 68 | app: pytorch-operator 69 | app.kubernetes.io/component: pytorch 70 | app.kubernetes.io/name: pytorch-operator 71 | kustomize.component: pytorch-operator 72 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true" 73 | name: kubeflow-pytorchjobs-admin 74 | rules: [] 75 | --- 76 | apiVersion: rbac.authorization.k8s.io/v1 77 | kind: ClusterRole 78 | metadata: 79 | labels: 80 | app: pytorch-operator 81 | app.kubernetes.io/component: pytorch 82 | app.kubernetes.io/name: pytorch-operator 83 | kustomize.component: pytorch-operator 84 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" 85 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-pytorchjobs-admin: "true" 86 | name: kubeflow-pytorchjobs-edit 87 | rules: 88 | - apiGroups: 89 | - kubeflow.org 90 | resources: 91 | - pytorchjobs 92 | - pytorchjobs/status 93 | - pytorchjobs/finalizers 94 | verbs: 95 | - get 96 | - list 97 | - watch 98 | - create 99 | - delete 100 | - deletecollection 101 | - patch 102 | - update 103 | --- 104 | apiVersion: rbac.authorization.k8s.io/v1 105 | kind: ClusterRole 106 | metadata: 107 | labels: 108 | app: pytorch-operator 109 | app.kubernetes.io/component: pytorch 110 | app.kubernetes.io/name: pytorch-operator 111 | kustomize.component: pytorch-operator 112 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true" 113 | name: kubeflow-pytorchjobs-view 114 | rules: 115 | - apiGroups: 116 | - kubeflow.org 117 | resources: 118 | - pytorchjobs 119 | - pytorchjobs/status 120 | - pytorchjobs/finalizers 121 | verbs: 122 | - get 123 | - list 124 | - watch 125 | --- 126 | apiVersion: rbac.authorization.k8s.io/v1beta1 127 | kind: ClusterRole 128 | metadata: 129 | labels: 130 | app: pytorch-operator 131 | app.kubernetes.io/component: pytorch 132 | app.kubernetes.io/name: pytorch-operator 133 | kustomize.component: pytorch-operator 134 | name: pytorch-operator 135 | rules: 136 | - apiGroups: 137 | - kubeflow.org 138 | resources: 139 | - pytorchjobs 140 | - pytorchjobs/status 141 | - pytorchjobs/finalizers 142 | verbs: 143 | - '*' 144 | - apiGroups: 145 | - apiextensions.k8s.io 146 | resources: 147 | - customresourcedefinitions 148 | verbs: 149 | - '*' 150 | - apiGroups: 151 | - "" 152 | resources: 153 | - pods 154 | - services 155 | - endpoints 156 | - events 157 | verbs: 158 | - '*' 159 | --- 160 | apiVersion: rbac.authorization.k8s.io/v1beta1 161 | kind: ClusterRoleBinding 162 | metadata: 163 | labels: 164 | app: pytorch-operator 165 | app.kubernetes.io/component: pytorch 166 | app.kubernetes.io/name: pytorch-operator 167 | kustomize.component: pytorch-operator 168 | name: pytorch-operator 169 | roleRef: 170 | apiGroup: rbac.authorization.k8s.io 171 | kind: ClusterRole 172 | name: pytorch-operator 173 | subjects: 174 | - kind: ServiceAccount 175 | name: pytorch-operator 176 | namespace: kubeflow 177 | --- 178 | apiVersion: v1 179 | kind: Service 180 | metadata: 181 | annotations: 182 | prometheus.io/path: /metrics 183 | prometheus.io/port: "8443" 184 | prometheus.io/scrape: "true" 185 | labels: 186 | app: pytorch-operator 187 | app.kubernetes.io/component: pytorch 188 | app.kubernetes.io/name: pytorch-operator 189 | kustomize.component: pytorch-operator 190 | name: pytorch-operator 191 | namespace: kubeflow 192 | spec: 193 | ports: 194 | - name: monitoring-port 195 | port: 8443 196 | targetPort: 8443 197 | selector: 198 | app: pytorch-operator 199 | app.kubernetes.io/component: pytorch 200 | app.kubernetes.io/name: pytorch-operator 201 | kustomize.component: pytorch-operator 202 | name: pytorch-operator 203 | type: ClusterIP 204 | --- 205 | apiVersion: apps/v1 206 | kind: Deployment 207 | metadata: 208 | labels: 209 | app: pytorch-operator 210 | app.kubernetes.io/component: pytorch 211 | app.kubernetes.io/name: pytorch-operator 212 | kustomize.component: pytorch-operator 213 | name: pytorch-operator 214 | namespace: kubeflow 215 | spec: 216 | replicas: 1 217 | selector: 218 | matchLabels: 219 | app: pytorch-operator 220 | app.kubernetes.io/component: pytorch 221 | app.kubernetes.io/name: pytorch-operator 222 | kustomize.component: pytorch-operator 223 | name: pytorch-operator 224 | template: 225 | metadata: 226 | labels: 227 | app: pytorch-operator 228 | app.kubernetes.io/component: pytorch 229 | app.kubernetes.io/name: pytorch-operator 230 | kustomize.component: pytorch-operator 231 | name: pytorch-operator 232 | spec: 233 | containers: 234 | - command: 235 | - /pytorch-operator.v1 236 | - --alsologtostderr 237 | - -v=1 238 | - --monitoring-port=8443 239 | env: 240 | - name: MY_POD_NAMESPACE 241 | valueFrom: 242 | fieldRef: 243 | fieldPath: metadata.namespace 244 | - name: MY_POD_NAME 245 | valueFrom: 246 | fieldRef: 247 | fieldPath: metadata.name 248 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/kubeflow-images-public-pytorch-operator:vmaster-g518f9c76-4fc09 249 | name: pytorch-operator 250 | serviceAccountName: pytorch-operator 251 | -------------------------------------------------------------------------------- /manifest1.3/028-tf-training-overlays-kubeflow.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiextensions.k8s.io/v1beta1 2 | kind: CustomResourceDefinition 3 | metadata: 4 | labels: 5 | app: tf-job-operator 6 | app.kubernetes.io/component: tfjob 7 | app.kubernetes.io/name: tf-job-operator 8 | kustomize.component: tf-job-operator 9 | name: tfjobs.kubeflow.org 10 | spec: 11 | additionalPrinterColumns: 12 | - JSONPath: .status.conditions[-1:].type 13 | name: State 14 | type: string 15 | - JSONPath: .metadata.creationTimestamp 16 | name: Age 17 | type: date 18 | group: kubeflow.org 19 | names: 20 | kind: TFJob 21 | plural: tfjobs 22 | singular: tfjob 23 | scope: Namespaced 24 | subresources: 25 | status: {} 26 | validation: 27 | openAPIV3Schema: 28 | properties: 29 | spec: 30 | properties: 31 | tfReplicaSpecs: 32 | properties: 33 | Chief: 34 | properties: 35 | replicas: 36 | maximum: 1 37 | minimum: 1 38 | type: integer 39 | Evaluator: 40 | properties: 41 | replicas: 42 | minimum: 0 43 | type: integer 44 | PS: 45 | properties: 46 | replicas: 47 | minimum: 1 48 | type: integer 49 | Worker: 50 | properties: 51 | replicas: 52 | minimum: 1 53 | type: integer 54 | versions: 55 | - name: v1 56 | served: true 57 | storage: true 58 | --- 59 | apiVersion: v1 60 | kind: ServiceAccount 61 | metadata: 62 | labels: 63 | app: tf-job-operator 64 | app.kubernetes.io/component: tfjob 65 | app.kubernetes.io/name: tf-job-operator 66 | kustomize.component: tf-job-operator 67 | name: tf-job-operator 68 | namespace: kubeflow 69 | --- 70 | aggregationRule: 71 | clusterRoleSelectors: 72 | - matchLabels: 73 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-tfjobs-admin: "true" 74 | apiVersion: rbac.authorization.k8s.io/v1 75 | kind: ClusterRole 76 | metadata: 77 | labels: 78 | app: tf-job-operator 79 | app.kubernetes.io/component: tfjob 80 | app.kubernetes.io/name: tf-job-operator 81 | kustomize.component: tf-job-operator 82 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true" 83 | name: kubeflow-tfjobs-admin 84 | rules: [] 85 | --- 86 | apiVersion: rbac.authorization.k8s.io/v1 87 | kind: ClusterRole 88 | metadata: 89 | labels: 90 | app: tf-job-operator 91 | app.kubernetes.io/component: tfjob 92 | app.kubernetes.io/name: tf-job-operator 93 | kustomize.component: tf-job-operator 94 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" 95 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-tfjobs-admin: "true" 96 | name: kubeflow-tfjobs-edit 97 | rules: 98 | - apiGroups: 99 | - kubeflow.org 100 | resources: 101 | - tfjobs 102 | - tfjobs/status 103 | verbs: 104 | - get 105 | - list 106 | - watch 107 | - create 108 | - delete 109 | - deletecollection 110 | - patch 111 | - update 112 | --- 113 | apiVersion: rbac.authorization.k8s.io/v1 114 | kind: ClusterRole 115 | metadata: 116 | labels: 117 | app: tf-job-operator 118 | app.kubernetes.io/component: tfjob 119 | app.kubernetes.io/name: tf-job-operator 120 | kustomize.component: tf-job-operator 121 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true" 122 | name: kubeflow-tfjobs-view 123 | rules: 124 | - apiGroups: 125 | - kubeflow.org 126 | resources: 127 | - tfjobs 128 | - tfjobs/status 129 | verbs: 130 | - get 131 | - list 132 | - watch 133 | --- 134 | apiVersion: rbac.authorization.k8s.io/v1beta1 135 | kind: ClusterRole 136 | metadata: 137 | labels: 138 | app: tf-job-operator 139 | app.kubernetes.io/component: tfjob 140 | app.kubernetes.io/name: tf-job-operator 141 | kustomize.component: tf-job-operator 142 | name: tf-job-operator 143 | rules: 144 | - apiGroups: 145 | - kubeflow.org 146 | resources: 147 | - tfjobs 148 | - tfjobs/status 149 | - tfjobs/finalizers 150 | verbs: 151 | - '*' 152 | - apiGroups: 153 | - apiextensions.k8s.io 154 | resources: 155 | - customresourcedefinitions 156 | verbs: 157 | - '*' 158 | - apiGroups: 159 | - "" 160 | resources: 161 | - pods 162 | - services 163 | - endpoints 164 | - events 165 | verbs: 166 | - '*' 167 | - apiGroups: 168 | - apps 169 | - extensions 170 | resources: 171 | - deployments 172 | verbs: 173 | - '*' 174 | --- 175 | apiVersion: rbac.authorization.k8s.io/v1beta1 176 | kind: ClusterRoleBinding 177 | metadata: 178 | labels: 179 | app: tf-job-operator 180 | app.kubernetes.io/component: tfjob 181 | app.kubernetes.io/name: tf-job-operator 182 | kustomize.component: tf-job-operator 183 | name: tf-job-operator 184 | roleRef: 185 | apiGroup: rbac.authorization.k8s.io 186 | kind: ClusterRole 187 | name: tf-job-operator 188 | subjects: 189 | - kind: ServiceAccount 190 | name: tf-job-operator 191 | namespace: kubeflow 192 | --- 193 | apiVersion: v1 194 | kind: Service 195 | metadata: 196 | annotations: 197 | prometheus.io/path: /metrics 198 | prometheus.io/port: "8443" 199 | prometheus.io/scrape: "true" 200 | labels: 201 | app: tf-job-operator 202 | app.kubernetes.io/component: tfjob 203 | app.kubernetes.io/name: tf-job-operator 204 | kustomize.component: tf-job-operator 205 | name: tf-job-operator 206 | namespace: kubeflow 207 | spec: 208 | ports: 209 | - name: monitoring-port 210 | port: 8443 211 | targetPort: 8443 212 | selector: 213 | app: tf-job-operator 214 | app.kubernetes.io/component: tfjob 215 | app.kubernetes.io/name: tf-job-operator 216 | kustomize.component: tf-job-operator 217 | name: tf-job-operator 218 | type: ClusterIP 219 | --- 220 | apiVersion: apps/v1 221 | kind: Deployment 222 | metadata: 223 | labels: 224 | app: tf-job-operator 225 | app.kubernetes.io/component: tfjob 226 | app.kubernetes.io/name: tf-job-operator 227 | kustomize.component: tf-job-operator 228 | name: tf-job-operator 229 | namespace: kubeflow 230 | spec: 231 | replicas: 1 232 | selector: 233 | matchLabels: 234 | app: tf-job-operator 235 | app.kubernetes.io/component: tfjob 236 | app.kubernetes.io/name: tf-job-operator 237 | kustomize.component: tf-job-operator 238 | template: 239 | metadata: 240 | annotations: 241 | sidecar.istio.io/inject: "false" 242 | labels: 243 | app: tf-job-operator 244 | app.kubernetes.io/component: tfjob 245 | app.kubernetes.io/name: tf-job-operator 246 | kustomize.component: tf-job-operator 247 | name: tf-job-operator 248 | spec: 249 | containers: 250 | - args: 251 | - -monitoring-port=8443 252 | env: 253 | - name: MY_POD_NAMESPACE 254 | valueFrom: 255 | fieldRef: 256 | fieldPath: metadata.namespace 257 | - name: MY_POD_NAME 258 | valueFrom: 259 | fieldRef: 260 | fieldPath: metadata.name 261 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/training-tf-operator:cd2fc1ff397b1f349f68524f4abd5013a32e3033-b54e1 262 | name: tf-job-operator 263 | serviceAccountName: tf-job-operator 264 | -------------------------------------------------------------------------------- /patch/kfserving.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: caching.internal.knative.dev/v1alpha1 2 | kind: Image 3 | metadata: 4 | labels: 5 | app.kubernetes.io/component: knative-serving-install 6 | app.kubernetes.io/name: knative-serving-install 7 | kustomize.component: knative 8 | serving.knative.dev/release: v0.14.3 9 | name: queue-proxy 10 | namespace: knative-serving 11 | spec: 12 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/knative-serving-queue:v0.14.3 13 | --- 14 | apiVersion: v1 15 | data: 16 | _example: | 17 | ################################ 18 | # # 19 | # EXAMPLE CONFIGURATION # 20 | # # 21 | ################################ 22 | 23 | # This block is not actually functional configuration, 24 | # but serves to illustrate the available configuration 25 | # options and document them in a way that is accessible 26 | # to users that `kubectl edit` this config map. 27 | # 28 | # These sample configuration options may be copied out of 29 | # this example block and unindented to be in the data block 30 | # to actually change the configuration. 31 | 32 | # List of repositories for which tag to digest resolving should be skipped 33 | registriesSkippingTagResolving: "ko.local,dev.local" 34 | queueSidecarImage: registry.cn-shenzhen.aliyuncs.com/tensorbytes/knative-serving-queue:v0.14.3 35 | kind: ConfigMap 36 | metadata: 37 | labels: 38 | app.kubernetes.io/component: knative-serving-install 39 | app.kubernetes.io/name: knative-serving-install 40 | kustomize.component: knative 41 | serving.knative.dev/release: v0.14.3 42 | name: config-deployment 43 | namespace: knative-serving 44 | --- 45 | 46 | apiVersion: v1 47 | data: 48 | agent: |- 49 | { 50 | "image" : "kfserving/agent:v0.5.1", 51 | "memoryRequest": "100Mi", 52 | "memoryLimit": "1Gi", 53 | "cpuRequest": "100m", 54 | "cpuLimit": "1" 55 | } 56 | batcher: |- 57 | { 58 | "image" : "kfserving/agent:v0.5.1", 59 | "memoryRequest": "1Gi", 60 | "memoryLimit": "1Gi", 61 | "cpuRequest": "1", 62 | "cpuLimit": "1" 63 | } 64 | credentials: |- 65 | { 66 | "gcs": { 67 | "gcsCredentialFileName": "gcloud-application-credentials.json" 68 | }, 69 | "s3": { 70 | "s3AccessKeyIDName": "AWS_ACCESS_KEY_ID", 71 | "s3SecretAccessKeyName": "AWS_SECRET_ACCESS_KEY" 72 | } 73 | } 74 | explainers: |- 75 | { 76 | "alibi": { 77 | "image" : "kfserving/alibi-explainer", 78 | "defaultImageVersion": "v0.5.1" 79 | }, 80 | "aix": { 81 | "image" : "kfserving/aix-explainer", 82 | "defaultImageVersion": "v0.5.1" 83 | }, 84 | "art": { 85 | "image" : "kfserving/art-explainer", 86 | "defaultImageVersion": "v0.5.1" 87 | } 88 | } 89 | ingress: |- 90 | { 91 | "ingressGateway" : "kubeflow-gateway.kubeflow", 92 | "ingressService" : "istio-ingressgateway.istio-system.svc.cluster.local", 93 | "localGateway" : "cluster-local-gateway.knative-serving", 94 | "localGatewayService" : "cluster-local-gateway.istio-system.svc.cluster.local" 95 | } 96 | logger: |- 97 | { 98 | "image" : "kfserving/agent:v0.5.1", 99 | "memoryRequest": "100Mi", 100 | "memoryLimit": "1Gi", 101 | "cpuRequest": "100m", 102 | "cpuLimit": "1", 103 | "defaultUrl": "http://default-broker" 104 | } 105 | predictors: |- 106 | { 107 | "tensorflow": { 108 | "image": "tensorflow/serving", 109 | "defaultImageVersion": "1.14.0", 110 | "defaultGpuImageVersion": "1.14.0-gpu", 111 | "defaultTimeout": "60", 112 | "supportedFrameworks": [ 113 | "tensorflow" 114 | ], 115 | "multiModelServer": false 116 | }, 117 | "onnx": { 118 | "image": "mcr.microsoft.com/onnxruntime/server", 119 | "defaultImageVersion": "v1.0.0", 120 | "supportedFrameworks": [ 121 | "onnx" 122 | ], 123 | "multiModelServer": false 124 | }, 125 | "sklearn": { 126 | "v1": { 127 | "image": "registry.cn-shenzhen.aliyuncs.com/tensorbytes/sklearnserver", 128 | "defaultImageVersion": "v0.5.1", 129 | "supportedFrameworks": [ 130 | "sklearn" 131 | ], 132 | "multiModelServer": false 133 | }, 134 | "v2": { 135 | "image": "docker.io/seldonio/mlserver", 136 | "defaultImageVersion": "0.2.1", 137 | "supportedFrameworks": [ 138 | "sklearn" 139 | ], 140 | "multiModelServer": false 141 | } 142 | }, 143 | "xgboost": { 144 | "v1": { 145 | "image": "registry.cn-shenzhen.aliyuncs.com/tensorbytes/xgbserver", 146 | "defaultImageVersion": "v0.5.1", 147 | "supportedFrameworks": [ 148 | "xgboost" 149 | ], 150 | "multiModelServer": false 151 | }, 152 | "v2": { 153 | "image": "docker.io/seldonio/mlserver", 154 | "defaultImageVersion": "0.2.1", 155 | "supportedFrameworks": [ 156 | "xgboost" 157 | ], 158 | "multiModelServer": false 159 | } 160 | }, 161 | "pytorch": { 162 | "v1" : { 163 | "image": "registry.cn-shenzhen.aliyuncs.com/tensorbytes/pytorchserver", 164 | "defaultImageVersion": "v0.5.1", 165 | "defaultGpuImageVersion": "v0.5.1-gpu", 166 | "supportedFrameworks": [ 167 | "pytorch" 168 | ], 169 | "multiModelServer": false 170 | }, 171 | "v2" : { 172 | "image": "kfserving/torchserve-kfs", 173 | "defaultImageVersion": "0.3.0", 174 | "defaultGpuImageVersion": "0.3.0-gpu", 175 | "supportedFrameworks": [ 176 | "pytorch" 177 | ], 178 | "multiModelServer": false 179 | } 180 | }, 181 | "triton": { 182 | "image": "nvcr.io/nvidia/tritonserver", 183 | "defaultImageVersion": "20.08-py3", 184 | "supportedFrameworks": [ 185 | "tensorrt", 186 | "tensorflow", 187 | "onnx", 188 | "pytorch", 189 | "caffe2" 190 | ], 191 | "multiModelServer": false 192 | }, 193 | "pmml": { 194 | "image": "kfserving/pmmlserver", 195 | "defaultImageVersion": "v0.5.1", 196 | "supportedFrameworks": [ 197 | "pmml" 198 | ], 199 | "multiModelServer": false 200 | }, 201 | "lightgbm": { 202 | "image": "kfserving/lgbserver", 203 | "defaultImageVersion": "v0.5.1", 204 | "supportedFrameworks": [ 205 | "lightgbm" 206 | ], 207 | "multiModelServer": false 208 | } 209 | } 210 | storageInitializer: |- 211 | { 212 | "image" : "registry.cn-shenzhen.aliyuncs.com/tensorbytes/storage-initializer:v0.5.1", 213 | "memoryRequest": "100Mi", 214 | "memoryLimit": "1Gi", 215 | "cpuRequest": "100m", 216 | "cpuLimit": "1" 217 | } 218 | transformers: |- 219 | { 220 | } 221 | kind: ConfigMap 222 | metadata: 223 | labels: 224 | app: kfserving 225 | app.kubernetes.io/component: kfserving 226 | app.kubernetes.io/name: kfserving 227 | kustomize.component: kfserving 228 | name: inferenceservice-config 229 | namespace: kubeflow -------------------------------------------------------------------------------- /patch/cluster-local-gateway.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | app: cluster-local-gateway 6 | install.operator.istio.io/owning-resource: unknown 7 | istio: cluster-local-gateway 8 | istio.io/rev: default 9 | operator.istio.io/component: IngressGateways 10 | release: istio 11 | name: cluster-local-gateway 12 | namespace: istio-system 13 | spec: 14 | selector: 15 | matchLabels: 16 | app: cluster-local-gateway 17 | istio: cluster-local-gateway 18 | strategy: 19 | rollingUpdate: 20 | maxSurge: 100% 21 | maxUnavailable: 25% 22 | template: 23 | metadata: 24 | annotations: 25 | prometheus.io/path: /stats/prometheus 26 | prometheus.io/port: "15020" 27 | prometheus.io/scrape: "true" 28 | sidecar.istio.io/inject: "false" 29 | labels: 30 | app: cluster-local-gateway 31 | chart: gateways 32 | heritage: Tiller 33 | install.operator.istio.io/owning-resource: unknown 34 | istio: cluster-local-gateway 35 | istio.io/rev: default 36 | operator.istio.io/component: IngressGateways 37 | release: istio 38 | service.istio.io/canonical-name: cluster-local-gateway 39 | service.istio.io/canonical-revision: latest 40 | sidecar.istio.io/inject: "false" 41 | spec: 42 | affinity: 43 | nodeAffinity: 44 | preferredDuringSchedulingIgnoredDuringExecution: 45 | - preference: 46 | matchExpressions: 47 | - key: kubernetes.io/arch 48 | operator: In 49 | values: 50 | - amd64 51 | weight: 2 52 | - preference: 53 | matchExpressions: 54 | - key: kubernetes.io/arch 55 | operator: In 56 | values: 57 | - ppc64le 58 | weight: 2 59 | - preference: 60 | matchExpressions: 61 | - key: kubernetes.io/arch 62 | operator: In 63 | values: 64 | - s390x 65 | weight: 2 66 | requiredDuringSchedulingIgnoredDuringExecution: 67 | nodeSelectorTerms: 68 | - matchExpressions: 69 | - key: kubernetes.io/arch 70 | operator: In 71 | values: 72 | - amd64 73 | - ppc64le 74 | - s390x 75 | containers: 76 | - args: 77 | - proxy 78 | - router 79 | - --domain 80 | - $(POD_NAMESPACE).svc.cluster.local 81 | - --proxyLogLevel=warning 82 | - --proxyComponentLogLevel=misc:error 83 | - --log_output_level=default:info 84 | - --serviceCluster 85 | - cluster-local-gateway 86 | env: 87 | - name: JWT_POLICY 88 | value: first-party-jwt 89 | - name: PILOT_CERT_PROVIDER 90 | value: istiod 91 | - name: CA_ADDR 92 | value: istiod.istio-system.svc:15012 93 | - name: NODE_NAME 94 | valueFrom: 95 | fieldRef: 96 | apiVersion: v1 97 | fieldPath: spec.nodeName 98 | - name: POD_NAME 99 | valueFrom: 100 | fieldRef: 101 | apiVersion: v1 102 | fieldPath: metadata.name 103 | - name: POD_NAMESPACE 104 | valueFrom: 105 | fieldRef: 106 | apiVersion: v1 107 | fieldPath: metadata.namespace 108 | - name: INSTANCE_IP 109 | valueFrom: 110 | fieldRef: 111 | apiVersion: v1 112 | fieldPath: status.podIP 113 | - name: HOST_IP 114 | valueFrom: 115 | fieldRef: 116 | apiVersion: v1 117 | fieldPath: status.hostIP 118 | - name: SERVICE_ACCOUNT 119 | valueFrom: 120 | fieldRef: 121 | fieldPath: spec.serviceAccountName 122 | - name: CANONICAL_SERVICE 123 | valueFrom: 124 | fieldRef: 125 | fieldPath: metadata.labels['service.istio.io/canonical-name'] 126 | - name: CANONICAL_REVISION 127 | valueFrom: 128 | fieldRef: 129 | fieldPath: metadata.labels['service.istio.io/canonical-revision'] 130 | - name: ISTIO_META_WORKLOAD_NAME 131 | value: cluster-local-gateway 132 | - name: ISTIO_META_OWNER 133 | value: kubernetes://apis/apps/v1/namespaces/istio-system/deployments/cluster-local-gateway 134 | - name: ISTIO_META_UNPRIVILEGED_POD 135 | value: "true" 136 | - name: ISTIO_META_ROUTER_MODE 137 | value: sni-dnat 138 | - name: ISTIO_META_CLUSTER_ID 139 | value: Kubernetes 140 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/istio-proxyv2:1.9.0-e8a74 141 | name: istio-proxy 142 | ports: 143 | - containerPort: 15020 144 | protocol: TCP 145 | - containerPort: 8080 146 | protocol: TCP 147 | - containerPort: 15090 148 | name: http-envoy-prom 149 | protocol: TCP 150 | readinessProbe: 151 | failureThreshold: 30 152 | httpGet: 153 | path: /healthz/ready 154 | port: 15021 155 | scheme: HTTP 156 | initialDelaySeconds: 1 157 | periodSeconds: 2 158 | successThreshold: 1 159 | timeoutSeconds: 1 160 | resources: 161 | limits: 162 | cpu: 2000m 163 | memory: 1024Mi 164 | requests: 165 | cpu: 100m 166 | memory: 128Mi 167 | securityContext: 168 | allowPrivilegeEscalation: false 169 | capabilities: 170 | drop: 171 | - ALL 172 | privileged: false 173 | readOnlyRootFilesystem: true 174 | volumeMounts: 175 | - mountPath: /etc/istio/proxy 176 | name: istio-envoy 177 | - mountPath: /etc/istio/config 178 | name: config-volume 179 | - mountPath: /var/run/secrets/istio 180 | name: istiod-ca-cert 181 | - mountPath: /var/lib/istio/data 182 | name: istio-data 183 | - mountPath: /etc/istio/pod 184 | name: podinfo 185 | - mountPath: /etc/istio/ingressgateway-certs 186 | name: ingressgateway-certs 187 | readOnly: true 188 | - mountPath: /etc/istio/ingressgateway-ca-certs 189 | name: ingressgateway-ca-certs 190 | readOnly: true 191 | securityContext: 192 | fsGroup: 1337 193 | runAsGroup: 1337 194 | runAsNonRoot: true 195 | runAsUser: 1337 196 | serviceAccountName: cluster-local-gateway-service-account 197 | volumes: 198 | - configMap: 199 | name: istio-ca-root-cert 200 | name: istiod-ca-cert 201 | - downwardAPI: 202 | items: 203 | - fieldRef: 204 | fieldPath: metadata.labels 205 | path: labels 206 | - fieldRef: 207 | fieldPath: metadata.annotations 208 | path: annotations 209 | - path: cpu-limit 210 | resourceFieldRef: 211 | containerName: istio-proxy 212 | divisor: 1m 213 | resource: limits.cpu 214 | - path: cpu-request 215 | resourceFieldRef: 216 | containerName: istio-proxy 217 | divisor: 1m 218 | resource: requests.cpu 219 | name: podinfo 220 | - emptyDir: {} 221 | name: istio-envoy 222 | - emptyDir: {} 223 | name: istio-data 224 | - configMap: 225 | name: istio 226 | optional: true 227 | name: config-volume 228 | - name: ingressgateway-certs 229 | secret: 230 | optional: true 231 | secretName: istio-ingressgateway-certs 232 | - name: ingressgateway-ca-certs 233 | secret: 234 | optional: true 235 | secretName: istio-ingressgateway-ca-certs -------------------------------------------------------------------------------- /patch/istio-ingressgateway.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | app: istio-ingressgateway 6 | install.operator.istio.io/owning-resource: unknown 7 | istio: ingressgateway 8 | istio.io/rev: default 9 | operator.istio.io/component: IngressGateways 10 | release: istio 11 | name: istio-ingressgateway 12 | namespace: istio-system 13 | spec: 14 | selector: 15 | matchLabels: 16 | app: istio-ingressgateway 17 | istio: ingressgateway 18 | strategy: 19 | rollingUpdate: 20 | maxSurge: 100% 21 | maxUnavailable: 25% 22 | template: 23 | metadata: 24 | annotations: 25 | prometheus.io/path: /stats/prometheus 26 | prometheus.io/port: "15020" 27 | prometheus.io/scrape: "true" 28 | sidecar.istio.io/inject: "false" 29 | labels: 30 | app: istio-ingressgateway 31 | chart: gateways 32 | heritage: Tiller 33 | install.operator.istio.io/owning-resource: unknown 34 | istio: ingressgateway 35 | istio.io/rev: default 36 | operator.istio.io/component: IngressGateways 37 | release: istio 38 | service.istio.io/canonical-name: istio-ingressgateway 39 | service.istio.io/canonical-revision: latest 40 | sidecar.istio.io/inject: "false" 41 | spec: 42 | affinity: 43 | nodeAffinity: 44 | preferredDuringSchedulingIgnoredDuringExecution: 45 | - preference: 46 | matchExpressions: 47 | - key: kubernetes.io/arch 48 | operator: In 49 | values: 50 | - amd64 51 | weight: 2 52 | - preference: 53 | matchExpressions: 54 | - key: kubernetes.io/arch 55 | operator: In 56 | values: 57 | - ppc64le 58 | weight: 2 59 | - preference: 60 | matchExpressions: 61 | - key: kubernetes.io/arch 62 | operator: In 63 | values: 64 | - s390x 65 | weight: 2 66 | requiredDuringSchedulingIgnoredDuringExecution: 67 | nodeSelectorTerms: 68 | - matchExpressions: 69 | - key: kubernetes.io/arch 70 | operator: In 71 | values: 72 | - amd64 73 | - ppc64le 74 | - s390x 75 | containers: 76 | - args: 77 | - proxy 78 | - router 79 | - --domain 80 | - $(POD_NAMESPACE).svc.cluster.local 81 | - --proxyLogLevel=warning 82 | - --proxyComponentLogLevel=misc:error 83 | - --log_output_level=default:info 84 | - --serviceCluster 85 | - istio-ingressgateway 86 | env: 87 | - name: JWT_POLICY 88 | value: first-party-jwt 89 | - name: PILOT_CERT_PROVIDER 90 | value: istiod 91 | - name: CA_ADDR 92 | value: istiod.istio-system.svc:15012 93 | - name: NODE_NAME 94 | valueFrom: 95 | fieldRef: 96 | apiVersion: v1 97 | fieldPath: spec.nodeName 98 | - name: POD_NAME 99 | valueFrom: 100 | fieldRef: 101 | apiVersion: v1 102 | fieldPath: metadata.name 103 | - name: POD_NAMESPACE 104 | valueFrom: 105 | fieldRef: 106 | apiVersion: v1 107 | fieldPath: metadata.namespace 108 | - name: INSTANCE_IP 109 | valueFrom: 110 | fieldRef: 111 | apiVersion: v1 112 | fieldPath: status.podIP 113 | - name: HOST_IP 114 | valueFrom: 115 | fieldRef: 116 | apiVersion: v1 117 | fieldPath: status.hostIP 118 | - name: SERVICE_ACCOUNT 119 | valueFrom: 120 | fieldRef: 121 | fieldPath: spec.serviceAccountName 122 | - name: CANONICAL_SERVICE 123 | valueFrom: 124 | fieldRef: 125 | fieldPath: metadata.labels['service.istio.io/canonical-name'] 126 | - name: CANONICAL_REVISION 127 | valueFrom: 128 | fieldRef: 129 | fieldPath: metadata.labels['service.istio.io/canonical-revision'] 130 | - name: ISTIO_META_WORKLOAD_NAME 131 | value: istio-ingressgateway 132 | - name: ISTIO_META_OWNER 133 | value: kubernetes://apis/apps/v1/namespaces/istio-system/deployments/istio-ingressgateway 134 | - name: ISTIO_META_UNPRIVILEGED_POD 135 | value: "true" 136 | - name: ISTIO_META_ROUTER_MODE 137 | value: standard 138 | - name: ISTIO_META_CLUSTER_ID 139 | value: Kubernetes 140 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/istio-proxyv2:1.9.0-e8a74 141 | name: istio-proxy 142 | ports: 143 | - containerPort: 15021 144 | protocol: TCP 145 | - containerPort: 8080 146 | protocol: TCP 147 | - containerPort: 8443 148 | protocol: TCP 149 | - containerPort: 31400 150 | protocol: TCP 151 | - containerPort: 15443 152 | protocol: TCP 153 | - containerPort: 15090 154 | name: http-envoy-prom 155 | protocol: TCP 156 | readinessProbe: 157 | failureThreshold: 30 158 | httpGet: 159 | path: /healthz/ready 160 | port: 15021 161 | scheme: HTTP 162 | initialDelaySeconds: 1 163 | periodSeconds: 2 164 | successThreshold: 1 165 | timeoutSeconds: 1 166 | resources: 167 | limits: 168 | cpu: 2000m 169 | memory: 1024Mi 170 | requests: 171 | cpu: 10m 172 | memory: 40Mi 173 | securityContext: 174 | allowPrivilegeEscalation: false 175 | capabilities: 176 | drop: 177 | - ALL 178 | privileged: false 179 | readOnlyRootFilesystem: true 180 | volumeMounts: 181 | - mountPath: /etc/istio/proxy 182 | name: istio-envoy 183 | - mountPath: /etc/istio/config 184 | name: config-volume 185 | - mountPath: /var/run/secrets/istio 186 | name: istiod-ca-cert 187 | - mountPath: /var/lib/istio/data 188 | name: istio-data 189 | - mountPath: /etc/istio/pod 190 | name: podinfo 191 | - mountPath: /etc/istio/ingressgateway-certs 192 | name: ingressgateway-certs 193 | readOnly: true 194 | - mountPath: /etc/istio/ingressgateway-ca-certs 195 | name: ingressgateway-ca-certs 196 | readOnly: true 197 | securityContext: 198 | fsGroup: 1337 199 | runAsGroup: 1337 200 | runAsNonRoot: true 201 | runAsUser: 1337 202 | serviceAccountName: istio-ingressgateway-service-account 203 | volumes: 204 | - configMap: 205 | name: istio-ca-root-cert 206 | name: istiod-ca-cert 207 | - downwardAPI: 208 | items: 209 | - fieldRef: 210 | fieldPath: metadata.labels 211 | path: labels 212 | - fieldRef: 213 | fieldPath: metadata.annotations 214 | path: annotations 215 | - path: cpu-limit 216 | resourceFieldRef: 217 | containerName: istio-proxy 218 | divisor: 1m 219 | resource: limits.cpu 220 | - path: cpu-request 221 | resourceFieldRef: 222 | containerName: istio-proxy 223 | divisor: 1m 224 | resource: requests.cpu 225 | name: podinfo 226 | - emptyDir: {} 227 | name: istio-envoy 228 | - emptyDir: {} 229 | name: istio-data 230 | - configMap: 231 | name: istio 232 | optional: true 233 | name: config-volume 234 | - name: ingressgateway-certs 235 | secret: 236 | optional: true 237 | secretName: istio-ingressgateway-certs 238 | - name: ingressgateway-ca-certs 239 | secret: 240 | optional: true 241 | secretName: istio-ingressgateway-ca-certs -------------------------------------------------------------------------------- /manifest1.3/021-admission-webhook-overlays-cert-manager.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiextensions.k8s.io/v1beta1 2 | kind: CustomResourceDefinition 3 | metadata: 4 | labels: 5 | app: poddefaults 6 | app.kubernetes.io/component: poddefaults 7 | app.kubernetes.io/name: poddefaults 8 | kustomize.component: poddefaults 9 | name: poddefaults.kubeflow.org 10 | spec: 11 | group: kubeflow.org 12 | names: 13 | kind: PodDefault 14 | plural: poddefaults 15 | singular: poddefault 16 | scope: Namespaced 17 | validation: 18 | openAPIV3Schema: 19 | properties: 20 | apiVersion: 21 | type: string 22 | kind: 23 | type: string 24 | metadata: 25 | type: object 26 | spec: 27 | properties: 28 | desc: 29 | type: string 30 | env: 31 | items: 32 | type: object 33 | type: array 34 | envFrom: 35 | items: 36 | type: object 37 | type: array 38 | selector: 39 | type: object 40 | serviceAccountName: 41 | type: string 42 | volumeMounts: 43 | items: 44 | type: object 45 | type: array 46 | volumes: 47 | items: 48 | type: object 49 | type: array 50 | required: 51 | - selector 52 | type: object 53 | status: 54 | type: object 55 | type: object 56 | version: v1alpha1 57 | --- 58 | apiVersion: v1 59 | kind: ServiceAccount 60 | metadata: 61 | labels: 62 | app: poddefaults 63 | app.kubernetes.io/component: poddefaults 64 | app.kubernetes.io/name: poddefaults 65 | kustomize.component: poddefaults 66 | name: admission-webhook-service-account 67 | namespace: kubeflow 68 | --- 69 | apiVersion: rbac.authorization.k8s.io/v1 70 | kind: ClusterRole 71 | metadata: 72 | labels: 73 | app: poddefaults 74 | app.kubernetes.io/component: poddefaults 75 | app.kubernetes.io/name: poddefaults 76 | kustomize.component: poddefaults 77 | name: admission-webhook-cluster-role 78 | rules: 79 | - apiGroups: 80 | - kubeflow.org 81 | resources: 82 | - poddefaults 83 | verbs: 84 | - get 85 | - watch 86 | - list 87 | - update 88 | - create 89 | - patch 90 | - delete 91 | --- 92 | aggregationRule: 93 | clusterRoleSelectors: 94 | - matchLabels: 95 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-poddefaults-admin: "true" 96 | apiVersion: rbac.authorization.k8s.io/v1 97 | kind: ClusterRole 98 | metadata: 99 | labels: 100 | app: poddefaults 101 | app.kubernetes.io/component: poddefaults 102 | app.kubernetes.io/name: poddefaults 103 | kustomize.component: poddefaults 104 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true" 105 | name: admission-webhook-kubeflow-poddefaults-admin 106 | rules: [] 107 | --- 108 | aggregationRule: 109 | clusterRoleSelectors: 110 | - matchLabels: 111 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-poddefaults-edit: "true" 112 | apiVersion: rbac.authorization.k8s.io/v1 113 | kind: ClusterRole 114 | metadata: 115 | labels: 116 | app: poddefaults 117 | app.kubernetes.io/component: poddefaults 118 | app.kubernetes.io/name: poddefaults 119 | kustomize.component: poddefaults 120 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" 121 | name: admission-webhook-kubeflow-poddefaults-edit 122 | rules: [] 123 | --- 124 | apiVersion: rbac.authorization.k8s.io/v1 125 | kind: ClusterRole 126 | metadata: 127 | labels: 128 | app: poddefaults 129 | app.kubernetes.io/component: poddefaults 130 | app.kubernetes.io/name: poddefaults 131 | kustomize.component: poddefaults 132 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-poddefaults-admin: "true" 133 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-poddefaults-edit: "true" 134 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true" 135 | name: admission-webhook-kubeflow-poddefaults-view 136 | rules: 137 | - apiGroups: 138 | - kubeflow.org 139 | resources: 140 | - poddefaults 141 | verbs: 142 | - get 143 | - list 144 | - watch 145 | --- 146 | apiVersion: rbac.authorization.k8s.io/v1 147 | kind: ClusterRoleBinding 148 | metadata: 149 | labels: 150 | app: poddefaults 151 | app.kubernetes.io/component: poddefaults 152 | app.kubernetes.io/name: poddefaults 153 | kustomize.component: poddefaults 154 | name: admission-webhook-cluster-role-binding 155 | roleRef: 156 | apiGroup: rbac.authorization.k8s.io 157 | kind: ClusterRole 158 | name: admission-webhook-cluster-role 159 | subjects: 160 | - kind: ServiceAccount 161 | name: admission-webhook-service-account 162 | namespace: kubeflow 163 | --- 164 | apiVersion: v1 165 | kind: Service 166 | metadata: 167 | labels: 168 | app: poddefaults 169 | app.kubernetes.io/component: poddefaults 170 | app.kubernetes.io/name: poddefaults 171 | kustomize.component: poddefaults 172 | name: admission-webhook-service 173 | namespace: kubeflow 174 | spec: 175 | ports: 176 | - name: https-webhook 177 | port: 443 178 | targetPort: https-webhook 179 | selector: 180 | app: poddefaults 181 | app.kubernetes.io/component: poddefaults 182 | app.kubernetes.io/name: poddefaults 183 | kustomize.component: poddefaults 184 | --- 185 | apiVersion: apps/v1 186 | kind: Deployment 187 | metadata: 188 | labels: 189 | app: poddefaults 190 | app.kubernetes.io/component: poddefaults 191 | app.kubernetes.io/name: poddefaults 192 | kustomize.component: poddefaults 193 | name: admission-webhook-deployment 194 | namespace: kubeflow 195 | spec: 196 | selector: 197 | matchLabels: 198 | app: poddefaults 199 | app.kubernetes.io/component: poddefaults 200 | app.kubernetes.io/name: poddefaults 201 | kustomize.component: poddefaults 202 | template: 203 | metadata: 204 | annotations: 205 | sidecar.istio.io/inject: "false" 206 | labels: 207 | app: poddefaults 208 | app.kubernetes.io/component: poddefaults 209 | app.kubernetes.io/name: poddefaults 210 | kustomize.component: poddefaults 211 | spec: 212 | containers: 213 | - args: 214 | - --tlsCertFile=/etc/webhook/certs/tls.crt 215 | - --tlsKeyFile=/etc/webhook/certs/tls.key 216 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/notebooks-admission-webhook:v1.3.0-rc.0-cc332 217 | name: admission-webhook 218 | ports: 219 | - containerPort: 4443 220 | name: https-webhook 221 | volumeMounts: 222 | - mountPath: /etc/webhook/certs 223 | name: webhook-cert 224 | readOnly: true 225 | serviceAccountName: admission-webhook-service-account 226 | volumes: 227 | - name: webhook-cert 228 | secret: 229 | secretName: webhook-certs 230 | --- 231 | apiVersion: cert-manager.io/v1alpha2 232 | kind: Certificate 233 | metadata: 234 | labels: 235 | app: poddefaults 236 | app.kubernetes.io/component: poddefaults 237 | app.kubernetes.io/name: poddefaults 238 | kustomize.component: poddefaults 239 | name: admission-webhook-cert 240 | namespace: kubeflow 241 | spec: 242 | commonName: admission-webhook-service.kubeflow.svc 243 | dnsNames: 244 | - admission-webhook-service.kubeflow.svc 245 | - admission-webhook-service.kubeflow.svc.cluster.local 246 | isCA: true 247 | issuerRef: 248 | kind: Issuer 249 | name: admission-webhook-selfsigned-issuer 250 | secretName: webhook-certs 251 | --- 252 | apiVersion: cert-manager.io/v1alpha2 253 | kind: Issuer 254 | metadata: 255 | labels: 256 | app: poddefaults 257 | app.kubernetes.io/component: poddefaults 258 | app.kubernetes.io/name: poddefaults 259 | kustomize.component: poddefaults 260 | name: admission-webhook-selfsigned-issuer 261 | namespace: kubeflow 262 | spec: 263 | selfSigned: {} 264 | --- 265 | apiVersion: admissionregistration.k8s.io/v1beta1 266 | kind: MutatingWebhookConfiguration 267 | metadata: 268 | annotations: 269 | cert-manager.io/inject-ca-from: kubeflow/admission-webhook-cert 270 | labels: 271 | app: poddefaults 272 | app.kubernetes.io/component: poddefaults 273 | app.kubernetes.io/name: poddefaults 274 | kustomize.component: poddefaults 275 | name: admission-webhook-mutating-webhook-configuration 276 | webhooks: 277 | - clientConfig: 278 | caBundle: "" 279 | service: 280 | name: admission-webhook-service 281 | namespace: kubeflow 282 | path: /apply-poddefault 283 | name: admission-webhook-deployment.kubeflow.org 284 | namespaceSelector: 285 | matchLabels: 286 | app.kubernetes.io/part-of: kubeflow-profile 287 | rules: 288 | - apiGroups: 289 | - "" 290 | apiVersions: 291 | - v1 292 | operations: 293 | - CREATE 294 | resources: 295 | - pods 296 | -------------------------------------------------------------------------------- /manifest1.3/026-tensorboard-overlays-kubeflow.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiextensions.k8s.io/v1beta1 2 | kind: CustomResourceDefinition 3 | metadata: 4 | creationTimestamp: null 5 | name: tensorboards.tensorboard.kubeflow.org 6 | spec: 7 | group: tensorboard.kubeflow.org 8 | names: 9 | kind: Tensorboard 10 | listKind: TensorboardList 11 | plural: tensorboards 12 | singular: tensorboard 13 | scope: "" 14 | subresources: 15 | status: {} 16 | validation: 17 | openAPIV3Schema: 18 | description: Tensorboard is the Schema for the tensorboards API 19 | properties: 20 | apiVersion: 21 | description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' 22 | type: string 23 | kind: 24 | description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' 25 | type: string 26 | metadata: 27 | type: object 28 | spec: 29 | description: TensorboardSpec defines the desired state of Tensorboard 30 | properties: 31 | logspath: 32 | description: 'INSERT ADDITIONAL SPEC FIELDS - desired state of cluster Important: Run "make" to regenerate code after modifying this file' 33 | type: string 34 | required: 35 | - logspath 36 | type: object 37 | status: 38 | description: TensorboardStatus defines the observed state of Tensorboard 39 | properties: 40 | conditions: 41 | description: Conditions is an array of current conditions 42 | items: 43 | description: TensorboardCondition defines the observed state of Tensorboard 44 | properties: 45 | deploymentState: 46 | description: Deployment status, 'Available', 'Progressing', 'ReplicaFailure' . 47 | type: string 48 | lastProbeTime: 49 | description: Last time we probed the condition. 50 | format: date-time 51 | type: string 52 | required: 53 | - deploymentState 54 | type: object 55 | type: array 56 | readyReplicas: 57 | description: ReadyReplicas defines the number of Tensorboard Servers that are available to connect. The value of ReadyReplicas can be either 0 or 1 58 | format: int32 59 | type: integer 60 | required: 61 | - conditions 62 | - readyReplicas 63 | type: object 64 | type: object 65 | version: v1alpha1 66 | versions: 67 | - name: v1alpha1 68 | served: true 69 | storage: true 70 | status: 71 | acceptedNames: 72 | kind: "" 73 | plural: "" 74 | conditions: [] 75 | storedVersions: [] 76 | --- 77 | apiVersion: v1 78 | kind: ServiceAccount 79 | metadata: 80 | name: tensorboard-controller 81 | namespace: kubeflow 82 | --- 83 | apiVersion: rbac.authorization.k8s.io/v1 84 | kind: Role 85 | metadata: 86 | name: tensorboard-controller-leader-election-role 87 | namespace: kubeflow 88 | rules: 89 | - apiGroups: 90 | - "" 91 | resources: 92 | - configmaps 93 | verbs: 94 | - get 95 | - list 96 | - watch 97 | - create 98 | - update 99 | - patch 100 | - delete 101 | - apiGroups: 102 | - "" 103 | resources: 104 | - configmaps/status 105 | verbs: 106 | - get 107 | - update 108 | - patch 109 | - apiGroups: 110 | - "" 111 | resources: 112 | - events 113 | verbs: 114 | - create 115 | --- 116 | apiVersion: rbac.authorization.k8s.io/v1 117 | kind: ClusterRole 118 | metadata: 119 | creationTimestamp: null 120 | name: tensorboard-controller-manager-role 121 | rules: 122 | - apiGroups: 123 | - apps 124 | resources: 125 | - deployments 126 | verbs: 127 | - create 128 | - get 129 | - list 130 | - update 131 | - watch 132 | - apiGroups: 133 | - "" 134 | resources: 135 | - persistentvolumeclaims 136 | verbs: 137 | - get 138 | - list 139 | - watch 140 | - apiGroups: 141 | - "" 142 | resources: 143 | - pods 144 | verbs: 145 | - get 146 | - list 147 | - watch 148 | - apiGroups: 149 | - "" 150 | resources: 151 | - services 152 | verbs: 153 | - create 154 | - get 155 | - list 156 | - update 157 | - watch 158 | - apiGroups: 159 | - networking.istio.io 160 | resources: 161 | - virtualservices 162 | verbs: 163 | - create 164 | - get 165 | - list 166 | - update 167 | - watch 168 | - apiGroups: 169 | - tensorboard.kubeflow.org 170 | resources: 171 | - tensorboards 172 | verbs: 173 | - create 174 | - delete 175 | - get 176 | - list 177 | - patch 178 | - update 179 | - watch 180 | - apiGroups: 181 | - tensorboard.kubeflow.org 182 | resources: 183 | - tensorboards/status 184 | verbs: 185 | - get 186 | - patch 187 | - update 188 | --- 189 | apiVersion: rbac.authorization.k8s.io/v1 190 | kind: ClusterRole 191 | metadata: 192 | name: tensorboard-controller-proxy-role 193 | rules: 194 | - apiGroups: 195 | - authentication.k8s.io 196 | resources: 197 | - tokenreviews 198 | verbs: 199 | - create 200 | - apiGroups: 201 | - authorization.k8s.io 202 | resources: 203 | - subjectaccessreviews 204 | verbs: 205 | - create 206 | --- 207 | apiVersion: rbac.authorization.k8s.io/v1 208 | kind: RoleBinding 209 | metadata: 210 | name: tensorboard-controller-leader-election-rolebinding 211 | namespace: kubeflow 212 | roleRef: 213 | apiGroup: rbac.authorization.k8s.io 214 | kind: Role 215 | name: tensorboard-controller-leader-election-role 216 | subjects: 217 | - kind: ServiceAccount 218 | name: tensorboard-controller 219 | namespace: kubeflow 220 | --- 221 | apiVersion: rbac.authorization.k8s.io/v1 222 | kind: ClusterRoleBinding 223 | metadata: 224 | name: tensorboard-controller-manager-rolebinding 225 | roleRef: 226 | apiGroup: rbac.authorization.k8s.io 227 | kind: ClusterRole 228 | name: tensorboard-controller-manager-role 229 | subjects: 230 | - kind: ServiceAccount 231 | name: tensorboard-controller 232 | namespace: kubeflow 233 | --- 234 | apiVersion: rbac.authorization.k8s.io/v1 235 | kind: ClusterRoleBinding 236 | metadata: 237 | name: tensorboard-controller-proxy-rolebinding 238 | roleRef: 239 | apiGroup: rbac.authorization.k8s.io 240 | kind: ClusterRole 241 | name: tensorboard-controller-proxy-role 242 | subjects: 243 | - kind: ServiceAccount 244 | name: tensorboard-controller 245 | namespace: kubeflow 246 | --- 247 | apiVersion: v1 248 | data: 249 | RWO_PVC_SCHEDULING: "True" 250 | kind: ConfigMap 251 | metadata: 252 | name: tensorboard-controller-config-bf88mm96c8 253 | namespace: kubeflow 254 | --- 255 | apiVersion: v1 256 | kind: Service 257 | metadata: 258 | annotations: 259 | prometheus.io/port: "8443" 260 | prometheus.io/scheme: https 261 | prometheus.io/scrape: "true" 262 | labels: 263 | control-plane: controller-manager 264 | name: tensorboard-controller-controller-manager-metrics-service 265 | namespace: kubeflow 266 | spec: 267 | ports: 268 | - name: https 269 | port: 8443 270 | targetPort: https 271 | selector: 272 | control-plane: controller-manager 273 | --- 274 | apiVersion: apps/v1 275 | kind: Deployment 276 | metadata: 277 | labels: 278 | control-plane: controller-manager 279 | name: tensorboard-controller-controller-manager 280 | namespace: kubeflow 281 | spec: 282 | replicas: 1 283 | selector: 284 | matchLabels: 285 | control-plane: controller-manager 286 | template: 287 | metadata: 288 | labels: 289 | control-plane: controller-manager 290 | spec: 291 | containers: 292 | - args: 293 | - --metrics-addr=127.0.0.1:8080 294 | - --enable-leader-election 295 | command: 296 | - /manager 297 | envFrom: 298 | - configMapRef: 299 | name: tensorboard-controller-config-bf88mm96c8 300 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/notebooks-tensorboard-controller:v1.3.0-rc.0-31ba9 301 | name: manager 302 | resources: 303 | limits: 304 | cpu: 100m 305 | memory: 30Mi 306 | requests: 307 | cpu: 100m 308 | memory: 20Mi 309 | - args: 310 | - --secure-listen-address=0.0.0.0:8443 311 | - --upstream=http://127.0.0.1:8080/ 312 | - --logtostderr=true 313 | - --v=10 314 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/kubebuilder-kube-rbac-proxy:v0.4.0-83234 315 | name: kube-rbac-proxy 316 | ports: 317 | - containerPort: 8443 318 | name: https 319 | serviceAccountName: tensorboard-controller 320 | terminationGracePeriodSeconds: 10 321 | -------------------------------------------------------------------------------- /manifest1.3/023-jupyter-overlays-kubeflow.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiextensions.k8s.io/v1beta1 2 | kind: CustomResourceDefinition 3 | metadata: 4 | labels: 5 | app: notebook-controller 6 | kustomize.component: notebook-controller 7 | name: notebooks.kubeflow.org 8 | spec: 9 | group: kubeflow.org 10 | names: 11 | kind: Notebook 12 | plural: notebooks 13 | singular: notebook 14 | scope: Namespaced 15 | subresources: 16 | status: {} 17 | validation: 18 | openAPIV3Schema: 19 | properties: 20 | apiVersion: 21 | description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' 22 | type: string 23 | kind: 24 | description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' 25 | type: string 26 | metadata: 27 | type: object 28 | spec: 29 | properties: 30 | template: 31 | description: 'INSERT ADDITIONAL SPEC FIELDS - desired state of cluster Important: Run "make" to regenerate code after modifying this file' 32 | properties: 33 | spec: 34 | type: object 35 | type: object 36 | type: object 37 | status: 38 | properties: 39 | conditions: 40 | description: Conditions is an array of current conditions 41 | items: 42 | properties: 43 | type: 44 | description: Type of the confition/ 45 | type: string 46 | required: 47 | - type 48 | type: object 49 | type: array 50 | required: 51 | - conditions 52 | type: object 53 | versions: 54 | - name: v1alpha1 55 | served: true 56 | storage: false 57 | - name: v1beta1 58 | served: true 59 | storage: false 60 | - name: v1 61 | served: true 62 | storage: true 63 | status: 64 | acceptedNames: 65 | kind: "" 66 | plural: "" 67 | conditions: [] 68 | storedVersions: [] 69 | --- 70 | apiVersion: v1 71 | kind: ServiceAccount 72 | metadata: 73 | labels: 74 | app: notebook-controller 75 | kustomize.component: notebook-controller 76 | name: notebook-controller-service-account 77 | namespace: kubeflow 78 | --- 79 | apiVersion: rbac.authorization.k8s.io/v1 80 | kind: Role 81 | metadata: 82 | labels: 83 | app: notebook-controller 84 | kustomize.component: notebook-controller 85 | name: notebook-controller-leader-election-role 86 | namespace: kubeflow 87 | rules: 88 | - apiGroups: 89 | - "" 90 | resources: 91 | - configmaps 92 | verbs: 93 | - get 94 | - list 95 | - watch 96 | - create 97 | - update 98 | - patch 99 | - delete 100 | - apiGroups: 101 | - "" 102 | resources: 103 | - configmaps/status 104 | verbs: 105 | - get 106 | - update 107 | - patch 108 | - apiGroups: 109 | - "" 110 | resources: 111 | - events 112 | verbs: 113 | - create 114 | --- 115 | aggregationRule: 116 | clusterRoleSelectors: 117 | - matchLabels: 118 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-notebooks-admin: "true" 119 | apiVersion: rbac.authorization.k8s.io/v1 120 | kind: ClusterRole 121 | metadata: 122 | labels: 123 | app: notebook-controller 124 | kustomize.component: notebook-controller 125 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true" 126 | name: notebook-controller-kubeflow-notebooks-admin 127 | rules: [] 128 | --- 129 | apiVersion: rbac.authorization.k8s.io/v1 130 | kind: ClusterRole 131 | metadata: 132 | labels: 133 | app: notebook-controller 134 | kustomize.component: notebook-controller 135 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" 136 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-notebooks-admin: "true" 137 | name: notebook-controller-kubeflow-notebooks-edit 138 | rules: 139 | - apiGroups: 140 | - kubeflow.org 141 | resources: 142 | - notebooks 143 | - notebooks/status 144 | verbs: 145 | - get 146 | - list 147 | - watch 148 | - create 149 | - delete 150 | - deletecollection 151 | - patch 152 | - update 153 | --- 154 | apiVersion: rbac.authorization.k8s.io/v1 155 | kind: ClusterRole 156 | metadata: 157 | labels: 158 | app: notebook-controller 159 | kustomize.component: notebook-controller 160 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true" 161 | name: notebook-controller-kubeflow-notebooks-view 162 | rules: 163 | - apiGroups: 164 | - kubeflow.org 165 | resources: 166 | - notebooks 167 | - notebooks/status 168 | verbs: 169 | - get 170 | - list 171 | - watch 172 | --- 173 | apiVersion: rbac.authorization.k8s.io/v1 174 | kind: ClusterRole 175 | metadata: 176 | creationTimestamp: null 177 | labels: 178 | app: notebook-controller 179 | kustomize.component: notebook-controller 180 | name: notebook-controller-role 181 | rules: 182 | - apiGroups: 183 | - apps 184 | resources: 185 | - statefulsets 186 | verbs: 187 | - '*' 188 | - apiGroups: 189 | - "" 190 | resources: 191 | - events 192 | verbs: 193 | - create 194 | - get 195 | - list 196 | - watch 197 | - apiGroups: 198 | - "" 199 | resources: 200 | - pods 201 | verbs: 202 | - get 203 | - list 204 | - watch 205 | - apiGroups: 206 | - "" 207 | resources: 208 | - services 209 | verbs: 210 | - '*' 211 | - apiGroups: 212 | - kubeflow.org 213 | resources: 214 | - notebooks 215 | - notebooks/finalizers 216 | - notebooks/status 217 | verbs: 218 | - '*' 219 | - apiGroups: 220 | - networking.istio.io 221 | resources: 222 | - virtualservices 223 | verbs: 224 | - '*' 225 | --- 226 | apiVersion: rbac.authorization.k8s.io/v1 227 | kind: RoleBinding 228 | metadata: 229 | labels: 230 | app: notebook-controller 231 | kustomize.component: notebook-controller 232 | name: notebook-controller-leader-election-rolebinding 233 | namespace: kubeflow 234 | roleRef: 235 | apiGroup: rbac.authorization.k8s.io 236 | kind: Role 237 | name: notebook-controller-leader-election-role 238 | subjects: 239 | - kind: ServiceAccount 240 | name: notebook-controller-service-account 241 | namespace: kubeflow 242 | --- 243 | apiVersion: rbac.authorization.k8s.io/v1 244 | kind: ClusterRoleBinding 245 | metadata: 246 | labels: 247 | app: notebook-controller 248 | kustomize.component: notebook-controller 249 | name: notebook-controller-role-binding 250 | roleRef: 251 | apiGroup: rbac.authorization.k8s.io 252 | kind: ClusterRole 253 | name: notebook-controller-role 254 | subjects: 255 | - kind: ServiceAccount 256 | name: notebook-controller-service-account 257 | namespace: kubeflow 258 | --- 259 | apiVersion: v1 260 | data: 261 | ISTIO_GATEWAY: kubeflow/kubeflow-gateway 262 | USE_ISTIO: "true" 263 | kind: ConfigMap 264 | metadata: 265 | annotations: {} 266 | labels: 267 | app: notebook-controller 268 | kustomize.component: notebook-controller 269 | name: notebook-controller-config-m44cmb547t 270 | namespace: kubeflow 271 | --- 272 | apiVersion: v1 273 | kind: Service 274 | metadata: 275 | labels: 276 | app: notebook-controller 277 | kustomize.component: notebook-controller 278 | name: notebook-controller-service 279 | namespace: kubeflow 280 | spec: 281 | ports: 282 | - port: 443 283 | selector: 284 | app: notebook-controller 285 | kustomize.component: notebook-controller 286 | --- 287 | apiVersion: apps/v1 288 | kind: Deployment 289 | metadata: 290 | labels: 291 | app: notebook-controller 292 | kustomize.component: notebook-controller 293 | name: notebook-controller-deployment 294 | namespace: kubeflow 295 | spec: 296 | selector: 297 | matchLabels: 298 | app: notebook-controller 299 | kustomize.component: notebook-controller 300 | template: 301 | metadata: 302 | annotations: 303 | sidecar.istio.io/inject: "false" 304 | labels: 305 | app: notebook-controller 306 | kustomize.component: notebook-controller 307 | spec: 308 | containers: 309 | - command: 310 | - /manager 311 | env: 312 | - name: USE_ISTIO 313 | valueFrom: 314 | configMapKeyRef: 315 | key: USE_ISTIO 316 | name: notebook-controller-config-m44cmb547t 317 | - name: ISTIO_GATEWAY 318 | valueFrom: 319 | configMapKeyRef: 320 | key: ISTIO_GATEWAY 321 | name: notebook-controller-config-m44cmb547t 322 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/notebooks-notebook-controller:v1.3.0-rc.0-4c9fa 323 | imagePullPolicy: Always 324 | livenessProbe: 325 | httpGet: 326 | path: /metrics 327 | port: 8080 328 | initialDelaySeconds: 30 329 | periodSeconds: 30 330 | name: manager 331 | serviceAccountName: notebook-controller-service-account 332 | -------------------------------------------------------------------------------- /manifest1.3/020-centraldashboard-overlays-istio.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | labels: 5 | app: centraldashboard 6 | app.kubernetes.io/component: centraldashboard 7 | app.kubernetes.io/name: centraldashboard 8 | kustomize.component: centraldashboard 9 | name: centraldashboard 10 | namespace: kubeflow 11 | --- 12 | apiVersion: rbac.authorization.k8s.io/v1 13 | kind: Role 14 | metadata: 15 | labels: 16 | app: centraldashboard 17 | app.kubernetes.io/component: centraldashboard 18 | app.kubernetes.io/name: centraldashboard 19 | kustomize.component: centraldashboard 20 | name: centraldashboard 21 | namespace: kubeflow 22 | rules: 23 | - apiGroups: 24 | - "" 25 | - app.k8s.io 26 | resources: 27 | - applications 28 | - pods 29 | - pods/exec 30 | - pods/log 31 | verbs: 32 | - get 33 | - list 34 | - watch 35 | - apiGroups: 36 | - "" 37 | resources: 38 | - secrets 39 | - configmaps 40 | verbs: 41 | - get 42 | --- 43 | apiVersion: rbac.authorization.k8s.io/v1 44 | kind: ClusterRole 45 | metadata: 46 | labels: 47 | app: centraldashboard 48 | app.kubernetes.io/component: centraldashboard 49 | app.kubernetes.io/name: centraldashboard 50 | kustomize.component: centraldashboard 51 | name: centraldashboard 52 | rules: 53 | - apiGroups: 54 | - "" 55 | resources: 56 | - events 57 | - namespaces 58 | - nodes 59 | verbs: 60 | - get 61 | - list 62 | - watch 63 | --- 64 | apiVersion: rbac.authorization.k8s.io/v1 65 | kind: RoleBinding 66 | metadata: 67 | labels: 68 | app: centraldashboard 69 | app.kubernetes.io/component: centraldashboard 70 | app.kubernetes.io/name: centraldashboard 71 | kustomize.component: centraldashboard 72 | name: centraldashboard 73 | namespace: kubeflow 74 | roleRef: 75 | apiGroup: rbac.authorization.k8s.io 76 | kind: Role 77 | name: centraldashboard 78 | subjects: 79 | - kind: ServiceAccount 80 | name: centraldashboard 81 | namespace: kubeflow 82 | --- 83 | apiVersion: rbac.authorization.k8s.io/v1 84 | kind: ClusterRoleBinding 85 | metadata: 86 | labels: 87 | app: centraldashboard 88 | app.kubernetes.io/component: centraldashboard 89 | app.kubernetes.io/name: centraldashboard 90 | kustomize.component: centraldashboard 91 | name: centraldashboard 92 | roleRef: 93 | apiGroup: rbac.authorization.k8s.io 94 | kind: ClusterRole 95 | name: centraldashboard 96 | subjects: 97 | - kind: ServiceAccount 98 | name: centraldashboard 99 | namespace: kubeflow 100 | --- 101 | apiVersion: v1 102 | data: 103 | links: |- 104 | { 105 | "menuLinks": [ 106 | { 107 | "type": "item", 108 | "link": "/jupyter/", 109 | "text": "Notebooks", 110 | "icon": "book" 111 | }, 112 | { 113 | "type": "item", 114 | "link": "/tensorboards/", 115 | "text": "Tensorboards", 116 | "icon": "assessment" 117 | }, 118 | { 119 | "type": "item", 120 | "link": "/volumes/", 121 | "text": "Volumes", 122 | "icon": "device:storage" 123 | }, 124 | { 125 | "type": "item", 126 | "link": "/katib/", 127 | "text": "Experiments (AutoML)", 128 | "icon": "kubeflow:katib" 129 | }, 130 | { 131 | "type": "item", 132 | "text": "Experiments (KFP)", 133 | "link": "/pipeline/#/experiments", 134 | "icon": "done-all" 135 | }, 136 | { 137 | "type": "item", 138 | "link": "/pipeline/#/pipelines", 139 | "text": "Pipelines", 140 | "icon": "kubeflow:pipeline-centered" 141 | }, 142 | { 143 | "type": "item", 144 | "link": "/pipeline/#/runs", 145 | "text": "Runs", 146 | "icon": "maps:directions-run" 147 | }, 148 | { 149 | "type": "item", 150 | "link": "/pipeline/#/recurringruns", 151 | "text": "Recurring Runs", 152 | "icon": "device:access-alarm" 153 | }, 154 | { 155 | "type": "item", 156 | "link": "/pipeline/#/artifacts", 157 | "text": "Artifacts", 158 | "icon": "editor:bubble-chart" 159 | }, 160 | { 161 | "type": "item", 162 | "link": "/pipeline/#/executions", 163 | "text": "Executions", 164 | "icon": "av:play-arrow" 165 | } 166 | ], 167 | "externalLinks": [ ], 168 | "quickLinks": [ 169 | { 170 | "text": "Upload a pipeline", 171 | "desc": "Pipelines", 172 | "link": "/pipeline/" 173 | }, 174 | { 175 | "text": "View all pipeline runs", 176 | "desc": "Pipelines", 177 | "link": "/pipeline/#/runs" 178 | }, 179 | { 180 | "text": "Create a new Notebook server", 181 | "desc": "Notebook Servers", 182 | "link": "/jupyter/new?namespace=kubeflow" 183 | }, 184 | { 185 | "text": "View Katib Experiments", 186 | "desc": "Katib", 187 | "link": "/katib/" 188 | } 189 | ], 190 | "documentationItems": [ 191 | { 192 | "text": "Getting Started with Kubeflow", 193 | "desc": "Get your machine-learning workflow up and running on Kubeflow", 194 | "link": "https://www.kubeflow.org/docs/started/getting-started/" 195 | }, 196 | { 197 | "text": "MiniKF", 198 | "desc": "A fast and easy way to deploy Kubeflow locally", 199 | "link": "https://www.kubeflow.org/docs/started/getting-started-minikf/" 200 | }, 201 | { 202 | "text": "Microk8s for Kubeflow", 203 | "desc": "Quickly get Kubeflow running locally on native hypervisors", 204 | "link": "https://www.kubeflow.org/docs/started/getting-started-multipass/" 205 | }, 206 | { 207 | "text": "Minikube for Kubeflow", 208 | "desc": "Quickly get Kubeflow running locally", 209 | "link": "https://www.kubeflow.org/docs/started/getting-started-minikube/" 210 | }, 211 | { 212 | "text": "Kubeflow on GCP", 213 | "desc": "Running Kubeflow on Kubernetes Engine and Google Cloud Platform", 214 | "link": "https://www.kubeflow.org/docs/gke/" 215 | }, 216 | { 217 | "text": "Kubeflow on AWS", 218 | "desc": "Running Kubeflow on Elastic Container Service and Amazon Web Services", 219 | "link": "https://www.kubeflow.org/docs/aws/" 220 | }, 221 | { 222 | "text": "Requirements for Kubeflow", 223 | "desc": "Get more detailed information about using Kubeflow and its components", 224 | "link": "https://www.kubeflow.org/docs/started/requirements/" 225 | } 226 | ] 227 | } 228 | settings: |- 229 | { 230 | "DASHBOARD_FORCE_IFRAME": true 231 | } 232 | kind: ConfigMap 233 | metadata: 234 | labels: 235 | app: centraldashboard 236 | app.kubernetes.io/component: centraldashboard 237 | app.kubernetes.io/name: centraldashboard 238 | kustomize.component: centraldashboard 239 | name: centraldashboard-config 240 | namespace: kubeflow 241 | --- 242 | apiVersion: v1 243 | data: 244 | CD_CLUSTER_DOMAIN: cluster.local 245 | CD_REGISTRATION_FLOW: "false" 246 | CD_USERID_HEADER: kubeflow-userid 247 | CD_USERID_PREFIX: "" 248 | kind: ConfigMap 249 | metadata: 250 | labels: 251 | app: centraldashboard 252 | app.kubernetes.io/component: centraldashboard 253 | app.kubernetes.io/name: centraldashboard 254 | kustomize.component: centraldashboard 255 | name: centraldashboard-parameters 256 | namespace: kubeflow 257 | --- 258 | apiVersion: v1 259 | kind: Service 260 | metadata: 261 | labels: 262 | app: centraldashboard 263 | app.kubernetes.io/component: centraldashboard 264 | app.kubernetes.io/name: centraldashboard 265 | kustomize.component: centraldashboard 266 | name: centraldashboard 267 | namespace: kubeflow 268 | spec: 269 | ports: 270 | - port: 80 271 | protocol: TCP 272 | targetPort: 8082 273 | selector: 274 | app: centraldashboard 275 | app.kubernetes.io/component: centraldashboard 276 | app.kubernetes.io/name: centraldashboard 277 | kustomize.component: centraldashboard 278 | sessionAffinity: None 279 | type: ClusterIP 280 | --- 281 | apiVersion: apps/v1 282 | kind: Deployment 283 | metadata: 284 | labels: 285 | app: centraldashboard 286 | app.kubernetes.io/component: centraldashboard 287 | app.kubernetes.io/name: centraldashboard 288 | kustomize.component: centraldashboard 289 | name: centraldashboard 290 | namespace: kubeflow 291 | spec: 292 | replicas: 1 293 | selector: 294 | matchLabels: 295 | app: centraldashboard 296 | app.kubernetes.io/component: centraldashboard 297 | app.kubernetes.io/name: centraldashboard 298 | kustomize.component: centraldashboard 299 | template: 300 | metadata: 301 | annotations: 302 | sidecar.istio.io/inject: "false" 303 | labels: 304 | app: centraldashboard 305 | app.kubernetes.io/component: centraldashboard 306 | app.kubernetes.io/name: centraldashboard 307 | kustomize.component: centraldashboard 308 | spec: 309 | containers: 310 | - env: 311 | - name: USERID_HEADER 312 | value: kubeflow-userid 313 | - name: USERID_PREFIX 314 | value: "" 315 | - name: PROFILES_KFAM_SERVICE_HOST 316 | value: profiles-kfam.kubeflow 317 | - name: REGISTRATION_FLOW 318 | value: "false" 319 | - name: DASHBOARD_LINKS_CONFIGMAP 320 | value: centraldashboard-config 321 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/notebooks-central-dashboard:v1.3.0-rc.0-a0ffd 322 | imagePullPolicy: IfNotPresent 323 | livenessProbe: 324 | httpGet: 325 | path: /healthz 326 | port: 8082 327 | initialDelaySeconds: 30 328 | periodSeconds: 30 329 | name: centraldashboard 330 | ports: 331 | - containerPort: 8082 332 | protocol: TCP 333 | serviceAccountName: centraldashboard 334 | --- 335 | apiVersion: networking.istio.io/v1alpha3 336 | kind: VirtualService 337 | metadata: 338 | labels: 339 | app: centraldashboard 340 | app.kubernetes.io/component: centraldashboard 341 | app.kubernetes.io/name: centraldashboard 342 | kustomize.component: centraldashboard 343 | name: centraldashboard 344 | namespace: kubeflow 345 | spec: 346 | gateways: 347 | - kubeflow-gateway 348 | hosts: 349 | - '*' 350 | http: 351 | - match: 352 | - uri: 353 | prefix: / 354 | rewrite: 355 | uri: / 356 | route: 357 | - destination: 358 | host: centraldashboard.kubeflow.svc.cluster.local 359 | port: 360 | number: 80 361 | -------------------------------------------------------------------------------- /docs/introduction.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | --- 4 | 5 | ![](https://shikanon.com/img/kubeflow/kubeflow-dashboardcenter.png) 6 | 7 | 可以看到新版的kubeflow多了很多功能。 8 | 9 | 这里按模块介绍下 Kubeflow 的几个核心组件。 10 | - Notebook Servers,作为一个管理线上交互实验的记录工具,可以帮助算法人员快速完成算法实验,同时notebook server 提供了统一的文档管理能力。 11 | - AutoML,提供自动化的服务,对特征处理、特征选择、模型选择、模型参数的配置、模型训练和评估等方面,实现了全自动建模,降低算法人员手动实验次数。 12 | - Pipeline,提供一个算法流水线的工程化工具,将算法各流程模块以拓扑图的形式组合起来,同时结合 argo 可以实现 MLOps。 13 | - Serverless,将模型直接发布成一个对外的服务,缩短从实验到生产的路径。 14 | 15 | ![](https://shikanon.com/img/kubeflow/kubeflow组件.png) 16 | 17 | ## Notebook Servers 18 | 19 | notebook 可以说是做机器学习最喜欢用到的工具了,完美的将动态语言的交互性发挥出来,kubeflow 提供了 jupyter notebook 来快速构建云上的实验环境,这里以一个我们自定义的镜像为例: 20 | 21 | ![](https://shikanon.com/img/kubeflow/kubeflow-create-notebook.png) 22 | 23 | 我们创建了一个`test-for-jupyter`名字的镜像,配置了一个 tensorflow 的镜像,点击启动,我们可以看到在`kubeflow-user-example-com`命名空间下已经创建我们的应用了: 24 | ```bash 25 | kubectl get po -nkubeflow-user-example-com 26 | NAME READY STATUS RESTARTS AGE 27 | ml-pipeline-ui-artifact-6d7ffcc4b6-9kxkk 2/2 Running 0 48m 28 | ml-pipeline-visualizationserver-84d577b989-5hl46 2/2 Running 0 48m 29 | test-for-jupyter-0 0/2 PodInitializing 0 44s 30 | ``` 31 | 32 | ![](https://shikanon.com/img/kubeflow/notebook-server-ui.png) 33 | 34 | 创建完成后点击 connect 就可以进入我们创建的应用界面中了 35 | 36 | ![](https://shikanon.com/img/kubeflow/jupterlab-webui.png) 37 | ![](https://shikanon.com/img/kubeflow/jupterlab-web-run-code.png) 38 | 39 | 在 jupyterlab 环境中开发人员可以很方便的进行算法实验,同时由于运行在云上利用 k8s api甚至可以很方便构建k8s资源,比如通过 kfserving 创建一个ML服务。 40 | 41 | ![](https://shikanon.com/img/kubeflow/jupyter-kfserving.png) 42 | 43 | 44 | ## AutoML 45 | 46 | AutoML 是机器学习比较热的领域,主要用来模型自动优化和超参数调整,这里其实是用的 Katib来实现的,一个基于k8s的 AutoML 项目,详细见https://github.com/kubeflow/katib。 47 | 48 | Katib 主要提供了 超参数调整(Hyperparameter Tuning),早停法(Early Stopping)和神经网络架构搜索(Neural Architecture Search) 49 | 50 | 这里以一个随机搜索算法为例: 51 | ```yaml 52 | apiVersion: "kubeflow.org/v1beta1" 53 | kind: Experiment 54 | metadata: 55 | namespace: kubeflow-user-example-com 56 | name: random-example 57 | spec: 58 | objective: 59 | type: maximize 60 | goal: 0.99 61 | objectiveMetricName: Validation-accuracy 62 | additionalMetricNames: 63 | - Train-accuracy 64 | algorithm: 65 | algorithmName: random 66 | parallelTrialCount: 3 67 | maxTrialCount: 12 68 | maxFailedTrialCount: 3 69 | parameters: 70 | - name: lr 71 | parameterType: double 72 | feasibleSpace: 73 | min: "0.01" 74 | max: "0.03" 75 | - name: num-layers 76 | parameterType: int 77 | feasibleSpace: 78 | min: "2" 79 | max: "5" 80 | - name: optimizer 81 | parameterType: categorical 82 | feasibleSpace: 83 | list: 84 | - sgd 85 | - adam 86 | - ftrl 87 | trialTemplate: 88 | primaryContainerName: training-container 89 | trialParameters: 90 | - name: learningRate 91 | description: Learning rate for the training model 92 | reference: lr 93 | - name: numberLayers 94 | description: Number of training model layers 95 | reference: num-layers 96 | - name: optimizer 97 | description: Training model optimizer (sdg, adam or ftrl) 98 | reference: optimizer 99 | trialSpec: 100 | apiVersion: batch/v1 101 | kind: Job 102 | spec: 103 | template: 104 | spec: 105 | containers: 106 | - name: training-container 107 | image: docker.io/kubeflowkatib/mxnet-mnist:v1beta1-45c5727 108 | command: 109 | - "python3" 110 | - "/opt/mxnet-mnist/mnist.py" 111 | - "--batch-size=64" 112 | - "--lr=${trialParameters.learningRate}" 113 | - "--num-layers=${trialParameters.numberLayers}" 114 | - "--optimizer=${trialParameters.optimizer}" 115 | restartPolicy: Never 116 | ``` 117 | 118 | 这里以一个简单的神经网络为例,该程序具有三个参数 lr, num-layers, optimizer,采用的算法是随机搜索,目标是最大化准确率(accuracy)。 119 | 120 | 可以直接在界面中填上yaml文件,然后提交,完成后会生成一张各参数和准确率的关系图和训练列表: 121 | ![](https://shikanon.com/img/kubeflow/katib-tune-hyperparameter.png) 122 | ![](https://shikanon.com/img/kubeflow/katib-tune-hyperparameter-training.png) 123 | 124 | ## Experiments and Pipelines 125 | 126 | experiments 为我们提供了一个可以创建实验空间功能, `pipeline` 定义了算法组合的模板,通过 `pipeline` 我们可以将算法中各处理模块按特定的拓扑图的方式组合起来。 127 | 128 | 这里可以看看官方提供的几个 pipeline 例子: 129 | ![](https://shikanon.com/img/kubeflow/kubeflow-pipeline-example.png) 130 | ![](https://shikanon.com/img/kubeflow/kubeflow-pipeline-example2.png) 131 | 132 | kubeflow `pipeline` 本质是基于 argo `workflow` 实现,**由于我们的kubeflow是基于kind上构建的,容器运行时用的containerd,而workflow默认的pipeline执行器是docker,因此有些特性不兼容**,这块可以见 argo workflow 官方说明:https://argoproj.github.io/argo-workflows/workflow-executors/。 133 | 这里我是把 workflow 的 `containerRuntimeExecutor` 改成了 `k8sapi`。但 `k8sapi` 由于在 workflow 是二级公民,因此有些功能不能用,比如 kubeflow pipeline 在 input/output 的 artifacts 需要用到 `docker cp` 命令,可以参考这个issue: https://github.com/argoproj/argo-workflows/issues/2685#issuecomment-613632304 134 | 135 | 由于以上原因 kubeflow 默认给的几个案例并没有用 volumes 是无法在 kind 中运行起来,这里我们基于 argo workflow 语法自己实现一个 `pipeline` 136 | 137 | ### 基于pipeline构建一个的工作流水 138 | 139 | **第一步,构建一个 workflow pipeline 文件:** 140 | 141 | ```yaml 142 | apiVersion: argoproj.io/v1alpha1 143 | kind: Workflow 144 | metadata: 145 | generateName: kubeflow-test- 146 | spec: 147 | entrypoint: kubeflow-test 148 | templates: 149 | - name: kubeflow-test 150 | dag: 151 | tasks: 152 | - name: print-text 153 | template: print-text 154 | dependencies: [repeat-line] 155 | - {name: repeat-line, template: repeat-line} 156 | - name: repeat-line 157 | container: 158 | args: [--line, Hello, --count, '15', --output-text, /gotest/outputs/output_text/data] 159 | command: 160 | - sh 161 | - -ec 162 | - | 163 | program_path=$(mktemp) 164 | printf "%s" "$0" > "$program_path" 165 | python3 -u "$program_path" "$@" 166 | - | 167 | def _make_parent_dirs_and_return_path(file_path: str): 168 | import os 169 | os.makedirs(os.path.dirname(file_path), exist_ok=True) 170 | return file_path 171 | 172 | def repeat_line(line, output_text_path, count = 10): 173 | '''Repeat the line specified number of times''' 174 | with open(output_text_path, 'w') as writer: 175 | for i in range(count): 176 | writer.write(line + '\n') 177 | 178 | import argparse 179 | _parser = argparse.ArgumentParser(prog='Repeat line', description='Repeat the line specified number of times') 180 | _parser.add_argument("--line", dest="line", type=str, required=True, default=argparse.SUPPRESS) 181 | _parser.add_argument("--count", dest="count", type=int, required=False, default=argparse.SUPPRESS) 182 | _parser.add_argument("--output-text", dest="output_text_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) 183 | _parsed_args = vars(_parser.parse_args()) 184 | 185 | _outputs = repeat_line(**_parsed_args) 186 | image: python:3.7 187 | volumeMounts: 188 | - name: workdir 189 | mountPath: /gotest/outputs/output_text/ 190 | volumes: 191 | - name: workdir 192 | persistentVolumeClaim: 193 | claimName: kubeflow-test-pv 194 | metadata: 195 | annotations: 196 | - name: print-text 197 | container: 198 | args: [--text, /gotest/outputs/output_text/data] 199 | command: 200 | - sh 201 | - -ec 202 | - | 203 | program_path=$(mktemp) 204 | printf "%s" "$0" > "$program_path" 205 | python3 -u "$program_path" "$@" 206 | - | 207 | def print_text(text_path): # The "text" input is untyped so that any data can be printed 208 | '''Print text''' 209 | with open(text_path, 'r') as reader: 210 | for line in reader: 211 | print(line, end = '') 212 | 213 | import argparse 214 | _parser = argparse.ArgumentParser(prog='Print text', description='Print text') 215 | _parser.add_argument("--text", dest="text_path", type=str, required=True, default=argparse.SUPPRESS) 216 | _parsed_args = vars(_parser.parse_args()) 217 | 218 | _outputs = print_text(**_parsed_args) 219 | image: python:3.7 220 | volumeMounts: 221 | - name: workdir 222 | mountPath: /gotest/outputs/output_text/ 223 | volumes: 224 | - name: workdir 225 | persistentVolumeClaim: 226 | claimName: kubeflow-test-pv 227 | metadata: 228 | annotations: 229 | ``` 230 | 231 | argo workflow 的语法可以参考:https://argoproj.github.io/argo-workflows/variables/ 232 | 233 | 这里我们定义了两个任务 repeat-line 和 print-text, repeat-line 任务会将生产结果写入 `kubeflow-test-pv` 的 PVC 中, print-text 会从 PVC 中读取数据输出到 stdout。 234 | 235 | 这里由于用到 PVC,我们需要先在集群中创建一个`kubeflow-test-pv`的PVC: 236 | ```yaml 237 | apiVersion: v1 238 | kind: PersistentVolumeClaim 239 | metadata: 240 | name: kubeflow-test-pv 241 | namespace: kubeflow-user-example-com 242 | spec: 243 | accessModes: 244 | - ReadWriteOnce 245 | resources: 246 | requests: 247 | storage: 128Mi 248 | ``` 249 | 250 | 251 | **第二步,定义好 pipeline 文件后可以创建pipeline:** 252 | 253 | ![](https://shikanon.com/img/kubeflow/kubeflow-upload-pipeline.png) 254 | 255 | **第三步,启动一个pipeline:** 256 | 257 | ![](https://shikanon.com/img/kubeflow/kubeflow-crate-pipeline.png) 258 | 259 | 启动 pipeline 除了单次运行模式 one-off,也支持定时器循环模式 Recurring,这块可以根据自己的需求确定。 260 | 261 | **查看运行结果:** 262 | 263 | ![](https://shikanon.com/img/kubeflow/kbueflow-pipeline-result.png) 264 | 265 | 运行完后,可以将实验进行归档(Archived)。 266 | 267 | 268 | 269 | ## 关于 MLOps 的一点思考 270 | 271 | 我们来看一个简单的 ML 运作流程: 272 | ![](https://shikanon.com/img/kubeflow/google-mlops.svg) 273 | 274 | 这是一个 google 提供的 level 1 级别的机器学习流水线自动化,整个流水线包括以下几部分: 275 | - 构建快速算法实验的环境(experimentation),这里的步骤已经过编排,各个步骤之间的转换是自动执行的,这样可以快速迭代实验,并更好地准备将整个流水线移至生产环境,在这个环境中算法研究员只进行模块内部的工作。 276 | - 构建可复用的生产环境流水线,组件的源代码模块化,实验环境模块化流水线可以直接在 staging 环境和 production 环境中使用。 277 | - 持续交付模型,生产环境中的机器学习流水线会向使用新数据进行训练的新模型持续交付预测服务。 278 | 279 | 基于上述功能描述我们其实可以基于 kubeflow 的 `pipeline` 和 `kfserving` 功能轻松实现一个简单的 MLOps 流水线发布流程。不过,值得注意的是,DevOps 本身并不仅仅是一种技术,同时是一种工程文化,所以在实践落地中需要团队各方的协同分阶段的落地。这块可以参考[《MLOps: Continuous delivery and automation pipelines in machine learning》](https://cloud.google.com/architecture/mlops-continuous-delivery-and-automation-pipelines-in-machine-learning)和[《Hidden Technical Debt in Machine Learning Systems》](https://papers.nips.cc/paper/2015/file/86df7dcfd896fcaf2674f757a2463eba-Paper.pdf) 280 | 281 | 282 | # 参考文献 283 | - https://www.tensorflow.org/tutorials/quickstart/beginner 284 | - https://github.com/dexidp/dex 285 | - https://github.com/kubeflow/kfserving/tree/master/docs 286 | - https://argoproj.github.io/argo-workflows/workflow-executors/ 287 | - https://github.com/shikanon/kubeflow-manifests 288 | - https://argoproj.github.io/argo-workflows/variables/ 289 | - https://cloud.google.com/architecture/mlops-continuous-delivery-and-automation-pipelines-in-machine-learning -------------------------------------------------------------------------------- /manifest1.3/013-istio-1-9-0-cluster-local-gateway-base.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | labels: 5 | app: cluster-local-gateway 6 | install.operator.istio.io/owning-resource: unknown 7 | istio: cluster-local-gateway 8 | istio.io/rev: default 9 | operator.istio.io/component: IngressGateways 10 | release: istio 11 | name: cluster-local-gateway-service-account 12 | namespace: istio-system 13 | --- 14 | apiVersion: rbac.authorization.k8s.io/v1 15 | kind: Role 16 | metadata: 17 | labels: 18 | install.operator.istio.io/owning-resource: unknown 19 | istio.io/rev: default 20 | operator.istio.io/component: IngressGateways 21 | release: istio 22 | name: cluster-local-gateway-sds 23 | namespace: istio-system 24 | rules: 25 | - apiGroups: 26 | - "" 27 | resources: 28 | - secrets 29 | verbs: 30 | - get 31 | - watch 32 | - list 33 | --- 34 | apiVersion: rbac.authorization.k8s.io/v1 35 | kind: RoleBinding 36 | metadata: 37 | labels: 38 | install.operator.istio.io/owning-resource: unknown 39 | istio.io/rev: default 40 | operator.istio.io/component: IngressGateways 41 | release: istio 42 | name: cluster-local-gateway-sds 43 | namespace: istio-system 44 | roleRef: 45 | apiGroup: rbac.authorization.k8s.io 46 | kind: Role 47 | name: cluster-local-gateway-sds 48 | subjects: 49 | - kind: ServiceAccount 50 | name: cluster-local-gateway-service-account 51 | namespace: istio-system 52 | --- 53 | apiVersion: v1 54 | kind: Service 55 | metadata: 56 | labels: 57 | app: cluster-local-gateway 58 | install.operator.istio.io/owning-resource: unknown 59 | istio: cluster-local-gateway 60 | istio.io/rev: default 61 | operator.istio.io/component: IngressGateways 62 | release: istio 63 | name: cluster-local-gateway 64 | namespace: istio-system 65 | spec: 66 | ports: 67 | - name: status-port 68 | port: 15020 69 | protocol: TCP 70 | targetPort: 15020 71 | - name: http2 72 | port: 80 73 | protocol: TCP 74 | targetPort: 8080 75 | selector: 76 | app: cluster-local-gateway 77 | istio: cluster-local-gateway 78 | type: ClusterIP 79 | --- 80 | apiVersion: apps/v1 81 | kind: Deployment 82 | metadata: 83 | labels: 84 | app: cluster-local-gateway 85 | install.operator.istio.io/owning-resource: unknown 86 | istio: cluster-local-gateway 87 | istio.io/rev: default 88 | operator.istio.io/component: IngressGateways 89 | release: istio 90 | name: cluster-local-gateway 91 | namespace: istio-system 92 | spec: 93 | selector: 94 | matchLabels: 95 | app: cluster-local-gateway 96 | istio: cluster-local-gateway 97 | strategy: 98 | rollingUpdate: 99 | maxSurge: 100% 100 | maxUnavailable: 25% 101 | template: 102 | metadata: 103 | annotations: 104 | prometheus.io/path: /stats/prometheus 105 | prometheus.io/port: "15020" 106 | prometheus.io/scrape: "true" 107 | sidecar.istio.io/inject: "false" 108 | labels: 109 | app: cluster-local-gateway 110 | chart: gateways 111 | heritage: Tiller 112 | install.operator.istio.io/owning-resource: unknown 113 | istio: cluster-local-gateway 114 | istio.io/rev: default 115 | operator.istio.io/component: IngressGateways 116 | release: istio 117 | service.istio.io/canonical-name: cluster-local-gateway 118 | service.istio.io/canonical-revision: latest 119 | sidecar.istio.io/inject: "false" 120 | spec: 121 | affinity: 122 | nodeAffinity: 123 | preferredDuringSchedulingIgnoredDuringExecution: 124 | - preference: 125 | matchExpressions: 126 | - key: kubernetes.io/arch 127 | operator: In 128 | values: 129 | - amd64 130 | weight: 2 131 | - preference: 132 | matchExpressions: 133 | - key: kubernetes.io/arch 134 | operator: In 135 | values: 136 | - ppc64le 137 | weight: 2 138 | - preference: 139 | matchExpressions: 140 | - key: kubernetes.io/arch 141 | operator: In 142 | values: 143 | - s390x 144 | weight: 2 145 | requiredDuringSchedulingIgnoredDuringExecution: 146 | nodeSelectorTerms: 147 | - matchExpressions: 148 | - key: kubernetes.io/arch 149 | operator: In 150 | values: 151 | - amd64 152 | - ppc64le 153 | - s390x 154 | containers: 155 | - args: 156 | - proxy 157 | - router 158 | - --domain 159 | - $(POD_NAMESPACE).svc.cluster.local 160 | - --proxyLogLevel=warning 161 | - --proxyComponentLogLevel=misc:error 162 | - --log_output_level=default:info 163 | - --serviceCluster 164 | - cluster-local-gateway 165 | env: 166 | - name: JWT_POLICY 167 | value: third-party-jwt 168 | - name: PILOT_CERT_PROVIDER 169 | value: istiod 170 | - name: CA_ADDR 171 | value: istiod.istio-system.svc:15012 172 | - name: NODE_NAME 173 | valueFrom: 174 | fieldRef: 175 | apiVersion: v1 176 | fieldPath: spec.nodeName 177 | - name: POD_NAME 178 | valueFrom: 179 | fieldRef: 180 | apiVersion: v1 181 | fieldPath: metadata.name 182 | - name: POD_NAMESPACE 183 | valueFrom: 184 | fieldRef: 185 | apiVersion: v1 186 | fieldPath: metadata.namespace 187 | - name: INSTANCE_IP 188 | valueFrom: 189 | fieldRef: 190 | apiVersion: v1 191 | fieldPath: status.podIP 192 | - name: HOST_IP 193 | valueFrom: 194 | fieldRef: 195 | apiVersion: v1 196 | fieldPath: status.hostIP 197 | - name: SERVICE_ACCOUNT 198 | valueFrom: 199 | fieldRef: 200 | fieldPath: spec.serviceAccountName 201 | - name: CANONICAL_SERVICE 202 | valueFrom: 203 | fieldRef: 204 | fieldPath: metadata.labels['service.istio.io/canonical-name'] 205 | - name: CANONICAL_REVISION 206 | valueFrom: 207 | fieldRef: 208 | fieldPath: metadata.labels['service.istio.io/canonical-revision'] 209 | - name: ISTIO_META_WORKLOAD_NAME 210 | value: cluster-local-gateway 211 | - name: ISTIO_META_OWNER 212 | value: kubernetes://apis/apps/v1/namespaces/istio-system/deployments/cluster-local-gateway 213 | - name: ISTIO_META_UNPRIVILEGED_POD 214 | value: "true" 215 | - name: ISTIO_META_ROUTER_MODE 216 | value: sni-dnat 217 | - name: ISTIO_META_CLUSTER_ID 218 | value: Kubernetes 219 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/istio-proxyv2:1.9.0-e8a74 220 | name: istio-proxy 221 | ports: 222 | - containerPort: 15020 223 | protocol: TCP 224 | - containerPort: 8080 225 | protocol: TCP 226 | - containerPort: 15090 227 | name: http-envoy-prom 228 | protocol: TCP 229 | readinessProbe: 230 | failureThreshold: 30 231 | httpGet: 232 | path: /healthz/ready 233 | port: 15021 234 | scheme: HTTP 235 | initialDelaySeconds: 1 236 | periodSeconds: 2 237 | successThreshold: 1 238 | timeoutSeconds: 1 239 | resources: 240 | limits: 241 | cpu: 2000m 242 | memory: 1024Mi 243 | requests: 244 | cpu: 100m 245 | memory: 128Mi 246 | securityContext: 247 | allowPrivilegeEscalation: false 248 | capabilities: 249 | drop: 250 | - ALL 251 | privileged: false 252 | readOnlyRootFilesystem: true 253 | volumeMounts: 254 | - mountPath: /etc/istio/proxy 255 | name: istio-envoy 256 | - mountPath: /etc/istio/config 257 | name: config-volume 258 | - mountPath: /var/run/secrets/istio 259 | name: istiod-ca-cert 260 | - mountPath: /var/run/secrets/tokens 261 | name: istio-token 262 | readOnly: true 263 | - mountPath: /var/lib/istio/data 264 | name: istio-data 265 | - mountPath: /etc/istio/pod 266 | name: podinfo 267 | - mountPath: /etc/istio/ingressgateway-certs 268 | name: ingressgateway-certs 269 | readOnly: true 270 | - mountPath: /etc/istio/ingressgateway-ca-certs 271 | name: ingressgateway-ca-certs 272 | readOnly: true 273 | securityContext: 274 | fsGroup: 1337 275 | runAsGroup: 1337 276 | runAsNonRoot: true 277 | runAsUser: 1337 278 | serviceAccountName: cluster-local-gateway-service-account 279 | volumes: 280 | - configMap: 281 | name: istio-ca-root-cert 282 | name: istiod-ca-cert 283 | - downwardAPI: 284 | items: 285 | - fieldRef: 286 | fieldPath: metadata.labels 287 | path: labels 288 | - fieldRef: 289 | fieldPath: metadata.annotations 290 | path: annotations 291 | - path: cpu-limit 292 | resourceFieldRef: 293 | containerName: istio-proxy 294 | divisor: 1m 295 | resource: limits.cpu 296 | - path: cpu-request 297 | resourceFieldRef: 298 | containerName: istio-proxy 299 | divisor: 1m 300 | resource: requests.cpu 301 | name: podinfo 302 | - emptyDir: {} 303 | name: istio-envoy 304 | - emptyDir: {} 305 | name: istio-data 306 | - name: istio-token 307 | projected: 308 | sources: 309 | - serviceAccountToken: 310 | audience: istio-ca 311 | expirationSeconds: 43200 312 | path: istio-token 313 | - configMap: 314 | name: istio 315 | optional: true 316 | name: config-volume 317 | - name: ingressgateway-certs 318 | secret: 319 | optional: true 320 | secretName: istio-ingressgateway-certs 321 | - name: ingressgateway-ca-certs 322 | secret: 323 | optional: true 324 | secretName: istio-ingressgateway-ca-certs 325 | --- 326 | apiVersion: networking.istio.io/v1alpha3 327 | kind: Gateway 328 | metadata: 329 | labels: 330 | release: istio 331 | name: cluster-local-gateway 332 | namespace: istio-system 333 | spec: 334 | selector: 335 | app: cluster-local-gateway 336 | istio: cluster-local-gateway 337 | servers: 338 | - hosts: 339 | - '*' 340 | port: 341 | name: http 342 | number: 80 343 | protocol: HTTP 344 | --- 345 | apiVersion: security.istio.io/v1beta1 346 | kind: AuthorizationPolicy 347 | metadata: 348 | name: cluster-local-gateway 349 | namespace: istio-system 350 | spec: 351 | action: ALLOW 352 | rules: 353 | - {} 354 | selector: 355 | matchLabels: 356 | app: cluster-local-gateway 357 | istio: cluster-local-gateway 358 | -------------------------------------------------------------------------------- /manifest1.3/030-mpi-job-overlays-kubeflow.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiextensions.k8s.io/v1beta1 2 | kind: CustomResourceDefinition 3 | metadata: 4 | labels: 5 | app: mpi-operator 6 | app.kubernetes.io/component: mpijob 7 | app.kubernetes.io/name: mpi-operator 8 | kustomize.component: mpi-operator 9 | name: mpijobs.kubeflow.org 10 | spec: 11 | group: kubeflow.org 12 | names: 13 | kind: MPIJob 14 | plural: mpijobs 15 | shortNames: 16 | - mj 17 | - mpij 18 | singular: mpijob 19 | scope: Namespaced 20 | versions: 21 | - name: v1alpha1 22 | schema: 23 | openAPIV3Schema: 24 | properties: 25 | spec: 26 | description: Only one of gpus, processingUnits, or replicas should be specified 27 | oneOf: 28 | - properties: 29 | gpus: 30 | description: Valid values are 1, 2, 4, or any multiple of 8 31 | oneOf: 32 | - enum: 33 | - 1 34 | - 2 35 | - 4 36 | type: integer 37 | - minimum: 8 38 | multipleOf: 8 39 | type: integer 40 | title: Total number of GPUs 41 | gpusPerNode: 42 | description: Defaults to the number of GPUs per worker 43 | minimum: 1 44 | title: The maximum number of GPUs available per node 45 | type: integer 46 | slotsPerWorker: 47 | description: Defaults to the number of processing units per worker 48 | minimum: 1 49 | title: The number of slots per worker used in hostfile 50 | type: integer 51 | required: 52 | - gpus 53 | - properties: 54 | processingResourceType: 55 | description: Defaults to 'nvidia.com/gpu' 56 | enum: 57 | - nvidia.com/gpu 58 | - cpu 59 | title: The processing resource type, e.g. 'nvidia.com/gpu' or 'cpu' 60 | type: string 61 | processingUnits: 62 | description: Valid values are 1, 2, 4, or any multiple of 8 63 | oneOf: 64 | - enum: 65 | - 1 66 | - 2 67 | - 4 68 | type: integer 69 | - minimum: 8 70 | multipleOf: 8 71 | type: integer 72 | title: Total number of processing units 73 | processingUnitsPerNode: 74 | description: Defaults to the number of processing units per worker 75 | minimum: 1 76 | title: The maximum number of processing units available per node 77 | type: integer 78 | slotsPerWorker: 79 | description: Defaults to the number of processing units per worker 80 | minimum: 1 81 | title: The number of slots per worker used in hostfile 82 | type: integer 83 | required: 84 | - processingUnits 85 | - properties: 86 | processingResourceType: 87 | description: Defaults to 'nvidia.com/gpu' 88 | enum: 89 | - nvidia.com/gpu 90 | - cpu 91 | title: The processing resource type, e.g. 'nvidia.com/gpu' or 'cpu' 92 | type: string 93 | replicas: 94 | description: The processing resource limit should be specified for each replica 95 | minimum: 1 96 | title: Total number of replicas 97 | type: integer 98 | slotsPerWorker: 99 | description: Defaults to the number of processing units per worker 100 | minimum: 1 101 | title: The number of slots per worker used in hostfile 102 | type: integer 103 | required: 104 | - replicas 105 | title: The MPIJob spec 106 | served: false 107 | storage: false 108 | - name: v1alpha2 109 | schema: 110 | openAPIV3Schema: 111 | properties: 112 | spec: 113 | properties: 114 | mpiReplicaSpecs: 115 | properties: 116 | Launcher: 117 | properties: 118 | replicas: 119 | maximum: 1 120 | minimum: 1 121 | type: integer 122 | Worker: 123 | properties: 124 | replicas: 125 | minimum: 1 126 | type: integer 127 | slotsPerWorker: 128 | minimum: 1 129 | type: integer 130 | served: true 131 | storage: false 132 | - name: v1 133 | schema: 134 | openAPIV3Schema: 135 | properties: 136 | spec: 137 | properties: 138 | mpiReplicaSpecs: 139 | properties: 140 | Launcher: 141 | properties: 142 | replicas: 143 | maximum: 1 144 | minimum: 1 145 | type: integer 146 | Worker: 147 | properties: 148 | replicas: 149 | minimum: 1 150 | type: integer 151 | slotsPerWorker: 152 | minimum: 1 153 | type: integer 154 | served: true 155 | storage: true 156 | --- 157 | apiVersion: v1 158 | kind: ServiceAccount 159 | metadata: 160 | labels: 161 | app: mpi-operator 162 | app.kubernetes.io/component: mpijob 163 | app.kubernetes.io/name: mpi-operator 164 | kustomize.component: mpi-operator 165 | name: mpi-operator 166 | namespace: kubeflow 167 | --- 168 | aggregationRule: 169 | clusterRoleSelectors: 170 | - matchLabels: 171 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-mpijobs-admin: "true" 172 | apiVersion: rbac.authorization.k8s.io/v1 173 | kind: ClusterRole 174 | metadata: 175 | labels: 176 | app: mpi-operator 177 | app.kubernetes.io/component: mpijob 178 | app.kubernetes.io/name: mpi-operator 179 | kustomize.component: mpi-operator 180 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true" 181 | name: kubeflow-mpijobs-admin 182 | rules: [] 183 | --- 184 | apiVersion: rbac.authorization.k8s.io/v1 185 | kind: ClusterRole 186 | metadata: 187 | labels: 188 | app: mpi-operator 189 | app.kubernetes.io/component: mpijob 190 | app.kubernetes.io/name: mpi-operator 191 | kustomize.component: mpi-operator 192 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" 193 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-mpijobs-admin: "true" 194 | name: kubeflow-mpijobs-edit 195 | rules: 196 | - apiGroups: 197 | - kubeflow.org 198 | resources: 199 | - mpijobs 200 | - mpijobs/status 201 | verbs: 202 | - get 203 | - list 204 | - watch 205 | - create 206 | - delete 207 | - deletecollection 208 | - patch 209 | - update 210 | --- 211 | apiVersion: rbac.authorization.k8s.io/v1 212 | kind: ClusterRole 213 | metadata: 214 | labels: 215 | app: mpi-operator 216 | app.kubernetes.io/component: mpijob 217 | app.kubernetes.io/name: mpi-operator 218 | kustomize.component: mpi-operator 219 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true" 220 | name: kubeflow-mpijobs-view 221 | rules: 222 | - apiGroups: 223 | - kubeflow.org 224 | resources: 225 | - mpijobs 226 | - mpijobs/status 227 | verbs: 228 | - get 229 | - list 230 | - watch 231 | --- 232 | apiVersion: rbac.authorization.k8s.io/v1 233 | kind: ClusterRole 234 | metadata: 235 | labels: 236 | app: mpi-operator 237 | app.kubernetes.io/component: mpijob 238 | app.kubernetes.io/name: mpi-operator 239 | kustomize.component: mpi-operator 240 | name: mpi-operator 241 | rules: 242 | - apiGroups: 243 | - "" 244 | resources: 245 | - configmaps 246 | - serviceaccounts 247 | verbs: 248 | - create 249 | - list 250 | - watch 251 | - apiGroups: 252 | - "" 253 | resources: 254 | - pods 255 | verbs: 256 | - get 257 | - list 258 | - watch 259 | - apiGroups: 260 | - "" 261 | resources: 262 | - pods/exec 263 | verbs: 264 | - create 265 | - apiGroups: 266 | - "" 267 | resources: 268 | - endpoints 269 | verbs: 270 | - create 271 | - get 272 | - update 273 | - apiGroups: 274 | - "" 275 | resources: 276 | - events 277 | verbs: 278 | - create 279 | - patch 280 | - apiGroups: 281 | - rbac.authorization.k8s.io 282 | resources: 283 | - roles 284 | - rolebindings 285 | verbs: 286 | - create 287 | - list 288 | - watch 289 | - apiGroups: 290 | - policy 291 | resources: 292 | - poddisruptionbudgets 293 | verbs: 294 | - create 295 | - list 296 | - update 297 | - watch 298 | - apiGroups: 299 | - apps 300 | resources: 301 | - statefulsets 302 | verbs: 303 | - create 304 | - list 305 | - update 306 | - watch 307 | - apiGroups: 308 | - batch 309 | resources: 310 | - jobs 311 | verbs: 312 | - create 313 | - list 314 | - update 315 | - watch 316 | - apiGroups: 317 | - apiextensions.k8s.io 318 | resources: 319 | - customresourcedefinitions 320 | verbs: 321 | - create 322 | - get 323 | - apiGroups: 324 | - kubeflow.org 325 | resources: 326 | - mpijobs 327 | - mpijobs/finalizers 328 | - mpijobs/status 329 | verbs: 330 | - '*' 331 | - apiGroups: 332 | - scheduling.incubator.k8s.io 333 | - scheduling.sigs.dev 334 | resources: 335 | - queues 336 | - podgroups 337 | verbs: 338 | - '*' 339 | --- 340 | apiVersion: rbac.authorization.k8s.io/v1 341 | kind: ClusterRoleBinding 342 | metadata: 343 | labels: 344 | app: mpi-operator 345 | app.kubernetes.io/component: mpijob 346 | app.kubernetes.io/name: mpi-operator 347 | kustomize.component: mpi-operator 348 | name: mpi-operator 349 | roleRef: 350 | apiGroup: rbac.authorization.k8s.io 351 | kind: ClusterRole 352 | name: mpi-operator 353 | subjects: 354 | - kind: ServiceAccount 355 | name: mpi-operator 356 | namespace: kubeflow 357 | --- 358 | apiVersion: v1 359 | data: 360 | kubectl-delivery-image: mpioperator/kubectl-delivery:latest 361 | lock-namespace: kubeflow 362 | kind: ConfigMap 363 | metadata: 364 | labels: 365 | app: mpi-operator 366 | app.kubernetes.io/component: mpijob 367 | app.kubernetes.io/name: mpi-operator 368 | kustomize.component: mpi-operator 369 | name: mpi-operator-config 370 | namespace: kubeflow 371 | --- 372 | apiVersion: apps/v1 373 | kind: Deployment 374 | metadata: 375 | labels: 376 | app: mpi-operator 377 | app.kubernetes.io/component: mpijob 378 | app.kubernetes.io/name: mpi-operator 379 | kustomize.component: mpi-operator 380 | name: mpi-operator 381 | namespace: kubeflow 382 | spec: 383 | replicas: 1 384 | selector: 385 | matchLabels: 386 | app: mpi-operator 387 | app.kubernetes.io/component: mpijob 388 | app.kubernetes.io/name: mpi-operator 389 | kustomize.component: mpi-operator 390 | template: 391 | metadata: 392 | annotations: 393 | sidecar.istio.io/inject: "false" 394 | labels: 395 | app: mpi-operator 396 | app.kubernetes.io/component: mpijob 397 | app.kubernetes.io/name: mpi-operator 398 | kustomize.component: mpi-operator 399 | spec: 400 | containers: 401 | - args: 402 | - -alsologtostderr 403 | - --lock-namespace 404 | - kubeflow 405 | - --kubectl-delivery-image 406 | - mpioperator/kubectl-delivery:latest 407 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/mpioperator-mpi-operator:latest-d32b4 408 | imagePullPolicy: Always 409 | name: mpi-operator 410 | serviceAccountName: mpi-operator 411 | -------------------------------------------------------------------------------- /manifest1.3/009-knative-knative-serving-crds-base.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | labels: 5 | serving.knative.dev/release: v0.14.3 6 | name: knative-serving 7 | --- 8 | apiVersion: apiextensions.k8s.io/v1beta1 9 | kind: CustomResourceDefinition 10 | metadata: 11 | labels: 12 | knative.dev/crd-install: "true" 13 | serving.knative.dev/release: v0.14.3 14 | name: certificates.networking.internal.knative.dev 15 | spec: 16 | additionalPrinterColumns: 17 | - JSONPath: .status.conditions[?(@.type=="Ready")].status 18 | name: Ready 19 | type: string 20 | - JSONPath: .status.conditions[?(@.type=="Ready")].reason 21 | name: Reason 22 | type: string 23 | group: networking.internal.knative.dev 24 | names: 25 | categories: 26 | - knative-internal 27 | - networking 28 | kind: Certificate 29 | plural: certificates 30 | shortNames: 31 | - kcert 32 | singular: certificate 33 | scope: Namespaced 34 | subresources: 35 | status: {} 36 | version: v1alpha1 37 | --- 38 | apiVersion: apiextensions.k8s.io/v1beta1 39 | kind: CustomResourceDefinition 40 | metadata: 41 | labels: 42 | duck.knative.dev/podspecable: "true" 43 | knative.dev/crd-install: "true" 44 | serving.knative.dev/release: v0.14.3 45 | name: configurations.serving.knative.dev 46 | spec: 47 | additionalPrinterColumns: 48 | - JSONPath: .status.latestCreatedRevisionName 49 | name: LatestCreated 50 | type: string 51 | - JSONPath: .status.latestReadyRevisionName 52 | name: LatestReady 53 | type: string 54 | - JSONPath: .status.conditions[?(@.type=='Ready')].status 55 | name: Ready 56 | type: string 57 | - JSONPath: .status.conditions[?(@.type=='Ready')].reason 58 | name: Reason 59 | type: string 60 | conversion: 61 | strategy: Webhook 62 | webhookClientConfig: 63 | service: 64 | name: webhook 65 | namespace: knative-serving 66 | group: serving.knative.dev 67 | names: 68 | categories: 69 | - all 70 | - knative 71 | - serving 72 | kind: Configuration 73 | plural: configurations 74 | shortNames: 75 | - config 76 | - cfg 77 | singular: configuration 78 | preserveUnknownFields: false 79 | scope: Namespaced 80 | subresources: 81 | status: {} 82 | validation: 83 | openAPIV3Schema: 84 | type: object 85 | x-kubernetes-preserve-unknown-fields: true 86 | versions: 87 | - name: v1alpha1 88 | served: true 89 | storage: false 90 | - name: v1beta1 91 | served: true 92 | storage: false 93 | - name: v1 94 | served: true 95 | storage: true 96 | --- 97 | apiVersion: apiextensions.k8s.io/v1beta1 98 | kind: CustomResourceDefinition 99 | metadata: 100 | labels: 101 | knative.dev/crd-install: "true" 102 | name: images.caching.internal.knative.dev 103 | spec: 104 | group: caching.internal.knative.dev 105 | names: 106 | categories: 107 | - knative-internal 108 | - caching 109 | kind: Image 110 | plural: images 111 | shortNames: 112 | - img 113 | singular: image 114 | scope: Namespaced 115 | subresources: 116 | status: {} 117 | version: v1alpha1 118 | --- 119 | apiVersion: apiextensions.k8s.io/v1beta1 120 | kind: CustomResourceDefinition 121 | metadata: 122 | labels: 123 | knative.dev/crd-install: "true" 124 | serving.knative.dev/release: v0.14.3 125 | name: ingresses.networking.internal.knative.dev 126 | spec: 127 | additionalPrinterColumns: 128 | - JSONPath: .status.conditions[?(@.type=='Ready')].status 129 | name: Ready 130 | type: string 131 | - JSONPath: .status.conditions[?(@.type=='Ready')].reason 132 | name: Reason 133 | type: string 134 | group: networking.internal.knative.dev 135 | names: 136 | categories: 137 | - knative-internal 138 | - networking 139 | kind: Ingress 140 | plural: ingresses 141 | shortNames: 142 | - kingress 143 | - king 144 | singular: ingress 145 | scope: Namespaced 146 | subresources: 147 | status: {} 148 | versions: 149 | - name: v1alpha1 150 | served: true 151 | storage: true 152 | --- 153 | apiVersion: apiextensions.k8s.io/v1beta1 154 | kind: CustomResourceDefinition 155 | metadata: 156 | labels: 157 | knative.dev/crd-install: "true" 158 | serving.knative.dev/release: v0.14.3 159 | name: metrics.autoscaling.internal.knative.dev 160 | spec: 161 | additionalPrinterColumns: 162 | - JSONPath: .status.conditions[?(@.type=='Ready')].status 163 | name: Ready 164 | type: string 165 | - JSONPath: .status.conditions[?(@.type=='Ready')].reason 166 | name: Reason 167 | type: string 168 | group: autoscaling.internal.knative.dev 169 | names: 170 | categories: 171 | - knative-internal 172 | - autoscaling 173 | kind: Metric 174 | plural: metrics 175 | singular: metric 176 | scope: Namespaced 177 | subresources: 178 | status: {} 179 | version: v1alpha1 180 | --- 181 | apiVersion: apiextensions.k8s.io/v1beta1 182 | kind: CustomResourceDefinition 183 | metadata: 184 | labels: 185 | knative.dev/crd-install: "true" 186 | serving.knative.dev/release: v0.14.3 187 | name: podautoscalers.autoscaling.internal.knative.dev 188 | spec: 189 | additionalPrinterColumns: 190 | - JSONPath: .status.desiredScale 191 | name: DesiredScale 192 | type: integer 193 | - JSONPath: .status.actualScale 194 | name: ActualScale 195 | type: integer 196 | - JSONPath: .status.conditions[?(@.type=='Ready')].status 197 | name: Ready 198 | type: string 199 | - JSONPath: .status.conditions[?(@.type=='Ready')].reason 200 | name: Reason 201 | type: string 202 | group: autoscaling.internal.knative.dev 203 | names: 204 | categories: 205 | - knative-internal 206 | - autoscaling 207 | kind: PodAutoscaler 208 | plural: podautoscalers 209 | shortNames: 210 | - kpa 211 | - pa 212 | singular: podautoscaler 213 | scope: Namespaced 214 | subresources: 215 | status: {} 216 | versions: 217 | - name: v1alpha1 218 | served: true 219 | storage: true 220 | --- 221 | apiVersion: apiextensions.k8s.io/v1beta1 222 | kind: CustomResourceDefinition 223 | metadata: 224 | labels: 225 | knative.dev/crd-install: "true" 226 | serving.knative.dev/release: v0.14.3 227 | name: revisions.serving.knative.dev 228 | spec: 229 | additionalPrinterColumns: 230 | - JSONPath: .metadata.labels['serving\.knative\.dev/configuration'] 231 | name: Config Name 232 | type: string 233 | - JSONPath: .status.serviceName 234 | name: K8s Service Name 235 | type: string 236 | - JSONPath: .metadata.labels['serving\.knative\.dev/configurationGeneration'] 237 | name: Generation 238 | type: string 239 | - JSONPath: .status.conditions[?(@.type=='Ready')].status 240 | name: Ready 241 | type: string 242 | - JSONPath: .status.conditions[?(@.type=='Ready')].reason 243 | name: Reason 244 | type: string 245 | conversion: 246 | strategy: Webhook 247 | webhookClientConfig: 248 | service: 249 | name: webhook 250 | namespace: knative-serving 251 | group: serving.knative.dev 252 | names: 253 | categories: 254 | - all 255 | - knative 256 | - serving 257 | kind: Revision 258 | plural: revisions 259 | shortNames: 260 | - rev 261 | singular: revision 262 | preserveUnknownFields: false 263 | scope: Namespaced 264 | subresources: 265 | status: {} 266 | validation: 267 | openAPIV3Schema: 268 | type: object 269 | x-kubernetes-preserve-unknown-fields: true 270 | versions: 271 | - name: v1alpha1 272 | served: true 273 | storage: false 274 | - name: v1beta1 275 | served: true 276 | storage: false 277 | - name: v1 278 | served: true 279 | storage: true 280 | --- 281 | apiVersion: apiextensions.k8s.io/v1beta1 282 | kind: CustomResourceDefinition 283 | metadata: 284 | labels: 285 | duck.knative.dev/addressable: "true" 286 | knative.dev/crd-install: "true" 287 | serving.knative.dev/release: v0.14.3 288 | name: routes.serving.knative.dev 289 | spec: 290 | additionalPrinterColumns: 291 | - JSONPath: .status.url 292 | name: URL 293 | type: string 294 | - JSONPath: .status.conditions[?(@.type=='Ready')].status 295 | name: Ready 296 | type: string 297 | - JSONPath: .status.conditions[?(@.type=='Ready')].reason 298 | name: Reason 299 | type: string 300 | conversion: 301 | strategy: Webhook 302 | webhookClientConfig: 303 | service: 304 | name: webhook 305 | namespace: knative-serving 306 | group: serving.knative.dev 307 | names: 308 | categories: 309 | - all 310 | - knative 311 | - serving 312 | kind: Route 313 | plural: routes 314 | shortNames: 315 | - rt 316 | singular: route 317 | preserveUnknownFields: false 318 | scope: Namespaced 319 | subresources: 320 | status: {} 321 | validation: 322 | openAPIV3Schema: 323 | type: object 324 | x-kubernetes-preserve-unknown-fields: true 325 | versions: 326 | - name: v1alpha1 327 | served: true 328 | storage: false 329 | - name: v1beta1 330 | served: true 331 | storage: false 332 | - name: v1 333 | served: true 334 | storage: true 335 | --- 336 | apiVersion: apiextensions.k8s.io/v1beta1 337 | kind: CustomResourceDefinition 338 | metadata: 339 | labels: 340 | knative.dev/crd-install: "true" 341 | serving.knative.dev/release: v0.14.3 342 | name: serverlessservices.networking.internal.knative.dev 343 | spec: 344 | additionalPrinterColumns: 345 | - JSONPath: .spec.mode 346 | name: Mode 347 | type: string 348 | - JSONPath: .spec.numActivators 349 | name: Activators 350 | type: integer 351 | - JSONPath: .status.serviceName 352 | name: ServiceName 353 | type: string 354 | - JSONPath: .status.privateServiceName 355 | name: PrivateServiceName 356 | type: string 357 | - JSONPath: .status.conditions[?(@.type=='Ready')].status 358 | name: Ready 359 | type: string 360 | - JSONPath: .status.conditions[?(@.type=='Ready')].reason 361 | name: Reason 362 | type: string 363 | group: networking.internal.knative.dev 364 | names: 365 | categories: 366 | - knative-internal 367 | - networking 368 | kind: ServerlessService 369 | plural: serverlessservices 370 | shortNames: 371 | - sks 372 | singular: serverlessservice 373 | scope: Namespaced 374 | subresources: 375 | status: {} 376 | versions: 377 | - name: v1alpha1 378 | served: true 379 | storage: true 380 | --- 381 | apiVersion: apiextensions.k8s.io/v1beta1 382 | kind: CustomResourceDefinition 383 | metadata: 384 | labels: 385 | duck.knative.dev/addressable: "true" 386 | duck.knative.dev/podspecable: "true" 387 | knative.dev/crd-install: "true" 388 | serving.knative.dev/release: v0.14.3 389 | name: services.serving.knative.dev 390 | spec: 391 | additionalPrinterColumns: 392 | - JSONPath: .status.url 393 | name: URL 394 | type: string 395 | - JSONPath: .status.latestCreatedRevisionName 396 | name: LatestCreated 397 | type: string 398 | - JSONPath: .status.latestReadyRevisionName 399 | name: LatestReady 400 | type: string 401 | - JSONPath: .status.conditions[?(@.type=='Ready')].status 402 | name: Ready 403 | type: string 404 | - JSONPath: .status.conditions[?(@.type=='Ready')].reason 405 | name: Reason 406 | type: string 407 | conversion: 408 | strategy: Webhook 409 | webhookClientConfig: 410 | service: 411 | name: webhook 412 | namespace: knative-serving 413 | group: serving.knative.dev 414 | names: 415 | categories: 416 | - all 417 | - knative 418 | - serving 419 | kind: Service 420 | plural: services 421 | shortNames: 422 | - kservice 423 | - ksvc 424 | singular: service 425 | preserveUnknownFields: false 426 | scope: Namespaced 427 | subresources: 428 | status: {} 429 | validation: 430 | openAPIV3Schema: 431 | type: object 432 | x-kubernetes-preserve-unknown-fields: true 433 | versions: 434 | - name: v1alpha1 435 | served: true 436 | storage: false 437 | - name: v1beta1 438 | served: true 439 | storage: false 440 | - name: v1 441 | served: true 442 | storage: true 443 | -------------------------------------------------------------------------------- /manifest1.3/022-jupyter-overlays-istio.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | labels: 5 | app: jupyter-web-app 6 | kustomize.component: jupyter-web-app 7 | name: jupyter-web-app-service-account 8 | namespace: kubeflow 9 | --- 10 | apiVersion: rbac.authorization.k8s.io/v1beta1 11 | kind: Role 12 | metadata: 13 | labels: 14 | app: jupyter-web-app 15 | kustomize.component: jupyter-web-app 16 | name: jupyter-web-app-jupyter-notebook-role 17 | namespace: kubeflow 18 | rules: 19 | - apiGroups: 20 | - "" 21 | resources: 22 | - pods 23 | - pods/log 24 | - secrets 25 | - services 26 | verbs: 27 | - '*' 28 | - apiGroups: 29 | - "" 30 | - apps 31 | - extensions 32 | resources: 33 | - deployments 34 | - replicasets 35 | verbs: 36 | - '*' 37 | - apiGroups: 38 | - kubeflow.org 39 | resources: 40 | - '*' 41 | verbs: 42 | - '*' 43 | - apiGroups: 44 | - batch 45 | resources: 46 | - jobs 47 | verbs: 48 | - '*' 49 | --- 50 | apiVersion: rbac.authorization.k8s.io/v1 51 | kind: ClusterRole 52 | metadata: 53 | labels: 54 | app: jupyter-web-app 55 | kustomize.component: jupyter-web-app 56 | name: jupyter-web-app-cluster-role 57 | rules: 58 | - apiGroups: 59 | - "" 60 | resources: 61 | - namespaces 62 | verbs: 63 | - get 64 | - list 65 | - create 66 | - delete 67 | - apiGroups: 68 | - authorization.k8s.io 69 | resources: 70 | - subjectaccessreviews 71 | verbs: 72 | - create 73 | - apiGroups: 74 | - kubeflow.org 75 | resources: 76 | - notebooks 77 | - notebooks/finalizers 78 | - poddefaults 79 | verbs: 80 | - get 81 | - list 82 | - create 83 | - delete 84 | - patch 85 | - update 86 | - apiGroups: 87 | - "" 88 | resources: 89 | - persistentvolumeclaims 90 | verbs: 91 | - create 92 | - delete 93 | - get 94 | - list 95 | - apiGroups: 96 | - "" 97 | resources: 98 | - events 99 | - nodes 100 | verbs: 101 | - list 102 | - apiGroups: 103 | - storage.k8s.io 104 | resources: 105 | - storageclasses 106 | verbs: 107 | - get 108 | - list 109 | - watch 110 | --- 111 | apiVersion: rbac.authorization.k8s.io/v1 112 | kind: ClusterRole 113 | metadata: 114 | labels: 115 | app: jupyter-web-app 116 | kustomize.component: jupyter-web-app 117 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true" 118 | name: jupyter-web-app-kubeflow-notebook-ui-admin 119 | rules: [] 120 | --- 121 | apiVersion: rbac.authorization.k8s.io/v1 122 | kind: ClusterRole 123 | metadata: 124 | labels: 125 | app: jupyter-web-app 126 | kustomize.component: jupyter-web-app 127 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" 128 | name: jupyter-web-app-kubeflow-notebook-ui-edit 129 | rules: 130 | - apiGroups: 131 | - kubeflow.org 132 | resources: 133 | - notebooks 134 | - notebooks/finalizers 135 | - poddefaults 136 | verbs: 137 | - get 138 | - list 139 | - create 140 | - delete 141 | --- 142 | apiVersion: rbac.authorization.k8s.io/v1 143 | kind: ClusterRole 144 | metadata: 145 | labels: 146 | app: jupyter-web-app 147 | kustomize.component: jupyter-web-app 148 | rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true" 149 | name: jupyter-web-app-kubeflow-notebook-ui-view 150 | rules: 151 | - apiGroups: 152 | - kubeflow.org 153 | resources: 154 | - notebooks 155 | - notebooks/finalizers 156 | - poddefaults 157 | verbs: 158 | - get 159 | - list 160 | - apiGroups: 161 | - storage.k8s.io 162 | resources: 163 | - storageclasses 164 | verbs: 165 | - get 166 | - list 167 | - watch 168 | --- 169 | apiVersion: rbac.authorization.k8s.io/v1beta1 170 | kind: RoleBinding 171 | metadata: 172 | labels: 173 | app: jupyter-web-app 174 | kustomize.component: jupyter-web-app 175 | name: jupyter-web-app-jupyter-notebook-role-binding 176 | namespace: kubeflow 177 | roleRef: 178 | apiGroup: rbac.authorization.k8s.io 179 | kind: Role 180 | name: jupyter-web-app-jupyter-notebook-role 181 | subjects: 182 | - kind: ServiceAccount 183 | name: jupyter-notebook 184 | --- 185 | apiVersion: rbac.authorization.k8s.io/v1 186 | kind: ClusterRoleBinding 187 | metadata: 188 | labels: 189 | app: jupyter-web-app 190 | kustomize.component: jupyter-web-app 191 | name: jupyter-web-app-cluster-role-binding 192 | roleRef: 193 | apiGroup: rbac.authorization.k8s.io 194 | kind: ClusterRole 195 | name: jupyter-web-app-cluster-role 196 | subjects: 197 | - kind: ServiceAccount 198 | name: jupyter-web-app-service-account 199 | namespace: kubeflow 200 | --- 201 | apiVersion: v1 202 | data: 203 | spawner_ui_config.yaml: | 204 | # Configuration file for the Jupyter UI. 205 | # 206 | # Each Jupyter UI option is configured by two keys: 'value' and 'readOnly' 207 | # - The 'value' key contains the default value 208 | # - The 'readOnly' key determines if the option will be available to users 209 | # 210 | # If the 'readOnly' key is present and set to 'true', the respective option 211 | # will be disabled for users and only set by the admin. Also when a 212 | # Notebook is POSTED to the API if a necessary field is not present then 213 | # the value from the config will be used. 214 | # 215 | # If the 'readOnly' key is missing (defaults to 'false'), the respective option 216 | # will be available for users to edit. 217 | # 218 | # Note that some values can be templated. Such values are the names of the 219 | # Volumes as well as their StorageClass 220 | spawnerFormDefaults: 221 | image: 222 | # The container Image for the user's Jupyter Notebook 223 | value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:v1.3.0-rc.0 224 | # The list of available standard container Images 225 | options: 226 | - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-scipy:v1.3.0-rc.0 227 | - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-full:v1.3.0-rc.0 228 | - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-pytorch-cuda-full:v1.3.0-rc.0 229 | - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:v1.3.0-rc.0 230 | - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-cuda-full:v1.3.0-rc.0 231 | imageVSCode: 232 | # The container Image for the user's VS-Code Server 233 | value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:v1.3.0-rc.0 234 | # The list of available standard container Images 235 | options: 236 | - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/codeserver-python:v1.3.0-rc.0 237 | imageRStudio: 238 | # The container Image for the user's RStudio Server 239 | value: public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio-tidyverse:v1.3.0-rc.0 240 | # The list of available standard container Images 241 | options: 242 | - public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/rstudio-tidyverse:v1.3.0-rc.0 243 | allowCustomImage: true 244 | imagePullPolicy: 245 | value: IfNotPresent 246 | readOnly: false 247 | cpu: 248 | # CPU for user's Notebook 249 | value: '0.5' 250 | readOnly: false 251 | memory: 252 | # Memory for user's Notebook 253 | value: 1.0Gi 254 | readOnly: false 255 | workspaceVolume: 256 | # Workspace Volume to be attached to user's Notebook 257 | # Each Workspace Volume is declared with the following attributes: 258 | # Type, Name, Size, MountPath and Access Mode 259 | value: 260 | type: 261 | # The Type of the Workspace Volume 262 | # Supported values: 'New', 'Existing' 263 | value: New 264 | name: 265 | # The Name of the Workspace Volume 266 | # Note that this is a templated value. Special values: 267 | # {notebook-name}: Replaced with the name of the Notebook. The frontend 268 | # will replace this value as the user types the name 269 | value: 'workspace-{notebook-name}' 270 | size: 271 | # The Size of the Workspace Volume (in Gi) 272 | value: '10Gi' 273 | mountPath: 274 | # The Path that the Workspace Volume will be mounted 275 | value: /home/jovyan 276 | accessModes: 277 | # The Access Mode of the Workspace Volume 278 | # Supported values: 'ReadWriteOnce', 'ReadWriteMany', 'ReadOnlyMany' 279 | value: ReadWriteOnce 280 | class: 281 | # The StrageClass the PVC will use if type is New. Special values are: 282 | # {none}: default StorageClass 283 | # {empty}: empty string "" 284 | value: '{none}' 285 | readOnly: false 286 | dataVolumes: 287 | # List of additional Data Volumes to be attached to the user's Notebook 288 | value: [] 289 | # Each Data Volume is declared with the following attributes: 290 | # Type, Name, Size, MountPath and Access Mode 291 | # 292 | # For example, a list with 2 Data Volumes: 293 | # value: 294 | # - value: 295 | # type: 296 | # value: New 297 | # name: 298 | # value: '{notebook-name}-vol-1' 299 | # size: 300 | # value: '10Gi' 301 | # class: 302 | # value: standard 303 | # mountPath: 304 | # value: /home/jovyan/vol-1 305 | # accessModes: 306 | # value: ReadWriteOnce 307 | # class: 308 | # value: {none} 309 | # - value: 310 | # type: 311 | # value: New 312 | # name: 313 | # value: '{notebook-name}-vol-2' 314 | # size: 315 | # value: '10Gi' 316 | # mountPath: 317 | # value: /home/jovyan/vol-2 318 | # accessModes: 319 | # value: ReadWriteMany 320 | # class: 321 | # value: {none} 322 | readOnly: false 323 | gpus: 324 | # Number of GPUs to be assigned to the Notebook Container 325 | value: 326 | # values: "none", "1", "2", "4", "8" 327 | num: "none" 328 | # Determines what the UI will show and send to the backend 329 | vendors: 330 | - limitsKey: "nvidia.com/gpu" 331 | uiName: "NVIDIA" 332 | - limitsKey: "amd.com/gpu" 333 | uiName: "AMD" 334 | # Values: "" or a `limits-key` from the vendors list 335 | vendor: "" 336 | readOnly: false 337 | shm: 338 | value: true 339 | readOnly: false 340 | configurations: 341 | # List of labels to be selected, these are the labels from PodDefaults 342 | # value: 343 | # - add-gcp-secret 344 | # - default-editor 345 | value: [] 346 | readOnly: false 347 | affinityConfig: 348 | # The default `configKey` from the options list 349 | # If readonly, the default value will be the only option 350 | value: "none" 351 | # The list of available affinity configs 352 | options: [] 353 | # # (DESC) Pod gets an exclusive "n1-standard-2" Node 354 | # # (TIP) set PreferNoSchedule taint on this node-pool 355 | # # (TIP) enable cluster-autoscaler on this node-pool 356 | # # (TIP) dont let users request more CPU/MEMORY than the size of this node 357 | # - configKey: "exclusive__n1-standard-2" 358 | # displayName: "Exclusive: n1-standard-2" 359 | # affinity: 360 | # # (Require) Node having label: `node_pool=notebook-n1-standard-2` 361 | # nodeAffinity: 362 | # requiredDuringSchedulingIgnoredDuringExecution: 363 | # nodeSelectorTerms: 364 | # - matchExpressions: 365 | # - key: "node_pool" 366 | # operator: "In" 367 | # values: 368 | # - "notebook-n1-standard-2" 369 | # # (Require) Node WITHOUT existing Pod having label: `notebook-name` 370 | # podAntiAffinity: 371 | # requiredDuringSchedulingIgnoredDuringExecution: 372 | # - labelSelector: 373 | # matchExpressions: 374 | # - key: "notebook-name" 375 | # operator: "Exists" 376 | # namespaces: [] 377 | # topologyKey: "kubernetes.io/hostname" 378 | readOnly: false 379 | tolerationGroup: 380 | # The default `groupKey` from the options list 381 | # If readonly, the default value will be the only option 382 | value: "none" 383 | # The list of available tolerationGroup configs 384 | options: [] 385 | # - groupKey: "group_1" 386 | # displayName: "Group 1: description" 387 | # tolerations: 388 | # - key: "key1" 389 | # operator: "Equal" 390 | # value: "value1" 391 | # effect: "NoSchedule" 392 | # - key: "key2" 393 | # operator: "Equal" 394 | # value: "value2" 395 | # effect: "NoSchedule" 396 | readOnly: false 397 | kind: ConfigMap 398 | metadata: 399 | labels: 400 | app: jupyter-web-app 401 | kustomize.component: jupyter-web-app 402 | name: jupyter-web-app-config-tkhtgh5mcm 403 | namespace: kubeflow 404 | --- 405 | apiVersion: v1 406 | data: 407 | JWA_CLUSTER_DOMAIN: cluster.local 408 | JWA_PREFIX: /jupyter 409 | JWA_UI: default 410 | JWA_USERID_HEADER: kubeflow-userid 411 | JWA_USERID_PREFIX: "" 412 | kind: ConfigMap 413 | metadata: 414 | labels: 415 | app: jupyter-web-app 416 | kustomize.component: jupyter-web-app 417 | name: jupyter-web-app-parameters-chmg88cm48 418 | namespace: kubeflow 419 | --- 420 | apiVersion: v1 421 | kind: Service 422 | metadata: 423 | labels: 424 | app: jupyter-web-app 425 | kustomize.component: jupyter-web-app 426 | run: jupyter-web-app 427 | name: jupyter-web-app-service 428 | namespace: kubeflow 429 | spec: 430 | ports: 431 | - name: http 432 | port: 80 433 | protocol: TCP 434 | targetPort: 5000 435 | selector: 436 | app: jupyter-web-app 437 | kustomize.component: jupyter-web-app 438 | type: ClusterIP 439 | --- 440 | apiVersion: apps/v1 441 | kind: Deployment 442 | metadata: 443 | labels: 444 | app: jupyter-web-app 445 | kustomize.component: jupyter-web-app 446 | name: jupyter-web-app-deployment 447 | namespace: kubeflow 448 | spec: 449 | replicas: 1 450 | selector: 451 | matchLabels: 452 | app: jupyter-web-app 453 | kustomize.component: jupyter-web-app 454 | template: 455 | metadata: 456 | annotations: 457 | sidecar.istio.io/inject: "false" 458 | labels: 459 | app: jupyter-web-app 460 | kustomize.component: jupyter-web-app 461 | spec: 462 | containers: 463 | - env: 464 | - name: APP_PREFIX 465 | value: /jupyter 466 | - name: UI 467 | value: default 468 | - name: USERID_HEADER 469 | value: kubeflow-userid 470 | - name: USERID_PREFIX 471 | value: "" 472 | image: registry.cn-shenzhen.aliyuncs.com/tensorbytes/notebooks-jupyter-web-app:v1.3.0-rc.0-70edb 473 | name: jupyter-web-app 474 | ports: 475 | - containerPort: 5000 476 | volumeMounts: 477 | - mountPath: /etc/config 478 | name: config-volume 479 | serviceAccountName: jupyter-web-app-service-account 480 | volumes: 481 | - configMap: 482 | name: jupyter-web-app-config-tkhtgh5mcm 483 | name: config-volume 484 | --- 485 | apiVersion: networking.istio.io/v1alpha3 486 | kind: VirtualService 487 | metadata: 488 | labels: 489 | app: jupyter-web-app 490 | kustomize.component: jupyter-web-app 491 | name: jupyter-web-app-jupyter-web-app 492 | namespace: kubeflow 493 | spec: 494 | gateways: 495 | - kubeflow-gateway 496 | hosts: 497 | - '*' 498 | http: 499 | - headers: 500 | request: 501 | add: 502 | x-forwarded-prefix: /jupyter 503 | match: 504 | - uri: 505 | prefix: /jupyter/ 506 | rewrite: 507 | uri: / 508 | route: 509 | - destination: 510 | host: jupyter-web-app-service.kubeflow.svc.cluster.local 511 | port: 512 | number: 80 513 | --------------------------------------------------------------------------------