├── .github
├── ISSUE_TEMPLATE
│ ├── bug-report.md
│ └── feature-request.md
├── PROPOSAL_TEMPLATE.md
├── PULL_REQUEST_TEMPLATE.md
└── workflows
│ ├── ci.yaml
│ ├── codeql-analysis.yml
│ ├── image-ci.yaml
│ └── license.yml
├── .gitignore
├── .license
├── README.md
└── dependency_decisions.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Dockerfile
├── Dockerfile.dashboard
├── GOVERNANCE.md
├── LICENSE
├── Makefile
├── OWNERS
├── PROJECT
├── README.md
├── SECURITY.md
├── apis
├── addtoscheme_apps_v1alpha1.go
├── addtoscheme_cache_v1alpha1.go
├── addtoscheme_inference_v1alpha1.go
├── addtoscheme_model_v1alpha1.go
├── addtoscheme_notebook_v1alpha1.go
├── addtoscheme_serving_v1alpha1.go
├── addtoscheme_training_v1alpha1.go
├── apis.go
├── apps
│ └── v1alpha1
│ │ ├── cron_types.go
│ │ ├── groupversion_info.go
│ │ └── zz_generated.deepcopy.go
├── cache
│ └── v1alpha1
│ │ ├── cachebackend_types.go
│ │ ├── defaults.go
│ │ ├── groupversion_info.go
│ │ ├── zz_generated.deepcopy.go
│ │ └── zz_generated.defaults.go
├── inference
│ └── v1alpha1
│ │ ├── defaults.go
│ │ ├── defaults_test.go
│ │ ├── elasticbatchjob_constants.go
│ │ ├── elasticbatchjob_types.go
│ │ ├── elasticbatchjob_types_test.go
│ │ ├── groupversion_info.go
│ │ ├── v1alpha1_suite_test.go
│ │ ├── zz_generated.deepcopy.go
│ │ └── zz_generated.defaults.go
├── model
│ └── v1alpha1
│ │ ├── defaults.go
│ │ ├── groupversion_info.go
│ │ ├── model_types.go
│ │ ├── modelversion_types.go
│ │ ├── zz_generated.deepcopy.go
│ │ └── zz_generated.defaults.go
├── notebook
│ └── v1alpha1
│ │ ├── groupversion_info.go
│ │ ├── notebook_types.go
│ │ ├── zz_generated.deepcopy.go
│ │ └── zz_generated.defaults.go
├── serving
│ └── v1alpha1
│ │ ├── defaults.go
│ │ ├── groupversion_info.go
│ │ ├── inference_types.go
│ │ ├── zz_generated.deepcopy.go
│ │ └── zz_generated.defaults.go
└── training
│ └── v1alpha1
│ ├── common.go
│ ├── doc.go
│ ├── elasticdljob_constant.go
│ ├── elasticdljob_types.go
│ ├── groupversion_info.go
│ ├── marsjob_constant.go
│ ├── marsjob_defaults.go
│ ├── marsjob_types.go
│ ├── mpijob_constants.go
│ ├── mpijob_default.go
│ ├── mpijob_types.go
│ ├── pytorchjob_constants.go
│ ├── pytorchjob_defaults.go
│ ├── pytorchjob_types.go
│ ├── pytorchjob_types_test.go
│ ├── tfjob_constants.go
│ ├── tfjob_defaults.go
│ ├── tfjob_defaults_test.go
│ ├── tfjob_types.go
│ ├── tfjob_types_test.go
│ ├── tfjob_util.go
│ ├── tfjob_util_test.go
│ ├── v1alpha1_suite_test.go
│ ├── xdljob_constants.go
│ ├── xdljob_defaults.go
│ ├── xdljob_defaults_test.go
│ ├── xdljob_types.go
│ ├── xdljob_types_test.go
│ ├── xgboostjob_constants.go
│ ├── xgboostjob_defaults.go
│ ├── xgboostjob_types.go
│ ├── xgboostjob_types_test.go
│ ├── zz_generated.deepcopy.go
│ └── zz_generated.defaults.go
├── client
├── clientset
│ └── versioned
│ │ ├── clientset.go
│ │ ├── doc.go
│ │ ├── fake
│ │ ├── clientset_generated.go
│ │ ├── doc.go
│ │ └── register.go
│ │ ├── scheme
│ │ ├── doc.go
│ │ └── register.go
│ │ └── typed
│ │ └── training
│ │ └── v1alpha1
│ │ ├── doc.go
│ │ ├── elasticdljob.go
│ │ ├── fake
│ │ ├── doc.go
│ │ ├── fake_elasticdljob.go
│ │ ├── fake_marsjob.go
│ │ ├── fake_mpijob.go
│ │ ├── fake_pytorchjob.go
│ │ ├── fake_tfjob.go
│ │ ├── fake_training_client.go
│ │ ├── fake_xdljob.go
│ │ └── fake_xgboostjob.go
│ │ ├── generated_expansion.go
│ │ ├── marsjob.go
│ │ ├── mpijob.go
│ │ ├── pytorchjob.go
│ │ ├── tfjob.go
│ │ ├── training_client.go
│ │ ├── xdljob.go
│ │ └── xgboostjob.go
├── informers
│ └── externalversions
│ │ ├── factory.go
│ │ ├── generic.go
│ │ ├── internalinterfaces
│ │ └── factory_interfaces.go
│ │ └── training
│ │ ├── interface.go
│ │ └── v1alpha1
│ │ ├── elasticdljob.go
│ │ ├── interface.go
│ │ ├── marsjob.go
│ │ ├── mpijob.go
│ │ ├── pytorchjob.go
│ │ ├── tfjob.go
│ │ ├── xdljob.go
│ │ └── xgboostjob.go
└── listers
│ └── training
│ └── v1alpha1
│ ├── elasticdljob.go
│ ├── expansion_generated.go
│ ├── marsjob.go
│ ├── mpijob.go
│ ├── pytorchjob.go
│ ├── tfjob.go
│ ├── xdljob.go
│ └── xgboostjob.go
├── cmd
└── options
│ └── options.go
├── config
├── certmanager
│ ├── certificate.yaml
│ ├── kustomization.yaml
│ └── kustomizeconfig.yaml
├── crd
│ ├── bases
│ │ ├── apps.kubedl.io_crons.yaml
│ │ ├── cache.kubedl.io_cachebackends.yaml
│ │ ├── inference.kubedl.io_elasticbatchjobs.yaml
│ │ ├── model.kubedl.io_models.yaml
│ │ ├── model.kubedl.io_modelversions.yaml
│ │ ├── notebook.kubedl.io_notebooks.yaml
│ │ ├── serving.kubedl.io_inferences.yaml
│ │ ├── training.kubedl.io_elasticdljobs.yaml
│ │ ├── training.kubedl.io_marsjobs.yaml
│ │ ├── training.kubedl.io_mpijobs.yaml
│ │ ├── training.kubedl.io_pytorchjobs.yaml
│ │ ├── training.kubedl.io_tfjobs.yaml
│ │ ├── training.kubedl.io_xdljobs.yaml
│ │ └── training.kubedl.io_xgboostjobs.yaml
│ ├── kustomization.yaml
│ ├── kustomizeconfig.yaml
│ └── patches
│ │ ├── cainjection_in_cachebackends.yaml
│ │ ├── cainjection_in_crons.yaml
│ │ ├── cainjection_in_elasticbatchjobs.yaml
│ │ ├── cainjection_in_elasticdljobs.yaml
│ │ ├── cainjection_in_marsjobs.yaml
│ │ ├── cainjection_in_models.yaml
│ │ ├── cainjection_in_modelversions.yaml
│ │ ├── cainjection_in_mpijobs.yaml
│ │ ├── cainjection_in_notebooks.yaml
│ │ ├── cainjection_in_pytorchjobs.yaml
│ │ ├── cainjection_in_tfjobs.yaml
│ │ ├── cainjection_in_xdljobs.yaml
│ │ ├── cainjection_in_xgboostjobs.yaml
│ │ ├── webhook_in_cachebackends.yaml
│ │ ├── webhook_in_crons.yaml
│ │ ├── webhook_in_elasticbatchjobs.yaml
│ │ ├── webhook_in_elasticdljobs.yaml
│ │ ├── webhook_in_marsjobs.yaml
│ │ ├── webhook_in_models.yaml
│ │ ├── webhook_in_modelversions.yaml
│ │ ├── webhook_in_mpijobs.yaml
│ │ ├── webhook_in_notebooks.yaml
│ │ ├── webhook_in_pytorchjobs.yaml
│ │ ├── webhook_in_tfjobs.yaml
│ │ ├── webhook_in_xdljobs.yaml
│ │ └── webhook_in_xgboostjobs.yaml
├── default
│ ├── kustomization.yaml
│ ├── manager_auth_proxy_patch.yaml
│ ├── manager_prometheus_metrics_patch.yaml
│ ├── manager_webhook_patch.yaml
│ └── webhookcainjection_patch.yaml
├── manager
│ ├── all_in_one.yaml
│ ├── kustomization.yaml
│ └── manager.yaml
├── rbac
│ ├── auth_proxy_role.yaml
│ ├── auth_proxy_role_binding.yaml
│ ├── auth_proxy_service.yaml
│ ├── cachebackend_editor_role.yaml
│ ├── cachebackend_viewer_role.yaml
│ ├── cron_editor_role.yaml
│ ├── cron_viewer_role.yaml
│ ├── elasticbatchjob_editor_role.yaml
│ ├── elasticbatchjob_viewer_role.yaml
│ ├── elasticdljob_editor_role.yaml
│ ├── elasticdljob_viewer_role.yaml
│ ├── kustomization.yaml
│ ├── leader_election_role.yaml
│ ├── leader_election_role_binding.yaml
│ ├── marsjob_editor_role.yaml
│ ├── marsjob_viewer_role.yaml
│ ├── model_editor_role.yaml
│ ├── model_viewer_role.yaml
│ ├── modelversion_editor_role.yaml
│ ├── modelversion_viewer_role.yaml
│ ├── mpijob_editor_role.yaml
│ ├── mpijob_viewer_role.yaml
│ ├── notebook_editor_role.yaml
│ ├── notebook_viewer_role.yaml
│ ├── pytorchjob_editor_role.yaml
│ ├── pytorchjob_viewer_role.yaml
│ ├── role.yaml
│ ├── role_binding.yaml
│ ├── tfjob_editor_role.yaml
│ ├── tfjob_viewer_role.yaml
│ ├── xdljob_editor_role.yaml
│ ├── xdljob_viewer_role.yaml
│ ├── xgboostjob_editor_role.yaml
│ └── xgboostjob_viewer_role.yaml
├── samples
│ ├── apps_v1alpha1_cron.yaml
│ ├── cache_v1alpha1_cachebackend.yaml
│ ├── inference_v1alpha1_elasticbatchjob.yaml
│ ├── model
│ │ ├── dockerfile_configmap.yaml
│ │ ├── kanikopod_sample.yaml
│ │ ├── model_v1alpha1_model.yaml
│ │ ├── model_v1alpha1_modelversion.yaml
│ │ └── model_v1alpha1_modelversion_nas.yaml
│ ├── notebook_v1alpha1_notebook.yaml
│ ├── training_v1alpha1_elasticdljob.yaml
│ ├── training_v1alpha1_marsjob.yaml
│ ├── training_v1alpha1_mpijob.yaml
│ ├── training_v1alpha1_pytorchjob.yaml
│ ├── training_v1alpha1_tfjob.yaml
│ ├── training_v1alpha1_xdljob.yaml
│ └── training_v1alpha1_xgboostjob.yaml
└── webhook
│ ├── kustomization.yaml
│ ├── kustomizeconfig.yaml
│ ├── manifests.yaml
│ └── service.yaml
├── console
├── README.md
├── backend
│ ├── cmd
│ │ └── backend-server
│ │ │ └── main.go
│ └── pkg
│ │ ├── auth
│ │ ├── .gitkeep
│ │ ├── config_auth.go
│ │ ├── empty_auth.go
│ │ └── oauth.go
│ │ ├── client
│ │ └── client.go
│ │ ├── constants
│ │ └── const.go
│ │ ├── handlers
│ │ ├── .gitkeep
│ │ ├── code_source.go
│ │ ├── data.go
│ │ ├── data_source.go
│ │ ├── job.go
│ │ ├── job_presubmit_hooks.go
│ │ ├── kubedl.go
│ │ ├── log.go
│ │ ├── notebook.go
│ │ └── tensorboard.go
│ │ ├── middleware
│ │ ├── .gitkeep
│ │ └── auth.go
│ │ ├── model
│ │ ├── code_source.go
│ │ ├── converter.go
│ │ ├── data.go
│ │ ├── data_source.go
│ │ ├── job.go
│ │ ├── notebook.go
│ │ ├── user_info.go
│ │ └── workspace.go
│ │ ├── routers
│ │ ├── api
│ │ │ ├── auth.go
│ │ │ ├── code_source.go
│ │ │ ├── data.go
│ │ │ ├── data_source.go
│ │ │ ├── job.go
│ │ │ ├── kubedl.go
│ │ │ ├── log.go
│ │ │ ├── notebook.go
│ │ │ ├── tensorboard.go
│ │ │ └── workspace.go
│ │ └── router.go
│ │ ├── storage
│ │ ├── events
│ │ │ └── apiserver
│ │ │ │ └── apiserver.go
│ │ ├── objects
│ │ │ ├── apiserver
│ │ │ │ └── apiserver.go
│ │ │ └── proxy
│ │ │ │ └── proxy.go
│ │ └── registry.go
│ │ └── utils
│ │ ├── .gitkeep
│ │ ├── http.go
│ │ ├── job.go
│ │ ├── kubedl.go
│ │ ├── redirects.go
│ │ └── response.go
├── dashboard.yaml
└── frontend
│ ├── abc.json
│ ├── config
│ ├── config.default.js
│ ├── config.js
│ ├── defaultSettings.js
│ ├── plugin.config.js
│ └── themePluginConfig.js
│ ├── jest-puppeteer.config.js
│ ├── jest.config.js
│ ├── jsconfig.json
│ ├── mock
│ ├── clusterInfo.js
│ ├── route.js
│ └── user.js
│ ├── package.json
│ ├── public
│ ├── favicon.png
│ └── icons
│ │ ├── android-chrome-192x192.png
│ │ └── android-chrome-512x512.png
│ ├── src
│ ├── assets
│ │ └── logo.svg
│ ├── components
│ │ ├── Authorized
│ │ │ ├── Authorized.jsx
│ │ │ ├── AuthorizedRoute.jsx
│ │ │ ├── CheckPermissions.jsx
│ │ │ ├── PromiseRender.jsx
│ │ │ ├── Secured.jsx
│ │ │ ├── index.jsx
│ │ │ └── renderAuthorize.js
│ │ ├── Form
│ │ │ └── index.jsx
│ │ ├── GlobalHeader
│ │ │ ├── AvatarDropdown.jsx
│ │ │ ├── RightContent.jsx
│ │ │ └── index.less
│ │ ├── HeaderDropdown
│ │ │ ├── index.jsx
│ │ │ └── index.less
│ │ ├── HeaderSearch
│ │ │ ├── index.jsx
│ │ │ └── index.less
│ │ ├── JobStatus
│ │ │ └── index.js
│ │ ├── NoticeIcon
│ │ │ ├── NoticeList.jsx
│ │ │ ├── NoticeList.less
│ │ │ ├── index.jsx
│ │ │ └── index.less
│ │ ├── PageLoading
│ │ │ └── index.jsx
│ │ ├── PodStatus
│ │ │ └── index.js
│ │ └── SelectLang
│ │ │ ├── index.jsx
│ │ │ └── index.less
│ ├── e2e
│ │ ├── __mocks__
│ │ │ └── antd-pro-merge-less.js
│ │ ├── baseLayout.e2e.js
│ │ └── topMenu.e2e.js
│ ├── global.jsx
│ ├── global.less
│ ├── layouts
│ │ ├── BasicLayout.jsx
│ │ ├── BlankLayout.jsx
│ │ ├── SecurityLayout.jsx
│ │ └── index.less
│ ├── locales
│ │ ├── en-US.js
│ │ ├── en-US
│ │ │ ├── component.js
│ │ │ ├── globalHeader.js
│ │ │ ├── kubedl.js
│ │ │ ├── menu.js
│ │ │ ├── pwa.js
│ │ │ ├── settingDrawer.js
│ │ │ └── settings.js
│ │ ├── pt-BR.js
│ │ ├── pt-BR
│ │ │ ├── component.js
│ │ │ ├── globalHeader.js
│ │ │ ├── menu.js
│ │ │ ├── pwa.js
│ │ │ ├── settingDrawer.js
│ │ │ └── settings.js
│ │ ├── zh-CN.js
│ │ ├── zh-CN
│ │ │ ├── component.js
│ │ │ ├── globalHeader.js
│ │ │ ├── kubedl.js
│ │ │ ├── menu.js
│ │ │ ├── pwa.js
│ │ │ ├── settingDrawer.js
│ │ │ └── settings.js
│ │ ├── zh-TW.js
│ │ └── zh-TW
│ │ │ ├── component.js
│ │ │ ├── globalHeader.js
│ │ │ ├── menu.js
│ │ │ ├── pwa.js
│ │ │ ├── settingDrawer.js
│ │ │ └── settings.js
│ ├── manifest.json
│ ├── models
│ │ ├── global.js
│ │ ├── setting.js
│ │ └── user.js
│ ├── pages
│ │ ├── 403.jsx
│ │ ├── 404.jsx
│ │ ├── 500.jsx
│ │ ├── Admin.jsx
│ │ ├── Authorized.jsx
│ │ ├── ClusterInfo
│ │ │ ├── index.jsx
│ │ │ ├── service.js
│ │ │ └── style.less
│ │ ├── CodeConfig
│ │ │ └── index.jsx
│ │ ├── ConsoleInfo
│ │ │ ├── index.jsx
│ │ │ └── service.js
│ │ ├── DataConfig
│ │ │ ├── index.jsx
│ │ │ └── service.js
│ │ ├── DataSheets
│ │ │ ├── index.jsx
│ │ │ ├── index.less
│ │ │ └── service.js
│ │ ├── GitConfig
│ │ │ ├── index.jsx
│ │ │ └── service.js
│ │ ├── JobCreate
│ │ │ ├── index.jsx
│ │ │ ├── service.js
│ │ │ └── style.less
│ │ ├── JobDetail
│ │ │ ├── LogModal.jsx
│ │ │ ├── LogModal.less
│ │ │ ├── PodCharts.jsx
│ │ │ ├── index.jsx
│ │ │ ├── service.js
│ │ │ └── style.less
│ │ ├── JobSubmit
│ │ │ ├── components
│ │ │ │ ├── FooterToolbar
│ │ │ │ │ ├── index.jsx
│ │ │ │ │ └── index.less
│ │ │ │ └── SubmitModal.tsx
│ │ │ ├── index.jsx
│ │ │ └── service.js
│ │ ├── Jobs
│ │ │ ├── CreateTBModal.jsx
│ │ │ ├── index.jsx
│ │ │ └── service.js
│ │ ├── NotebookCreate
│ │ │ ├── index.jsx
│ │ │ ├── service.js
│ │ │ └── style.less
│ │ ├── Notebooks
│ │ │ ├── index.jsx
│ │ │ └── service.js
│ │ ├── Welcome.less
│ │ ├── WorkspaceCreate
│ │ │ ├── index.jsx
│ │ │ └── service.js
│ │ ├── WorkspaceDetail
│ │ │ ├── index.jsx
│ │ │ └── style.less
│ │ ├── Workspaces
│ │ │ ├── index.jsx
│ │ │ └── service.js
│ │ ├── document.ejs
│ │ ├── logIn
│ │ │ ├── index.jsx
│ │ │ └── index.less
│ │ └── user
│ │ │ └── login
│ │ │ ├── components
│ │ │ └── Login
│ │ │ │ ├── LoginContext.jsx
│ │ │ │ ├── LoginItem.jsx
│ │ │ │ ├── LoginSubmit.jsx
│ │ │ │ ├── LoginTab.jsx
│ │ │ │ ├── index.jsx
│ │ │ │ ├── index.less
│ │ │ │ └── map.jsx
│ │ │ ├── index.jsx
│ │ │ └── style.less
│ ├── service-worker.js
│ ├── services
│ │ ├── global.js
│ │ └── login.js
│ └── utils
│ │ ├── Authorized.js
│ │ ├── JobSubmit.js
│ │ ├── authority.js
│ │ ├── authority.test.js
│ │ ├── iconfont.js
│ │ ├── request.js
│ │ ├── utils.js
│ │ ├── utils.less
│ │ └── utils.test.js
│ ├── tests
│ ├── run-tests.js
│ └── setupTests.js
│ ├── yarn-error.log
│ └── yarn.lock
├── controllers
├── add_cachebackend.go
├── add_cron.go
├── add_elasticbatch.go
├── add_elasticdl.go
├── add_mars.go
├── add_modelversion.go
├── add_mpi.go
├── add_notebook.go
├── add_pytorch.go
├── add_serving.go
├── add_tensorflow.go
├── add_xdl.go
├── add_xgboostjob.go
├── apps
│ ├── cron_controller.go
│ ├── cron_event_filters.go
│ ├── cron_utils.go
│ └── suite_test.go
├── cache
│ ├── cachebackend_controller.go
│ ├── cachebackend_controller_test.go
│ └── utils.go
├── controllers.go
├── elasticbatch
│ ├── elasticbatch.go
│ ├── elasticbatchjob_controller.go
│ ├── elasticbatchjob_controller_test.go
│ ├── job.go
│ ├── pod.go
│ ├── service.go
│ └── status.go
├── elasticdl
│ ├── elasticdljob_controller.go
│ ├── job.go
│ ├── pod.go
│ ├── service.go
│ ├── status.go
│ └── util.go
├── mars
│ ├── ingress.go
│ ├── job.go
│ ├── mars.go
│ ├── marsjob_controller.go
│ ├── pod.go
│ ├── service.go
│ ├── status.go
│ └── suite_test.go
├── model
│ ├── modelversion_controller.go
│ ├── modelversion_controller_test.go
│ ├── storage
│ │ ├── aws_efs_provider.go
│ │ ├── local_storage_provider.go
│ │ ├── nfs_provider.go
│ │ └── storage_provider.go
│ └── utils.go
├── mpi
│ ├── job.go
│ ├── legacy.go
│ ├── mpi_config.go
│ ├── mpijob_controller.go
│ ├── pod.go
│ └── service.go
├── notebook
│ ├── notebook_controller.go
│ └── notebook_controller_test.go
├── persist
│ ├── event
│ │ ├── event_persist_controller.go
│ │ └── events_event_handler.go
│ ├── object
│ │ ├── job
│ │ │ ├── elasticbatchjob_persist_controller.go
│ │ │ ├── job_event_handler.go
│ │ │ ├── job_persist_controller.go
│ │ │ ├── marsjob_persist_controller.go
│ │ │ ├── pytorchjob_persist_controller.go
│ │ │ ├── tfjob_persist_controller.go
│ │ │ ├── xdljob_persist_controller.go
│ │ │ └── xgboostjob_persist_controller.go
│ │ └── pod
│ │ │ ├── pod_event_handler.go
│ │ │ └── pod_persist_controller.go
│ ├── persist_controller.go
│ └── util
│ │ ├── filter.go
│ │ └── request.go
├── pytorch
│ ├── elastic_scale.go
│ ├── elastic_scale_test.go
│ ├── job.go
│ ├── pod.go
│ ├── pytorchjob_controller.go
│ ├── service.go
│ ├── status.go
│ └── util.go
├── serving
│ ├── framework
│ │ ├── tfserving.go
│ │ └── types.go
│ ├── inference_controller.go
│ ├── labels.go
│ ├── model.go
│ ├── predictor.go
│ ├── suite_test.go
│ └── utils.go
├── suite_tests
│ ├── elasticbatchjob_controller_test.go
│ ├── pytorchjob_controller_test.go
│ ├── suite_test.go
│ ├── tfjob_controller_test.go
│ ├── xdljob_controller_test.go
│ └── xgboostjob_controller_test.go
├── tensorflow
│ ├── job.go
│ ├── pod.go
│ ├── service.go
│ ├── status.go
│ ├── tensorflow.go
│ ├── tfjob_controller.go
│ └── tfjob_controller_test.go
├── xdl
│ ├── job.go
│ ├── pod.go
│ ├── service.go
│ ├── status.go
│ ├── xdl.go
│ └── xdljob_controller.go
└── xgboost
│ ├── job.go
│ ├── pod.go
│ ├── pod_test.go
│ ├── service.go
│ └── xgboostjob_controller.go
├── docs
├── cache_backend.md
├── debug_guide.md
├── hostnetwork.md
├── how-to-add-a-custom-workload.md
├── img
│ ├── cache_lifetime.png
│ ├── cache_reuse.png
│ ├── data_cache_sequence.png
│ ├── inference.png
│ ├── kubedl-dingtalk.png
│ ├── kubedl-logo.svg
│ ├── kubedl.png
│ ├── kubedllogo.png
│ ├── mars-ingress.png
│ ├── stack.png
│ ├── tf_hostnetwork.png
│ └── ui_demo.png
├── metrics.md
├── proposals
│ ├── .gitkeep
│ ├── cn
│ │ └── .gitkeep
│ └── en
│ │ ├── cache-reuse.md
│ │ ├── data-cache.md
│ │ └── light-weighted-traffic-control-for-inference.md
├── startup_flags.md
├── sync_code.md
├── tensorboard.md
└── tutorial
│ ├── marsjob.md
│ ├── v1
│ ├── xdl-job.yaml
│ └── xdl-zk.yaml
│ └── xdljob.md
├── example
├── cachebackend
│ └── cache.yaml
├── elasticbatch
│ └── elasticbatch_job_sample.yaml
├── elasticdl
│ └── elasticdl_job_mnist.yaml
├── notebook
│ └── notebook-simple.yaml
├── pytorch
│ ├── example.go
│ ├── go.mod
│ ├── go.sum
│ └── pytorch_job_mnist_mpi.yaml
├── serving
│ └── tfserving.yaml
├── tf
│ ├── Dockerfile
│ ├── keras_model_to_estimator.py
│ ├── tf_job_mnist.yaml
│ ├── tf_job_mnist_cache.yaml
│ ├── tf_job_mnist_distributed_simple.yaml
│ ├── tf_job_mnist_modelversion.yaml
│ ├── tf_job_mnist_volume.yaml
│ └── tf_serving_modelversion.yaml
├── xdl
│ └── xdl_job_mnist.yaml
└── xgboost
│ └── xgboostjob_v1alpha1_iris_train.yaml
├── go.mod
├── go.sum
├── hack
├── boilerplate.go.txt
├── generate_client.sh
├── tool.go
└── update_codegen.sh
├── helm
└── kubedl
│ ├── .helmignore
│ ├── Chart.yaml
│ ├── crds
│ ├── apps.kubedl.io_crons.yaml
│ ├── cache.kubedl.io_cachebackends.yaml
│ ├── inference.kubedl.io_elasticbatchjobs.yaml
│ ├── model.kubedl.io_models.yaml
│ ├── model.kubedl.io_modelversions.yaml
│ ├── notebook.kubedl.io_notebooks.yaml
│ ├── serving.kubedl.io_inferences.yaml
│ ├── training.kubedl.io_elasticdljobs.yaml
│ ├── training.kubedl.io_marsjobs.yaml
│ ├── training.kubedl.io_mpijobs.yaml
│ ├── training.kubedl.io_pytorchjobs.yaml
│ ├── training.kubedl.io_tfjobs.yaml
│ ├── training.kubedl.io_xdljobs.yaml
│ └── training.kubedl.io_xgboostjobs.yaml
│ ├── templates
│ ├── NOTES.txt
│ ├── _helpers.tpl
│ ├── cluster-role-binding.yaml
│ ├── deployment.yaml
│ ├── hpa.yaml
│ ├── ingress.yaml
│ ├── role.yaml
│ ├── service.yaml
│ ├── serviceaccount.yaml
│ └── tests
│ │ └── test-connection.yaml
│ └── values.yaml
├── main.go
├── pkg
├── cache_backend
│ ├── fluid
│ │ ├── fluidcache.go
│ │ └── fluidcache_test.go
│ ├── interface.go
│ ├── registry
│ │ ├── add_fluid_cachebackend.go
│ │ └── registry.go
│ └── test
│ │ └── cachebackend.go
├── code_sync
│ ├── git_sync_handler.go
│ └── sync_handler.go
├── features
│ └── features.go
├── gang_schedule
│ ├── batch_scheduler
│ │ ├── scheduler.go
│ │ └── scheduler_test.go
│ ├── coscheduler
│ │ ├── scheduler.go
│ │ └── scheduler_test.go
│ ├── interface.go
│ ├── registry
│ │ ├── add_batch_scheduler.go
│ │ ├── add_coscheduler.go
│ │ ├── add_volcano_scheduler.go
│ │ └── registry.go
│ ├── utils.go
│ └── volcano_scheduler
│ │ ├── scheduler.go
│ │ └── scheduler_test.go
├── job_controller
│ ├── api
│ │ └── v1
│ │ │ ├── constants.go
│ │ │ ├── doc.go
│ │ │ ├── interface.go
│ │ │ ├── types.go
│ │ │ ├── zz_generated.deepcopy.go
│ │ │ └── zz_generated.defaults.go
│ ├── dag_sched.go
│ ├── dag_sched_test.go
│ ├── eventhandler_helpers.go
│ ├── expectations.go
│ ├── failover.go
│ ├── failover_test.go
│ ├── hostnetwork.go
│ ├── hostnetwork_test.go
│ ├── job.go
│ ├── job_controller.go
│ ├── job_test.go
│ ├── pod.go
│ ├── pod_control.go
│ ├── pod_control_test.go
│ ├── pod_test.go
│ ├── service.go
│ ├── service_control.go
│ ├── service_control_test.go
│ ├── service_ref_manager.go
│ ├── service_ref_manager_test.go
│ ├── status.go
│ ├── status_test.go
│ ├── suite_test.go
│ ├── test_job_controller.go
│ └── util.go
├── jobcoordinator
│ ├── core
│ │ ├── coordinator.go
│ │ ├── coordinator_test.go
│ │ ├── defaults.go
│ │ ├── metrics.go
│ │ ├── next_queue_selector.go
│ │ ├── next_queue_selector_test.go
│ │ ├── queue.go
│ │ └── queue_test.go
│ ├── eventhandler
│ │ └── eventhandler.go
│ ├── helper
│ │ └── helper.go
│ ├── plugins
│ │ ├── priority
│ │ │ ├── priority.go
│ │ │ └── priority_test.go
│ │ ├── quota
│ │ │ ├── quota.go
│ │ │ └── quota_test.go
│ │ └── registry.go
│ └── types.go
├── metrics
│ ├── job_metrics.go
│ ├── monitor.go
│ ├── status_counter.go
│ └── status_counter_test.go
├── storage
│ ├── backends
│ │ ├── events
│ │ │ └── aliyun_sls
│ │ │ │ ├── config.go
│ │ │ │ └── sls_logstore.go
│ │ ├── interface.go
│ │ ├── objects
│ │ │ └── mysql
│ │ │ │ ├── config.go
│ │ │ │ └── mysql.go
│ │ ├── query.go
│ │ ├── registry
│ │ │ ├── add_event_aliyun_sls.go
│ │ │ ├── add_object_mysql.go
│ │ │ └── registry.go
│ │ └── utils
│ │ │ ├── constants.go
│ │ │ └── env.go
│ └── dmo
│ │ ├── converters
│ │ ├── event.go
│ │ ├── event_test.go
│ │ ├── job.go
│ │ ├── job_test.go
│ │ ├── notebook.go
│ │ ├── pod.go
│ │ └── pod_test.go
│ │ └── types.go
├── tensorboard
│ └── tensorboard.go
├── test_job
│ └── v1
│ │ ├── constants.go
│ │ ├── defaults.go
│ │ ├── doc.go
│ │ ├── object_kind.go
│ │ ├── register.go
│ │ ├── test_job_controller.go
│ │ ├── types.go
│ │ ├── zz_generated.deepcopy.go
│ │ └── zz_generated.defaults.go
├── test_util
│ └── v1
│ │ ├── const.go
│ │ ├── pod.go
│ │ ├── service.go
│ │ ├── test_job_util.go
│ │ └── util.go
└── util
│ ├── concurrent
│ └── concurrent.go
│ ├── errors.go
│ ├── k8sutil
│ ├── client.go
│ ├── k8sutil.go
│ └── k8sutil.go.orig
│ ├── logger.go
│ ├── patch
│ ├── patch_utils.go
│ └── patch_utils_test.go
│ ├── pointer.go
│ ├── quota
│ └── resources.go
│ ├── recorder
│ ├── flowcontrolled_recorder.go
│ └── flowcontrolled_recorder_test.go
│ ├── resource_utils
│ ├── resources.go
│ └── resources_test.go
│ ├── runtime.go
│ ├── runtime
│ ├── empty_scale.go
│ ├── runtime.go
│ └── status_traitor.go
│ ├── signals
│ ├── signal.go
│ ├── signal_posix.go
│ └── signal_windows.go
│ ├── status.go
│ ├── status_test.go
│ ├── tenancy
│ └── tenancy.go
│ ├── train
│ ├── train_util.go
│ └── train_util_test.go
│ ├── util.go
│ ├── util_test.go
│ └── workloadgate
│ ├── workload_gate.go
│ └── workload_gate_test.go
└── scripts
├── deploy_kubedl.sh
├── docker_build_daily.sh
└── run_tf_test_job.sh
/.github/ISSUE_TEMPLATE/bug-report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug Report
3 | about: Create a report to help us improve
4 | title: "[BUG]"
5 | labels: ''
6 | assignees: SimonCqk
7 |
8 | ---
9 |
10 |
11 |
12 | **What happened**:
13 |
14 | **What you expected to happen**:
15 |
16 | **How to reproduce it**:
17 |
18 | **Anything else we need to know?**:
19 |
20 | **Environment**:
21 | - KubeDL version:
22 | - Kubernetes version (use `kubectl version`):
23 | - OS (e.g: `cat /etc/os-release`):
24 | - Kernel (e.g. `uname -a`):
25 | - Install tools:
26 | - Others:
27 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature Request
3 | about: Suggest an idea for this project
4 | title: "[feature request]"
5 | labels: ''
6 | assignees: SimonCqk
7 |
8 | ---
9 |
10 |
11 |
12 | **What would you like to be added**:
13 |
14 |
15 | **Why is this needed**:
16 |
--------------------------------------------------------------------------------
/.github/PROPOSAL_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | # Proposal Template
2 |
3 | ## Motivations
4 |
5 | ## Use case
6 |
7 | ## Proposal
8 |
9 | ## Implementation
10 |
11 | ## Alternatives considered
12 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 |
4 |
5 | ### Ⅰ. Describe what this PR does
6 |
7 |
8 | ### II. Does this pull request fix one issue?
9 |
10 |
11 |
12 | ### III. Special notes for reviewers if any.
13 |
14 |
15 |
--------------------------------------------------------------------------------
/.github/workflows/license.yml:
--------------------------------------------------------------------------------
1 | name: License
2 | on:
3 | push:
4 | branches:
5 | - master
6 | - release-*
7 | workflow_dispatch: {}
8 | pull_request:
9 | branches:
10 | - master
11 | - release-*
12 |
13 | jobs:
14 | license_check:
15 | runs-on: ubuntu-latest
16 | name: Check for unapproved licenses
17 | steps:
18 | - uses: actions/checkout@v2
19 | - name: Set up Ruby
20 | uses: ruby/setup-ruby@v1
21 | with:
22 | ruby-version: 2.6
23 | - name: Install dependencies
24 | run: gem install license_finder
25 | - name: Run tests
26 | run: license_finder --decisions_file .license/dependency_decisions.yml
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # console
2 | console/backend-server
3 | console/frontend/package-lock.json
4 | **/node_modules
5 | **/.umi/**
6 | **/dist/**
7 |
8 | # manager binary
9 | manager
10 |
11 | # Binaries for programs and plugins
12 | *.exe
13 | *.exe~
14 | *.dll
15 | *.so
16 | *.dylib
17 | bin
18 |
19 | # Test binary, build with `go test -c`
20 | *.test
21 |
22 | # Output of the go coverage tool, specifically when used with LiteIDE
23 | *.out
24 |
25 | # Kubernetes Generated files - skip generated files, except for vendored files
26 |
27 | !vendor/**/zz_generated.*
28 |
29 | # editor and IDE paraphernalia
30 | .idea
31 | *.swp
32 | *.swo
33 | *~
34 | .DS_Store
35 |
36 | .vscode
37 |
38 |
--------------------------------------------------------------------------------
/.license/README.md:
--------------------------------------------------------------------------------
1 | # License Checker
2 |
3 | Our license checker CI rely on [LicenseFinder](https://github.com/pivotal/LicenseFinder).
4 |
5 | ## How to add a new license
6 |
7 | LicenseFinder is a ruby project, so make sure you have ruby installed.
8 |
9 | ### Install the tool
10 |
11 | ```shell
12 | gem install license_finder
13 | ```
14 |
15 | ### Add a license
16 |
17 | ```shell
18 | license_finder permitted_licenses add MIT --decisions_file .license/dependency_decisions.yml
19 | ```
20 |
--------------------------------------------------------------------------------
/.license/dependency_decisions.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - - :permit
3 | - MIT
4 | - :who:
5 | :why:
6 | :versions: []
7 | :when: 2021-03-12 07:35:34.645031000 Z
8 | - - :permit
9 | - Apache 2.0
10 | - :who:
11 | :why:
12 | :versions: []
13 | :when: 2021-03-12 07:19:18.243194000 Z
14 | - - :permit
15 | - New BSD
16 | - :who:
17 | :why:
18 | :versions: []
19 | :when: 2021-03-12 07:19:28.540675000 Z
20 | - - :permit
21 | - Simplified BSD
22 | - :who:
23 | :why:
24 | :versions: []
25 | :when: 2021-03-12 07:20:01.774212000 Z
26 | - - :permit
27 | - Mozilla Public License 2.0
28 | - :who:
29 | :why:
30 | :versions: []
31 | :when: 2021-03-12 07:21:05.194536000 Z
32 | - - :permit
33 | - unknown
34 | - :who:
35 | :why:
36 | :versions: []
37 | :when: 2021-03-12 07:21:43.379269000 Z
38 | - - :permit
39 | - ISC
40 | - :who:
41 | :why:
42 | :versions: []
43 | :when: 2021-03-12 07:22:07.265966000 Z
44 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Build the manager binary
2 | FROM golang:1.19.10 as builder
3 |
4 | WORKDIR /workspace
5 | # Copy the Go Modules manifests
6 | COPY go.mod go.mod
7 | COPY go.sum go.sum
8 | # cache deps before building and copying source so that we don't need to re-download as much
9 | # and so that source changes don't invalidate our downloaded layer
10 | RUN go mod download
11 |
12 | # Copy the go source
13 | COPY . .
14 |
15 | # Build
16 | RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -a -o manager main.go
17 |
18 | # Use distroless as minimal base image to package the manager binary
19 | # Refer to https://github.com/GoogleContainerTools/distroless for more details
20 | FROM gcr.io/distroless/static:nonroot
21 | WORKDIR /
22 | COPY --from=builder /workspace/manager .
23 | USER nonroot:nonroot
24 |
25 | ENTRYPOINT ["/manager"]
26 |
--------------------------------------------------------------------------------
/Dockerfile.dashboard:
--------------------------------------------------------------------------------
1 | FROM node:13.12.0 as frontend-builder
2 |
3 | WORKDIR /workspace
4 |
5 | COPY console/frontend/ .
6 | RUN rm -rf ./dist && rm -rf ./node_modules && rm -f ./package-lock.json
7 | RUN npm install
8 | RUN npm run build
9 |
10 | FROM golang:1.13.6 as backend-builder
11 |
12 | WORKDIR /workspace
13 |
14 | COPY . .
15 |
16 | RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o backend-server console/backend/cmd/backend-server/main.go
17 |
18 | # Use distroless as minimal base image to package the manager binary
19 | # Refer to https://github.com/GoogleContainerTools/distroless for more details
20 | FROM gcr.io/distroless/static:nonroot
21 | WORKDIR /
22 | COPY --from=frontend-builder /workspace/dist ./dist
23 | COPY --from=backend-builder /workspace/backend-server ./backend-server
24 | USER nonroot:nonroot
25 |
26 | ENTRYPOINT ["/backend-server"]
--------------------------------------------------------------------------------
/OWNERS:
--------------------------------------------------------------------------------
1 | # This is a list of owners in alphabetic orders.
2 |
3 | # Github Id Name Email
4 | @jian-he, Jian He, jianhe688@gmail.com
5 | @lwangbm, Luping Wang, lwangbm@cse.ust.hk
6 | @mental2008, Lingyun Yang, lyangbk@cse.ust.hk
7 | @SimonCqk, Qiukai Chen, qiukai.cqk@alibaba-inc.com
8 | @tzzcfrank, Cheng Zhang, zuofeng.zc@alibaba-inc.com
9 | @yhalpha, Yinghao Yu, yinghao.yyh@alibaba-inc.com
10 |
--------------------------------------------------------------------------------
/PROJECT:
--------------------------------------------------------------------------------
1 | domain: kubedl.io
2 | multigroup: true
3 | repo: github.com/alibaba/kubedl
4 | resources:
5 | - group: training
6 | kind: MarsJob
7 | version: v1alpha1
8 | - group: training
9 | kind: MPIJob
10 | version: v1alpha1
11 | - group: training
12 | kind: PyTorchJob
13 | version: v1alpha1
14 | - group: training
15 | kind: TFJob
16 | version: v1alpha1
17 | - group: training
18 | kind: XDLJob
19 | version: v1alpha1
20 | - group: training
21 | kind: XGBoostJob
22 | version: v1alpha1
23 | - group: training
24 | kind: ElasticDLJob
25 | version: ""
26 | - group: model
27 | kind: Model
28 | version: v1alpha1
29 | - group: model
30 | kind: ModelVersion
31 | version: v1alpha1
32 | - group: serving
33 | kind: Inference
34 | version: v1alpha1
35 | - group: apps
36 | kind: Cron
37 | version: v1alpha1
38 | - group: cache
39 | kind: CacheBackend
40 | version: ""
41 | - group: notebook
42 | kind: Notebook
43 | version: v1alpha1
44 | - group: inference
45 | kind: ElasticBatchJob
46 | version: v1alpha1
47 | version: "2"
48 |
--------------------------------------------------------------------------------
/apis/addtoscheme_apps_v1alpha1.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021 The Alibaba Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package apis
18 |
19 | import "github.com/alibaba/kubedl/apis/apps/v1alpha1"
20 |
21 | func init() {
22 | AddToSchemes = append(AddToSchemes, v1alpha1.AddToScheme)
23 | }
24 |
--------------------------------------------------------------------------------
/apis/addtoscheme_cache_v1alpha1.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2019 The Alibaba Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package apis
18 |
19 | import "github.com/alibaba/kubedl/apis/cache/v1alpha1"
20 |
21 | func init() {
22 | AddToSchemes = append(AddToSchemes, v1alpha1.AddToScheme)
23 | }
24 |
--------------------------------------------------------------------------------
/apis/addtoscheme_inference_v1alpha1.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2019 The Alibaba Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package apis
18 |
19 | import (
20 | "github.com/alibaba/kubedl/apis/inference/v1alpha1"
21 | )
22 |
23 | func init() {
24 | AddToSchemes = append(AddToSchemes, v1alpha1.SchemeBuilder.AddToScheme)
25 | }
26 |
--------------------------------------------------------------------------------
/apis/addtoscheme_model_v1alpha1.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2019 The Alibaba Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package apis
18 |
19 | import "github.com/alibaba/kubedl/apis/model/v1alpha1"
20 |
21 | func init() {
22 | AddToSchemes = append(AddToSchemes, v1alpha1.AddToScheme)
23 | }
24 |
--------------------------------------------------------------------------------
/apis/addtoscheme_notebook_v1alpha1.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021 The KubedDL Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package apis
18 |
19 | import (
20 | "github.com/alibaba/kubedl/apis/notebook/v1alpha1"
21 | )
22 |
23 | func init() {
24 | AddToSchemes = append(AddToSchemes, v1alpha1.AddToScheme)
25 | }
26 |
--------------------------------------------------------------------------------
/apis/addtoscheme_serving_v1alpha1.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021 The Alibaba Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package apis
18 |
19 | import "github.com/alibaba/kubedl/apis/serving/v1alpha1"
20 |
21 | func init() {
22 | AddToSchemes = append(AddToSchemes, v1alpha1.AddToScheme)
23 | }
24 |
--------------------------------------------------------------------------------
/apis/addtoscheme_training_v1alpha1.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2019 The Alibaba Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package apis
18 |
19 | import (
20 | "github.com/alibaba/kubedl/apis/training/v1alpha1"
21 | )
22 |
23 | func init() {
24 | AddToSchemes = append(AddToSchemes, v1alpha1.SchemeBuilder.AddToScheme)
25 | }
26 |
--------------------------------------------------------------------------------
/apis/apis.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2019 The Alibaba Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package apis
18 |
19 | import (
20 | "k8s.io/apimachinery/pkg/runtime"
21 | )
22 |
23 | var AddToSchemes runtime.SchemeBuilder
24 |
25 | func AddToScheme(s *runtime.Scheme) error {
26 | return AddToSchemes.AddToScheme(s)
27 | }
28 |
--------------------------------------------------------------------------------
/apis/cache/v1alpha1/defaults.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021 The Alibaba Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package v1alpha1
18 |
19 | import "k8s.io/apimachinery/pkg/runtime"
20 |
21 | func SetDefaults_CacheBackend(cacheBackend *CacheBackend) {
22 |
23 | }
24 |
25 | func addDefaultingFuncs(scheme *runtime.Scheme) error {
26 | return RegisterDefaults(scheme)
27 | }
28 |
--------------------------------------------------------------------------------
/apis/cache/v1alpha1/zz_generated.defaults.go:
--------------------------------------------------------------------------------
1 | //go:build !ignore_autogenerated
2 | // +build !ignore_autogenerated
3 |
4 | /*
5 | Copyright 2021 The Alibaba Authors.
6 |
7 | Licensed under the Apache License, Version 2.0 (the "License");
8 | you may not use this file except in compliance with the License.
9 | You may obtain a copy of the License at
10 |
11 | http://www.apache.org/licenses/LICENSE-2.0
12 |
13 | Unless required by applicable law or agreed to in writing, software
14 | distributed under the License is distributed on an "AS IS" BASIS,
15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | See the License for the specific language governing permissions and
17 | limitations under the License.
18 | */
19 | // Code generated by defaulter-gen. DO NOT EDIT.
20 |
21 | package v1alpha1
22 |
23 | import (
24 | runtime "k8s.io/apimachinery/pkg/runtime"
25 | )
26 |
27 | // RegisterDefaults adds defaulters functions to the given scheme.
28 | // Public to allow building arbitrary schemes.
29 | // All generated defaulters are covering - they call all nested defaulters.
30 | func RegisterDefaults(scheme *runtime.Scheme) error {
31 | return nil
32 | }
33 |
--------------------------------------------------------------------------------
/apis/inference/v1alpha1/v1alpha1_suite_test.go:
--------------------------------------------------------------------------------
1 | /*
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 |
16 | package v1alpha1
17 |
18 | import (
19 | "log"
20 | "os"
21 | "testing"
22 |
23 | "k8s.io/client-go/kubernetes/scheme"
24 | "sigs.k8s.io/controller-runtime/pkg/client"
25 | "sigs.k8s.io/controller-runtime/pkg/client/fake"
26 | )
27 |
28 | var c client.Client
29 |
30 | func TestMain(m *testing.M) {
31 | err := SchemeBuilder.AddToScheme(scheme.Scheme)
32 | if err != nil {
33 | log.Fatal(err)
34 | }
35 |
36 | c = fake.NewClientBuilder().WithScheme(scheme.Scheme).Build()
37 |
38 | code := m.Run()
39 | os.Exit(code)
40 | }
41 |
--------------------------------------------------------------------------------
/apis/model/v1alpha1/defaults.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021 The Alibaba Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package v1alpha1
18 |
19 | import "k8s.io/apimachinery/pkg/runtime"
20 |
21 | func addDefaultingFuncs(scheme *runtime.Scheme) error {
22 | return RegisterDefaults(scheme)
23 | }
24 |
--------------------------------------------------------------------------------
/apis/model/v1alpha1/zz_generated.defaults.go:
--------------------------------------------------------------------------------
1 | //go:build !ignore_autogenerated
2 | // +build !ignore_autogenerated
3 |
4 | /*
5 | Copyright 2021 The Alibaba Authors.
6 |
7 | Licensed under the Apache License, Version 2.0 (the "License");
8 | you may not use this file except in compliance with the License.
9 | You may obtain a copy of the License at
10 |
11 | http://www.apache.org/licenses/LICENSE-2.0
12 |
13 | Unless required by applicable law or agreed to in writing, software
14 | distributed under the License is distributed on an "AS IS" BASIS,
15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | See the License for the specific language governing permissions and
17 | limitations under the License.
18 | */
19 | // Code generated by defaulter-gen. DO NOT EDIT.
20 |
21 | package v1alpha1
22 |
23 | import (
24 | runtime "k8s.io/apimachinery/pkg/runtime"
25 | )
26 |
27 | // RegisterDefaults adds defaulters functions to the given scheme.
28 | // Public to allow building arbitrary schemes.
29 | // All generated defaulters are covering - they call all nested defaulters.
30 | func RegisterDefaults(scheme *runtime.Scheme) error {
31 | return nil
32 | }
33 |
--------------------------------------------------------------------------------
/apis/training/v1alpha1/doc.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021 The Alibaba Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | // Package v1alpha1 contains API Schema definitions for the training v1alpha1 API group
18 | // +k8s:defaulter-gen=TypeMeta
19 | // +groupName=training.kubedl.io
20 | package v1alpha1
21 |
--------------------------------------------------------------------------------
/apis/training/v1alpha1/elasticdljob_constant.go:
--------------------------------------------------------------------------------
1 | package v1alpha1
2 |
3 | const (
4 | ElasticDLJobKind = "ElasticDLJob"
5 | ElasticDLJobDefaultContainerName = "elasticdl"
6 | ElasticDLJobDefaultPortName = "elasticdl-port"
7 | ElasticDLJobDefaultPort = 11111
8 | )
9 |
--------------------------------------------------------------------------------
/apis/training/v1alpha1/mpijob_constants.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021 The Alibaba Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package v1alpha1
18 |
19 | import v1 "github.com/alibaba/kubedl/pkg/job_controller/api/v1"
20 |
21 | const (
22 | MPIJobKind = "MPIJob"
23 | // DefaultRestartPolicy is default RestartPolicy for ReplicaSpec.
24 | MPIJobDefaultRestartPolicy = v1.RestartPolicyNever
25 | MPIJobDefaultCleanPodPolicy = v1.CleanPodPolicyRunning
26 |
27 | MPIJobDefaultContainerName = "mpi"
28 | MPIJobDefaultPortName = "mpi-port"
29 | MPIJobDefaultPort = 2222
30 | )
31 |
--------------------------------------------------------------------------------
/apis/training/v1alpha1/v1alpha1_suite_test.go:
--------------------------------------------------------------------------------
1 | /*
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 |
16 | package v1alpha1
17 |
18 | import (
19 | "log"
20 | "os"
21 | "testing"
22 |
23 | "k8s.io/client-go/kubernetes/scheme"
24 | "sigs.k8s.io/controller-runtime/pkg/client"
25 | "sigs.k8s.io/controller-runtime/pkg/client/fake"
26 | )
27 |
28 | var c client.Client
29 |
30 | func TestMain(m *testing.M) {
31 | err := SchemeBuilder.AddToScheme(scheme.Scheme)
32 | if err != nil {
33 | log.Fatal(err)
34 | }
35 |
36 | c = fake.NewClientBuilder().WithScheme(scheme.Scheme).Build()
37 |
38 | code := m.Run()
39 | os.Exit(code)
40 | }
41 |
--------------------------------------------------------------------------------
/apis/training/v1alpha1/xgboostjob_constants.go:
--------------------------------------------------------------------------------
1 | /*
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 |
16 | package v1alpha1
17 |
18 | import (
19 | v1 "github.com/alibaba/kubedl/pkg/job_controller/api/v1"
20 | )
21 |
22 | const (
23 | // Kind is the kind name.
24 | XGBoostJobKind = "XGBoostJob"
25 | XGBoostJobDefaultContainerName = "xgboostjob"
26 | XGBoostJobDefaultContainerPortName = "xgboostjob-port"
27 | XGBoostJobDefaultPort = 9999
28 | XGBoostJobDefaultTTLseconds = int32(100)
29 | XGBoostJobDefaultCleanPodPolicy = v1.CleanPodPolicyNone
30 | )
31 |
--------------------------------------------------------------------------------
/client/clientset/versioned/doc.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021 The Alibaba Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | // Code generated by client-gen. DO NOT EDIT.
17 |
18 | // This package has the automatically generated clientset.
19 | package versioned
20 |
--------------------------------------------------------------------------------
/client/clientset/versioned/fake/doc.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021 The Alibaba Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | // Code generated by client-gen. DO NOT EDIT.
17 |
18 | // This package has the automatically generated fake clientset.
19 | package fake
20 |
--------------------------------------------------------------------------------
/client/clientset/versioned/scheme/doc.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021 The Alibaba Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | // Code generated by client-gen. DO NOT EDIT.
17 |
18 | // This package contains the scheme of the automatically generated clientset.
19 | package scheme
20 |
--------------------------------------------------------------------------------
/client/clientset/versioned/typed/training/v1alpha1/doc.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021 The Alibaba Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | // Code generated by client-gen. DO NOT EDIT.
17 |
18 | // This package has the automatically generated typed clients.
19 | package v1alpha1
20 |
--------------------------------------------------------------------------------
/client/clientset/versioned/typed/training/v1alpha1/fake/doc.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021 The Alibaba Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | // Code generated by client-gen. DO NOT EDIT.
17 |
18 | // Package fake has the automatically generated clients.
19 | package fake
20 |
--------------------------------------------------------------------------------
/client/clientset/versioned/typed/training/v1alpha1/generated_expansion.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021 The Alibaba Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | // Code generated by client-gen. DO NOT EDIT.
17 |
18 | package v1alpha1
19 |
20 | type ElasticDLJobExpansion interface{}
21 |
22 | type MPIJobExpansion interface{}
23 |
24 | type MarsJobExpansion interface{}
25 |
26 | type PyTorchJobExpansion interface{}
27 |
28 | type TFJobExpansion interface{}
29 |
30 | type XDLJobExpansion interface{}
31 |
32 | type XGBoostJobExpansion interface{}
33 |
--------------------------------------------------------------------------------
/config/certmanager/certificate.yaml:
--------------------------------------------------------------------------------
1 | # The following manifests contain a self-signed issuer CR and a certificate CR.
2 | # More document can be found at https://docs.cert-manager.io
3 | apiVersion: certmanager.k8s.io/v1alpha1
4 | kind: Issuer
5 | metadata:
6 | name: selfsigned-issuer
7 | namespace: system
8 | spec:
9 | selfSigned: {}
10 | ---
11 | apiVersion: certmanager.k8s.io/v1alpha1
12 | kind: Certificate
13 | metadata:
14 | name: serving-cert # this name should match the one appeared in kustomizeconfig.yaml
15 | namespace: system
16 | spec:
17 | # $(SERVICE_NAME) and $(SERVICE_NAMESPACE) will be substituted by kustomize
18 | commonName: $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc
19 | dnsNames:
20 | - $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc.cluster.local
21 | issuerRef:
22 | kind: Issuer
23 | name: selfsigned-issuer
24 | secretName: webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize
25 |
--------------------------------------------------------------------------------
/config/certmanager/kustomization.yaml:
--------------------------------------------------------------------------------
1 | resources:
2 | - certificate.yaml
3 |
4 | configurations:
5 | - kustomizeconfig.yaml
6 |
--------------------------------------------------------------------------------
/config/certmanager/kustomizeconfig.yaml:
--------------------------------------------------------------------------------
1 | # This configuration is for teaching kustomize how to update name ref and var substitution
2 | nameReference:
3 | - kind: Issuer
4 | group: certmanager.k8s.io
5 | fieldSpecs:
6 | - kind: Certificate
7 | group: certmanager.k8s.io
8 | path: spec/issuerRef/name
9 |
10 | varReference:
11 | - kind: Certificate
12 | group: certmanager.k8s.io
13 | path: spec/commonName
14 | - kind: Certificate
15 | group: certmanager.k8s.io
16 | path: spec/dnsNames
17 |
--------------------------------------------------------------------------------
/config/crd/kustomizeconfig.yaml:
--------------------------------------------------------------------------------
1 | # This file is for teaching kustomize how to substitute name and namespace reference in CRD
2 | nameReference:
3 | - kind: Service
4 | version: v1
5 | fieldSpecs:
6 | - kind: CustomResourceDefinition
7 | group: apiextensions.k8s.io
8 | path: spec/conversion/webhookClientConfig/service/name
9 |
10 | namespace:
11 | - kind: CustomResourceDefinition
12 | group: apiextensions.k8s.io
13 | path: spec/conversion/webhookClientConfig/service/namespace
14 | create: false
15 |
16 | varReference:
17 | - path: metadata/annotations
18 |
--------------------------------------------------------------------------------
/config/crd/patches/cainjection_in_cachebackends.yaml:
--------------------------------------------------------------------------------
1 | # The following patch adds a directive for certmanager to inject CA into the CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | annotations:
7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
8 | name: cachebackends.cache.kubedl.io
9 |
--------------------------------------------------------------------------------
/config/crd/patches/cainjection_in_crons.yaml:
--------------------------------------------------------------------------------
1 | # The following patch adds a directive for certmanager to inject CA into the CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | annotations:
7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
8 | name: crons.apps.kubedl.io
9 |
--------------------------------------------------------------------------------
/config/crd/patches/cainjection_in_elasticbatchjobs.yaml:
--------------------------------------------------------------------------------
1 | # The following patch adds a directive for certmanager to inject CA into the CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | annotations:
7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
8 | name: elasticbatchjobs.inference.kubedl.io
9 |
--------------------------------------------------------------------------------
/config/crd/patches/cainjection_in_elasticdljobs.yaml:
--------------------------------------------------------------------------------
1 | # The following patch adds a directive for certmanager to inject CA into the CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | annotations:
7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
8 | name: elasticdljobs.training.kubedl.io
9 |
--------------------------------------------------------------------------------
/config/crd/patches/cainjection_in_marsjobs.yaml:
--------------------------------------------------------------------------------
1 | # The following patch adds a directive for certmanager to inject CA into the CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | annotations:
7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
8 | name: marsjobs.training.kubedl.io
9 |
--------------------------------------------------------------------------------
/config/crd/patches/cainjection_in_models.yaml:
--------------------------------------------------------------------------------
1 | # The following patch adds a directive for certmanager to inject CA into the CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | annotations:
7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
8 | name: models.model.kubedl.io
9 |
--------------------------------------------------------------------------------
/config/crd/patches/cainjection_in_modelversions.yaml:
--------------------------------------------------------------------------------
1 | # The following patch adds a directive for certmanager to inject CA into the CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | annotations:
7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
8 | name: modelversions.model.kubedl.io
9 |
--------------------------------------------------------------------------------
/config/crd/patches/cainjection_in_mpijobs.yaml:
--------------------------------------------------------------------------------
1 | # The following patch adds a directive for certmanager to inject CA into the CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | annotations:
7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
8 | name: mpijobs.training.kubedl.io
9 |
--------------------------------------------------------------------------------
/config/crd/patches/cainjection_in_notebooks.yaml:
--------------------------------------------------------------------------------
1 | # The following patch adds a directive for certmanager to inject CA into the CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | annotations:
7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
8 | name: notebooks.notebook.kubedl.io
9 |
--------------------------------------------------------------------------------
/config/crd/patches/cainjection_in_pytorchjobs.yaml:
--------------------------------------------------------------------------------
1 | # The following patch adds a directive for certmanager to inject CA into the CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | annotations:
7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
8 | name: pytorchjobs.training.kubedl.io
9 |
--------------------------------------------------------------------------------
/config/crd/patches/cainjection_in_tfjobs.yaml:
--------------------------------------------------------------------------------
1 | # The following patch adds a directive for certmanager to inject CA into the CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | annotations:
7 | certmanager.k8s.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
8 | name: tfjobs.training.kubedl.io
9 |
--------------------------------------------------------------------------------
/config/crd/patches/cainjection_in_xdljobs.yaml:
--------------------------------------------------------------------------------
1 | # The following patch adds a directive for certmanager to inject CA into the CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | annotations:
7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
8 | name: xdljobs.training.kubedl.io
9 |
--------------------------------------------------------------------------------
/config/crd/patches/cainjection_in_xgboostjobs.yaml:
--------------------------------------------------------------------------------
1 | # The following patch adds a directive for certmanager to inject CA into the CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | annotations:
7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
8 | name: xgboostjobs.training.kubedl.io
9 |
--------------------------------------------------------------------------------
/config/crd/patches/webhook_in_cachebackends.yaml:
--------------------------------------------------------------------------------
1 | # The following patch enables conversion webhook for CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | name: cachebackends.cache.kubedl.io
7 | spec:
8 | conversion:
9 | strategy: Webhook
10 | webhookClientConfig:
11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank,
12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager)
13 | caBundle: Cg==
14 | service:
15 | namespace: system
16 | name: webhook-service
17 | path: /convert
18 |
--------------------------------------------------------------------------------
/config/crd/patches/webhook_in_crons.yaml:
--------------------------------------------------------------------------------
1 | # The following patch enables conversion webhook for CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | name: crons.apps.kubedl.io
7 | spec:
8 | conversion:
9 | strategy: Webhook
10 | webhookClientConfig:
11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank,
12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager)
13 | caBundle: Cg==
14 | service:
15 | namespace: system
16 | name: webhook-service
17 | path: /convert
18 |
--------------------------------------------------------------------------------
/config/crd/patches/webhook_in_elasticbatchjobs.yaml:
--------------------------------------------------------------------------------
1 | # The following patch enables conversion webhook for CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | name: elasticbatchjobs.inference.kubedl.io
7 | spec:
8 | conversion:
9 | strategy: Webhook
10 | webhookClientConfig:
11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank,
12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager)
13 | caBundle: Cg==
14 | service:
15 | namespace: system
16 | name: webhook-service
17 | path: /convert
18 |
--------------------------------------------------------------------------------
/config/crd/patches/webhook_in_elasticdljobs.yaml:
--------------------------------------------------------------------------------
1 | # The following patch enables conversion webhook for CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | name: elasticdljobs.training.kubedl.io
7 | spec:
8 | conversion:
9 | strategy: Webhook
10 | webhookClientConfig:
11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank,
12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager)
13 | caBundle: Cg==
14 | service:
15 | namespace: system
16 | name: webhook-service
17 | path: /convert
18 |
--------------------------------------------------------------------------------
/config/crd/patches/webhook_in_marsjobs.yaml:
--------------------------------------------------------------------------------
1 | # The following patch enables conversion webhook for CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | name: marsjobs.training.kubedl.io
7 | spec:
8 | conversion:
9 | strategy: Webhook
10 | webhookClientConfig:
11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank,
12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager)
13 | caBundle: Cg==
14 | service:
15 | namespace: system
16 | name: webhook-service
17 | path: /convert
18 |
--------------------------------------------------------------------------------
/config/crd/patches/webhook_in_models.yaml:
--------------------------------------------------------------------------------
1 | # The following patch enables conversion webhook for CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | name: models.model.kubedl.io
7 | spec:
8 | conversion:
9 | strategy: Webhook
10 | webhookClientConfig:
11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank,
12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager)
13 | caBundle: Cg==
14 | service:
15 | namespace: system
16 | name: webhook-service
17 | path: /convert
18 |
--------------------------------------------------------------------------------
/config/crd/patches/webhook_in_modelversions.yaml:
--------------------------------------------------------------------------------
1 | # The following patch enables conversion webhook for CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | name: modelversions.model.kubedl.io
7 | spec:
8 | conversion:
9 | strategy: Webhook
10 | webhookClientConfig:
11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank,
12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager)
13 | caBundle: Cg==
14 | service:
15 | namespace: system
16 | name: webhook-service
17 | path: /convert
18 |
--------------------------------------------------------------------------------
/config/crd/patches/webhook_in_mpijobs.yaml:
--------------------------------------------------------------------------------
1 | # The following patch enables conversion webhook for CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | name: mpijobs.training.kubedl.io
7 | spec:
8 | conversion:
9 | strategy: Webhook
10 | webhookClientConfig:
11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank,
12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager)
13 | caBundle: Cg==
14 | service:
15 | namespace: system
16 | name: webhook-service
17 | path: /convert
18 |
--------------------------------------------------------------------------------
/config/crd/patches/webhook_in_notebooks.yaml:
--------------------------------------------------------------------------------
1 | # The following patch enables conversion webhook for CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | name: notebooks.notebook.kubedl.io
7 | spec:
8 | conversion:
9 | strategy: Webhook
10 | webhookClientConfig:
11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank,
12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager)
13 | caBundle: Cg==
14 | service:
15 | namespace: system
16 | name: webhook-service
17 | path: /convert
18 |
--------------------------------------------------------------------------------
/config/crd/patches/webhook_in_pytorchjobs.yaml:
--------------------------------------------------------------------------------
1 | # The following patch enables conversion webhook for CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | name: pytorchjobs.training.kubedl.io
7 | spec:
8 | conversion:
9 | strategy: Webhook
10 | webhookClientConfig:
11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank,
12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager)
13 | caBundle: Cg==
14 | service:
15 | namespace: system
16 | name: webhook-service
17 | path: /convert
18 |
--------------------------------------------------------------------------------
/config/crd/patches/webhook_in_tfjobs.yaml:
--------------------------------------------------------------------------------
1 | # The following patch enables conversion webhook for CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | name: tfjobs.training.kubedl.io
7 | spec:
8 | conversion:
9 | strategy: Webhook
10 | webhookClientConfig:
11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank,
12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager)
13 | caBundle: Cg==
14 | service:
15 | namespace: system
16 | name: webhook-service
17 | path: /convert
18 |
--------------------------------------------------------------------------------
/config/crd/patches/webhook_in_xdljobs.yaml:
--------------------------------------------------------------------------------
1 | # The following patch enables conversion webhook for CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | name: xdljobs.training.kubedl.io
7 | spec:
8 | conversion:
9 | strategy: Webhook
10 | webhookClientConfig:
11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank,
12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager)
13 | caBundle: Cg==
14 | service:
15 | namespace: system
16 | name: webhook-service
17 | path: /convert
18 |
--------------------------------------------------------------------------------
/config/crd/patches/webhook_in_xgboostjobs.yaml:
--------------------------------------------------------------------------------
1 | # The following patch enables conversion webhook for CRD
2 | # CRD conversion requires k8s 1.13 or later.
3 | apiVersion: apiextensions.k8s.io/v1beta1
4 | kind: CustomResourceDefinition
5 | metadata:
6 | name: xgboostjobs.training.kubedl.io
7 | spec:
8 | conversion:
9 | strategy: Webhook
10 | webhookClientConfig:
11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank,
12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager)
13 | caBundle: Cg==
14 | service:
15 | namespace: system
16 | name: webhook-service
17 | path: /convert
18 |
--------------------------------------------------------------------------------
/config/default/manager_auth_proxy_patch.yaml:
--------------------------------------------------------------------------------
1 | # This patch inject a sidecar container which is a HTTP proxy for the controller manager,
2 | # it performs RBAC authorization against the Kubernetes API using SubjectAccessReviews.
3 | apiVersion: apps/v1
4 | kind: Deployment
5 | metadata:
6 | name: controller-manager
7 | namespace: system
8 | spec:
9 | template:
10 | spec:
11 | containers:
12 | - name: kube-rbac-proxy
13 | image: gcr.io/kubebuilder/kube-rbac-proxy:v0.4.0
14 | args:
15 | - "--secure-listen-address=0.0.0.0:8443"
16 | - "--upstream=http://127.0.0.1:8080/"
17 | - "--logtostderr=true"
18 | - "--v=10"
19 | ports:
20 | - containerPort: 8443
21 | name: https
22 | - name: manager
23 | args:
24 | - "--metrics-addr=127.0.0.1:8080"
25 | - "--enable-leader-election"
26 |
--------------------------------------------------------------------------------
/config/default/manager_prometheus_metrics_patch.yaml:
--------------------------------------------------------------------------------
1 | # This patch enables Prometheus scraping for the manager pod.
2 | apiVersion: apps/v1
3 | kind: Deployment
4 | metadata:
5 | name: controller-manager
6 | namespace: system
7 | spec:
8 | template:
9 | metadata:
10 | annotations:
11 | prometheus.io/scrape: 'true'
12 | spec:
13 | containers:
14 | # Expose the prometheus metrics on default port
15 | - name: manager
16 | ports:
17 | - containerPort: 8080
18 | name: metrics
19 | protocol: TCP
20 |
--------------------------------------------------------------------------------
/config/default/manager_webhook_patch.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: controller-manager
5 | namespace: system
6 | spec:
7 | template:
8 | spec:
9 | containers:
10 | - name: manager
11 | ports:
12 | - containerPort: 9443
13 | name: webhook-server
14 | protocol: TCP
15 | volumeMounts:
16 | - mountPath: /tmp/k8s-webhook-server/serving-certs
17 | name: cert
18 | readOnly: true
19 | volumes:
20 | - name: cert
21 | secret:
22 | defaultMode: 420
23 | secretName: webhook-server-cert
24 |
--------------------------------------------------------------------------------
/config/default/webhookcainjection_patch.yaml:
--------------------------------------------------------------------------------
1 | # This patch add annotation to admission webhook config and
2 | # the variables $(CERTIFICATE_NAMESPACE) and $(CERTIFICATE_NAME) will be substituted by kustomize.
3 | apiVersion: admissionregistration.k8s.io/v1beta1
4 | kind: MutatingWebhookConfiguration
5 | metadata:
6 | name: mutating-webhook-configuration
7 | annotations:
8 | certmanager.k8s.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
9 | ---
10 | apiVersion: admissionregistration.k8s.io/v1beta1
11 | kind: ValidatingWebhookConfiguration
12 | metadata:
13 | name: validating-webhook-configuration
14 | annotations:
15 | certmanager.k8s.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME)
16 |
--------------------------------------------------------------------------------
/config/manager/kustomization.yaml:
--------------------------------------------------------------------------------
1 | resources:
2 | - manager.yaml
3 | apiVersion: kustomize.config.k8s.io/v1beta1
4 | kind: Kustomization
5 | images:
6 | - name: controller
7 | newName: kubedl/kubedl
8 | newTag: 0.4.1
9 |
--------------------------------------------------------------------------------
/config/manager/manager.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 | labels:
5 | control-plane: controller-manager
6 | name: system
7 | ---
8 | apiVersion: apps/v1
9 | kind: Deployment
10 | metadata:
11 | name: controller-manager
12 | namespace: system
13 | labels:
14 | control-plane: controller-manager
15 | spec:
16 | selector:
17 | matchLabels:
18 | control-plane: controller-manager
19 | replicas: 1
20 | template:
21 | metadata:
22 | labels:
23 | control-plane: controller-manager
24 | spec:
25 | containers:
26 | - command:
27 | - /manager
28 | args:
29 | - --enable-leader-election
30 | image: controller:latest
31 | name: manager
32 | resources:
33 | limits:
34 | cpu: 100m
35 | memory: 30Mi
36 | requests:
37 | cpu: 100m
38 | memory: 20Mi
39 | terminationGracePeriodSeconds: 10
40 |
--------------------------------------------------------------------------------
/config/rbac/auth_proxy_role.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: rbac.authorization.k8s.io/v1
2 | kind: ClusterRole
3 | metadata:
4 | name: proxy-role
5 | rules:
6 | - apiGroups: ["authentication.k8s.io"]
7 | resources:
8 | - tokenreviews
9 | verbs: ["create"]
10 | - apiGroups: ["authorization.k8s.io"]
11 | resources:
12 | - subjectaccessreviews
13 | verbs: ["create"]
14 |
--------------------------------------------------------------------------------
/config/rbac/auth_proxy_role_binding.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: rbac.authorization.k8s.io/v1
2 | kind: ClusterRoleBinding
3 | metadata:
4 | name: proxy-rolebinding
5 | roleRef:
6 | apiGroup: rbac.authorization.k8s.io
7 | kind: ClusterRole
8 | name: proxy-role
9 | subjects:
10 | - kind: ServiceAccount
11 | name: default
12 | namespace: system
13 |
--------------------------------------------------------------------------------
/config/rbac/auth_proxy_service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | annotations:
5 | prometheus.io/port: "8443"
6 | prometheus.io/scheme: https
7 | prometheus.io/scrape: "true"
8 | labels:
9 | control-plane: controller-manager
10 | name: controller-manager-metrics-service
11 | namespace: system
12 | spec:
13 | ports:
14 | - name: https
15 | port: 8443
16 | targetPort: https
17 | selector:
18 | control-plane: controller-manager
19 |
--------------------------------------------------------------------------------
/config/rbac/cachebackend_editor_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to edit cachebackends.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: cachebackend-editor-role
6 | rules:
7 | - apiGroups:
8 | - cache.kubedl.io
9 | resources:
10 | - cachebackends
11 | verbs:
12 | - create
13 | - delete
14 | - get
15 | - list
16 | - patch
17 | - update
18 | - watch
19 | - apiGroups:
20 | - cache.kubedl.io
21 | resources:
22 | - cachebackends/status
23 | verbs:
24 | - get
25 |
--------------------------------------------------------------------------------
/config/rbac/cachebackend_viewer_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to view cachebackends.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: cachebackend-viewer-role
6 | rules:
7 | - apiGroups:
8 | - cache.kubedl.io
9 | resources:
10 | - cachebackends
11 | verbs:
12 | - get
13 | - list
14 | - watch
15 | - apiGroups:
16 | - cache.kubedl.io
17 | resources:
18 | - cachebackends/status
19 | verbs:
20 | - get
21 |
--------------------------------------------------------------------------------
/config/rbac/cron_editor_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to edit crons.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: cron-editor-role
6 | rules:
7 | - apiGroups:
8 | - apps.kubedl.io
9 | resources:
10 | - crons
11 | verbs:
12 | - create
13 | - delete
14 | - get
15 | - list
16 | - patch
17 | - update
18 | - watch
19 | - apiGroups:
20 | - apps.kubedl.io
21 | resources:
22 | - crons/status
23 | verbs:
24 | - get
25 |
--------------------------------------------------------------------------------
/config/rbac/cron_viewer_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to view crons.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: cron-viewer-role
6 | rules:
7 | - apiGroups:
8 | - apps.kubedl.io
9 | resources:
10 | - crons
11 | verbs:
12 | - get
13 | - list
14 | - watch
15 | - apiGroups:
16 | - apps.kubedl.io
17 | resources:
18 | - crons/status
19 | verbs:
20 | - get
21 |
--------------------------------------------------------------------------------
/config/rbac/elasticbatchjob_editor_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to edit elasticbatchjobs.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: elasticbatchjob-editor-role
6 | rules:
7 | - apiGroups:
8 | - inference.kubedl.io
9 | resources:
10 | - elasticbatchjobs
11 | verbs:
12 | - create
13 | - delete
14 | - get
15 | - list
16 | - patch
17 | - update
18 | - watch
19 | - apiGroups:
20 | - inference.kubedl.io
21 | resources:
22 | - elasticbatchjobs/status
23 | verbs:
24 | - get
25 |
--------------------------------------------------------------------------------
/config/rbac/elasticbatchjob_viewer_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to view elasticbatchjobs.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: elasticbatchjob-viewer-role
6 | rules:
7 | - apiGroups:
8 | - inference.kubedl.io
9 | resources:
10 | - elasticbatchjobs
11 | verbs:
12 | - get
13 | - list
14 | - watch
15 | - apiGroups:
16 | - inference.kubedl.io
17 | resources:
18 | - elasticbatchjobs/status
19 | verbs:
20 | - get
21 |
--------------------------------------------------------------------------------
/config/rbac/elasticdljob_editor_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to edit elasticdljobs.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: elasticdljob-editor-role
6 | rules:
7 | - apiGroups:
8 | - training.kubedl.io
9 | resources:
10 | - elasticdljobs
11 | verbs:
12 | - create
13 | - delete
14 | - get
15 | - list
16 | - patch
17 | - update
18 | - watch
19 | - apiGroups:
20 | - training.kubedl.io
21 | resources:
22 | - elasticdljobs/status
23 | verbs:
24 | - get
25 |
--------------------------------------------------------------------------------
/config/rbac/elasticdljob_viewer_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to view elasticdljobs.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: elasticdljob-viewer-role
6 | rules:
7 | - apiGroups:
8 | - training.kubedl.io
9 | resources:
10 | - elasticdljobs
11 | verbs:
12 | - get
13 | - list
14 | - watch
15 | - apiGroups:
16 | - training.kubedl.io
17 | resources:
18 | - elasticdljobs/status
19 | verbs:
20 | - get
21 |
--------------------------------------------------------------------------------
/config/rbac/kustomization.yaml:
--------------------------------------------------------------------------------
1 | resources:
2 | - role.yaml
3 | - role_binding.yaml
4 | - leader_election_role.yaml
5 | - leader_election_role_binding.yaml
6 | # Comment the following 3 lines if you want to disable
7 | # the auth proxy (https://github.com/brancz/kube-rbac-proxy)
8 | # which protects your /metrics endpoint.
9 | - auth_proxy_service.yaml
10 | - auth_proxy_role.yaml
11 | - auth_proxy_role_binding.yaml
12 |
--------------------------------------------------------------------------------
/config/rbac/leader_election_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions to do leader election.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: leader-election-role
6 | rules:
7 | - apiGroups:
8 | - ""
9 | resources:
10 | - configmaps
11 | verbs:
12 | - get
13 | - list
14 | - watch
15 | - create
16 | - update
17 | - patch
18 | - delete
19 | - apiGroups:
20 | - ""
21 | resources:
22 | - configmaps/status
23 | verbs:
24 | - get
25 | - update
26 | - patch
27 | - apiGroups:
28 | - ""
29 | resources:
30 | - events
31 | verbs:
32 | - create
33 |
--------------------------------------------------------------------------------
/config/rbac/leader_election_role_binding.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: rbac.authorization.k8s.io/v1
2 | kind: ClusterRoleBinding
3 | metadata:
4 | name: leader-election-rolebinding
5 | roleRef:
6 | apiGroup: rbac.authorization.k8s.io
7 | kind: ClusterRole
8 | name: leader-election-role
9 | subjects:
10 | - kind: ServiceAccount
11 | name: default
12 | namespace: system
13 |
--------------------------------------------------------------------------------
/config/rbac/marsjob_editor_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to edit marsjobs.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: marsjob-editor-role
6 | rules:
7 | - apiGroups:
8 | - training.kubedl.io
9 | resources:
10 | - marsjobs
11 | verbs:
12 | - create
13 | - delete
14 | - get
15 | - list
16 | - patch
17 | - update
18 | - watch
19 | - apiGroups:
20 | - training.kubedl.io
21 | resources:
22 | - marsjobs/status
23 | verbs:
24 | - get
25 |
--------------------------------------------------------------------------------
/config/rbac/marsjob_viewer_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to view marsjobs.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: marsjob-viewer-role
6 | rules:
7 | - apiGroups:
8 | - training.kubedl.io
9 | resources:
10 | - marsjobs
11 | verbs:
12 | - get
13 | - list
14 | - watch
15 | - apiGroups:
16 | - training.kubedl.io
17 | resources:
18 | - marsjobs/status
19 | verbs:
20 | - get
21 |
--------------------------------------------------------------------------------
/config/rbac/model_editor_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to edit models.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: model-editor-role
6 | rules:
7 | - apiGroups:
8 | - model.kubedl.io
9 | resources:
10 | - models
11 | verbs:
12 | - create
13 | - delete
14 | - get
15 | - list
16 | - patch
17 | - update
18 | - watch
19 | - apiGroups:
20 | - model.kubedl.io
21 | resources:
22 | - models/status
23 | verbs:
24 | - get
25 |
--------------------------------------------------------------------------------
/config/rbac/model_viewer_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to view models.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: model-viewer-role
6 | rules:
7 | - apiGroups:
8 | - model.kubedl.io
9 | resources:
10 | - models
11 | verbs:
12 | - get
13 | - list
14 | - watch
15 | - apiGroups:
16 | - model.kubedl.io
17 | resources:
18 | - models/status
19 | verbs:
20 | - get
21 |
--------------------------------------------------------------------------------
/config/rbac/modelversion_editor_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to edit modelversions.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: modelversion-editor-role
6 | rules:
7 | - apiGroups:
8 | - model.kubedl.io
9 | resources:
10 | - modelversions
11 | verbs:
12 | - create
13 | - delete
14 | - get
15 | - list
16 | - patch
17 | - update
18 | - watch
19 | - apiGroups:
20 | - model.kubedl.io
21 | resources:
22 | - modelversions/status
23 | verbs:
24 | - get
25 |
--------------------------------------------------------------------------------
/config/rbac/modelversion_viewer_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to view modelversions.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: modelversion-viewer-role
6 | rules:
7 | - apiGroups:
8 | - model.kubedl.io
9 | resources:
10 | - modelversions
11 | verbs:
12 | - get
13 | - list
14 | - watch
15 | - apiGroups:
16 | - model.kubedl.io
17 | resources:
18 | - modelversions/status
19 | verbs:
20 | - get
21 |
--------------------------------------------------------------------------------
/config/rbac/mpijob_editor_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to edit mpijobs.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: mpijob-editor-role
6 | rules:
7 | - apiGroups:
8 | - training.kubedl.io
9 | resources:
10 | - mpijobs
11 | verbs:
12 | - create
13 | - delete
14 | - get
15 | - list
16 | - patch
17 | - update
18 | - watch
19 | - apiGroups:
20 | - training.kubedl.io
21 | resources:
22 | - mpijobs/status
23 | verbs:
24 | - get
25 |
--------------------------------------------------------------------------------
/config/rbac/mpijob_viewer_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to view mpijobs.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: mpijob-viewer-role
6 | rules:
7 | - apiGroups:
8 | - training.kubedl.io
9 | resources:
10 | - mpijobs
11 | verbs:
12 | - get
13 | - list
14 | - watch
15 | - apiGroups:
16 | - training.kubedl.io
17 | resources:
18 | - mpijobs/status
19 | verbs:
20 | - get
21 |
--------------------------------------------------------------------------------
/config/rbac/notebook_editor_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to edit notebooks.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: notebook-editor-role
6 | rules:
7 | - apiGroups:
8 | - notebook.kubedl.io
9 | resources:
10 | - notebooks
11 | verbs:
12 | - create
13 | - delete
14 | - get
15 | - list
16 | - patch
17 | - update
18 | - watch
19 | - apiGroups:
20 | - notebook.kubedl.io
21 | resources:
22 | - notebooks/status
23 | verbs:
24 | - get
25 |
--------------------------------------------------------------------------------
/config/rbac/notebook_viewer_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to view notebooks.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: notebook-viewer-role
6 | rules:
7 | - apiGroups:
8 | - notebook.kubedl.io
9 | resources:
10 | - notebooks
11 | verbs:
12 | - get
13 | - list
14 | - watch
15 | - apiGroups:
16 | - notebook.kubedl.io
17 | resources:
18 | - notebooks/status
19 | verbs:
20 | - get
21 |
--------------------------------------------------------------------------------
/config/rbac/pytorchjob_editor_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to edit pytorchjobs.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: pytorchjob-editor-role
6 | rules:
7 | - apiGroups:
8 | - training.kubedl.io
9 | resources:
10 | - pytorchjobs
11 | verbs:
12 | - create
13 | - delete
14 | - get
15 | - list
16 | - patch
17 | - update
18 | - watch
19 | - apiGroups:
20 | - training.kubedl.io
21 | resources:
22 | - pytorchjobs/status
23 | verbs:
24 | - get
25 |
--------------------------------------------------------------------------------
/config/rbac/pytorchjob_viewer_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to view pytorchjobs.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: pytorchjob-viewer-role
6 | rules:
7 | - apiGroups:
8 | - training.kubedl.io
9 | resources:
10 | - pytorchjobs
11 | verbs:
12 | - get
13 | - list
14 | - watch
15 | - apiGroups:
16 | - training.kubedl.io
17 | resources:
18 | - pytorchjobs/status
19 | verbs:
20 | - get
21 |
--------------------------------------------------------------------------------
/config/rbac/role_binding.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: rbac.authorization.k8s.io/v1
2 | kind: ClusterRoleBinding
3 | metadata:
4 | name: manager-rolebinding
5 | roleRef:
6 | apiGroup: rbac.authorization.k8s.io
7 | kind: ClusterRole
8 | name: manager-role
9 | subjects:
10 | - kind: ServiceAccount
11 | name: default
12 | namespace: system
13 |
--------------------------------------------------------------------------------
/config/rbac/tfjob_editor_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to edit tfjobs.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: tfjob-editor-role
6 | rules:
7 | - apiGroups:
8 | - training.kubedl.io
9 | resources:
10 | - tfjobs
11 | verbs:
12 | - create
13 | - delete
14 | - get
15 | - list
16 | - patch
17 | - update
18 | - watch
19 | - apiGroups:
20 | - training.kubedl.io
21 | resources:
22 | - tfjobs/status
23 | verbs:
24 | - get
25 |
--------------------------------------------------------------------------------
/config/rbac/tfjob_viewer_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to view tfjobs.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: tfjob-viewer-role
6 | rules:
7 | - apiGroups:
8 | - training.kubedl.io
9 | resources:
10 | - tfjobs
11 | verbs:
12 | - get
13 | - list
14 | - watch
15 | - apiGroups:
16 | - training.kubedl.io
17 | resources:
18 | - tfjobs/status
19 | verbs:
20 | - get
21 |
--------------------------------------------------------------------------------
/config/rbac/xdljob_editor_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to edit xdljobs.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: xdljob-editor-role
6 | rules:
7 | - apiGroups:
8 | - training.kubedl.io
9 | resources:
10 | - xdljobs
11 | verbs:
12 | - create
13 | - delete
14 | - get
15 | - list
16 | - patch
17 | - update
18 | - watch
19 | - apiGroups:
20 | - training.kubedl.io
21 | resources:
22 | - xdljobs/status
23 | verbs:
24 | - get
25 |
--------------------------------------------------------------------------------
/config/rbac/xdljob_viewer_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to view xdljobs.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: xdljob-viewer-role
6 | rules:
7 | - apiGroups:
8 | - training.kubedl.io
9 | resources:
10 | - xdljobs
11 | verbs:
12 | - get
13 | - list
14 | - watch
15 | - apiGroups:
16 | - training.kubedl.io
17 | resources:
18 | - xdljobs/status
19 | verbs:
20 | - get
21 |
--------------------------------------------------------------------------------
/config/rbac/xgboostjob_editor_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to edit xgboostjobs.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: xgboostjob-editor-role
6 | rules:
7 | - apiGroups:
8 | - training.kubedl.io
9 | resources:
10 | - xgboostjobs
11 | verbs:
12 | - create
13 | - delete
14 | - get
15 | - list
16 | - patch
17 | - update
18 | - watch
19 | - apiGroups:
20 | - training.kubedl.io
21 | resources:
22 | - xgboostjobs/status
23 | verbs:
24 | - get
25 |
--------------------------------------------------------------------------------
/config/rbac/xgboostjob_viewer_role.yaml:
--------------------------------------------------------------------------------
1 | # permissions for end users to view xgboostjobs.
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | kind: ClusterRole
4 | metadata:
5 | name: xgboostjob-viewer-role
6 | rules:
7 | - apiGroups:
8 | - training.kubedl.io
9 | resources:
10 | - xgboostjobs
11 | verbs:
12 | - get
13 | - list
14 | - watch
15 | - apiGroups:
16 | - training.kubedl.io
17 | resources:
18 | - xgboostjobs/status
19 | verbs:
20 | - get
21 |
--------------------------------------------------------------------------------
/config/samples/apps_v1alpha1_cron.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps.kubedl.io/v1alpha1
2 | kind: Cron
3 | metadata:
4 | name: cron-sample
5 | spec:
6 | # Add fields here
7 | foo: bar
8 |
--------------------------------------------------------------------------------
/config/samples/cache_v1alpha1_cachebackend.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: cache.kubedl.io/v1alpha1
2 | kind: CacheBackend
3 | metadata:
4 | name: cachebackend-sample
5 | spec:
6 | # Add fields here
7 | foo: bar
8 |
--------------------------------------------------------------------------------
/config/samples/inference_v1alpha1_elasticbatchjob.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: inference.kubedl.io/v1alpha1
2 | kind: ElasticBatchJob
3 | metadata:
4 | name: elasticbatchjob-sample
5 | spec:
6 | # Add fields here
7 | foo: bar
8 |
--------------------------------------------------------------------------------
/config/samples/model/dockerfile_configmap.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | name: dockerfile
5 | namespace: default
6 | data:
7 | dockerfile: |
8 | FROM busybox
9 | COPY build/ /kubedl-model
--------------------------------------------------------------------------------
/config/samples/model/kanikopod_sample.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | name: kaniko
5 | spec:
6 | containers:
7 | - name: kaniko
8 | image: gcr.io/kaniko-project/executor:latest
9 | args:
10 | - "--dockerfile=/workspace/dockerfile"
11 | - "--context=dir:///workspace/"
12 | - "--destination=jianhe6/kaniko:0.1"
13 | volumeMounts:
14 | - name: kaniko-secret
15 | mountPath: /kaniko/.docker
16 | - name: dockerfile
17 | mountPath: /workspace/
18 | - name: build-source
19 | mountPath: /workspace/build
20 | restartPolicy: Never
21 | volumes:
22 | - name: kaniko-secret
23 | secret:
24 | secretName: regcred
25 | items:
26 | - key: .dockerconfigjson
27 | path: config.json
28 | - name: dockerfile
29 | configMap:
30 | name: dockerfile
31 | - name: build-source
32 | hostPath:
33 | # directory location on host
34 | path: /foo
--------------------------------------------------------------------------------
/config/samples/model/model_v1alpha1_model.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: model.kubedl.io/v1alpha1
2 | kind: Model
3 | metadata:
4 | name: model1
5 | spec:
6 | status:
7 |
8 |
--------------------------------------------------------------------------------
/config/samples/model/model_v1alpha1_modelversion.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: model.kubedl.io/v1alpha1
2 | kind: ModelVersion
3 | metadata:
4 | name: mv-4
5 | namespace: default
6 | spec:
7 | modelName: model1
8 | createdBy: user1
9 | imageRepo: jianhe6/kaniko
10 | storage:
11 | localStorage:
12 | path: /foo
13 | nodeName: kind-control-plane
--------------------------------------------------------------------------------
/config/samples/model/model_v1alpha1_modelversion_nas.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: model.kubedl.io/v1alpha1
2 | kind: ModelVersion
3 | metadata:
4 | name: mv-4
5 | namespace: default
6 | spec:
7 | modelName: model1
8 | createdBy: user1
9 | imageRepo: jianhe6/kaniko
10 | storage:
11 | nfs:
12 | path: /
13 | server:
--------------------------------------------------------------------------------
/config/samples/notebook_v1alpha1_notebook.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: notebook.kubedl.io/v1alpha1
2 | kind: Notebook
3 | metadata:
4 | name: notebook-sample
5 | spec:
6 | # Add fields here
7 | foo: bar
8 |
--------------------------------------------------------------------------------
/config/samples/training_v1alpha1_elasticdljob.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: training.kubedl.io/v1alpha1
2 | kind: ElasticDLJob
3 | metadata:
4 | name: elasticdljob-sample
5 | spec:
6 | # Add fields here
7 | foo: bar
8 |
--------------------------------------------------------------------------------
/config/samples/training_v1alpha1_marsjob.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: training.kubedl.io/v1alpha1
2 | kind: MarsJob
3 | metadata:
4 | name: marsjob-sample
5 | spec:
6 | # Add fields here
7 | foo: bar
8 |
--------------------------------------------------------------------------------
/config/samples/training_v1alpha1_mpijob.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: training.kubedl.io/v1alpha1
2 | kind: MPIJob
3 | metadata:
4 | name: mpijob-sample
5 | spec:
6 | # Add fields here
7 | foo: bar
8 |
--------------------------------------------------------------------------------
/config/samples/training_v1alpha1_pytorchjob.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: training.kubedl.io/v1alpha1
2 | kind: PyTorchJob
3 | metadata:
4 | name: pytorchjob-sample
5 | spec:
6 | # Add fields here
7 | foo: bar
8 |
--------------------------------------------------------------------------------
/config/samples/training_v1alpha1_tfjob.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: training.kubedl.io/v1alpha1
2 | kind: TFJob
3 | metadata:
4 | name: tfjob-sample
5 | spec:
6 | # Add fields here
7 | foo: bar
8 |
--------------------------------------------------------------------------------
/config/samples/training_v1alpha1_xdljob.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: training.kubedl.io/v1alpha1
2 | kind: XDLJob
3 | metadata:
4 | name: xdljob-sample
5 | spec:
6 | # Add fields here
7 | foo: bar
8 |
--------------------------------------------------------------------------------
/config/samples/training_v1alpha1_xgboostjob.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: training.kubedl.io/v1alpha1
2 | kind: XGBoostJob
3 | metadata:
4 | name: xgboostjob-sample
5 | spec:
6 | # Add fields here
7 | foo: bar
8 |
--------------------------------------------------------------------------------
/config/webhook/kustomization.yaml:
--------------------------------------------------------------------------------
1 | resources:
2 | - manifests.yaml
3 | - service.yaml
4 |
5 | configurations:
6 | - kustomizeconfig.yaml
7 |
--------------------------------------------------------------------------------
/config/webhook/kustomizeconfig.yaml:
--------------------------------------------------------------------------------
1 | # the following config is for teaching kustomize where to look at when substituting vars.
2 | # It requires kustomize v2.1.0 or newer to work properly.
3 | nameReference:
4 | - kind: Service
5 | version: v1
6 | fieldSpecs:
7 | - kind: MutatingWebhookConfiguration
8 | group: admissionregistration.k8s.io
9 | path: webhooks/clientConfig/service/name
10 | - kind: ValidatingWebhookConfiguration
11 | group: admissionregistration.k8s.io
12 | path: webhooks/clientConfig/service/name
13 |
14 | namespace:
15 | - kind: MutatingWebhookConfiguration
16 | group: admissionregistration.k8s.io
17 | path: webhooks/clientConfig/service/namespace
18 | create: true
19 | - kind: ValidatingWebhookConfiguration
20 | group: admissionregistration.k8s.io
21 | path: webhooks/clientConfig/service/namespace
22 | create: true
23 |
24 | varReference:
25 | - path: metadata/annotations
26 |
--------------------------------------------------------------------------------
/config/webhook/manifests.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kubedl-io/kubedl/b93a2b4689ffb07e4f811690801dd69305174b2f/config/webhook/manifests.yaml
--------------------------------------------------------------------------------
/config/webhook/service.yaml:
--------------------------------------------------------------------------------
1 |
2 | apiVersion: v1
3 | kind: Service
4 | metadata:
5 | name: webhook-service
6 | namespace: system
7 | spec:
8 | ports:
9 | - port: 443
10 | targetPort: 9443
11 | selector:
12 | control-plane: controller-manager
13 |
--------------------------------------------------------------------------------
/console/backend/cmd/backend-server/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "flag"
6 |
7 | "github.com/alibaba/kubedl/console/backend/pkg/client"
8 | "github.com/alibaba/kubedl/console/backend/pkg/routers"
9 | "github.com/alibaba/kubedl/console/backend/pkg/storage"
10 | )
11 |
12 | func main() {
13 | flag.Parse()
14 | client.Init()
15 | storage.RegisterStorageBackends()
16 | r := routers.InitRouter()
17 |
18 | client.Start(context.Background())
19 | _ = r.Run(":9090")
20 | }
21 |
--------------------------------------------------------------------------------
/console/backend/pkg/auth/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kubedl-io/kubedl/b93a2b4689ffb07e4f811690801dd69305174b2f/console/backend/pkg/auth/.gitkeep
--------------------------------------------------------------------------------
/console/backend/pkg/auth/empty_auth.go:
--------------------------------------------------------------------------------
1 | package auth
2 |
3 | import (
4 | "github.com/gin-contrib/sessions"
5 | "github.com/gin-gonic/gin"
6 | "k8s.io/klog"
7 | )
8 |
9 | type emptyAuth struct {
10 | defaultLoginID string
11 | }
12 |
13 | func NewEmptyAuth() Auth {
14 | return &emptyAuth{
15 | defaultLoginID: "Anonymous",
16 | }
17 | }
18 |
19 | func (auth *emptyAuth) Login(c *gin.Context) error {
20 | session := sessions.Default(c)
21 | session.Set(SessionKeyLoginID, auth.defaultLoginID)
22 | return session.Save()
23 | }
24 |
25 | func (auth *emptyAuth) Logout(c *gin.Context) error {
26 | session := sessions.Default(c)
27 | session.Delete(SessionKeyLoginID)
28 | return session.Save()
29 | }
30 |
31 | func (auth emptyAuth) Authorize(c *gin.Context) error {
32 | session := sessions.Default(c)
33 | v := session.Get(SessionKeyLoginID)
34 | if v == nil || v.(string) != auth.defaultLoginID {
35 | klog.Warningf("Authorize failed")
36 | return auth.Login(c)
37 | }
38 | return nil
39 | }
40 |
--------------------------------------------------------------------------------
/console/backend/pkg/auth/oauth.go:
--------------------------------------------------------------------------------
1 | package auth
2 |
3 | import (
4 | "errors"
5 | "flag"
6 |
7 | "github.com/gin-gonic/gin"
8 | )
9 |
10 | const (
11 | SessionKeyLoginID = "loginId"
12 | )
13 |
14 | func init() {
15 | authTypes := map[string]AuthRegister{
16 | "none": NewEmptyAuth,
17 | "config": NewConfigAuth,
18 | }
19 |
20 | var authType string
21 | flag.StringVar(&authType, "authentication-mode", "none",
22 | "set authentication mode. By default, no authentication . Use --authentication-mode=none to explicitly disable authentication,"+
23 | " --authentication-mode=config to authenticate using configMap")
24 | flag.Parse()
25 | GetAuth = authTypes[authType]
26 | }
27 |
28 | var (
29 | GetAuth AuthRegister
30 |
31 | ErrLoginInvalid = errors.New("login id is invalid")
32 | ErrGetAuthError = errors.New("get oauthInfo error")
33 | )
34 |
35 | type AuthRegister func() Auth
36 |
37 | type Auth interface {
38 | Login(c *gin.Context) error
39 | Logout(c *gin.Context) error
40 | Authorize(c *gin.Context) error
41 | }
42 |
--------------------------------------------------------------------------------
/console/backend/pkg/constants/const.go:
--------------------------------------------------------------------------------
1 | package constants
2 |
3 | const (
4 | KubeDLSystemNamespace = "kubedl-system"
5 | ApiV1Routes = "/api/v1"
6 | KubeDLConsoleConfig = "kubedl-dashboard-config"
7 | )
8 |
--------------------------------------------------------------------------------
/console/backend/pkg/handlers/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kubedl-io/kubedl/b93a2b4689ffb07e4f811690801dd69305174b2f/console/backend/pkg/handlers/.gitkeep
--------------------------------------------------------------------------------
/console/backend/pkg/middleware/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kubedl-io/kubedl/b93a2b4689ffb07e4f811690801dd69305174b2f/console/backend/pkg/middleware/.gitkeep
--------------------------------------------------------------------------------
/console/backend/pkg/model/code_source.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | type CodeSource struct {
4 | UserId string `json:"userid"`
5 |
6 | Username string `json:"username"`
7 |
8 | Name string `json:"name"`
9 |
10 | Type string `json:"type"`
11 |
12 | CodePath string `json:"code_path"`
13 |
14 | DefaultBranch string `json:"default_branch"`
15 |
16 | LocalPath string `json:"local_path"`
17 |
18 | Description string `json:"description"`
19 |
20 | CreateTime string `json:"create_time"`
21 |
22 | UpdateTime string `json:"update_time"`
23 | }
24 |
25 | type CodeSourceMap map[string]CodeSource
26 |
--------------------------------------------------------------------------------
/console/backend/pkg/model/data.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | type ClusterTotalResource struct {
4 | TotalCPU int64 `json:"totalCPU"`
5 | TotalMemory int64 `json:"totalMemory"`
6 | TotalGPU int64 `json:"totalGPU"`
7 | }
8 |
9 | type ClusterRequestResource struct {
10 | RequestCPU int64 `json:"requestCPU"`
11 | RequestMemory int64 `json:"requestMemory"`
12 | RequestGPU int64 `json:"requestGPU"`
13 | }
14 |
15 | type ClusterNodeInfo struct {
16 | NodeName string `json:"nodeName"`
17 | InstanceType string `json:"instanceType"`
18 | GPUType string `json:"gpuType"`
19 | TotalCPU int64 `json:"totalCPU"`
20 | TotalMemory int64 `json:"totalMemory"`
21 | TotalGPU int64 `json:"totalGPU"`
22 | RequestCPU int64 `json:"requestCPU"`
23 | RequestMemory int64 `json:"requestMemory"`
24 | RequestGPU int64 `json:"requestGPU"`
25 | }
26 |
27 | type ClusterNodeInfoList struct {
28 | Items []ClusterNodeInfo `json:"items,omitempty"`
29 | }
30 |
--------------------------------------------------------------------------------
/console/backend/pkg/model/data_source.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | type DataSource struct {
4 | UserId string `json:"userid"`
5 |
6 | Username string `json:"username"`
7 |
8 | Namespace string `json:"namespace"`
9 |
10 | Name string `json:"name"`
11 |
12 | Type string `json:"type"`
13 |
14 | PvcName string `json:"pvc_name"`
15 |
16 | LocalPath string `json:"local_path"`
17 |
18 | Description string `json:"description"`
19 |
20 | CreateTime string `json:"create_time"`
21 |
22 | UpdateTime string `json:"update_time"`
23 | }
24 |
25 | type DataSourceMap map[string]DataSource
26 |
--------------------------------------------------------------------------------
/console/backend/pkg/model/workspace.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | const WorkspacePrefix = "workspace-"
4 | const WorkspaceKubeDLLabel = "kubedl.io/workspace-name"
5 |
6 | // WorkspaceInfo is the object returned in http call
7 | type WorkspaceInfo struct {
8 | Username string `json:"username"`
9 |
10 | Namespace string `json:"namespace"`
11 |
12 | CPU int64 `json:"cpu"`
13 |
14 | Memory int64 `json:"memory"`
15 |
16 | GPU int64 `json:"gpu"`
17 |
18 | Storage int64 `json:"storage"`
19 |
20 | Name string `json:"name"`
21 |
22 | Type string `json:"type"`
23 |
24 | PvcName string `json:"pvc_name"`
25 |
26 | // Created
27 | // Ready: pvc bound
28 | Status string `json:"status"`
29 |
30 | LocalPath string `json:"local_path"`
31 |
32 | Description string `json:"description"`
33 |
34 | CreateTime string `json:"create_time"`
35 |
36 | UpdateTime string `json:"update_time"`
37 |
38 | DurationTime string `json:"duration_time"`
39 | }
40 |
--------------------------------------------------------------------------------
/console/backend/pkg/utils/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kubedl-io/kubedl/b93a2b4689ffb07e4f811690801dd69305174b2f/console/backend/pkg/utils/.gitkeep
--------------------------------------------------------------------------------
/console/backend/pkg/utils/job.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import (
4 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
5 | "k8s.io/apimachinery/pkg/runtime"
6 | "sigs.k8s.io/controller-runtime/pkg/client"
7 |
8 | "github.com/alibaba/kubedl/apis/notebook/v1alpha1"
9 | v1 "github.com/alibaba/kubedl/apis/training/v1alpha1"
10 | )
11 |
12 | func InitJobRuntimeObjectByKind(kind string) client.Object {
13 | var (
14 | object client.Object
15 | )
16 |
17 | switch kind {
18 | case v1.TFJobKind:
19 | object = &v1.TFJob{}
20 | case v1.PyTorchJobKind:
21 | object = &v1.PyTorchJob{}
22 | case v1.XDLJobKind:
23 | object = &v1.XDLJob{}
24 | case v1.XGBoostJobKind:
25 | object = &v1.XGBoostJob{}
26 | case v1alpha1.NotebookKind:
27 | object = &v1alpha1.Notebook{}
28 | }
29 |
30 | return object
31 | }
32 |
33 | func RuntimeObjToMetaObj(obj runtime.Object) (metaObj metav1.Object, ok bool) {
34 | meta, ok := obj.(metav1.Object)
35 | return meta, ok
36 | }
37 |
--------------------------------------------------------------------------------
/console/backend/pkg/utils/kubedl.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import (
4 | "github.com/alibaba/kubedl/console/backend/pkg/model"
5 | corev1 "k8s.io/api/core/v1"
6 | )
7 |
8 | func IsKubedlManagedNamespace(namespace *corev1.Namespace) bool {
9 | if _, ok := namespace.Labels[model.WorkspaceKubeDLLabel]; ok {
10 | return true
11 | }
12 | return false
13 | }
14 |
--------------------------------------------------------------------------------
/console/backend/pkg/utils/redirects.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import (
4 | "github.com/gin-gonic/gin"
5 | )
6 |
7 | func Redirect403(c *gin.Context) {
8 | c.JSON(403, nil)
9 | }
10 |
11 | func Redirect404(c *gin.Context) {
12 | c.JSON(404, nil)
13 | }
14 |
15 | func Redirect500(c *gin.Context) {
16 | c.JSON(500, nil)
17 | }
18 |
--------------------------------------------------------------------------------
/console/frontend/abc.json:
--------------------------------------------------------------------------------
1 | {
2 | "assets": {
3 | "type": "command",
4 | "command": {
5 | "cmd": ["tnpm install", "tnpm run build", "mv ./dist $BUILD_DEST"]
6 | }
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/console/frontend/config/defaultSettings.js:
--------------------------------------------------------------------------------
1 | export default {
2 | navTheme: "dark",
3 | // 拂晓蓝
4 | primaryColor: "daybreak",
5 | layout: "sidemenu",
6 | contentWidth: "Fluid",
7 | fixedHeader: false,
8 | autoHideHeader: false,
9 | fixSiderbar: false,
10 | colorWeak: false,
11 | menu: {
12 | locale: true
13 | },
14 | pwa: false,
15 | iconfontUrl: ""
16 | };
17 |
--------------------------------------------------------------------------------
/console/frontend/jest-puppeteer.config.js:
--------------------------------------------------------------------------------
1 | // ps https://github.com/GoogleChrome/puppeteer/issues/3120
2 | module.exports = {
3 | launch: {
4 | args: [
5 | "--disable-gpu",
6 | "--disable-dev-shm-usage",
7 | "--no-first-run",
8 | "--no-zygote",
9 | "--no-sandbox"
10 | ]
11 | }
12 | };
13 |
--------------------------------------------------------------------------------
/console/frontend/jest.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | testURL: "http://localhost:8000",
3 | preset: "jest-puppeteer",
4 | extraSetupFiles: ["./tests/setupTests.js"],
5 | globals: {
6 | ANT_DESIGN_PRO_ONLY_DO_NOT_USE_IN_YOUR_PRODUCTION: false,
7 | localStorage: null
8 | }
9 | };
10 |
--------------------------------------------------------------------------------
/console/frontend/jsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "emitDecoratorMetadata": true,
4 | "experimentalDecorators": true,
5 | "baseUrl": ".",
6 | "paths": {
7 | "@/*": ["./src/*"],
8 | "@@/*": ["./src/.umi/*"]
9 | }
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/console/frontend/mock/route.js:
--------------------------------------------------------------------------------
1 | export default {
2 | "/api/auth_routes": {
3 | "/form/advanced-form": {
4 | authority: ["admin", "user"]
5 | }
6 | }
7 | };
8 |
--------------------------------------------------------------------------------
/console/frontend/public/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kubedl-io/kubedl/b93a2b4689ffb07e4f811690801dd69305174b2f/console/frontend/public/favicon.png
--------------------------------------------------------------------------------
/console/frontend/public/icons/android-chrome-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kubedl-io/kubedl/b93a2b4689ffb07e4f811690801dd69305174b2f/console/frontend/public/icons/android-chrome-192x192.png
--------------------------------------------------------------------------------
/console/frontend/public/icons/android-chrome-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kubedl-io/kubedl/b93a2b4689ffb07e4f811690801dd69305174b2f/console/frontend/public/icons/android-chrome-512x512.png
--------------------------------------------------------------------------------
/console/frontend/src/components/Authorized/Authorized.jsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import { Result } from 'antd';
3 | import check from './CheckPermissions';
4 |
5 | const Authorized = ({
6 | children,
7 | authority,
8 | noMatch = (
9 |
33 | Want to add more pages? Please refer to{' '} 34 | 35 | use block 36 | 37 | 。 38 |
39 |