├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
└── workflows
│ ├── codeql.yml
│ ├── integration_test_minio_gitops.yaml
│ ├── linter-py.yaml
│ └── main.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── charts
└── mlflow-controller
│ ├── .helmignore
│ ├── Chart.yaml
│ ├── README.md
│ ├── templates
│ ├── _helpers.tpl
│ ├── deployment-ui.yaml
│ ├── deployment.yaml
│ ├── gitops-cm.yaml
│ ├── ingress.yaml
│ ├── mlflow-cm.yaml
│ ├── service.yaml
│ └── serviceaccount.yaml
│ └── values.yaml
├── doc
├── Mlflow Deployment controller.drawio
├── doc.md
└── gitops.md
├── examples
├── argo-manifest
│ ├── mlflow-controller-production.yaml
│ ├── mlflow-controller.yaml
│ ├── mlflow.yaml
│ └── seldon-core.yaml
├── gitops
│ └── gitops.ipynb
├── notebook
│ ├── deploy.yaml
│ └── mlflow.ipynb
└── readme.md
├── main.py
├── mlflow_controller
├── __init__.py
├── controller.py
├── gitops.py
├── mlflow_direct.py
├── mlservers
│ ├── kserve.py
│ ├── rclone.py
│ ├── seldon.py
│ └── utils.py
├── registries
│ ├── mlflow.py
│ └── mlflow_backend.py
└── utils
│ └── var_extract.py
├── requirements.txt
├── test.py
├── tests
├── docker_build_push.sh
├── install_gitea.sh
├── install_istio.sh
├── install_kserve.sh
├── install_kserve_deployment_controller.sh
├── install_mlflow.sh
├── install_seldon_core.sh
├── install_seldon_deployment_controller.sh
├── kind-cluster-1-24.yaml
├── log_mlflow_model.sh
├── mlflow-cm.yaml
├── mlflow
│ ├── iris.py
│ ├── list_model.py
│ └── test_deploy.py
├── pf_mlflow.sh
├── repo-test
│ ├── production
│ │ ├── kserve-s3.yaml
│ │ └── seldon-s3.yaml
│ └── staging
│ │ ├── kserve-s3.yaml
│ │ ├── kserve-s3t.yaml
│ │ ├── kserve-sa.yaml
│ │ ├── seldon-s3.yaml
│ │ ├── seldon-secret.yaml
│ │ └── seldon-single-model.yaml
└── setup_git_repo.sh
├── tox.ini
└── ui
├── Dockerfile
├── app.py
├── pages
├── deployments.py
├── logs.py
├── not_found_404.py
└── seldon.py
├── requirements.txt
└── seldon_deployments
├── card.py
└── data.py
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 |
26 | **Desktop (please complete the following information):**
27 | - OS: [e.g. iOS]
28 | - Browser [e.g. chrome, safari]
29 | - Version [e.g. 22]
30 |
31 | **Smartphone (please complete the following information):**
32 | - Device: [e.g. iPhone6]
33 | - OS: [e.g. iOS8.1]
34 | - Browser [e.g. stock browser, safari]
35 | - Version [e.g. 22]
36 |
37 | **Additional context**
38 | Add any other context about the problem here.
39 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
1 | # For most projects, this workflow file will not need changing; you simply need
2 | # to commit it to your repository.
3 | #
4 | # You may wish to alter this file to override the set of languages analyzed,
5 | # or to provide custom queries or build logic.
6 | #
7 | # ******** NOTE ********
8 | # We have attempted to detect the languages in your repository. Please check
9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 |
14 | on:
15 | push:
16 | branches: [ "main", gh-pages ]
17 | pull_request:
18 | # The branches below must be a subset of the branches above
19 | branches: [ "main" ]
20 | schedule:
21 | - cron: '33 5 * * 0'
22 |
23 | jobs:
24 | analyze:
25 | name: Analyze
26 | runs-on: ubuntu-latest
27 | permissions:
28 | actions: read
29 | contents: read
30 | security-events: write
31 |
32 | strategy:
33 | fail-fast: false
34 | matrix:
35 | language: [ 'python' ]
36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
37 | # Use only 'java' to analyze code written in Java, Kotlin or both
38 | # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
39 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
40 |
41 | steps:
42 | - name: Checkout repository
43 | uses: actions/checkout@v3
44 |
45 | # Initializes the CodeQL tools for scanning.
46 | - name: Initialize CodeQL
47 | uses: github/codeql-action/init@v2
48 | with:
49 | languages: ${{ matrix.language }}
50 | # If you wish to specify custom queries, you can do so here or in a config file.
51 | # By default, queries listed here will override any specified in a config file.
52 | # Prefix the list here with "+" to use these queries and those in the config file.
53 |
54 | # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
55 | # queries: security-extended,security-and-quality
56 |
57 |
58 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java).
59 | # If this step fails, then you should remove it and run the build manually (see below)
60 | - name: Autobuild
61 | uses: github/codeql-action/autobuild@v2
62 |
63 | # ℹ️ Command-line programs to run using the OS shell.
64 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
65 |
66 | # If the Autobuild fails above, remove it and uncomment the following three lines.
67 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
68 |
69 | # - run: |
70 | # echo "Run, Build Application using script"
71 | # ./location_of_script_within_repo/buildscript.sh
72 |
73 | - name: Perform CodeQL Analysis
74 | uses: github/codeql-action/analyze@v2
75 | with:
76 | category: "/language:${{matrix.language}}"
77 |
--------------------------------------------------------------------------------
/.github/workflows/integration_test_minio_gitops.yaml:
--------------------------------------------------------------------------------
1 | name: Integration test gitops in KinD [minio]
2 | on:
3 | pull_request:
4 |
5 |
6 | jobs:
7 | build:
8 | runs-on: ubuntu-latest
9 | strategy:
10 | fail-fast: false
11 | matrix:
12 | mlserver: [seldon,kserve]
13 | steps:
14 | - name: Checkout
15 | uses: actions/checkout@v3
16 | with:
17 | ref: ${{ github.event.pull_request.head.sha }}
18 |
19 | - uses: engineerd/setup-kind@v0.5.0
20 | with:
21 | skipClusterCreation: "true"
22 | version: v0.17.0
23 |
24 | - name: Install Helm
25 | uses: azure/setup-helm@v1
26 | with:
27 | version: v3.8.1
28 |
29 |
30 | - name: Create KinD Cluster
31 | run: kind create cluster --config tests/kind-cluster-1-24.yaml
32 |
33 | - name: Testing
34 | run: |
35 | kubectl cluster-info
36 | kubectl get pods -n kube-system
37 | echo "current-context:" $(kubectl config current-context)
38 | echo "environment-kubeconfig:" ${KUBECONFIG}
39 | kubectl get nodes
40 | kubectl wait --for=condition=Ready nodes --all --timeout=600s
41 |
42 | - name: Install Gitea
43 | run: ./tests/install_gitea.sh
44 |
45 | - name: setup git repo
46 | run: ./tests/setup_git_repo.sh
47 |
48 | - name: Install mlflow
49 | run: ./tests/install_mlflow.sh
50 |
51 | - name: PF Mlflow
52 | run: ./tests/pf_mlflow.sh
53 | - uses: actions/setup-python@v4
54 | with:
55 | python-version: '3.7'
56 |
57 | - name: Log model Mlflow
58 | run: ./tests/log_mlflow_model.sh
59 |
60 | - name: Install Kserve
61 | run: ./tests/install_kserve.sh
62 |
63 | - name: Install Seldon Core
64 | run: ./tests/install_seldon_core.sh
65 |
66 | - name: Build and Push image
67 | run: ./tests/docker_build_push.sh
68 |
69 | - name: Install deployment controller ${{matrix.mlserver}}
70 | run: ./tests/install_${{matrix.mlserver}}_deployment_controller.sh
71 | env:
72 | mlserver: ${{matrix.mlserver}}
73 |
74 |
--------------------------------------------------------------------------------
/.github/workflows/linter-py.yaml:
--------------------------------------------------------------------------------
1 | name: linter
2 |
3 | on: [pull_request]
4 |
5 | jobs:
6 | lint-python:
7 | runs-on: ubuntu-latest
8 | env:
9 | PYTHON: 3.8
10 | steps:
11 | - uses: actions/checkout@v2
12 | - name: Setup Python
13 | id: setup-python
14 | uses: actions/setup-python@v2
15 | with:
16 | python-version: "3.8"
17 | architecture: x64
18 | - name: Upgrade pip version
19 | run: |
20 | pip install --upgrade "pip>=21.3.1,<22.1"
21 | - name: requirements.txt
22 | id: pip-requirements
23 | run: |
24 | pip install isort black flake8
25 |
26 | - name: Lint python
27 | run: make lint-python-check
--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
1 | name: Release Charts
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | paths:
8 | - 'charts/**'
9 | permissions: write-all
10 |
11 | jobs:
12 | release:
13 | runs-on: ubuntu-latest
14 | steps:
15 | - name: Checkout
16 | uses: actions/checkout@v2
17 | with:
18 | fetch-depth: 0
19 |
20 | - name: Configure Git
21 | run: |
22 | git config user.name "$GITHUB_ACTOR"
23 | git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
24 |
25 | - name: Install Helm
26 | uses: azure/setup-helm@v1
27 | with:
28 | version: v3.8.1
29 |
30 | - name: Run chart-releaser
31 | uses: helm/chart-releaser-action@v1.4.0
32 | env:
33 | CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
34 | docker:
35 | runs-on: ubuntu-latest
36 | needs: ["release"]
37 | steps:
38 | - name: Checkout
39 | uses: actions/checkout@v2
40 | with:
41 | fetch-depth: 0
42 | -
43 | name: Set up QEMU
44 | uses: docker/setup-qemu-action@v2
45 | -
46 | name: Set up Docker Buildx
47 | uses: docker/setup-buildx-action@v2
48 |
49 | - name: 'Get Previous tag'
50 | id: previoustag
51 | uses: "WyriHaximus/github-action-get-previous-tag@v1"
52 | with:
53 | fallback: 1.0.0
54 | -
55 | name: Login to DockerHub
56 | uses: docker/login-action@v2
57 | with:
58 | username: ${{ secrets.DOCKERHUB_USERNAME }}
59 | password: ${{ secrets.DOCKERHUB_TOKEN }}
60 | -
61 | name: Build and push
62 | uses: docker/build-push-action@v3
63 | with:
64 | push: true
65 | tags: tachyongroup/mlflow-deployment-controller:${{ steps.previoustag.outputs.tag }}
66 |
67 | -
68 | name: Build and push
69 | uses: docker/build-push-action@v3
70 | with:
71 | push: true
72 | context: ui/
73 | tags: tachyongroup/mlflow-deployment-controller-ui:${{ steps.previoustag.outputs.tag }}
74 |
75 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | mdc/*
6 | *.DS_Store
7 | tmp/*
8 | # C extensions
9 | *.so
10 | scripts/*
11 | env*
12 | mdc/*
13 | *DS_Store
14 | live.py
15 | # Distribution / packaging
16 | .Python
17 | build/
18 | develop-eggs/
19 | dist/
20 | downloads/
21 | eggs/
22 | .eggs/
23 | lib/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | wheels/
29 | share/python-wheels/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 | MANIFEST
34 |
35 | # PyInstaller
36 | # Usually these files are written by a python script from a template
37 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
38 | *.manifest
39 | *.spec
40 |
41 | # Installer logs
42 | pip-log.txt
43 | pip-delete-this-directory.txt
44 |
45 | # Unit test / coverage reports
46 | htmlcov/
47 | .tox/
48 | .nox/
49 | .coverage
50 | .coverage.*
51 | .cache
52 | nosetests.xml
53 | coverage.xml
54 | *.cover
55 | *.py,cover
56 | .hypothesis/
57 | .pytest_cache/
58 | cover/
59 |
60 | # Translations
61 | *.mo
62 | *.pot
63 |
64 | # Django stuff:
65 | *.log
66 | local_settings.py
67 | db.sqlite3
68 | db.sqlite3-journal
69 |
70 | # Flask stuff:
71 | instance/
72 | .webassets-cache
73 |
74 | # Scrapy stuff:
75 | .scrapy
76 |
77 | # Sphinx documentation
78 | docs/_build/
79 |
80 | # PyBuilder
81 | .pybuilder/
82 | target/
83 |
84 | # Jupyter Notebook
85 | .ipynb_checkpoints
86 |
87 | # IPython
88 | profile_default/
89 | ipython_config.py
90 |
91 | # pyenv
92 | # For a library or package, you might want to ignore these files since the code is
93 | # intended to run in multiple environments; otherwise, check them in:
94 | # .python-version
95 |
96 | # pipenv
97 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
98 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
99 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
100 | # install all needed dependencies.
101 | #Pipfile.lock
102 |
103 | # poetry
104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | # This is especially recommended for binary packages to ensure reproducibility, and is more
106 | # commonly ignored for libraries.
107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 |
110 | # pdm
111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | # in version control.
115 | # https://pdm.fming.dev/#use-with-ide
116 | .pdm.toml
117 |
118 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
119 | __pypackages__/
120 |
121 | # Celery stuff
122 | celerybeat-schedule
123 | celerybeat.pid
124 |
125 | # SageMath parsed files
126 | *.sage.py
127 |
128 | # Environments
129 | .env
130 | .venv
131 | env/
132 | venv/
133 | ENV/
134 | env.bak/
135 | venv.bak/
136 |
137 | # Spyder project settings
138 | .spyderproject
139 | .spyproject
140 |
141 | # Rope project settings
142 | .ropeproject
143 |
144 | # mkdocs documentation
145 | /site
146 |
147 | # mypy
148 | .mypy_cache/
149 | .dmypy.json
150 | dmypy.json
151 |
152 | # Pyre type checker
153 | .pyre/
154 |
155 | # pytype static type analyzer
156 | .pytype/
157 |
158 | # Cython debug symbols
159 | cython_debug/
160 |
161 | # PyCharm
162 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
163 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
164 | # and can be added to the global gitignore or merged into this file. For a more nuclear
165 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
166 | #.idea/
167 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8.16-slim-buster
2 | RUN apt-get -y update
3 | RUN apt-get -y install git
4 | COPY requirements.txt requirements.txt
5 | RUN pip install -r requirements.txt
6 | RUN pip install protobuf==3.20
7 | WORKDIR /app
8 | COPY . /app
9 | CMD ["python", "main.py"]
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 ROCKET9
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | ROOT_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
2 |
3 | lint-python:
4 | cd ${ROOT_DIR}; python -m isort . --recursive --atomic
5 | cd ${ROOT_DIR}; python -m black .
6 | cd ${ROOT_DIR}; python -m flake8 mlflow_controller/
7 | cd ${ROOT_DIR}; python -m flake8 ui/
8 | # autoflake --remove-all-unused-imports -i -r .
9 |
10 |
11 | lint-python-check:
12 | # cd ${ROOT_DIR}; python -m isort mlflow_controller/ --check-only
13 | cd ${ROOT_DIR}; python -m flake8 mlflow_controller/
14 | cd ${ROOT_DIR}; python -m black --check mlflow_controller
15 | cd ${ROOT_DIR}; python -m black --check ui
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |

3 |
4 |
5 |
6 |
7 |
8 |
9 | Mlflow Deployment Controller
10 |
11 | [](https://artifacthub.io/packages/search?repo=mlflow-deployment-controller)
12 |
13 |
14 |
15 |
16 | ## :dart: About ##
17 |
18 | Mlflow Does not have integration with model servers ( Ex: Seldon-core) for automated deployment of models when registered or promoted to different stages, Mlflow deployment controller tries to solve this problem. Mlflow deployment controller is a python based controller which periodically checks the state between mlflow and model server's CRDs in k8s and acts accordingly. Every stage in Mlflow needs a separate controller as in the real world we would have different clusters for each stage. you can configure the controller to manage the state for a certain stage based on the use case.
19 |
20 |
21 |
22 |
23 | ## :rocket: Technologies ##
24 |
25 | The following tools were used in this project:
26 |
27 | - [Seldon-Core](https://docs.seldon.io/projects/seldon-core/en/latest/index.html)
28 | - [Mlflow](https://www.mlflow.org/docs/latest/index.html)
29 |
30 | ## :white_check_mark: Requirements ##
31 |
32 | Before starting :checkered_flag:, you need to have [Helm](https://helm.sh/docs/helm/helm_install/)
33 |
34 | ## :checkered_flag: Starting ##
35 |
36 | ```bash
37 | $ helm repo add rocket9-code https://rocket9-code.github.io/helm-charts
38 |
39 | $ helm install mlflow-deployment-controller rocket9-code/mlflow-deployment-controller
40 |
41 | ```
42 |
43 | ## To Setup Deployment controller in different environments
44 |
45 | ### For Staging environment
46 |
47 | Deployment controller will look for models logged with deploy.yaml in Mlflow Staging Environment and deploys the model in staging Namespace
48 |
49 | ```bash
50 | $ helm repo add rocket9-code https://rocket9-code.github.io/mlflow-deployment-controller/
51 |
52 | $ helm install mlflow-deployment-controller-staging rocket9-code/mlflow-deployment-controller --set mlflow.stage=Staging --set mlflow.namespace=staging
53 |
54 | ```
55 |
56 | ### For Production environment
57 |
58 | Deployment controller will look models logged with deploy.yaml in Mlflow Production Environment and deploys the model in production Namespace
59 |
60 | ```bash
61 | $ helm repo add rocket9-code https://rocket9-code.github.io/helm-charts
62 |
63 | $ helm install mlflow-deployment-controller-production rocket9-code/mlflow-deployment-controller --set mlflow.stage=Production --set mlflow.namespace=production
64 |
65 | ```
66 |
67 | Quick Start using argocd
68 | ---
69 |
70 | Setup Mlflow and Mlflow controllers for different stages using argocd
71 |
72 | ```
73 | kubectl apply -f examples/argo-manifest
74 | ```
75 |
76 | #### Log a Mlflow model with Seldon deployment configuration with the name deploy.yaml
77 |
78 | Model Uri parameter will be overwritten by controller so it can be left blank
79 |
80 |
81 |
82 | If any Model in mlflow is registered with deploy.yaml deployment controller will start deploying or managing the model server based on the config
83 |
84 |
85 |
86 |
87 |
88 | Once the Model is logged with deploy.yaml deployment controller will deploy the model to the predefined namespace
89 | Currently, the deployment controller does not have a UI(But it is in our roadmap ) so you can check the logs of the Mlflow deployment controller to see the model deployment and any errors
90 |
91 |
92 |
93 | ```
94 | kubectl logs -f deployment/mlflow-deploment-controller
95 | ```
96 |
97 |
98 |
99 |
100 |
101 |
102 | https://user-images.githubusercontent.com/62284209/182024746-1fa281ac-a388-467e-98cd-98e9f40a0ed0.mp4
103 |
104 |
105 | ## Gitops based deployment controller
106 |
107 | Gitops based deployment controller helps to version control seldon deployments as well as version control the models in ml registries in a automated way.
108 | Controller expects a templated variable in place of modelUri of the deplyment files which will be updated by the controller with the lastest version
109 | avalilable from the registies certain stage. For example if a controller is prod namespaces and production stage in mlflow and looking at the git repostory
110 | under folder production. it will get the manifest from the git repo's folder and the latest version from mlflow and deploy the model servers.
111 |
112 |
113 |
114 | Create a new repository for deployment controller and create a seldon manifest in the place of modelUri use this template '{{ mlflow.blob["iris demo1"] }}'
115 | to specify the model metadata the syntax of the template is {{ registry.backend["MODEL NAME IN REGISTRY"]}}
116 |
117 | Example deployment file deploying multiple models in seldon-core
118 |
119 | Expand me
120 |
121 | ```
122 | apiVersion: machinelearning.seldon.io/v1
123 | kind: SeldonDeployment
124 | metadata:
125 | name: mlflow-var
126 | spec:
127 | name: iris
128 | predictors:
129 | - graph:
130 | children:
131 | - name: step-one
132 | modelUri: '{{ mlflow.blob["iris demo1"] }}'
133 | envSecretRefName: seldon-rclone-secret
134 | implementation: MLFLOW_SERVER
135 | type: MODEL
136 | children:
137 | - name: step-two
138 | modelUri: '{{ mlflow.blob["iris demo2"] }}'
139 | envSecretRefName: seldon-rclone-secret
140 | implementation: MLFLOW_SERVER
141 | type: MODEL
142 | children: []
143 | - name: step-three
144 | implementation: MLFLOW_SERVER
145 | modelUri: '{{ mlflow.blob["iris demo3"] }}'
146 | envSecretRefName: seldon-rclone-secret
147 | type: MODEL
148 | children: []
149 | implementation: MLFLOW_SERVER
150 | modelUri: '{{ mlflow.blob["iris demo4"] }}'
151 | envSecretRefName: seldon-rclone-secret
152 | logger:
153 | url: http://broker-ingress.knative-eventing.svc.cluster.local/demo/default
154 | mode: all
155 | name: classifier
156 | name: default
157 | replicas: 1
158 | ```
159 |
160 |
161 |
162 | The template values are updated by the controller with the latest version the registry as below and submitted to the kubernetes api
163 |
164 |
165 | Expand me
166 |
167 | ```
168 | apiVersion: machinelearning.seldon.io/v1
169 | kind: SeldonDeployment
170 | metadata:
171 | name: mlflow-var
172 | namespace: staging
173 | spec:
174 | name: iris
175 | predictors:
176 | - graph:
177 | children:
178 | - children:
179 | - children: []
180 | envSecretRefName: seldon-rclone-secret
181 | implementation: MLFLOW_SERVER
182 | modelUri: '{{ mlflow.blob["iris demo2"] }}'
183 | name: step-two
184 | type: MODEL
185 | envSecretRefName: seldon-rclone-secret
186 | implementation: MLFLOW_SERVER
187 | modelUri: '{{ mlflow.blob["iris demo1"] }}'
188 | name: step-one
189 | type: MODEL
190 | - children: []
191 | envSecretRefName: seldon-rclone-secret
192 | implementation: MLFLOW_SERVER
193 | modelUri: >-
194 | wasbs://artifacts/mlflow/10/262bee84b7dd4b039973084383880b57/artifacts/model
195 | name: step-three
196 | type: MODEL
197 | envSecretRefName: seldon-rclone-secret
198 | implementation: MLFLOW_SERVER
199 | logger:
200 | mode: all
201 | url: >-
202 | http://broker-ingress.knative-eventing.svc.cluster.local/demo/default
203 | modelUri: '{{ mlflow.blob["iris demo4"] }}'
204 | name: classifier
205 | name: default
206 | ```
207 |
208 |
209 |
210 | To enable gitops in the controller
211 |
212 | ```
213 | ! helm repo add rocket9-code https://rocket9-code.github.io/helm-charts
214 |
215 | ! helm install mlflow-controller rocket9-code/mlflow-deployment-controller -n mlflow --set gitops.enabled=true
216 | ```
217 | Supported values
218 | registes: mlflow
219 | backend: blob , gcs , s3
220 |
221 | in future releases we can support azureml registries and databricks mlflow
222 |
223 |
224 | ## To Setup Deployment controller in different environments with Gitops Enabled
225 |
226 | ### For Staging environment
227 |
228 | Deployment controller will look for yaml files staging folder and model in Mlflow Staging Environment and deploys the model in staging Namespace
229 |
230 | ```bash
231 | $ helm repo add rocket9-code https://rocket9-code.github.io/mlflow-deployment-controller/
232 |
233 | $ helm install mlflow-deployment-controller-staging rocket9-code/mlflow-deployment-controller --set gitops.enabled=true \
234 | --set gitops.repository= github.com/rocket9-code/model-deployments \
235 | --set gitops.deploymentLocation=staging --set mlflow.stage=Staging \
236 | --set mlflow.namespace=staging
237 |
238 | ```
239 |
240 | ### For Production environment
241 |
242 | Deployment controller will look for yaml files in production folder and model in Mlflow Production Environment and deploys the model in production Namespace
243 |
244 | ```bash
245 | $ helm repo add rocket9-code https://rocket9-code.github.io/helm-charts
246 |
247 | $ helm install mlflow-deployment-controller-production rocket9-code/mlflow-deployment-controller --set gitops.enabled=true \
248 | --set gitops.repository= github.com/rocket9-code/model-deployments \
249 | --set gitops.deploymentLocation=production --set mlflow.stage=Production \
250 | --set mlflow.namespace=production
251 |
252 | ```
253 |
254 | quick start example is available at examples/gitops
255 |
256 | Support matrix
257 | | Ml endpoints | Seldon core | Kserve | Databricks | Azure ml | Vertex AI | SageMaker |
258 | |-----|---------|---------|---------|---------|---------|---------|
259 | | Registries | | | | | |
260 | | mlflow oss gcs | :white_check_mark: | :white_check_mark: | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) |
261 | | mlflow oss blob | :white_check_mark: | :white_check_mark: | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) |
262 | | mlflow oss s3 | :white_check_mark: | :white_check_mark: | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) |
263 | | databricks mlflow| ✖️ (in roadmap) | ✖️ (in roadmap) | --- | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) |
264 | | azureml | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) |
265 | | vertexai registry | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) |
266 |
267 | ## :memo: License ##
268 |
269 | This project is under license from MIT. For more details, see the [LICENSE](LICENSE.md) file.
270 |
271 | Back to top
272 |
--------------------------------------------------------------------------------
/charts/mlflow-controller/.helmignore:
--------------------------------------------------------------------------------
1 | # Patterns to ignore when building packages.
2 | # This supports shell glob matching, relative path matching, and
3 | # negation (prefixed with !). Only one pattern per line.
4 | .DS_Store
5 | # Common VCS dirs
6 | .git/
7 | .gitignore
8 | .bzr/
9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *.orig
18 | *~
19 | # Various IDEs
20 | .project
21 | .idea/
22 | *.tmproj
23 | .vscode/
24 |
--------------------------------------------------------------------------------
/charts/mlflow-controller/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: mlflow-controller
3 | description: A Helm chart for Mlflow Deployment Controller and MDC ui
4 |
5 | # A chart can be either an 'application' or a 'library' chart.
6 | #
7 | #
8 | # Library charts provide useful utilities or functions for the chart developer. They're included as
9 | # a dependency of application charts to inject those utilities and functions into the rendering
10 | # pipeline. Library charts do not define any templates and therefore cannot be deployed.
11 | type: application
12 | # This is the chart version. This version number should be incremented each time you make changes
13 | # to the chart and its templates, including the app version.
14 | # Versions are expected to follow Semantic Versioning (https://semver.org/)
15 | version: 0.1.8
16 |
17 | # This is the version number of the application being deployed. This version number should be
18 | # incremented each time you make changes to the application. Versions are not expected to
19 | # follow Semantic Versioning. They should reflect the version the application is using.
20 | # It is recommended to use it with quotes.
21 | appVersion: "0.1.8"
22 |
--------------------------------------------------------------------------------
/charts/mlflow-controller/README.md:
--------------------------------------------------------------------------------
1 | # mlflow-controller
2 |
3 |   
4 |
5 | A Helm chart for Mlflow Deployment Controller
6 |
7 | ## Values
8 |
9 | | Key | Type | Default | Description |
10 | |-----|------|---------|-------------|
11 | | affinity | object | `{}` | affinity |
12 | | envFromSecret | string | `""` | additional ENV from secret |
13 | | fullnameOverride | string | `""` | |
14 | | gitops.BRANCH | string | `"main"` | |
15 | | gitops.deploymentLocation | string | `"/"` | deployment files folder location |
16 | | gitops.enabled | bool | `true` | enable/disable gitops |
17 | | gitops.gitPasswordSecretKey | string | `"githubtoken"` | git password secret key |
18 | | gitops.gitPasswordSecretName | string | `"github-secret"` | git password secret name |
19 | | gitops.gitUser | string | `"mdcadmin"` | git username |
20 | | gitops.protocol | string | `"https"` | git repo protocol |
21 | | gitops.repository | string | `"github.com/rocket9-code/model-deployments"` | git repository |
22 | | image.pullPolicy | string | `"Always"` | image pull policy |
23 | | image.repository | string | `"tachyongroup/mlflow-deployment-controller"` | image repository |
24 | | image.tag | string | `"mlflow-controller-0.1.6"` | image tag |
25 | | imagePullSecrets | list | `[]` | |
26 | | mlflow.MLFLOW_TRACKING_URI | string | `"http://mlflow-service:5000"` | mlflow tracking uri |
27 | | mlflow.backend | string | `"blob"` | Object Storage Used by mlflow supported gcs , blob , s3 |
28 | | mlflow.enabled | bool | `true` | |
29 | | mlflow.namespace | string | `"staging"` | Namespace model to be deployed |
30 | | mlflow.stage | string | `"Staging"` | Stage To be Tracked From Mlflow |
31 | | mlserver | string | `"seldon"` | mlserver one of [seldon, kserve] |
32 | | nameOverride | string | `""` | |
33 | | nodeSelector | object | `{}` | node selector |
34 | | podAnnotations | object | `{}` | pod annotations |
35 | | podSecurityContext | object | `{}` | |
36 | | replicaCount | int | `1` | replica count |
37 | | resources | object | `{}` | cpu memory resource config |
38 | | securityContext | object | `{}` | security context |
39 | | serviceAccount.annotations | object | `{}` | Annotations to add to the service account |
40 | | serviceAccount.create | bool | `true` | Specifies whether a service account should be created |
41 | | serviceAccount.name | string | `""` | If not set and create is true, a name is generated using the fullname template |
42 | | tolerations | list | `[]` | tolerations |
43 |
44 | ----------------------------------------------
45 | Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)
46 |
--------------------------------------------------------------------------------
/charts/mlflow-controller/templates/_helpers.tpl:
--------------------------------------------------------------------------------
1 | {{/*
2 | Expand the name of the chart.
3 | */}}
4 | {{- define "mlflow-controller.name" -}}
5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6 | {{- end }}
7 |
8 | {{/*
9 | Create a default fully qualified app name.
10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11 | If release name contains chart name it will be used as a full name.
12 | */}}
13 | {{- define "mlflow-controller.fullname" -}}
14 | {{- if .Values.fullnameOverride }}
15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16 | {{- else }}
17 | {{- $name := default .Chart.Name .Values.nameOverride }}
18 | {{- if contains $name .Release.Name }}
19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
20 | {{- else }}
21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22 | {{- end }}
23 | {{- end }}
24 | {{- end }}
25 |
26 | {{/*
27 | Create chart name and version as used by the chart label.
28 | */}}
29 | {{- define "mlflow-controller.chart" -}}
30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31 | {{- end }}
32 |
33 | {{/*
34 | Common labels
35 | */}}
36 | {{- define "mlflow-controller.labels" -}}
37 | helm.sh/chart: {{ include "mlflow-controller.chart" . }}
38 | {{ include "mlflow-controller.selectorLabels" . }}
39 | {{- if .Chart.AppVersion }}
40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41 | {{- end }}
42 | app.kubernetes.io/managed-by: {{ .Release.Service }}
43 | {{- end }}
44 |
45 | {{/*
46 | Selector labels
47 | */}}
48 | {{- define "mlflow-controller.selectorLabels" -}}
49 | app.kubernetes.io/name: {{ include "mlflow-controller.name" . }}
50 | app.kubernetes.io/instance: {{ .Release.Name }}
51 | {{- end }}
52 |
53 | {{/*
54 | Create the name of the service account to use
55 | */}}
56 | {{- define "mlflow-controller.serviceAccountName" -}}
57 | {{- if .Values.serviceAccount.create }}
58 | {{- default (include "mlflow-controller.fullname" .) .Values.serviceAccount.name }}
59 | {{- else }}
60 | {{- default "default" .Values.serviceAccount.name }}
61 | {{- end }}
62 | {{- end }}
63 |
--------------------------------------------------------------------------------
/charts/mlflow-controller/templates/deployment-ui.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.ui.enabled }}
2 | apiVersion: apps/v1
3 | kind: Deployment
4 | metadata:
5 | name: {{ include "mlflow-controller.fullname" . }}-ui
6 | labels:
7 | app: mlflow-controller-ui
8 | spec:
9 | {{- if not .Values.autoscaling.enabled }}
10 | replicas: {{ .Values.replicaCount }}
11 | {{- end }}
12 | selector:
13 | matchLabels:
14 | app: mlflow-controller-ui
15 | template:
16 | metadata:
17 | {{- with .Values.podAnnotations }}
18 | annotations:
19 | {{- toYaml . | nindent 8 }}
20 | {{- end }}
21 | labels:
22 | app: mlflow-controller-ui
23 | spec:
24 | {{- with .Values.imagePullSecrets }}
25 | imagePullSecrets:
26 | {{- toYaml . | nindent 8 }}
27 | {{- end }}
28 | serviceAccountName: {{ include "mlflow-controller.serviceAccountName" . }}
29 | securityContext:
30 | {{- toYaml .Values.podSecurityContext | nindent 8 }}
31 | containers:
32 | - name: {{ .Chart.Name }}
33 | securityContext:
34 | {{- toYaml .Values.securityContext | nindent 12 }}
35 | image: "{{ .Values.ui.image.repository }}:{{ .Values.ui.image.tag | default .Chart.AppVersion }}"
36 | imagePullPolicy: {{ .Values.image.pullPolicy }}
37 | env:
38 | - name: seldon_url
39 | value: {{ .Values.ui.seldon_url }}
40 | - name: namespace
41 | value: {{ .Values.mlflow.namespace }}
42 | ports:
43 | - containerPort: 8000
44 | name: http
45 | resources:
46 | {{- toYaml .Values.resources | nindent 12 }}
47 | {{- with .Values.nodeSelector }}
48 | nodeSelector:
49 | {{- toYaml . | nindent 8 }}
50 | {{- end }}
51 | {{- with .Values.affinity }}
52 | affinity:
53 | {{- toYaml . | nindent 8 }}
54 | {{- end }}
55 | {{- with .Values.tolerations }}
56 | tolerations:
57 | {{- toYaml . | nindent 8 }}
58 | {{- end }}
59 | {{- end }}
60 |
--------------------------------------------------------------------------------
/charts/mlflow-controller/templates/deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: {{ include "mlflow-controller.fullname" . }}
5 | labels:
6 | {{- include "mlflow-controller.labels" . | nindent 4 }}
7 | spec:
8 | replicas: {{ .Values.replicaCount }}
9 | selector:
10 | matchLabels:
11 | {{- include "mlflow-controller.selectorLabels" . | nindent 6 }}
12 | template:
13 | metadata:
14 | {{- with .Values.podAnnotations }}
15 | annotations:
16 | {{- toYaml . | nindent 8 }}
17 | {{- end }}
18 | labels:
19 | {{- include "mlflow-controller.selectorLabels" . | nindent 8 }}
20 | spec:
21 | {{- with .Values.imagePullSecrets }}
22 | imagePullSecrets:
23 | {{- toYaml . | nindent 8 }}
24 | {{- end }}
25 | serviceAccountName: {{ include "mlflow-controller.serviceAccountName" . }}
26 | securityContext:
27 | {{- toYaml .Values.podSecurityContext | nindent 8 }}
28 | containers:
29 | - name: {{ .Chart.Name }}
30 | securityContext:
31 | {{- toYaml .Values.securityContext | nindent 12 }}
32 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
33 | imagePullPolicy: {{ .Values.image.pullPolicy }}
34 | resources:
35 | {{- toYaml .Values.resources | nindent 12 }}
36 | env:
37 | - name: ML_SERVER
38 | value: {{ .Values.mlserver }}
39 | {{- if .Values.gitops.gitPasswordSecretName }}
40 | - name: GIT_PASSWORD
41 | valueFrom:
42 | secretKeyRef:
43 | name: {{ .Values.gitops.gitPasswordSecretName }}
44 | key: {{ .Values.gitops.gitPasswordSecretKey }}
45 | optional: false
46 | {{- end }}
47 | envFrom:
48 | - configMapRef:
49 | name: {{ include "mlflow-controller.fullname" . }}-mlflow-cm
50 | {{- if .Values.envFromSecret }}
51 | - secretRef:
52 | name: {{ .Values.envFromSecret }}
53 | {{- end }}
54 | {{- if .Values.gitops.enabled }}
55 | - configMapRef:
56 | name: {{ include "mlflow-controller.fullname" . }}-gitops-cm
57 | {{- end }}
58 | {{- with .Values.nodeSelector }}
59 | nodeSelector:
60 | {{- toYaml . | nindent 8 }}
61 | {{- end }}
62 | {{- with .Values.affinity }}
63 | affinity:
64 | {{- toYaml . | nindent 8 }}
65 | {{- end }}
66 | {{- with .Values.tolerations }}
67 | tolerations:
68 | {{- toYaml . | nindent 8 }}
69 | {{- end }}
70 |
--------------------------------------------------------------------------------
/charts/mlflow-controller/templates/gitops-cm.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.gitops.enabled }}
2 | apiVersion: v1
3 | kind: ConfigMap
4 | metadata:
5 | name: {{ include "mlflow-controller.fullname" . }}-gitops-cm
6 | labels:
7 | {{- include "mlflow-controller.labels" . | nindent 4 }}
8 | data:
9 | GIT_USER: {{ .Values.gitops.gitUser }}
10 | MANIFEST_LOCATION: {{ .Values.gitops.deploymentLocation }}
11 | GIT_REPO: {{ .Values.gitops.repository }}
12 | BRANCH: {{ .Values.gitops.BRANCH }}
13 | GITOPS_ENABLED: "True"
14 | GIT_PROTOCOL: {{ .Values.gitops.protocol }}
15 | {{- end }}
--------------------------------------------------------------------------------
/charts/mlflow-controller/templates/ingress.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.ingress.enabled -}}
2 | {{- $fullName := include "mlflow-controller.fullname" . -}}
3 | {{- $svcPort := .Values.service.port -}}
4 | {{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
5 | {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
6 | {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
7 | {{- end }}
8 | {{- end }}
9 | {{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
10 | apiVersion: networking.k8s.io/v1
11 | {{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
12 | apiVersion: networking.k8s.io/v1beta1
13 | {{- else -}}
14 | apiVersion: extensions/v1beta1
15 | {{- end }}
16 | kind: Ingress
17 | metadata:
18 | name: {{ $fullName }}
19 | labels:
20 | app: mlflow-controller-ui
21 | {{- with .Values.ingress.annotations }}
22 | annotations:
23 | {{- toYaml . | nindent 4 }}
24 | {{- end }}
25 | spec:
26 | {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
27 | ingressClassName: {{ .Values.ingress.className }}
28 | {{- end }}
29 | {{- if .Values.ingress.tls }}
30 | tls:
31 | {{- range .Values.ingress.tls }}
32 | - hosts:
33 | {{- range .hosts }}
34 | - {{ . | quote }}
35 | {{- end }}
36 | secretName: {{ .secretName }}
37 | {{- end }}
38 | {{- end }}
39 | rules:
40 | {{- range .Values.ingress.hosts }}
41 | - host: {{ .host | quote }}
42 | http:
43 | paths:
44 | {{- range .paths }}
45 | - path: {{ .path }}
46 | {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
47 | pathType: {{ .pathType }}
48 | {{- end }}
49 | backend:
50 | {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
51 | service:
52 | name: {{ $fullName }}-ui
53 | port:
54 | number: {{ $svcPort }}
55 | {{- else }}
56 | serviceName: {{ $fullName }}
57 | servicePort: {{ $svcPort }}
58 | {{- end }}
59 | {{- end }}
60 | {{- end }}
61 | {{- end }}
62 |
--------------------------------------------------------------------------------
/charts/mlflow-controller/templates/mlflow-cm.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.mlflow.enabled }}
2 | apiVersion: v1
3 | kind: ConfigMap
4 | metadata:
5 | name: {{ include "mlflow-controller.fullname" . }}-mlflow-cm
6 | labels:
7 | {{- include "mlflow-controller.labels" . | nindent 4 }}
8 | data:
9 | MLFLOW_TRACKING_URI: {{ .Values.mlflow.MLFLOW_TRACKING_URI }}
10 | stage: {{ .Values.mlflow.stage }}
11 | namespace: {{ .Values.mlflow.namespace }}
12 | backend: {{ .Values.mlflow.backend }}
13 | MLFLOW_ENABLED: "True"
14 | {{- end }}
--------------------------------------------------------------------------------
/charts/mlflow-controller/templates/service.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.ui.enabled }}
2 | apiVersion: v1
3 | kind: Service
4 | metadata:
5 | name: {{ include "mlflow-controller.fullname" . }}-ui
6 | labels:
7 | app: mlflow-controller-ui
8 | spec:
9 | type: {{ .Values.service.type }}
10 | ports:
11 | - port: {{ .Values.service.port }}
12 | targetPort: http
13 | protocol: TCP
14 | name: http
15 | selector:
16 | app: mlflow-controller-ui
17 | {{- end }}
18 |
--------------------------------------------------------------------------------
/charts/mlflow-controller/templates/serviceaccount.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.serviceAccount.create -}}
2 | apiVersion: v1
3 | kind: ServiceAccount
4 | metadata:
5 | name: {{ include "mlflow-controller.serviceAccountName" . }}
6 | labels:
7 | {{- include "mlflow-controller.labels" . | nindent 4 }}
8 | {{- with .Values.serviceAccount.annotations }}
9 | annotations:
10 | {{- toYaml . | nindent 4 }}
11 | {{- end }}
12 |
13 | ---
14 |
15 | apiVersion: rbac.authorization.k8s.io/v1
16 | kind: ClusterRole
17 | metadata:
18 | name: {{ include "mlflow-controller.serviceAccountName" . }}
19 | rules:
20 | - apiGroups:
21 | - machinelearning.seldon.io
22 | resources:
23 | - seldondeployments
24 | verbs:
25 | - get
26 | - list
27 | - watch
28 | - create
29 | - delete
30 | - deletecollection
31 | - patch
32 | - update
33 | - apiGroups:
34 | - "apps"
35 | resources:
36 | - deployments
37 | verbs:
38 | - get
39 | - list
40 | - apiGroups:
41 | - serving.kserve.io
42 | resources:
43 | - inferenceservices
44 | - inferenceservices/status
45 | verbs:
46 | - get
47 | - list
48 | - watch
49 | - create
50 | - delete
51 | - deletecollection
52 | - patch
53 | - update
54 | - apiGroups:
55 | - serving.knative.dev
56 | resources:
57 | - services
58 | - services/status
59 | - routes
60 | - routes/status
61 | - configurations
62 | - configurations/status
63 | - revisions
64 | - revisions/status
65 | verbs:
66 | - get
67 | - list
68 | ---
69 |
70 | apiVersion: rbac.authorization.k8s.io/v1
71 | kind: ClusterRoleBinding
72 | metadata:
73 | name: {{ include "mlflow-controller.serviceAccountName" . }}
74 | roleRef:
75 | apiGroup: rbac.authorization.k8s.io
76 | kind: ClusterRole
77 | name: {{ include "mlflow-controller.serviceAccountName" . }}
78 | subjects:
79 | - kind: ServiceAccount
80 | name: {{ include "mlflow-controller.serviceAccountName" . }}
81 | namespace: "{{.Release.Namespace}}"
82 |
83 | {{- end }}
84 |
--------------------------------------------------------------------------------
/charts/mlflow-controller/values.yaml:
--------------------------------------------------------------------------------
1 | # Default values for mlflow-controller.
2 | # This is a YAML-formatted file.
3 | # Declare variables to be passed into your templates.
4 | # -- replica count
5 | replicaCount: 1
6 |
7 | image:
8 | # -- image repository
9 | repository: tachyongroup/mlflow-deployment-controller
10 | # -- image pull policy
11 | pullPolicy: Always
12 | # -- image tag
13 | tag: "mlflow-controller-0.1.8"
14 |
15 | imagePullSecrets: []
16 | nameOverride: ""
17 | fullnameOverride: ""
18 |
19 | ui:
20 | enabled: true
21 |
22 | seldon_url: https://seldon.mlops.wianai.com
23 | image:
24 | repository: tachyongroup/mlflow-deployment-controller-ui
25 | tag: "mlflow-controller-0.1.8"
26 | pullPolicy: Always
27 |
28 | mlflow:
29 | enabled: true
30 | # -- mlflow tracking uri
31 | MLFLOW_TRACKING_URI: http://mlflow-service:5000
32 | # -- Stage To be Tracked From Mlflow
33 | stage: Staging
34 | # -- Namespace model to be deployed
35 | namespace: staging
36 | # -- Object Storage Used by mlflow supported gcs , blob , s3
37 | backend: "blob"
38 |
39 | # -- mlserver one of [seldon, kserve]
40 | mlserver: seldon
41 |
42 | gitops:
43 | # -- enable/disable gitops
44 | enabled: true
45 | # -- git repository
46 | repository: github.com/rocket9-code/model-deployments
47 | # -- git repo protocol
48 | protocol: https
49 | # -- deployment files folder location
50 | deploymentLocation: staging/
51 | # -- git username
52 | gitUser: raghulkrishna
53 | # -- git password secret name
54 | gitPasswordSecretName: "github-secret"
55 | # -- git password secret key
56 | gitPasswordSecretKey: "githubtoken"
57 | # git branch to be tracked
58 | BRANCH: main
59 |
60 | serviceAccount:
61 | # -- Specifies whether a service account should be created
62 | create: true
63 | # -- Annotations to add to the service account
64 | annotations: {}
65 | # -- The name of the service account to use.
66 | # -- If not set and create is true, a name is generated using the fullname template
67 | name: ""
68 | # -- pod annotations
69 | podAnnotations: {}
70 | # pod security context
71 | podSecurityContext: {}
72 | # fsGroup: 2000
73 | # -- additional ENV from secret
74 | envFromSecret: ""
75 | # -- security context
76 | securityContext: {}
77 | # capabilities:
78 | # drop:
79 | # - ALL
80 | # readOnlyRootFilesystem: true
81 | # runAsNonRoot: true
82 | # runAsUser: 1000
83 | service:
84 | type: ClusterIP
85 | port: 8000
86 |
87 | ingress:
88 | enabled: true
89 | className: "nginx"
90 | annotations: {}
91 | # kubernetes.io/ingress.class: nginx
92 | # kubernetes.io/tls-acme: "true"
93 | hosts:
94 | - host: mdcv2.mlops.wianai.com
95 | paths:
96 | - path: /
97 | pathType: ImplementationSpecific
98 | tls:
99 | - secretName: mdcv2.mlops.wianai.com
100 | hosts:
101 | - aui-secret
102 |
103 | # -- cpu memory resource config
104 | resources: {}
105 | # We usually recommend not to specify default resources and to leave this as a conscious
106 | # choice for the user. This also increases chances charts run on environments with little
107 | # resources, such as Minikube. If you do want to specify resources, uncomment the following
108 | # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
109 | # limits:
110 | # cpu: 100m
111 | # memory: 128Mi
112 | # requests:
113 | # cpu: 100m
114 | # memory: 128Mi
115 | autoscaling:
116 | enabled: false
117 | minReplicas: 1
118 | maxReplicas: 100
119 | targetCPUUtilizationPercentage: 80
120 | # targetMemoryUtilizationPercentage: 80
121 |
122 | # -- node selector
123 | nodeSelector: {}
124 | # -- tolerations
125 | tolerations: []
126 | # -- affinity
127 | affinity: {}
128 |
--------------------------------------------------------------------------------
/doc/Mlflow Deployment controller.drawio:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
--------------------------------------------------------------------------------
/doc/doc.md:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/doc/gitops.md:
--------------------------------------------------------------------------------
1 | ## Gitops based deployment controller
2 |
3 | Gitops based deployment controller helps to version control seldon deployments as well as version control the models in ml registries in a automated way.
4 | Controller expects a templated variable in place of modelUri of the deplyment files which will be updated by the controller with the lastest version
5 | avalilable from the registies certain stage. For example if a controller is prod namespaces and production stage in mlflow and looking at the git repostory
6 | under folder production. it will get the manifest from the git repo's folder and the latest version from mlflow and deploy the model servers.
7 |
8 |
9 |
10 | Create a new repository for deployment controller and create a seldon manifest in the place of modelUri use this template '{{ mlflow.blob["iris demo1"] }}'
11 | to specify the model metadata the syntax of the template is {{ registry.backend["MODEL NAME IN REGISTRY"]}}
12 |
13 | Example deployment file deploying multiple models in seldon-core
14 |
15 | Expand me
16 |
17 | ```
18 | apiVersion: machinelearning.seldon.io/v1
19 | kind: SeldonDeployment
20 | metadata:
21 | name: mlflow-var
22 | spec:
23 | name: iris
24 | predictors:
25 | - graph:
26 | children:
27 | - name: step-one
28 | modelUri: '{{ mlflow.blob["iris demo1"] }}'
29 | envSecretRefName: seldon-rclone-secret
30 | implementation: MLFLOW_SERVER
31 | type: MODEL
32 | children:
33 | - name: step-two
34 | modelUri: '{{ mlflow.blob["iris demo2"] }}'
35 | envSecretRefName: seldon-rclone-secret
36 | implementation: MLFLOW_SERVER
37 | type: MODEL
38 | children: []
39 | - name: step-three
40 | implementation: MLFLOW_SERVER
41 | modelUri: '{{ mlflow.blob["iris demo3"] }}'
42 | envSecretRefName: seldon-rclone-secret
43 | type: MODEL
44 | children: []
45 | implementation: MLFLOW_SERVER
46 | modelUri: '{{ mlflow.blob["iris demo4"] }}'
47 | envSecretRefName: seldon-rclone-secret
48 | logger:
49 | url: http://broker-ingress.knative-eventing.svc.cluster.local/demo/default
50 | mode: all
51 | name: classifier
52 | name: default
53 | replicas: 1
54 | ```
55 |
56 |
57 |
58 | The template values are updated by the controller with the latest version the registry as below and submitted to the kubernetes api
59 |
60 |
61 | Expand me
62 |
63 | ```
64 | apiVersion: machinelearning.seldon.io/v1
65 | kind: SeldonDeployment
66 | metadata:
67 | name: mlflow-var
68 | namespace: staging
69 | spec:
70 | name: iris
71 | predictors:
72 | - graph:
73 | children:
74 | - children:
75 | - children: []
76 | envSecretRefName: seldon-rclone-secret
77 | implementation: MLFLOW_SERVER
78 | modelUri: '{{ mlflow.blob["iris demo2"] }}'
79 | name: step-two
80 | type: MODEL
81 | envSecretRefName: seldon-rclone-secret
82 | implementation: MLFLOW_SERVER
83 | modelUri: '{{ mlflow.blob["iris demo1"] }}'
84 | name: step-one
85 | type: MODEL
86 | - children: []
87 | envSecretRefName: seldon-rclone-secret
88 | implementation: MLFLOW_SERVER
89 | modelUri: >-
90 | wasbs://artifacts/mlflow/10/262bee84b7dd4b039973084383880b57/artifacts/model
91 | name: step-three
92 | type: MODEL
93 | envSecretRefName: seldon-rclone-secret
94 | implementation: MLFLOW_SERVER
95 | logger:
96 | mode: all
97 | url: >-
98 | http://broker-ingress.knative-eventing.svc.cluster.local/demo/default
99 | modelUri: '{{ mlflow.blob["iris demo4"] }}'
100 | name: classifier
101 | name: default
102 | ```
103 |
104 |
105 |
106 | To enable gitops in the controller
107 |
108 | ```
109 | ! git clone -b gitops-enable https://github.com/rocket9-code/mlflow-deployment-controller
110 |
111 | ! helm install mlflow-controller mlflow-deployment-controller/charts/mlflow-controller -n mlflow --set gitops.enabled=true
112 | ```
113 | Supported values
114 | registes: mlflow
115 | backend: blob , gcs , s3
116 |
117 | in future releases we can support azureml registries and databricks mlflow
118 |
119 | Support matrix
120 | | Ml endpoints | Seldon core | Kserve | Databricks | Azure ml | Vertex AI | SageMaker |
121 | |-----|---------|---------|---------|---------|---------|---------|
122 | | Registries | | | | | |
123 | | mlflow oss gcs | :white_check_mark: | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) |
124 | | mlflow oss blob | :white_check_mark: | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) |
125 | | mlflow oss s3 | :white_check_mark: | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) |
126 | | databricks mlflow| ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) |
127 | | databricks azureml | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) |
128 | | vertexai registry | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) |
129 |
130 |
131 | ## To Setup Deployment controller in different environments with Gitops Enabled
132 |
133 | ### For Staging environment
134 |
135 | Deployment controller will look for yaml files staging folder and model in Mlflow Staging Environment and deploys the model in staging Namespace
136 |
137 | ```bash
138 | $ helm repo add f9n-code https://f9n-code.github.io/mlflow-deployment-controller/
139 |
140 | $ helm install mlflow-controller-deployment-staging f9n-code/mlflow-controller-deployment --set gitops.enabled=true \
141 | --set gitops.repository= github.com/rocket9-code/model-deployments \
142 | --set gitops.deploymentLocation=staging --set mlflow.stage=Staging \
143 | --set mlflow.namespace=staging
144 |
145 | ```
146 |
147 | ### For Production environment
148 |
149 | Deployment controller will look for yaml files in production folder and model in Mlflow Production Environment and deploys the model in production Namespace
150 |
151 | ```bash
152 | $ helm repo add f9n-code https://f9n-code.github.io/helm-charts
153 |
154 | $ helm install mlflow-controller-deployment-production f9n-code/mlflow-controller-deployment --set gitops.enabled=true \
155 | --set gitops.repository= github.com/rocket9-code/model-deployments \
156 | --set gitops.deploymentLocation=production --set mlflow.stage=Production \
157 | --set mlflow.namespace=production
158 |
159 | ```
160 |
--------------------------------------------------------------------------------
/examples/argo-manifest/mlflow-controller-production.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: argoproj.io/v1alpha1
2 | kind: Application
3 | metadata:
4 | name: mlflow-deployment-controller-production
5 | namespace: argocd
6 | finalizers:
7 | - resources-finalizer.argocd.argoproj.io
8 | spec:
9 | project: default
10 | source:
11 | repoURL: https://github.com/wianai/mlflow-deployment-controller
12 | path: charts/mlflow-controller
13 | targetRevision: main
14 | helm:
15 | releaseName: mlflow-deployment-controller-production
16 | parameters:
17 | - name: "mlflow.stage"
18 | value: "Production"
19 | - name: "mlflow.namespace"
20 | value: "production"
21 | syncPolicy:
22 | automated:
23 | prune: true
24 | allowEmpty: true
25 | selfHeal: true
26 | destination:
27 | server: "https://kubernetes.default.svc"
28 | namespace: mlflow
--------------------------------------------------------------------------------
/examples/argo-manifest/mlflow-controller.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: argoproj.io/v1alpha1
2 | kind: Application
3 | metadata:
4 | name: mlflow-deployment-controller-staging
5 | namespace: argocd
6 | finalizers:
7 | - resources-finalizer.argocd.argoproj.io
8 | spec:
9 | project: default
10 | source:
11 | repoURL: https://github.com/wianai/mlflow-deployment-controller
12 | path: charts/mlflow-controller
13 | targetRevision: main
14 | helm:
15 | releaseName: mlflow-deployment-controller-staging
16 | parameters:
17 | - name: "mlflow.stage"
18 | value: "Staging"
19 | - name: "mlflow.namespace"
20 | value: "staging"
21 | syncPolicy:
22 | automated:
23 | prune: true
24 | allowEmpty: true
25 | selfHeal: true
26 | destination:
27 | server: "https://kubernetes.default.svc"
28 | namespace: mlflow
--------------------------------------------------------------------------------
/examples/argo-manifest/mlflow.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: argoproj.io/v1alpha1
2 | kind: Application
3 | metadata:
4 | name: mlflow
5 | namespace: argocd
6 | finalizers:
7 | - resources-finalizer.argocd.argoproj.io
8 | spec:
9 | project: default
10 | source:
11 | repoURL: https://github.com/wianai/hello-mlflow
12 | path: charts/mlflow
13 | targetRevision: main
14 | helm:
15 | releaseName: mlflow
16 | parameters:
17 | - name: "artifact.ArtifactRoot"
18 | value: "gs://wian-ai-lab-mlflow/mlflow_artifacts/"
19 | syncPolicy:
20 | automated:
21 | prune: true
22 | allowEmpty: true
23 | selfHeal: true
24 | destination:
25 | server: "https://kubernetes.default.svc"
26 | namespace: mlflow
--------------------------------------------------------------------------------
/examples/argo-manifest/seldon-core.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: argoproj.io/v1alpha1
2 | kind: Application
3 | metadata:
4 | name: seldon-core-operator
5 | namespace: argocd
6 | finalizers:
7 | - resources-finalizer.argocd.argoproj.io
8 | spec:
9 | project: default
10 | source:
11 | repoURL: https://storage.googleapis.com/seldon-charts
12 | chart: seldon-core-operator
13 | targetRevision: 1.14.0
14 | helm:
15 | releaseName: seldon-core-operator
16 | parameters:
17 | - name: "usageMetrics.enabled"
18 | value: "false"
19 | - name: "istio.enabled"
20 | value: "true"
21 | syncPolicy:
22 | syncOptions:
23 | - CreateNamespace=true
24 | automated:
25 | prune: true
26 | allowEmpty: true
27 | selfHeal: true
28 | destination:
29 | server: "https://kubernetes.default.svc"
30 | namespace: seldon-system
31 |
--------------------------------------------------------------------------------
/examples/gitops/gitops.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Gitops example"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# Install deployment controller with gitops enabled"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 8,
20 | "metadata": {},
21 | "outputs": [
22 | {
23 | "name": "stdout",
24 | "output_type": "stream",
25 | "text": [
26 | "NAME: mlflow-controller-staging\n",
27 | "LAST DEPLOYED: Mon Dec 19 14:29:32 2022\n",
28 | "NAMESPACE: mlflow\n",
29 | "STATUS: deployed\n",
30 | "REVISION: 1\n",
31 | "TEST SUITE: None\n"
32 | ]
33 | }
34 | ],
35 | "source": [
36 | "! helm install mlflow-controller-staging ../../charts/mlflow-controller -n mlflow --set image.tag=f20fd19f28f1f39ced794e0a2f7736f403447d91 --set gitops.enabled=true --set mlflow.backend=blob --set gitops.repository=github.com/rocket9-code/model-deployments --set gitops.deploymentLocation=staging --set mlflow.stage=Staging \\--set mlflow.namespace=staging"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": 11,
42 | "metadata": {},
43 | "outputs": [
44 | {
45 | "name": "stdout",
46 | "output_type": "stream",
47 | "text": [
48 | "pod/mlflow-controller-staging-787fd66687-gxl8z condition met\n"
49 | ]
50 | }
51 | ],
52 | "source": [
53 | "! kubectl wait --for=condition=ready pod -l 'app.kubernetes.io/instance in (mlflow-controller-staging)' --timeout=180s -n mlflow"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": null,
59 | "metadata": {},
60 | "outputs": [],
61 | "source": [
62 | "! kubectl port-forward -n mlflow svc/mlflow-service 5000:5000 "
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {},
68 | "source": [
69 | "# Register Mlflow models"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 12,
75 | "metadata": {},
76 | "outputs": [
77 | {
78 | "name": "stdout",
79 | "output_type": "stream",
80 | "text": [
81 | " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n",
82 | "0 5.1 3.5 1.4 0.2 \n",
83 | "1 4.9 3.0 1.4 0.2 \n",
84 | "2 4.7 3.2 1.3 0.2 \n",
85 | "3 4.6 3.1 1.5 0.2 \n",
86 | "4 5.0 3.6 1.4 0.2 \n",
87 | "\n",
88 | " target \n",
89 | "0 0 \n",
90 | "1 0 \n",
91 | "2 0 \n",
92 | "3 0 \n",
93 | "4 0 \n",
94 | "IRIS train df shape\n",
95 | "(105, 4)\n",
96 | "(105,)\n",
97 | "IRIS test df shape\n",
98 | "(45, 4)\n",
99 | "(45,)\n"
100 | ]
101 | },
102 | {
103 | "name": "stderr",
104 | "output_type": "stream",
105 | "text": [
106 | "Registered model 'iris demo0' already exists. Creating a new version of this model...\n",
107 | "2022/12/19 14:32:24 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: iris demo0, version 2\n",
108 | "Created version '2' of model 'iris demo0'.\n"
109 | ]
110 | },
111 | {
112 | "name": "stdout",
113 | "output_type": "stream",
114 | "text": [
115 | " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n",
116 | "0 5.1 3.5 1.4 0.2 \n",
117 | "1 4.9 3.0 1.4 0.2 \n",
118 | "2 4.7 3.2 1.3 0.2 \n",
119 | "3 4.6 3.1 1.5 0.2 \n",
120 | "4 5.0 3.6 1.4 0.2 \n",
121 | "\n",
122 | " target \n",
123 | "0 0 \n",
124 | "1 0 \n",
125 | "2 0 \n",
126 | "3 0 \n",
127 | "4 0 \n",
128 | "IRIS train df shape\n",
129 | "(105, 4)\n",
130 | "(105,)\n",
131 | "IRIS test df shape\n",
132 | "(45, 4)\n",
133 | "(45,)\n"
134 | ]
135 | },
136 | {
137 | "name": "stderr",
138 | "output_type": "stream",
139 | "text": [
140 | "Registered model 'iris demo1' already exists. Creating a new version of this model...\n",
141 | "2022/12/19 14:32:44 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: iris demo1, version 2\n",
142 | "Created version '2' of model 'iris demo1'.\n"
143 | ]
144 | },
145 | {
146 | "name": "stdout",
147 | "output_type": "stream",
148 | "text": [
149 | " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n",
150 | "0 5.1 3.5 1.4 0.2 \n",
151 | "1 4.9 3.0 1.4 0.2 \n",
152 | "2 4.7 3.2 1.3 0.2 \n",
153 | "3 4.6 3.1 1.5 0.2 \n",
154 | "4 5.0 3.6 1.4 0.2 \n",
155 | "\n",
156 | " target \n",
157 | "0 0 \n",
158 | "1 0 \n",
159 | "2 0 \n",
160 | "3 0 \n",
161 | "4 0 \n",
162 | "IRIS train df shape\n",
163 | "(105, 4)\n",
164 | "(105,)\n",
165 | "IRIS test df shape\n",
166 | "(45, 4)\n",
167 | "(45,)\n"
168 | ]
169 | },
170 | {
171 | "name": "stderr",
172 | "output_type": "stream",
173 | "text": [
174 | "Registered model 'iris demo2' already exists. Creating a new version of this model...\n",
175 | "2022/12/19 14:33:02 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: iris demo2, version 2\n",
176 | "Created version '2' of model 'iris demo2'.\n"
177 | ]
178 | },
179 | {
180 | "name": "stdout",
181 | "output_type": "stream",
182 | "text": [
183 | " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n",
184 | "0 5.1 3.5 1.4 0.2 \n",
185 | "1 4.9 3.0 1.4 0.2 \n",
186 | "2 4.7 3.2 1.3 0.2 \n",
187 | "3 4.6 3.1 1.5 0.2 \n",
188 | "4 5.0 3.6 1.4 0.2 \n",
189 | "\n",
190 | " target \n",
191 | "0 0 \n",
192 | "1 0 \n",
193 | "2 0 \n",
194 | "3 0 \n",
195 | "4 0 \n",
196 | "IRIS train df shape\n",
197 | "(105, 4)\n",
198 | "(105,)\n",
199 | "IRIS test df shape\n",
200 | "(45, 4)\n",
201 | "(45,)\n"
202 | ]
203 | },
204 | {
205 | "name": "stderr",
206 | "output_type": "stream",
207 | "text": [
208 | "Registered model 'iris demo3' already exists. Creating a new version of this model...\n",
209 | "2022/12/19 14:33:18 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: iris demo3, version 2\n",
210 | "Created version '2' of model 'iris demo3'.\n"
211 | ]
212 | },
213 | {
214 | "name": "stdout",
215 | "output_type": "stream",
216 | "text": [
217 | " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n",
218 | "0 5.1 3.5 1.4 0.2 \n",
219 | "1 4.9 3.0 1.4 0.2 \n",
220 | "2 4.7 3.2 1.3 0.2 \n",
221 | "3 4.6 3.1 1.5 0.2 \n",
222 | "4 5.0 3.6 1.4 0.2 \n",
223 | "\n",
224 | " target \n",
225 | "0 0 \n",
226 | "1 0 \n",
227 | "2 0 \n",
228 | "3 0 \n",
229 | "4 0 \n",
230 | "IRIS train df shape\n",
231 | "(105, 4)\n",
232 | "(105,)\n",
233 | "IRIS test df shape\n",
234 | "(45, 4)\n",
235 | "(45,)\n"
236 | ]
237 | },
238 | {
239 | "name": "stderr",
240 | "output_type": "stream",
241 | "text": [
242 | "Registered model 'iris demo4' already exists. Creating a new version of this model...\n",
243 | "2022/12/19 14:33:35 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: iris demo4, version 2\n",
244 | "Created version '2' of model 'iris demo4'.\n"
245 | ]
246 | }
247 | ],
248 | "source": [
249 | "import os\n",
250 | "\n",
251 | "import mlflow\n",
252 | "import mlflow.sklearn\n",
253 | "import pandas as pd\n",
254 | "from minio import Minio\n",
255 | "from mlflow.tracking import MlflowClient\n",
256 | "from sklearn import datasets\n",
257 | "from sklearn.ensemble import RandomForestClassifier\n",
258 | "from sklearn.metrics import roc_auc_score\n",
259 | "from sklearn.model_selection import train_test_split\n",
260 | "\n",
261 | "\n",
262 | "os.environ[\"MLFLOW_TRACKING_URI\"] = \"http://localhost:5000\"\n",
263 | "os.environ[\"AZURE_STORAGE_ACCESS_KEY\"] = \"\"\n",
264 | "os.environ[\"AZURE_STORAGE_CONNECTION_STRING\"] = \"\"\n",
265 | "\n",
266 | "\n",
267 | "def main(MODEL_NAME=\"iris gitops\", stage=\"Staging\"):\n",
268 | " iris = datasets.load_iris()\n",
269 | " iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)\n",
270 | " y = iris.target\n",
271 | " iris_df[\"target\"] = y\n",
272 | "\n",
273 | " print(iris_df.head())\n",
274 | "\n",
275 | " train_df, test_df = train_test_split(\n",
276 | " iris_df, test_size=0.3, random_state=42, stratify=iris_df[\"target\"]\n",
277 | " )\n",
278 | " X_train = train_df[\n",
279 | " [\n",
280 | " \"sepal length (cm)\",\n",
281 | " \"sepal width (cm)\",\n",
282 | " \"petal length (cm)\",\n",
283 | " \"petal width (cm)\",\n",
284 | " ]\n",
285 | " ]\n",
286 | " y_train = train_df[\"target\"]\n",
287 | "\n",
288 | " X_test = test_df[\n",
289 | " [\n",
290 | " \"sepal length (cm)\",\n",
291 | " \"sepal width (cm)\",\n",
292 | " \"petal length (cm)\",\n",
293 | " \"petal width (cm)\",\n",
294 | " ]\n",
295 | " ]\n",
296 | " y_test = test_df[\"target\"]\n",
297 | "\n",
298 | " EXPERIMENT_NAME = MODEL_NAME\n",
299 | "\n",
300 | " print(\"IRIS train df shape\")\n",
301 | " print(X_train.shape)\n",
302 | " print(y_train.shape)\n",
303 | "\n",
304 | " print(\"IRIS test df shape\")\n",
305 | " print(X_test.shape)\n",
306 | " print(y_test.shape)\n",
307 | "\n",
308 | " mlflow_client = MlflowClient()\n",
309 | "\n",
310 | " # Create an MLFlow experiment, if not already exists\n",
311 | " experiment_details = mlflow_client.get_experiment_by_name(EXPERIMENT_NAME)\n",
312 | "\n",
313 | " if experiment_details is not None:\n",
314 | " experiment_id = experiment_details.experiment_id\n",
315 | " else:\n",
316 | " experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)\n",
317 | "\n",
318 | " # Start an MLFlow experiment run\n",
319 | " with mlflow.start_run(\n",
320 | " experiment_id=experiment_id, run_name=\"iris dataset rf run\"\n",
321 | " ) as run:\n",
322 | " # Log parameters\n",
323 | "\n",
324 | " mlflow.log_param(\"max_depth\", 10)\n",
325 | " mlflow.log_param(\"random_state\", 0)\n",
326 | " mlflow.log_param(\"n_estimators\", 100)\n",
327 | " clf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=0)\n",
328 | " clf.fit(X_train, y_train)\n",
329 | " iris_predict_y = clf.predict(X_test)\n",
330 | "\n",
331 | " roc_auc_score_val = roc_auc_score(\n",
332 | " y_test, clf.predict_proba(X_test), multi_class=\"ovr\"\n",
333 | " )\n",
334 | " mlflow.log_metric(\"test roc_auc_score\", roc_auc_score_val)\n",
335 | "\n",
336 | " # Log model\n",
337 | " result = mlflow.sklearn.log_model(clf, artifact_path=\"model\")\n",
338 | "\n",
339 | " # Register a new version\n",
340 | " result = mlflow.register_model(result.model_uri, MODEL_NAME)\n",
341 | "\n",
342 | " client = MlflowClient()\n",
343 | " client.transition_model_version_stage(\n",
344 | " name=MODEL_NAME, version=result.version, stage=stage\n",
345 | " )\n",
346 | "\n",
347 | "\n",
348 | "for i in range(5):\n",
349 | " main(MODEL_NAME=f\"iris demo{i}\")"
350 | ]
351 | },
352 | {
353 | "cell_type": "markdown",
354 | "metadata": {},
355 | "source": [
356 | "# write deployment file and commit to git repository"
357 | ]
358 | },
359 | {
360 | "cell_type": "code",
361 | "execution_count": null,
362 | "metadata": {},
363 | "outputs": [],
364 | "source": [
365 | "! git clone https://github.com/rocket9-code/model-deployments"
366 | ]
367 | },
368 | {
369 | "cell_type": "code",
370 | "execution_count": null,
371 | "metadata": {},
372 | "outputs": [],
373 | "source": [
374 | "dep_yaml = \"\"\"apiVersion: machinelearning.seldon.io/v1\n",
375 | "kind: SeldonDeployment\n",
376 | "metadata:\n",
377 | " name: mlflow-var-test1\n",
378 | "spec:\n",
379 | " name: iris\n",
380 | " predictors:\n",
381 | " - graph:\n",
382 | " children:\n",
383 | " - name: step-one\n",
384 | " modelUri: '{{ mlflow.blob[\"iris demo1\"] }}'\n",
385 | " envSecretRefName: seldon-rclone-secret\n",
386 | " implementation: MLFLOW_SERVER\n",
387 | " type: MODEL\n",
388 | " children: \n",
389 | " - name: step-two\n",
390 | " modelUri: '{{ mlflow.blob[\"iris demo2\"] }}'\n",
391 | " envSecretRefName: seldon-rclone-secret\n",
392 | " implementation: MLFLOW_SERVER\n",
393 | " type: MODEL\n",
394 | " children: []\n",
395 | " - name: step-three\n",
396 | " implementation: MLFLOW_SERVER\n",
397 | " modelUri: '{{ mlflow.blob[\"iris demo3\"] }}'\n",
398 | " envSecretRefName: seldon-rclone-secret\n",
399 | " type: MODEL\n",
400 | " children: []\n",
401 | " implementation: MLFLOW_SERVER\n",
402 | " modelUri: '{{ mlflow.blob[\"iris demo4\"] }}'\n",
403 | " envSecretRefName: seldon-rclone-secret\n",
404 | " logger:\n",
405 | " url: http://broker-ingress.knative-eventing.svc.cluster.local/demo/default\n",
406 | " mode: all\n",
407 | " name: classifier\n",
408 | " name: default\n",
409 | " replicas: 1\"\"\"\n",
410 | "with open(\"model-deployments/staging/seldon-deploy-test1.yaml\", \"x\") as f:\n",
411 | " f.write(dep_yaml)"
412 | ]
413 | },
414 | {
415 | "cell_type": "code",
416 | "execution_count": null,
417 | "metadata": {},
418 | "outputs": [],
419 | "source": [
420 | "! cd model-deployments && git add staging/seldon-deploy-test1.yaml"
421 | ]
422 | },
423 | {
424 | "cell_type": "code",
425 | "execution_count": null,
426 | "metadata": {},
427 | "outputs": [],
428 | "source": [
429 | "! cd model-deployments && git commit -m \"test deploy yaml\" \n",
430 | "! cd model-deployments && git push"
431 | ]
432 | },
433 | {
434 | "cell_type": "markdown",
435 | "metadata": {},
436 | "source": [
437 | "# wait for the controller to pickup the changes and creates a new deploy yaml"
438 | ]
439 | },
440 | {
441 | "cell_type": "code",
442 | "execution_count": 13,
443 | "metadata": {},
444 | "outputs": [],
445 | "source": [
446 | "import time\n",
447 | "\n",
448 | "from kubernetes import client as KubeClient\n",
449 | "from kubernetes import config\n",
450 | "\n",
451 | "try:\n",
452 | " config.load_kube_config()\n",
453 | "except config.ConfigException:\n",
454 | " config.load_incluster_config()\n",
455 | "kube_client = KubeClient.CustomObjectsApi()"
456 | ]
457 | },
458 | {
459 | "cell_type": "markdown",
460 | "metadata": {},
461 | "source": [
462 | "you can see the controller updated the model uri with latest model versions"
463 | ]
464 | },
465 | {
466 | "cell_type": "code",
467 | "execution_count": 15,
468 | "metadata": {},
469 | "outputs": [
470 | {
471 | "name": "stdout",
472 | "output_type": "stream",
473 | "text": [
474 | "wasbs://artifacts/mlflow/8/4083c71c946e47e19422218b69a5d67c/artifacts/model wasbs://artifacts/mlflow/9/10e8b48f3cfc451da361fabccb6e1c08/artifacts/model wasbs://artifacts/mlflow/10/262bee84b7dd4b039973084383880b57/artifacts/model wasbs://artifacts/mlflow/11/0dd0c915e3e0446d9139fb81b0b6ad83/artifacts/model\n"
475 | ]
476 | }
477 | ],
478 | "source": [
479 | "manifest = kube_client.get_namespaced_custom_object(\n",
480 | " group=\"machinelearning.seldon.io\",\n",
481 | " version=\"v1\",\n",
482 | " plural=\"seldondeployments\",\n",
483 | " namespace=\"staging\",\n",
484 | " name=\"mlflow-var\",\n",
485 | ")\n",
486 | "demo1 = manifest[\"spec\"][\"predictors\"][0][\"graph\"][\"children\"][0][\"modelUri\"]\n",
487 | "demo2 = manifest[\"spec\"][\"predictors\"][0][\"graph\"][\"children\"][0][\"children\"][0][\n",
488 | " \"modelUri\"\n",
489 | "]\n",
490 | "demo3 = manifest[\"spec\"][\"predictors\"][0][\"graph\"][\"children\"][1][\"modelUri\"]\n",
491 | "demo4 = manifest[\"spec\"][\"predictors\"][0][\"graph\"][\"modelUri\"]\n",
492 | "\n",
493 | "print(demo1, demo2, demo3, demo4)"
494 | ]
495 | },
496 | {
497 | "cell_type": "code",
498 | "execution_count": 16,
499 | "metadata": {},
500 | "outputs": [
501 | {
502 | "name": "stdout",
503 | "output_type": "stream",
504 | "text": [
505 | "release \"mlflow-controller-staging\" uninstalled\n"
506 | ]
507 | }
508 | ],
509 | "source": [
510 | "! helm delete mlflow-controller-staging -n mlflow"
511 | ]
512 | },
513 | {
514 | "cell_type": "code",
515 | "execution_count": null,
516 | "metadata": {},
517 | "outputs": [],
518 | "source": []
519 | }
520 | ],
521 | "metadata": {
522 | "kernelspec": {
523 | "display_name": "Python 3",
524 | "language": "python",
525 | "name": "python3"
526 | },
527 | "language_info": {
528 | "codemirror_mode": {
529 | "name": "ipython",
530 | "version": 3
531 | },
532 | "file_extension": ".py",
533 | "mimetype": "text/x-python",
534 | "name": "python",
535 | "nbconvert_exporter": "python",
536 | "pygments_lexer": "ipython3",
537 | "version": "3.7.7"
538 | }
539 | },
540 | "nbformat": 4,
541 | "nbformat_minor": 4
542 | }
543 |
--------------------------------------------------------------------------------
/examples/notebook/deploy.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: machinelearning.seldon.io/v1
2 | kind: SeldonDeployment
3 | metadata:
4 | name: mlflow
5 | labels:
6 | app.kubernetes.io/managed-by: mlflow-seldon
7 | app.kubernetes.io/name: mlflow
8 | spec:
9 | name: iris
10 | predictors:
11 | - componentSpecs:
12 | - spec:
13 | # We are setting high failureThreshold as installing conda dependencies
14 | # can take long time and we want to avoid k8s killing the container prematurely
15 | containers:
16 | # - image: seldonio/mlflowserver:1.14.0-dev
17 | # imagePullPolicy: IfNotPresent
18 | # name: classifier
19 | - name: classifier
20 | livenessProbe:
21 | initialDelaySeconds: 800
22 | failureThreshold: 20000000
23 | periodSeconds: 25
24 | successThreshold: 1
25 | httpGet:
26 | path: /health/ping
27 | port: http
28 | scheme: HTTP
29 | readinessProbe:
30 | initialDelaySeconds: 800
31 | failureThreshold: 2000000
32 | periodSeconds: 25
33 | successThreshold: 1
34 | httpGet:
35 | path: /health/ping
36 | port: http
37 | scheme: HTTP
38 |
39 | graph:
40 | children: []
41 | implementation: MLFLOW_SERVER
42 | modelUri: gs://hellomlops-mlflow/mlflow_artifacts/1/6887f98225b9419f9681d68e7cdd9335/artifacts/random-forest-model
43 | logger:
44 | url: http://broker-ingress.knative-eventing.svc.cluster.local/demo/default
45 | mode: all
46 | name: classifier
47 | name: default
48 | replicas: 1
--------------------------------------------------------------------------------
/examples/notebook/mlflow.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "id": "3bab55b3-a167-48c1-b3b2-0ca66f4c7c21",
7 | "metadata": {},
8 | "outputs": [
9 | {
10 | "name": "stdout",
11 | "output_type": "stream",
12 | "text": [
13 | " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n",
14 | "0 5.1 3.5 1.4 0.2 \n",
15 | "1 4.9 3.0 1.4 0.2 \n",
16 | "2 4.7 3.2 1.3 0.2 \n",
17 | "3 4.6 3.1 1.5 0.2 \n",
18 | "4 5.0 3.6 1.4 0.2 \n",
19 | "\n",
20 | " target \n",
21 | "0 0 \n",
22 | "1 0 \n",
23 | "2 0 \n",
24 | "3 0 \n",
25 | "4 0 \n",
26 | "IRIS train df shape\n",
27 | "(105, 4)\n",
28 | "(105,)\n",
29 | "IRIS test df shape\n",
30 | "(45, 4)\n",
31 | "(45,)\n"
32 | ]
33 | }
34 | ],
35 | "source": [
36 | "import pandas as pd\n",
37 | "from sklearn import datasets\n",
38 | "from sklearn.ensemble import RandomForestClassifier\n",
39 | "import mlflow, os\n",
40 | "import mlflow.sklearn\n",
41 | "from mlflow.tracking import MlflowClient\n",
42 | "from sklearn.metrics import roc_auc_score, accuracy_score\n",
43 | "from sklearn.model_selection import train_test_split\n",
44 | "\n",
45 | "os.environ[\"MLFLOW_TRACKING_URI\"] = \"http://localhost:5000\"\n",
46 | "iris = datasets.load_iris()\n",
47 | "iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)\n",
48 | "y = iris.target\n",
49 | "iris_df[\"target\"] = y\n",
50 | "\n",
51 | "print(iris_df.head())\n",
52 | "\n",
53 | "\n",
54 | "train_df, test_df = train_test_split(\n",
55 | " iris_df, test_size=0.3, random_state=42, stratify=iris_df[\"target\"]\n",
56 | ")\n",
57 | "X_train = train_df[\n",
58 | " [\"sepal length (cm)\", \"sepal width (cm)\", \"petal length (cm)\", \"petal width (cm)\"]\n",
59 | "]\n",
60 | "y_train = train_df[\"target\"]\n",
61 | "\n",
62 | "X_test = test_df[\n",
63 | " [\"sepal length (cm)\", \"sepal width (cm)\", \"petal length (cm)\", \"petal width (cm)\"]\n",
64 | "]\n",
65 | "y_test = test_df[\"target\"]\n",
66 | "# print(iris)\n",
67 | "# print(iris_df.head())\n",
68 | "\n",
69 | "\n",
70 | "EXPERIMENT_NAME = \"IRIS dataset classification\"\n",
71 | "\n",
72 | "\n",
73 | "print(\"IRIS train df shape\")\n",
74 | "print(X_train.shape)\n",
75 | "print(y_train.shape)\n",
76 | "\n",
77 | "print(\"IRIS test df shape\")\n",
78 | "print(X_test.shape)\n",
79 | "print(y_test.shape)\n",
80 | "\n",
81 | "mlflow_client = MlflowClient()\n",
82 | "\n",
83 | "# Create an MLFlow experiment, if not already exists\n",
84 | "experiment_details = mlflow_client.get_experiment_by_name(EXPERIMENT_NAME)\n",
85 | "\n",
86 | "if experiment_details is not None:\n",
87 | " experiment_id = experiment_details.experiment_id\n",
88 | "else:\n",
89 | " experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)\n",
90 | "\n",
91 | "# Start an MLFlow experiment run\n",
92 | "with mlflow.start_run(\n",
93 | " experiment_id=experiment_id, run_name=\"iris dataset rf run\"\n",
94 | ") as run:\n",
95 | " # Log parameters\n",
96 | "\n",
97 | " mlflow.log_param(\"max_depth\", 10)\n",
98 | " mlflow.log_param(\"random_state\", 0)\n",
99 | " mlflow.log_param(\"n_estimators\", 100)\n",
100 | " clf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=0)\n",
101 | " clf.fit(X_train, y_train)\n",
102 | " iris_predict_y = clf.predict(X_test)\n",
103 | "\n",
104 | " roc_auc_score_val = roc_auc_score(\n",
105 | " y_test, clf.predict_proba(X_test), multi_class=\"ovr\"\n",
106 | " )\n",
107 | " mlflow.log_metric(\"test roc_auc_score\", roc_auc_score_val)\n",
108 | "\n",
109 | " accuracy_score = accuracy_score(y_test, iris_predict_y)\n",
110 | " mlflow.log_metric(\"test accuracy_score\", accuracy_score)\n",
111 | " mlflow.log_artifact(\"deploy.yaml\")\n",
112 | "\n",
113 | " # Log model\n",
114 | " mlflow.sklearn.log_model(clf, artifact_path=\"model\")"
115 | ]
116 | }
117 | ],
118 | "metadata": {
119 | "interpreter": {
120 | "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
121 | },
122 | "kernelspec": {
123 | "display_name": "Python 3",
124 | "language": "python",
125 | "name": "python3"
126 | },
127 | "language_info": {
128 | "codemirror_mode": {
129 | "name": "ipython",
130 | "version": 3
131 | },
132 | "file_extension": ".py",
133 | "mimetype": "text/x-python",
134 | "name": "python",
135 | "nbconvert_exporter": "python",
136 | "pygments_lexer": "ipython3",
137 | "version": "3.9.12"
138 | }
139 | },
140 | "nbformat": 4,
141 | "nbformat_minor": 5
142 | }
143 |
--------------------------------------------------------------------------------
/examples/readme.md:
--------------------------------------------------------------------------------
1 | Example Deployment using argocd
2 | ---
3 |
4 | Setup Mlflow and Mlflow controllers for different stages using argocd
5 |
6 | ```
7 | kubectl apply -f argo-manifest
8 | ```
9 |
10 | Log a Mlflow model with Seldon deployment configuration with the name deploy.yaml
11 |
12 |
13 |
14 |
15 |
16 |
17 | Mlflow controllers will deploy the models to appropriate Namespaces based on the configuration
18 |
19 |
20 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | """
2 | __author__ = "Raghul Krishna"
3 | __copyright__ = ""
4 | __credits__ = ""
5 | __license__ = ""
6 | __version__ = ""
7 | __maintainer__ = "raghul Krishna"
8 | __email__ = "rrkraghulkrishna@gmail.com"
9 |
10 | """
11 | import logging
12 | import os
13 | from time import sleep
14 |
15 | from apscheduler.schedulers.background import BackgroundScheduler
16 |
17 | from mlflow_controller.gitops import GitopsMDC
18 | from mlflow_controller.mlflow_direct import DeployConroller
19 |
20 | logging.getLogger("apscheduler").setLevel(logging.ERROR)
21 |
22 | if __name__ == "__main__":
23 | scheduler = BackgroundScheduler()
24 | controller = DeployConroller()
25 | giopsmdc = GitopsMDC()
26 | # scheduler.add_job(
27 | # controller.deploy_controller, CronTrigger.from_crontab("* * * * *")
28 | # )
29 | # scheduler.add_job(
30 | # id="controller",
31 | # func=controller.deploy_controller,
32 | # trigger="interval",
33 | # seconds=15,
34 | # )
35 | if os.getenv("GITOPS_ENABLED", "False"):
36 | scheduler.add_job(
37 | id="gitopsmdc",
38 | func=giopsmdc.gitops_mlflow_controller,
39 | trigger="interval",
40 | seconds=15,
41 | )
42 | scheduler.start()
43 | while True:
44 | sleep(1)
45 |
--------------------------------------------------------------------------------
/mlflow_controller/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rocket9-code/mlflow-deployment-controller/9bd5eefa87f8351bfe6837754f373fcab0ac86c0/mlflow_controller/__init__.py
--------------------------------------------------------------------------------
/mlflow_controller/controller.py:
--------------------------------------------------------------------------------
1 | """
2 | __author__ = "Raghul Krishna"
3 | __copyright__ = ""
4 | __credits__ = ""
5 | __license__ = ""
6 | __version__ = ""
7 | __maintainer__ = "raghul Krishna"
8 | __email__ = "rrkraghulkrishna@gmail.com"
9 |
10 | """
11 | import logging
12 | import os
13 | import re
14 |
15 | from kubernetes import client as KubeClient
16 | from kubernetes import config
17 | from mlflow.tracking import MlflowClient
18 |
19 | import mlflow_controller.storage
20 |
21 | logger = logging.getLogger(__name__)
22 | logger.setLevel(logging.DEBUG)
23 |
24 | formatter = logging.Formatter("%(asctime)s:%(name)s:%(message)s")
25 |
26 | file_handler = logging.FileHandler("log.log")
27 | file_handler.setLevel(logging.ERROR)
28 | file_handler.setFormatter(formatter)
29 |
30 | stream_handler = logging.StreamHandler()
31 | stream_handler.setFormatter(formatter)
32 |
33 | logger.addHandler(file_handler)
34 | logger.addHandler(stream_handler)
35 |
36 | logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
37 |
38 | # os.environ["MLFLOW_TRACKING_URI"] = "http://localhost:5000"
39 |
40 |
41 | class DeployConroller:
42 | """
43 | A class to Matain the controller
44 | ...
45 |
46 | Methods
47 | -------
48 | deploy_controller():
49 | Manages the deployments from Mlflow
50 | """
51 |
52 | def __init__(self):
53 | self.mlflow_client = MlflowClient()
54 | logger.info("Mlflow client initialized")
55 | self.object_init = mlflow_controller.storage.Artifact()
56 | try:
57 | config.load_kube_config()
58 | except config.ConfigException:
59 | config.load_incluster_config()
60 | self.kube_client = KubeClient.CustomObjectsApi()
61 | logger.info("KubeClient initialized")
62 | self.mlflow_deploy_config = "deploy.yaml"
63 | self.stage = os.environ["stage"]
64 | self.model_details = []
65 | self.Namespace = os.environ["namespace"]
66 | self.cloud = os.environ["cloud"]
67 | self.label = "app.kubernetes.io/managed-by=mdc-mlflow"
68 |
69 | def __str__(self):
70 | return self.__class__.__name__
71 |
72 | def state_manager(self):
73 | """To delete resources deleted in Mlflow"""
74 | manifests = self.kube_client.list_namespaced_custom_object(
75 | group="machinelearning.seldon.io",
76 | version="v1",
77 | plural="seldondeployments",
78 | namespace=self.Namespace,
79 | label_selector=self.label,
80 | )
81 | for manifest in manifests["items"]:
82 | model_names = self.model_details
83 | manifest_name = manifest["metadata"]["name"]
84 | manifest_namespace = manifest["metadata"]["namespace"]
85 | print(model_names, manifest_name, manifest_namespace)
86 | model = next(
87 | (
88 | item
89 | for item in model_names
90 | if item["deploy_name"] == manifest_name
91 | and item["Namespace"] == manifest_namespace
92 | ),
93 | None,
94 | )
95 | if model:
96 | logger.info(
97 | "Model %s Namespace %s in Sync ",
98 | manifest["metadata"]["name"],
99 | manifest["metadata"]["namespace"],
100 | )
101 | else:
102 | logger.info(
103 | "Deleting a Deployment %s Namespace %s",
104 | manifest["metadata"]["name"],
105 | manifest["metadata"]["namespace"],
106 | )
107 | self.kube_client.delete_namespaced_custom_object(
108 | group="machinelearning.seldon.io",
109 | version="v1",
110 | plural="seldondeployments",
111 | name=manifest["metadata"]["name"],
112 | namespace=manifest["metadata"]["namespace"],
113 | )
114 | self.model_details = []
115 |
116 | def deploy_controller(self):
117 | """
118 | Manages the deployments from Mlflow
119 | """
120 | model_versions = []
121 | for registered_model in self.mlflow_client.list_registered_models():
122 | for version in registered_model.latest_versions:
123 | model_versions.append(version)
124 | for version in model_versions:
125 | if version.current_stage == self.stage:
126 | print(version.current_stage)
127 | for file in self.mlflow_client.list_artifacts(version.run_id):
128 | if file.path == self.mlflow_deploy_config:
129 | model_name = version.name.lower()
130 | model_run_id = version.run_id
131 | run_details = self.mlflow_client.get_run(version.run_id)
132 | model_version = version.version
133 | artifact_uri = run_details.info.artifact_uri
134 | if self.cloud == "gcp":
135 | model_source = version.source
136 | deploy_yaml = self.object_init.gcp_bucket(artifact_uri)
137 | elif self.cloud == "azure_blob":
138 | model_source = re.sub(
139 | r"(?=\@)(.*?)(?=\/)", "", version.source
140 | )
141 | deploy_yaml = self.object_init.azure_blob(artifact_uri)
142 | elif self.cloud == "aws_s3":
143 | model_source = re.sub(
144 | r"(?=\@)(.*?)(?=\/)", "", version.source
145 | )
146 | deploy_yaml = self.object_init.azure_blob(artifact_uri)
147 |
148 | else:
149 | raise ("unsupported Object Storage")
150 | model_deploy_name = model_name.replace(" ", "").replace(
151 | "_", "-"
152 | )
153 | deploy_yaml["spec"]["predictors"][0]["graph"][
154 | "modelUri"
155 | ] = model_source
156 | deploy_yaml["spec"]["predictors"][0]["annotations"][
157 | "predictor_version"
158 | ] = model_version
159 | deploy_yaml["metadata"]["name"] = model_deploy_name
160 | try:
161 | deploy_yaml["metadata"]["annotations"]
162 | except KeyError:
163 | deploy_yaml["metadata"]["annotations"] = {}
164 | deploy_yaml["metadata"]["labels"][
165 | "app.kubernetes.io/managed-by"
166 | ] = "mdc-mlflow"
167 | logger.info(
168 | "Model Name: %s, Model Run Id: %s",
169 | model_name,
170 | model_run_id,
171 | )
172 | self.model_details.append(
173 | {
174 | "name": model_name,
175 | "deploy_name": deploy_yaml["metadata"]["name"],
176 | "Namespace": self.Namespace,
177 | }
178 | )
179 | try:
180 | self.kube_client.create_namespaced_custom_object(
181 | group="machinelearning.seldon.io",
182 | version="v1",
183 | plural="seldondeployments",
184 | body=deploy_yaml,
185 | namespace=self.Namespace,
186 | )
187 | logger.info(
188 | "Created a Deployment %s Namespace %s",
189 | model_name,
190 | self.Namespace,
191 | )
192 | except KubeClient.rest.ApiException:
193 | self.kube_client.patch_namespaced_custom_object(
194 | group="machinelearning.seldon.io",
195 | version="v1",
196 | plural="seldondeployments",
197 | body=deploy_yaml,
198 | name=deploy_yaml["metadata"]["name"],
199 | namespace=self.Namespace,
200 | )
201 | logger.info(
202 | "Patched a Deployment %s Namespace %s",
203 | model_name,
204 | self.Namespace,
205 | )
206 | self.state_manager()
207 |
--------------------------------------------------------------------------------
/mlflow_controller/gitops.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import logging
3 | import os
4 | import shutil
5 | import uuid
6 |
7 | import yaml
8 | from git import Repo
9 | from kubernetes import config
10 |
11 | from mlflow_controller.mlservers import kserve, seldon
12 | from mlflow_controller.registries.mlflow import MLflowMetadata
13 |
14 | logger = logging.getLogger(__name__)
15 | logger.setLevel(logging.DEBUG)
16 |
17 | formatter = logging.Formatter("%(asctime)s:%(name)s:%(message)s")
18 |
19 | file_handler = logging.FileHandler("log.log")
20 | file_handler.setLevel(logging.ERROR)
21 | file_handler.setFormatter(formatter)
22 |
23 | stream_handler = logging.StreamHandler()
24 | stream_handler.setFormatter(formatter)
25 |
26 | logger.addHandler(file_handler)
27 | logger.addHandler(stream_handler)
28 |
29 | logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
30 |
31 | TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI", "http://localhost:9000")
32 | GIT_USER = os.getenv("GIT_USER", "")
33 | GIT_PASSWORD = os.getenv("GIT_PASSWORD", "")
34 | GIT_PROTOCOL = os.getenv("GIT_PROTOCOL", "https")
35 | GIT_REPO = os.getenv("GIT_REPO", "github.com/rocket9-code/model-deployments")
36 | if GIT_PASSWORD:
37 | GIT_URL = f"{GIT_PROTOCOL}://{GIT_USER}:{GIT_PASSWORD}@{GIT_REPO}"
38 | else:
39 | GIT_URL = f"{GIT_PROTOCOL}://{GIT_REPO}"
40 |
41 | MANIFEST_LOCATION = os.getenv("MANIFEST_LOCATION", "staging")
42 | GLOBAL_NAMESPACE = os.getenv("namespace", "staging")
43 | MLFLOW_STAGE = os.getenv("stage", "Staging")
44 | backend = os.getenv("backend", "")
45 | BRANCH = os.getenv("BRANCH", "main")
46 | ML_SERVER = os.getenv("ML_SERVER", "kserve")
47 |
48 |
49 | class GitopsMDC:
50 | def gitops_mlflow_controller(self):
51 | folder_name = str(uuid.uuid4())
52 | path = "./tmp/" + folder_name
53 | if not os.path.exists(path):
54 | os.makedirs(path)
55 | logger.info(f"Cloning repo {GIT_REPO} with branch {BRANCH}")
56 | Repo.clone_from(GIT_URL, path, single_branch=True, branch=BRANCH)
57 | try:
58 | config.load_kube_config()
59 | except config.ConfigException:
60 | config.load_incluster_config()
61 | manifest_path = path + "/" + MANIFEST_LOCATION
62 | deploy_yamls = glob.glob(f"{manifest_path}/*.yaml") + glob.glob(
63 | f"{manifest_path}/*.yml"
64 | )
65 | mlflowcontroller = MLflowMetadata(tracking_uri=TRACKING_URI, stage=MLFLOW_STAGE)
66 | logger.info(f"Mlflow tracking uri {TRACKING_URI}")
67 | logger.info(f"Mlflow Stage {MLFLOW_STAGE}")
68 | logger.info(f"backend {backend}")
69 | mlflow_models_metadata, _ = mlflowcontroller.get_model_metadata(
70 | check_deploy=False, backend=backend
71 | )
72 | read_seldon_deploy_yamls = []
73 | for i in deploy_yamls:
74 | with open(i, "r") as stream:
75 | try:
76 | deploy_yaml = yaml.safe_load(stream)
77 | resource_group = deploy_yaml["apiVersion"].split("/")[0]
78 | if ML_SERVER == "seldon":
79 | if resource_group == "machinelearning.seldon.io":
80 | read_seldon_deploy_yamls.append(deploy_yaml)
81 | elif ML_SERVER == "kserve":
82 | if resource_group == "serving.kserve.io":
83 | read_seldon_deploy_yamls.append(deploy_yaml)
84 | except yaml.YAMLError as exc:
85 | logger.error(exc)
86 | if len(mlflow_models_metadata.keys()) > 0:
87 | if ML_SERVER == "seldon":
88 | seldon.sync(
89 | read_seldon_deploy_yamls,
90 | mlflow_models_metadata,
91 | MLFLOW_STAGE,
92 | GLOBAL_NAMESPACE,
93 | f"mdc-gitops-{backend}-mlflow-seldon",
94 | "mlflow",
95 | backend,
96 | )
97 | elif ML_SERVER == "kserve":
98 | kserve.sync(
99 | read_seldon_deploy_yamls,
100 | mlflow_models_metadata,
101 | MLFLOW_STAGE,
102 | GLOBAL_NAMESPACE,
103 | f"mdc-gitops-{backend}-mlflow-kserve",
104 | "mlflow",
105 | backend,
106 | )
107 | shutil.rmtree(path, ignore_errors=True)
108 |
--------------------------------------------------------------------------------
/mlflow_controller/mlflow_direct.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 |
4 | from mlflow_controller.mlservers import kserve, seldon
5 | from mlflow_controller.registries.mlflow import MLflowMetadata
6 |
7 | logger = logging.getLogger(__name__)
8 | logger.setLevel(logging.DEBUG)
9 |
10 | formatter = logging.Formatter("%(asctime)s:%(name)s:%(message)s")
11 |
12 | file_handler = logging.FileHandler("log.log")
13 | file_handler.setLevel(logging.ERROR)
14 | file_handler.setFormatter(formatter)
15 |
16 | stream_handler = logging.StreamHandler()
17 | stream_handler.setFormatter(formatter)
18 |
19 | logger.addHandler(file_handler)
20 | logger.addHandler(stream_handler)
21 |
22 | logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
23 | TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI", "http://localhost:5000")
24 | GLOBAL_NAMESPACE = os.getenv("namespace", "staging")
25 | MLFLOW_STAGE = os.getenv("stage", "Staging")
26 | backend = os.getenv("backend", "")
27 | ML_SERVER = os.getenv("ML_SERVER", "kserve")
28 |
29 |
30 | class DeployConroller:
31 | """
32 | A class to Matain the controller
33 |
34 | ...
35 |
36 | Methods
37 | -------
38 | deploy_controller():
39 | Manages the deployments from Mlflow
40 | """
41 |
42 | def __init__(self):
43 | self.managed_label = "mdc-direct"
44 |
45 | def __str__(self):
46 | return self.__class__.__name__
47 |
48 | def deploy_controller(self):
49 | """
50 | Manages the deployments from Mlflow
51 | """
52 | mlflowcontroller = MLflowMetadata(tracking_uri=TRACKING_URI, stage=MLFLOW_STAGE)
53 | logger.info(f"Mlflow tracking uri {TRACKING_URI}")
54 | logger.info(f"Mlflow Stage {MLFLOW_STAGE}")
55 | logger.info(f"backend {backend}")
56 | mlflow_models_metadata, read_deploy_yaml = mlflowcontroller.get_model_metadata(
57 | check_deploy=True,
58 | backend=backend,
59 | manager_label=self.managed_label,
60 | mlflow_deploy_config="deploy.yaml",
61 | )
62 | if len(mlflow_models_metadata.keys()) > 0:
63 | if ML_SERVER == "seldon":
64 | seldon.sync(
65 | read_deploy_yaml,
66 | mlflow_models_metadata,
67 | MLFLOW_STAGE,
68 | GLOBAL_NAMESPACE,
69 | f"{self.managed_label}-mlflow-{backend}-seldon",
70 | "mlflow",
71 | backend,
72 | )
73 | elif ML_SERVER == "kserve":
74 | kserve.sync(
75 | read_deploy_yaml,
76 | mlflow_models_metadata,
77 | MLFLOW_STAGE,
78 | GLOBAL_NAMESPACE,
79 | f"{self.managed_label}-mlflow-{backend}-kserve",
80 | "mlflow",
81 | backend,
82 | )
83 |
--------------------------------------------------------------------------------
/mlflow_controller/mlservers/kserve.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import re
3 |
4 | from kubernetes import client as KubeClient
5 | from kubernetes import config
6 |
7 | from mlflow_controller.mlservers.utils import mlflow_model_search, update_modeluris
8 | from mlflow_controller.utils.var_extract import var_parser
9 |
10 | logger = logging.getLogger(__name__)
11 | logger.setLevel(logging.DEBUG)
12 |
13 | formatter = logging.Formatter("%(asctime)s:%(name)s:%(message)s")
14 |
15 | file_handler = logging.FileHandler("log.log")
16 | file_handler.setLevel(logging.ERROR)
17 | file_handler.setFormatter(formatter)
18 |
19 | stream_handler = logging.StreamHandler()
20 | stream_handler.setFormatter(formatter)
21 |
22 | logger.addHandler(file_handler)
23 | logger.addHandler(stream_handler)
24 |
25 | try:
26 | config.load_kube_config()
27 | except config.ConfigException:
28 | config.load_incluster_config()
29 | kube_client = KubeClient.CustomObjectsApi()
30 |
31 |
32 | class InvalidVariable(Exception):
33 | "Raised when wrong templates"
34 |
35 |
36 | def sync(
37 | deploy_yamls,
38 | model_metadata,
39 | stage,
40 | GLOBAL_NAMESPACE,
41 | controller_label_value,
42 | registry_name,
43 | backend,
44 | ):
45 | git_models = []
46 | for deploy_yaml in deploy_yamls:
47 | logger.info(deploy_yamls)
48 | resource_group = deploy_yaml["apiVersion"].split("/")[0]
49 | logger.info(resource_group)
50 | if resource_group == "serving.kserve.io":
51 | models = list(
52 | set(mlflow_model_search("storageUri", deploy_yaml, search_result=[]))
53 | )
54 | logger.info(f"models {models}")
55 | rep_deploy_yaml = deploy_yaml
56 | try:
57 | rep_deploy_yaml["metadata"]["annotations"]
58 |
59 | except KeyError:
60 | rep_deploy_yaml["metadata"]["annotations"] = {}
61 | try:
62 | rep_deploy_yaml["metadata"]["labels"]
63 |
64 | except KeyError:
65 | rep_deploy_yaml["metadata"]["labels"] = {}
66 | deploy = False
67 | for m in models:
68 | try:
69 | pattern = r"\{\{\s(.*)\s\}\}"
70 | model_jinja = re.findall(pattern, m)[0]
71 | model_name, bk_name, rg_name = var_parser(model_jinja)
72 | if (bk_name != backend) or (rg_name != registry_name):
73 | raise InvalidVariable
74 | model = model_metadata[registry_name][backend][model_name]
75 | run_id = model["run_id"]
76 | if backend == "blob":
77 | model_source = model["source"].replace("wasbs", "https")
78 | else:
79 | model_source = model["source"]
80 | rep_deploy_yaml = update_modeluris(
81 | rep_deploy_yaml,
82 | f'{{{{ {registry_name}.{backend}["{model_name}"] }}}}',
83 | model_source,
84 | )
85 | rep_deploy_yaml["metadata"]["annotations"][
86 | f"mdc/mlflow-{run_id}"
87 | ] = str(model)
88 | rep_deploy_yaml["metadata"]["annotations"][
89 | "mdc/mlflow-stage"
90 | ] = stage
91 | rep_deploy_yaml["metadata"]["labels"][
92 | "app.kubernetes.io/mdc-type"
93 | ] = controller_label_value
94 | rep_deploy_yaml["metadata"]["labels"][
95 | "app.kubernetes.io/managed-by"
96 | ] = "mdc"
97 | deploy = True
98 | name = rep_deploy_yaml["metadata"]["name"]
99 | except InvalidVariable:
100 | deploy = False
101 | logger.error(
102 | f"Error in variable for model {m} backend {bk_name} registry {rg_name}"
103 | )
104 | except Exception as e:
105 | deploy = False
106 | logger.error(
107 | f"Error deploying {name} Model {m} not found in mlflow {e}"
108 | )
109 | if deploy:
110 | logger.info(
111 | f"deploying kserve deployment {name} in namespace {GLOBAL_NAMESPACE}"
112 | )
113 | try:
114 | manifest = kube_client.get_namespaced_custom_object(
115 | group=resource_group,
116 | version="v1beta1",
117 | plural="inferenceservices",
118 | namespace=GLOBAL_NAMESPACE,
119 | name=rep_deploy_yaml["metadata"]["name"],
120 | )
121 | resourceVersion = manifest["metadata"]["resourceVersion"]
122 | manifest["metadata"].pop("creationTimestamp")
123 | manifest["metadata"].pop("generation")
124 | manifest["metadata"].pop("managedFields")
125 | manifest["metadata"].pop("resourceVersion")
126 | manifest["metadata"].pop("uid")
127 | manifest["metadata"].pop("namespace")
128 | manifest.pop("status")
129 | _name = rep_deploy_yaml["metadata"]["name"]
130 | if rep_deploy_yaml == manifest:
131 | logger.info(f"Kserve deployment {_name} in sync")
132 | else:
133 | rep_deploy_yaml["metadata"]["resourceVersion"] = resourceVersion
134 | kube_client.replace_namespaced_custom_object(
135 | group=resource_group,
136 | version="v1beta1",
137 | plural="inferenceservices",
138 | body=rep_deploy_yaml,
139 | name=_name,
140 | namespace=GLOBAL_NAMESPACE,
141 | )
142 | except KubeClient.rest.ApiException:
143 | kube_client.create_namespaced_custom_object(
144 | group=resource_group,
145 | version="v1beta1",
146 | plural="inferenceservices",
147 | body=rep_deploy_yaml,
148 | namespace=GLOBAL_NAMESPACE,
149 | )
150 | git_models.append(rep_deploy_yaml["metadata"]["name"])
151 | manifests = kube_client.list_namespaced_custom_object(
152 | group="serving.kserve.io",
153 | version="v1beta1",
154 | plural="inferenceservices",
155 | namespace=GLOBAL_NAMESPACE,
156 | label_selector=f"app.kubernetes.io/mdc-type={controller_label_value}",
157 | )
158 | for i in manifests["items"]:
159 | model_name = i["metadata"]["name"]
160 | if model_name in git_models:
161 | logger.info(f"kserve dpeloyment in sync {model_name}")
162 | else:
163 | kube_client.delete_namespaced_custom_object(
164 | group="serving.kserve.io",
165 | version="v1beta1",
166 | plural="inferenceservices",
167 | name=model_name,
168 | namespace=GLOBAL_NAMESPACE,
169 | )
170 |
--------------------------------------------------------------------------------
/mlflow_controller/mlservers/rclone.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 |
4 | def rclone_source(source, backend):
5 | if backend == "blob":
6 | pattern = r"(?<=net/).*"
7 | rclonesource = re.search(pattern, source).group()
8 | conatiner_pattern = r"(?<=/)\w+"
9 | conatiner_name = re.search(conatiner_pattern, source).group()
10 | return "wasbs://" + conatiner_name + "/" + rclonesource
11 | else:
12 | return source
13 |
--------------------------------------------------------------------------------
/mlflow_controller/mlservers/seldon.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import re
3 |
4 | from kubernetes import client as KubeClient
5 | from kubernetes import config
6 |
7 | from mlflow_controller.mlservers.rclone import rclone_source
8 | from mlflow_controller.mlservers.utils import mlflow_model_search, update_modeluris
9 | from mlflow_controller.utils.var_extract import var_parser
10 |
11 | logger = logging.getLogger(__name__)
12 | logger.setLevel(logging.DEBUG)
13 |
14 | formatter = logging.Formatter("%(asctime)s:%(name)s:%(message)s")
15 |
16 | file_handler = logging.FileHandler("log.log")
17 | file_handler.setLevel(logging.ERROR)
18 | file_handler.setFormatter(formatter)
19 |
20 | stream_handler = logging.StreamHandler()
21 | stream_handler.setFormatter(formatter)
22 |
23 | logger.addHandler(file_handler)
24 | logger.addHandler(stream_handler)
25 |
26 | try:
27 | config.load_kube_config()
28 | except config.ConfigException:
29 | config.load_incluster_config()
30 | kube_client = KubeClient.CustomObjectsApi()
31 |
32 |
33 | class InvalidVariable(Exception):
34 | "Raised when wrong templates"
35 |
36 |
37 | def sync(
38 | deploy_yamls,
39 | model_metadata,
40 | stage,
41 | GLOBAL_NAMESPACE,
42 | controller_label_value,
43 | registry_name,
44 | backend,
45 | ):
46 | git_models = []
47 | for deploy_yaml in deploy_yamls:
48 | resource_group = deploy_yaml["apiVersion"].split("/")[0]
49 | if resource_group == "machinelearning.seldon.io":
50 | models = list(
51 | set(mlflow_model_search("modelUri", deploy_yaml, search_result=[]))
52 | )
53 | logger.info(f"models {models}")
54 | rep_deploy_yaml = deploy_yaml
55 | try:
56 | rep_deploy_yaml["metadata"]["annotations"]
57 |
58 | except KeyError:
59 | rep_deploy_yaml["metadata"]["annotations"] = {}
60 | try:
61 | rep_deploy_yaml["metadata"]["labels"]
62 |
63 | except KeyError:
64 | rep_deploy_yaml["metadata"]["labels"] = {}
65 | deploy = False
66 | for m in models:
67 | try:
68 | pattern = r"\{\{\s(.*)\s\}\}"
69 | model_jinja = re.findall(pattern, m)[0]
70 | model_name, bk_name, rg_name = var_parser(model_jinja)
71 | if (bk_name != backend) or (rg_name != registry_name):
72 | raise InvalidVariable
73 | model = model_metadata[registry_name][backend][model_name]
74 | run_id = model["run_id"]
75 | rep_deploy_yaml = update_modeluris(
76 | rep_deploy_yaml,
77 | f'{{{{ {registry_name}.{backend}["{model_name}"] }}}}',
78 | rclone_source(model["source"], backend),
79 | )
80 | rep_deploy_yaml["metadata"]["annotations"][
81 | f"mdc/mlflow-{run_id}"
82 | ] = str(model)
83 | rep_deploy_yaml["metadata"]["annotations"][
84 | "mdc/mlflow-stage"
85 | ] = stage
86 | rep_deploy_yaml["metadata"]["labels"][
87 | "app.kubernetes.io/mdc-type"
88 | ] = controller_label_value
89 | rep_deploy_yaml["metadata"]["labels"][
90 | "app.kubernetes.io/managed-by"
91 | ] = "mdc"
92 | deploy = True
93 | name = rep_deploy_yaml["metadata"]["name"]
94 | except InvalidVariable:
95 | deploy = False
96 | logger.error(
97 | f"Error in variable for model {m} backend {bk_name} registry {rg_name}"
98 | )
99 | except Exception as e:
100 | deploy = False
101 | logger.error(
102 | f"Error deploying {name} Model {m} not found in mlflow {e}"
103 | )
104 | if deploy:
105 | logger.info(
106 | f"deploying seldon deployment {name} in namespace {GLOBAL_NAMESPACE}"
107 | )
108 | try:
109 | manifest = kube_client.get_namespaced_custom_object(
110 | group=resource_group,
111 | version="v1",
112 | plural="seldondeployments",
113 | namespace=GLOBAL_NAMESPACE,
114 | name=rep_deploy_yaml["metadata"]["name"],
115 | )
116 | resourceVersion = manifest["metadata"]["resourceVersion"]
117 | manifest["metadata"].pop("creationTimestamp")
118 | manifest["metadata"].pop("generation")
119 | manifest["metadata"].pop("managedFields")
120 | manifest["metadata"].pop("resourceVersion")
121 | manifest["metadata"].pop("uid")
122 | manifest["metadata"].pop("namespace")
123 | manifest.pop("status")
124 | _name = rep_deploy_yaml["metadata"]["name"]
125 | if rep_deploy_yaml == manifest:
126 | logger.info(f"seldon deployment {_name} in sync")
127 | else:
128 | rep_deploy_yaml["metadata"]["resourceVersion"] = resourceVersion
129 | kube_client.replace_namespaced_custom_object(
130 | group=resource_group,
131 | version="v1",
132 | plural="seldondeployments",
133 | body=rep_deploy_yaml,
134 | name=_name,
135 | namespace=GLOBAL_NAMESPACE,
136 | )
137 |
138 | except KubeClient.rest.ApiException:
139 | kube_client.create_namespaced_custom_object(
140 | group=resource_group,
141 | version="v1",
142 | plural="seldondeployments",
143 | body=rep_deploy_yaml,
144 | namespace=GLOBAL_NAMESPACE,
145 | )
146 | git_models.append(rep_deploy_yaml["metadata"]["name"])
147 | manifests = kube_client.list_namespaced_custom_object(
148 | group="machinelearning.seldon.io",
149 | version="v1",
150 | plural="seldondeployments",
151 | namespace=GLOBAL_NAMESPACE,
152 | label_selector=f"app.kubernetes.io/mdc-type={controller_label_value}",
153 | )
154 | for i in manifests["items"]:
155 | model_name = i["metadata"]["name"]
156 | if model_name in git_models:
157 | logger.info(f"seldon deployment in sync {model_name}")
158 | else:
159 | kube_client.delete_namespaced_custom_object(
160 | group="machinelearning.seldon.io",
161 | version="v1",
162 | plural="seldondeployments",
163 | name=model_name,
164 | namespace=GLOBAL_NAMESPACE,
165 | )
166 |
--------------------------------------------------------------------------------
/mlflow_controller/mlservers/utils.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 |
4 | def mlflow_model_search(lookup_key, json_dict, search_result=[]):
5 | if type(json_dict) == dict:
6 | for key, value in json_dict.items():
7 | if key == lookup_key:
8 | search_result.append(value)
9 | mlflow_model_search(lookup_key, value, search_result)
10 | elif type(json_dict) == list:
11 | for element in json_dict:
12 | mlflow_model_search(lookup_key, element, search_result)
13 | return search_result
14 |
15 |
16 | def update_modeluris(json_para, search_para, replace_para):
17 | def decode_dict(a_dict):
18 | if search_para in a_dict.values():
19 | for key, value in a_dict.items():
20 | if value == search_para:
21 | a_dict[key] = replace_para
22 | return a_dict
23 |
24 | return json.loads(json.dumps(json_para), object_hook=decode_dict)
25 |
--------------------------------------------------------------------------------
/mlflow_controller/registries/mlflow.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 |
4 | from mlflow.tracking import MlflowClient
5 |
6 | from mlflow_controller.registries import mlflow_backend
7 |
8 | logger = logging.getLogger(__name__)
9 | logger.setLevel(logging.DEBUG)
10 |
11 | formatter = logging.Formatter("%(asctime)s:%(name)s:%(message)s")
12 |
13 | file_handler = logging.FileHandler("log.log")
14 | file_handler.setLevel(logging.ERROR)
15 | file_handler.setFormatter(formatter)
16 |
17 | stream_handler = logging.StreamHandler()
18 | stream_handler.setFormatter(formatter)
19 |
20 | logger.addHandler(file_handler)
21 | logger.addHandler(stream_handler)
22 |
23 | logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
24 |
25 |
26 | class MLflowMetadata:
27 | def __init__(self, tracking_uri, stage):
28 | self.mlflow_client = MlflowClient(tracking_uri=tracking_uri)
29 | logger.debug("Mlflow client initialized")
30 | self.object_init = mlflow_backend.Artifact()
31 | self.stage = stage
32 |
33 | def __str__(self):
34 | return self.__class__.__name__
35 |
36 | def get_model_metadata(
37 | self,
38 | check_deploy=False,
39 | manager_label="mdc-mlflow-direct",
40 | backend="",
41 | mlflow_deploy_config="deploy.yaml",
42 | ):
43 | mlflow_models_metadata = {}
44 | read_deploy_yaml = []
45 | registered_models = self.mlflow_client.list_registered_models()
46 | for registered_model in registered_models:
47 | for version in registered_model.latest_versions:
48 | if version.current_stage == self.stage:
49 | model_details = dict(version)
50 | model_run_id = model_details["run_id"]
51 | run_details = dict(self.mlflow_client.get_run(model_run_id).info)
52 | name = model_details["name"]
53 | model_template = f'{{{{ mlflow.{backend}["{name}"] }}}}'
54 | artifact_uri = run_details["artifact_uri"]
55 | mlflow_models_metadata[name] = {
56 | "name": name,
57 | "run_id": model_details["run_id"],
58 | "source": model_details["source"],
59 | "status": model_details["status"],
60 | "version": model_details["version"],
61 | "artifact_uri": artifact_uri,
62 | }
63 | logger.debug(artifact_uri)
64 | if check_deploy:
65 | for file in self.mlflow_client.list_artifacts(model_run_id):
66 | if file.path == mlflow_deploy_config:
67 | if backend == "gcs":
68 | deploy_yaml = self.object_init.gcp_bucket(
69 | artifact_uri
70 | )
71 | elif backend == "blob":
72 | deploy_yaml = self.object_init.azure_blob(
73 | artifact_uri
74 | )
75 | elif backend == "s3":
76 | deploy_yaml = self.object_init.aws_s3(artifact_uri)
77 | else:
78 | raise ("unsupported Object Storage")
79 | deploy_yaml["spec"]["predictors"][0]["graph"][
80 | "modelUri"
81 | ] = model_template
82 | deploy_yaml["spec"]["predictors"][0]["annotations"][
83 | "predictor_version"
84 | ] = model_details["version"]
85 | try:
86 | deploy_yaml["metadata"]["annotations"]
87 | except KeyError:
88 | deploy_yaml["metadata"]["annotations"] = {}
89 | deploy_yaml["metadata"]["labels"][
90 | "app.kubernetes.io/mdc-type"
91 | ] = manager_label
92 | read_deploy_yaml.append(deploy_yaml)
93 | ml_metadata = {"mlflow": {f"{backend}": mlflow_models_metadata}}
94 | return ml_metadata, read_deploy_yaml
95 |
--------------------------------------------------------------------------------
/mlflow_controller/registries/mlflow_backend.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import re
4 | from io import BytesIO
5 |
6 | import boto3
7 | import yaml
8 | from azure.identity import DefaultAzureCredential
9 | from azure.storage.blob import BlobServiceClient
10 | from google.cloud.storage import Client as GoogleClient
11 |
12 | logger = logging.getLogger(__name__)
13 | logger.setLevel(logging.DEBUG)
14 |
15 | formatter = logging.Formatter("%(asctime)s:%(name)s:%(message)s")
16 |
17 | file_handler = logging.FileHandler("log.log")
18 | file_handler.setLevel(logging.ERROR)
19 | file_handler.setFormatter(formatter)
20 |
21 | stream_handler = logging.StreamHandler()
22 | stream_handler.setFormatter(formatter)
23 |
24 | logger.addHandler(file_handler)
25 | logger.addHandler(stream_handler)
26 |
27 | logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
28 |
29 |
30 | class Artifact:
31 | def __init__(self):
32 | print("Class Artifact initalized")
33 | self.mlflow_deploy_config = "deploy.yaml"
34 |
35 | def gcp_bucket(self, artifact_uri):
36 | google_client = GoogleClient()
37 | bucket = artifact_uri.split("/")[2]
38 | object_name = (
39 | "/".join(artifact_uri.split("/")[3:]) + f"/{self.mlflow_deploy_config}"
40 | )
41 | bucket = google_client.get_bucket(bucket)
42 | blob = bucket.get_blob(object_name)
43 | downloaded_file = blob.download_as_text(encoding="utf-8")
44 | deploy_yaml = yaml.safe_load(downloaded_file)
45 | return deploy_yaml
46 |
47 | def azure_blob(self, artifact_uri):
48 | acc_name_re = r"(?<=\/\/)(.*)(?=\@)"
49 | container_re = r"(?<=\@)(.*)(?=[\.])"
50 | container = re.search(acc_name_re, artifact_uri).group(1)
51 | acc_name = re.search(container_re, artifact_uri).group(1).split(".")[0]
52 | STORAGEACCOUNTURL = f"https://{acc_name}.blob.core.windows.net"
53 | default_credential = DefaultAzureCredential()
54 | blob_service_client_instance = BlobServiceClient(
55 | account_url=STORAGEACCOUNTURL, credential=default_credential
56 | )
57 | blob_location = (
58 | "/".join(artifact_uri.split("blob.core.windows.net")[1].split("/")[1:-1])
59 | + f"/artifacts/{self.mlflow_deploy_config}"
60 | )
61 | blob_client_instance = blob_service_client_instance.get_blob_client(
62 | container, blob_location, snapshot=None
63 | )
64 | blob_data = blob_client_instance.download_blob()
65 | bl = blob_data.readall()
66 | deploy_yaml = yaml.load(bl, Loader=yaml.FullLoader)
67 | return deploy_yaml
68 |
69 | def aws_s3(self, artifact_uri):
70 | session = boto3.Session()
71 | s3_client = session.client("s3")
72 | path_parts = artifact_uri.replace("s3://", "").split("/")
73 | bucket = path_parts.pop(0)
74 | key = "/".join(path_parts) + "/deploy.yaml"
75 | f = BytesIO()
76 | s3_client.download_fileobj(bucket, key, f)
77 |
78 | deploy_yaml = yaml.load(f.getvalue(), Loader=yaml.FullLoader)
79 | return deploy_yaml
80 |
--------------------------------------------------------------------------------
/mlflow_controller/utils/var_extract.py:
--------------------------------------------------------------------------------
1 | import ast
2 | import re
3 |
4 |
5 | def var_parser(placeholder):
6 | model_pattern = r"\[.*\]"
7 | model = re.search(model_pattern, placeholder)
8 | model_name = ast.literal_eval(model.group())[0]
9 | vendor_pattern = r"\..*\["
10 | vendor = re.search(vendor_pattern, placeholder)
11 | vendor_name = vendor.group().replace(".", "").replace("[", "")
12 | registry_pattern = r"^[a-zA-Z0-9_]*"
13 | registry = re.search(registry_pattern, placeholder)
14 | registry_name = registry.group()
15 | return model_name, vendor_name, registry_name
16 |
17 |
18 | def validate_variable(placeholder):
19 | pattern = re.compile(r"\w+\.\w+\[\".+\"\]", re.IGNORECASE)
20 | return pattern.match(placeholder)
21 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | mlflow==1.25
2 | kubernetes==22.6.0
3 | google==3.0.0
4 | gcloud==0.18.3
5 | apscheduler
6 | azure-storage-blob==12.14.1
7 | azure-identity==1.12.0
8 | boto3==1.26.25
9 | GitPython>=3.1.30
10 | google-apitools==0.5.32
11 | google-auth==2.1.0
12 | google-auth-oauthlib==0.4.6
13 | google-cloud==0.34.0
14 | google-cloud-core==2.0.0
15 | google-cloud-storage==1.42.2
16 | google-crc32c==1.2.0
17 | google-pasta==0.2.0
18 | google-reauth==0.1.1
19 | google-resumable-media==2.0.3
20 | googleapis-common-protos==1.52.0
21 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | from mlflow_controller.gitops import GitopsMDC
2 |
3 | controller = GitopsMDC()
4 | controller.gitops_mlflow_controller()
5 |
6 | # controller = DeployConroller()
7 | # controller.deploy_controller()
8 |
--------------------------------------------------------------------------------
/tests/docker_build_push.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | echo "Installing build test image and push ..."
4 | docker build -t tachyongroup/mdc-test:$GITHUB_SHA .
5 | # docker push tachyongroup/mdc-test:$GITHUB_SHA
6 | kind load docker-image tachyongroup/mdc-test:$GITHUB_SHA
7 |
--------------------------------------------------------------------------------
/tests/install_gitea.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -euo pipefail
3 |
4 | helm repo add gitea-charts https://dl.gitea.io/charts/
5 | helm install gitea gitea-charts/gitea --set "gitea.admin.username=mdcadmin" --set "gitea.admin.password=password" --set "gitea.admin.email=mdcadmin@local.domain"
6 | sleep 30
7 | kubectl wait --for=condition=ready pod -l 'app.kubernetes.io/name in (gitea)' --timeout=180s
8 |
9 | kubectl --namespace default port-forward svc/gitea-http 3000:3000 &
10 | GITEA_PID=$!
11 |
--------------------------------------------------------------------------------
/tests/install_istio.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | echo "Installing Istio service mesh ..."
4 | helm repo add istio https://istio-release.storage.googleapis.com/charts
5 | helm repo update
6 | kubectl create namespace istio-system
7 | helm install istio-base istio/base -n istio-system
8 | helm install istiod istio/istiod -n istio-system --wait
9 | helm status istiod -n istio-system
10 |
11 | echo "Waiting for Istio service mesh to be ready ..."
12 | kubectl wait --for=condition=ready pod -l 'app in (istiod)' --timeout=180s -n istio-system
--------------------------------------------------------------------------------
/tests/install_kserve.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | echo "Installing Kserve ..."
4 | curl -s "https://raw.githubusercontent.com/kserve/kserve/release-0.9/hack/quick_install.sh" | bash
5 |
--------------------------------------------------------------------------------
/tests/install_kserve_deployment_controller.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | echo "Installing Kserve Deployment Controller ..."
4 | kubectl create ns staging
5 | kubectl create ns production
6 | kubectl create secret generic github-secret -n mlflow --from-literal=githubtoken=password
7 | kubectl apply -f tests/repo-test/staging/kserve-sa.yaml -n staging
8 |
9 | helm install mdc-staging charts/mlflow-controller -n mlflow --set image.tag=$GITHUB_SHA --set image.pullPolicy=Never --set image.repository=docker.io/tachyongroup/mdc-test --set mlflow.backend=s3 --set gitops.deploymentLocation=staging/ --set mlserver=kserve --set gitops.repository=gitea-http.default.svc.cluster.local:3000/mdcadmin/repo-test --set gitops.protocol=http
10 |
11 | kubectl get deployment -n mlflow
12 | kubectl get cm -n mlflow
13 | kubectl get po -n mlflow
14 | echo "Waiting for Deployment Controller to be ready ..."
15 | export POD_NAME=$(kubectl get pods --namespace mlflow -l "app.kubernetes.io/instance=mdc-staging" -o jsonpath="{.items[0].metadata.name}")
16 |
17 | kubectl describe po $POD_NAME -n mlflow
18 | sleep 180
19 | kubectl logs deployment/mdc-staging-mlflow-controller -n mlflow
20 | #kubectl get inferenceservice --all-namespaces
21 | kubectl get inferenceservice sklearn-iris-minio -n staging -o yaml
22 |
23 | export MLFLOW_S3_ENDPOINT_URL=http://localhost:9000
24 | export AWS_ACCESS_KEY_ID=minioadmin
25 | export AWS_SECRET_ACCESS_KEY=minioadmin
26 | export MLFLOW_TRACKING_URI=http://localhost:5000
27 | python ./tests/mlflow/list_model.py $mlserver
28 |
29 | kubectl wait --for=condition=ready inferenceservice sklearn-iris-miniot -n staging --timeout=380s
30 | kubectl describe inferenceservice sklearn-iris-miniot -n staging
--------------------------------------------------------------------------------
/tests/install_mlflow.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | echo "Installing Mlflow ..."
4 | kubectl create ns mlflow
5 | helm repo add minio https://charts.bitnami.com/bitnami
6 | helm install minio minio/minio -n mlflow --set auth.rootUser=minioadmin --set auth.rootPassword=minioadmin --set livenessProbe.enabled=false --set readinessProbe.enabled=false #--set mode=distributed
7 |
8 | export ROOT_USER=$(kubectl get secret --namespace mlflow minio -o jsonpath="{.data.root-user}" | base64 -d)
9 | export ROOT_PASSWORD=$(kubectl get secret --namespace mlflow minio -o jsonpath="{.data.root-password}" | base64 -d)
10 |
11 | kubectl wait --for=condition=ready pod -l 'app.kubernetes.io/name in (minio)' --timeout=380s -n mlflow
12 | kubectl apply -f tests/mlflow-cm.yaml -n mlflow
13 | helm repo add rocket9-code https://rocket9-code.github.io/hello-mlflow
14 | helm install mlflow rocket9-code/mlflow -n mlflow --set artifact.ArtifactRoot=s3://artifacts --set envFromconfigMap=minio-cm --set image.pullPolicy=Always
15 | kubectl get po -n mlflow
16 | export POD_NAME=$(kubectl get pods --namespace mlflow -l "app.kubernetes.io/name=mlflow,app.kubernetes.io/instance=mlflow" -o jsonpath="{.items[0].metadata.name}")
17 | kubectl describe po $POD_NAME -n mlflow
18 | kubectl wait --for=condition=ready pod -l 'app.kubernetes.io/name in (mlflow)' --timeout=380s -n mlflow
19 |
--------------------------------------------------------------------------------
/tests/install_seldon_core.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | echo "Installing Seldon Core ..."
4 | kubectl create namespace seldon-system
5 | helm install seldon-core seldon-core-operator \
6 | --repo https://storage.googleapis.com/seldon-charts \
7 | --set usageMetrics.enabled=true \
8 | --set istio.enabled=true \
9 | --namespace seldon-system
10 | echo "Waiting for Seldon Core to be ready ..."
11 | kubectl wait --for=condition=ready pod -l 'app in (seldon)' --timeout=180s -n seldon-system
--------------------------------------------------------------------------------
/tests/install_seldon_deployment_controller.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | echo "Installing Seldon Deployment Controller ..."
4 | kubectl create ns staging
5 | kubectl create secret generic github-secret -n mlflow --from-literal=githubtoken=password
6 |
7 | helm install mdc-staging charts/mlflow-controller -n mlflow --set image.tag=$GITHUB_SHA --set image.pullPolicy=Never --set image.repository=docker.io/tachyongroup/mdc-test --set mlflow.backend=s3 --set gitops.deploymentLocation=staging/ --set mlserver=seldon --set gitops.repository=gitea-http.default.svc.cluster.local:3000/mdcadmin/repo-test --set gitops.protocol=http
8 | kubectl apply -f tests/repo-test/staging/seldon-secret.yaml -n staging
9 | kubectl get deployment -n mlflow
10 | kubectl get cm -n mlflow
11 | kubectl get po -n mlflow
12 |
13 | echo "Waiting for Deployment Controller to be ready ..."
14 | export POD_NAME=$(kubectl get pods --namespace mlflow -l "app.kubernetes.io/instance=mdc-staging" -o jsonpath="{.items[0].metadata.name}")
15 | sleep 180
16 | kubectl describe po $POD_NAME -n mlflow
17 | kubectl wait --for=condition=ready pod -l 'app.kubernetes.io/instance in (mdc-staging)' --timeout=380s -n mlflow
18 |
19 |
20 | kubectl describe po $POD_NAME -n mlflow
21 | sleep 180
22 | kubectl logs deployment/mdc-staging-mlflow-controller -n mlflow
23 | kubectl get seldondeployment --all-namespaces
24 | kubectl get seldondeployment mlflow-var-minio -n staging -o yaml
25 |
26 | export MLFLOW_S3_ENDPOINT_URL=http://localhost:9000
27 | export AWS_ACCESS_KEY_ID=minioadmin
28 | export AWS_SECRET_ACCESS_KEY=minioadmin
29 | export MLFLOW_TRACKING_URI=http://localhost:5000
30 | python ./tests/mlflow/list_model.py $mlserver
31 |
32 |
33 | python ./tests/mlflow/test_deploy.py
--------------------------------------------------------------------------------
/tests/kind-cluster-1-24.yaml:
--------------------------------------------------------------------------------
1 | # This testing option is available for testing projects that don't yet support k8s 1.25
2 | apiVersion: kind.x-k8s.io/v1alpha4
3 | kind: Cluster
4 | # Configure registry for KinD.
5 | containerdConfigPatches:
6 | - |-
7 | [plugins."io.containerd.grpc.v1.cri".registry.mirrors."$REGISTRY_NAME:$REGISTRY_PORT"]
8 | endpoint = ["http://$REGISTRY_NAME:$REGISTRY_PORT"]
9 | # This is needed in order to support projected volumes with service account tokens.
10 | # See: https://kubernetes.slack.com/archives/CEKK1KTN2/p1600268272383600
11 | kubeadmConfigPatches:
12 | - |
13 | apiVersion: kubeadm.k8s.io/v1beta2
14 | kind: ClusterConfiguration
15 | metadata:
16 | name: config
17 | apiServer:
18 | extraArgs:
19 | "service-account-issuer": "kubernetes.default.svc"
20 | "service-account-signing-key-file": "/etc/kubernetes/pki/sa.key"
21 | nodes:
22 | - role: control-plane
23 | image: kindest/node:v1.22.15@sha256:7d9708c4b0873f0fe2e171e2b1b7f45ae89482617778c1c875f1053d4cef2e41
24 | - role: worker
25 | image: kindest/node:v1.22.15@sha256:7d9708c4b0873f0fe2e171e2b1b7f45ae89482617778c1c875f1053d4cef2e41
26 | - role: worker
27 | image: kindest/node:v1.22.15@sha256:7d9708c4b0873f0fe2e171e2b1b7f45ae89482617778c1c875f1053d4cef2e41
28 |
29 | - role: worker
30 | image: kindest/node:v1.22.15@sha256:7d9708c4b0873f0fe2e171e2b1b7f45ae89482617778c1c875f1053d4cef2e41
--------------------------------------------------------------------------------
/tests/log_mlflow_model.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | echo "Installing Mlflow ..."
4 | pip install mlflow==1.25.1
5 | pip install protobuf==3.20.*
6 | pip install scikit-learn==0.23.2
7 | pip install pandas==0.23.4
8 | pip install boto3==1.22.9
9 | pip install minio
10 | pip install kubernetes
11 | pip install termcolor
12 | export MLFLOW_S3_ENDPOINT_URL=http://localhost:9000
13 | export AWS_ACCESS_KEY_ID=minioadmin
14 | export AWS_SECRET_ACCESS_KEY=minioadmin
15 | export MLFLOW_TRACKING_URI=http://localhost:5000
16 | python ./tests/mlflow/iris.py 1 staging
17 |
18 |
--------------------------------------------------------------------------------
/tests/mlflow-cm.yaml:
--------------------------------------------------------------------------------
1 | kind: ConfigMap
2 | apiVersion: v1
3 | metadata:
4 | name: minio-cm
5 | namespace: mlflow
6 | data:
7 | MLFLOW_S3_ENDPOINT_URL: 'http://minio.mlflow.svc.cluster.local'
8 | AWS_ACCESS_KEY_ID: minioadmin
9 | AWS_SECRET_ACCESS_KEY: minioadmin
10 |
--------------------------------------------------------------------------------
/tests/mlflow/iris.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | import mlflow
4 | import mlflow.sklearn
5 | import pandas as pd
6 | from minio import Minio
7 | from mlflow.tracking import MlflowClient
8 | from sklearn import datasets
9 | from sklearn.ensemble import RandomForestClassifier
10 | from sklearn.metrics import roc_auc_score
11 | from sklearn.model_selection import train_test_split
12 |
13 | try:
14 | client = Minio(
15 | "localhost:9000", access_key="minioadmin", secret_key="minioadmin", secure=False
16 | )
17 |
18 | # Create bucket.
19 | client.make_bucket("artifacts")
20 | policy = '{"Version":"2012-10-17","Statement":[{"Action":["s3:GetBucketLocation","s3:ListBucket","s3:ListBucketMultipartUploads"],"Effect":"Allow","Principal":{"AWS":["*"]},"Resource":["arn:aws:s3:::artifacts"],"Sid":""},{"Action":["s3:AbortMultipartUpload","s3:DeleteObject","s3:GetObject","s3:ListMultipartUploadParts","s3:PutObject"],"Effect":"Allow","Principal":{"AWS":["*"]},"Resource":["arn:aws:s3:::artifacts/*"],"Sid":""}]}'
21 | client.set_bucket_policy(bucket_name="artifacts", policy=policy)
22 | except Exception as e:
23 | print(e)
24 |
25 |
26 | def main(version, stage, MODEL_NAME):
27 | iris = datasets.load_iris()
28 | iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
29 | y = iris.target
30 | iris_df["target"] = y
31 |
32 | train_df, test_df = train_test_split(
33 | iris_df, test_size=0.3, random_state=42, stratify=iris_df["target"]
34 | )
35 | X_train = train_df[
36 | [
37 | "sepal length (cm)",
38 | "sepal width (cm)",
39 | "petal length (cm)",
40 | "petal width (cm)",
41 | ]
42 | ]
43 | y_train = train_df["target"]
44 |
45 | X_test = test_df[
46 | [
47 | "sepal length (cm)",
48 | "sepal width (cm)",
49 | "petal length (cm)",
50 | "petal width (cm)",
51 | ]
52 | ]
53 | y_test = test_df["target"]
54 |
55 | EXPERIMENT_NAME = MODEL_NAME
56 |
57 | # print("IRIS train df shape")
58 | # print(X_train.shape)
59 | # print(y_train.shape)
60 |
61 | # print("IRIS test df shape")
62 | # print(X_test.shape)
63 | # print(y_test.shape)
64 |
65 | mlflow_client = MlflowClient()
66 |
67 | # Create an MLFlow experiment, if not already exists
68 | experiment_details = mlflow_client.get_experiment_by_name(EXPERIMENT_NAME)
69 |
70 | if experiment_details is not None:
71 | experiment_id = experiment_details.experiment_id
72 | else:
73 | experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)
74 |
75 | # Start an MLFlow experiment run
76 | with mlflow.start_run(
77 | experiment_id=experiment_id, run_name="iris dataset rf run"
78 | ) as run:
79 | # Log parameters
80 |
81 | mlflow.log_param("max_depth", 10)
82 | mlflow.log_param("random_state", 0)
83 | mlflow.log_param("n_estimators", 100)
84 | clf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=0)
85 | clf.fit(X_train, y_train)
86 | iris_predict_y = clf.predict(X_test)
87 |
88 | roc_auc_score_val = roc_auc_score(
89 | y_test, clf.predict_proba(X_test), multi_class="ovr"
90 | )
91 | mlflow.log_metric("test roc_auc_score", roc_auc_score_val)
92 |
93 | # Log model
94 | result = mlflow.sklearn.log_model(clf, artifact_path="model")
95 |
96 | # Register a new version
97 | result = mlflow.register_model(result.model_uri, MODEL_NAME)
98 |
99 | mlflow_client.transition_model_version_stage(
100 | name=MODEL_NAME, version=version, stage=stage
101 | )
102 | registered_models = mlflow_client.list_registered_models()
103 |
104 |
105 | if __name__ == "__main__":
106 | for i in range(5):
107 | print(f"iris demo{i}")
108 | version = sys.argv[1]
109 | stage = sys.argv[2]
110 | main(MODEL_NAME=f"iris demo{i}", version=version, stage=stage)
111 |
--------------------------------------------------------------------------------
/tests/mlflow/list_model.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import time
3 |
4 | from git import Repo
5 | from iris import main
6 | from kubernetes import client as KubeClient
7 | from kubernetes import config
8 | from mlflow.tracking import MlflowClient
9 | from termcolor import colored
10 |
11 | try:
12 | config.load_kube_config()
13 | except config.ConfigException:
14 | config.load_incluster_config()
15 | kube_client = KubeClient.CustomObjectsApi()
16 |
17 | timeout = time.time() + 60 * 2
18 |
19 |
20 | print(colored("Test", "red"), colored("no1", "green"))
21 |
22 |
23 | def test():
24 | backend = "s3"
25 | mlflow_client = MlflowClient()
26 | registered_models = mlflow_client.list_registered_models()
27 | mlflow_models_metadata = {}
28 | for registered_model in registered_models:
29 | for version in registered_model.latest_versions:
30 | if version.current_stage == "Staging":
31 | model_details = dict(version)
32 | model_run_id = model_details["run_id"]
33 | run_details = dict(mlflow_client.get_run(model_run_id).info)
34 | name = model_details["name"]
35 | model_template = f'{{{{ mlflow.{backend}["{name}"] }}}}'
36 | artifact_uri = run_details["artifact_uri"]
37 | mlflow_models_metadata[name] = {
38 | "name": name,
39 | "run_id": model_details["run_id"],
40 | "source": model_details["source"],
41 | "status": model_details["status"],
42 | "version": model_details["version"],
43 | "artifact_uri": artifact_uri,
44 | }
45 | while True:
46 | if sys.argv[1] == "seldon":
47 | manifest = kube_client.get_namespaced_custom_object(
48 | group="machinelearning.seldon.io",
49 | version="v1",
50 | plural="seldondeployments",
51 | namespace="staging",
52 | name="mlflow-var-minio",
53 | )
54 | demo1 = manifest["spec"]["predictors"][0]["graph"]["children"][0][
55 | "modelUri"
56 | ]
57 | demo2 = manifest["spec"]["predictors"][0]["graph"]["children"][0][
58 | "children"
59 | ][0]["modelUri"]
60 | demo3 = manifest["spec"]["predictors"][0]["graph"]["children"][1][
61 | "modelUri"
62 | ]
63 | demo4 = manifest["spec"]["predictors"][0]["graph"]["modelUri"]
64 | if (
65 | (demo1 == mlflow_models_metadata["iris demo1"]["source"])
66 | & (demo2 == mlflow_models_metadata["iris demo2"]["source"])
67 | & (demo4 == mlflow_models_metadata["iris demo4"]["source"])
68 | ):
69 | print(demo1, demo2, demo3, demo4)
70 | print("test passed", mlflow_models_metadata)
71 | break
72 | elif sys.argv[1] == "kserve":
73 | manifest = kube_client.get_namespaced_custom_object(
74 | group="serving.kserve.io",
75 | version="v1beta1",
76 | plural="inferenceservices",
77 | namespace="staging",
78 | name="sklearn-iris-minio",
79 | )
80 | demo2 = manifest["spec"]["predictor"]["model"]["storageUri"]
81 | if demo2 == mlflow_models_metadata["iris demo2"]["source"]:
82 | print(demo2)
83 | print("test passed", mlflow_models_metadata)
84 | break
85 | if time.time() > timeout:
86 | print(mlflow_models_metadata)
87 | print(manifest)
88 | print(sys.argv[1])
89 | raise ("Timeout error")
90 |
91 |
92 | test()
93 |
94 | # Test transition
95 |
96 | print(colored("Test", "red"), colored("no2", "green"))
97 |
98 | for i in range(5):
99 | main(MODEL_NAME=f"iris demo{i}", version=2, stage="Staging")
100 |
101 | test()
102 |
103 | # Test removal
104 | print(colored("Test", "red"), colored("no3", "green"))
105 |
106 | if sys.argv[1] == "kserve":
107 | PATH_OF_GIT_REPO = "tests/repo-test"
108 | COMMIT_MESSAGE = "comment from python script"
109 |
110 | def git_push():
111 | import os
112 |
113 | os.remove("tests/repo-test/staging/kserve-s3.yaml")
114 | try:
115 | repo = Repo(PATH_OF_GIT_REPO)
116 | repo.git.add(update=True)
117 | repo.index.commit(COMMIT_MESSAGE)
118 | origin = repo.remote(name="origin")
119 | origin.push()
120 | except:
121 | print("Some error occured while pushing the code")
122 |
123 | git_push()
124 |
125 | while True:
126 | if time.time() > timeout:
127 | raise ("Timeout error")
128 | manifest = kube_client.list_namespaced_custom_object(
129 | group="serving.kserve.io",
130 | version="v1beta1",
131 | plural="inferenceservices",
132 | namespace="staging",
133 | )
134 | model_names = []
135 | for i in manifest["items"]:
136 | model_names.append(i["metadata"]["name"])
137 | if "sklearn-iris-minio" in model_names:
138 | pass
139 | else:
140 | print(model_names)
141 | print("Deletion test passed")
142 | break
143 |
144 | if sys.argv[1] == "seldon":
145 | PATH_OF_GIT_REPO = "tests/repo-test"
146 | COMMIT_MESSAGE = "comment from python script"
147 |
148 | def git_push():
149 | import os
150 |
151 | os.remove("tests/repo-test/staging/seldon-s3.yaml")
152 | try:
153 | repo = Repo(PATH_OF_GIT_REPO)
154 | repo.git.add(update=True)
155 | repo.index.commit(COMMIT_MESSAGE)
156 | origin = repo.remote(name="origin")
157 | origin.push()
158 | except:
159 | print("Some error occured while pushing the code")
160 |
161 | git_push()
162 | time.sleep(60)
163 |
164 | while True:
165 | if time.time() > timeout:
166 | raise ("Timeout error")
167 | manifest = kube_client.list_namespaced_custom_object(
168 | group="machinelearning.seldon.io",
169 | version="v1",
170 | plural="seldondeployments",
171 | namespace="staging",
172 | )
173 | model_names = []
174 | for i in manifest["items"]:
175 | model_names.append(i["metadata"]["name"])
176 | if "mlflow-var-minio" in model_names:
177 | pass
178 | else:
179 | print(model_names)
180 | print("Deletion test passed")
181 | break
182 |
--------------------------------------------------------------------------------
/tests/mlflow/test_deploy.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | from kubernetes import client as KubeClient
4 | from kubernetes import config
5 |
6 | try:
7 | config.load_kube_config()
8 | except config.ConfigException:
9 | config.load_incluster_config()
10 | kube_client = KubeClient.CustomObjectsApi()
11 | status = ""
12 | timeout = time.time() + 60 * 10
13 |
14 | while True:
15 | test = kube_client.get_namespaced_custom_object(
16 | group="machinelearning.seldon.io",
17 | version="v1",
18 | plural="seldondeployments",
19 | namespace="staging",
20 | name="mlflow",
21 | )
22 | status = test["status"]["state"]
23 | print(status)
24 | if status == "Available":
25 | break
26 | else:
27 | print(test["status"])
28 | time.sleep(30)
29 | if time.time() > timeout:
30 | # print(test)
31 | deploy_name = list(test["status"]["deploymentStatus"].keys())[0]
32 | kube_client = KubeClient.AppsV1Api()
33 | deployment = kube_client.read_namespaced_deployment(
34 | name=deploy_name, namespace="staging"
35 | )
36 | print(deployment)
37 | raise ("Timeout error")
38 |
--------------------------------------------------------------------------------
/tests/pf_mlflow.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -euo pipefail
3 |
4 | kubectl port-forward -n mlflow svc/mlflow-service 5000:5000 &
5 | MLFLOW_PID=$!
6 |
7 | echo "Started mlflow port-forward, pid: $MLFLOW_PID"
8 | echo MLFLOW_PID=$MLFLOW_PID >> pids.env
9 |
10 | sleep 1
11 |
12 |
13 | kubectl port-forward --namespace mlflow svc/minio 9000:9000 &
14 | MINIO_PID=$!
15 |
16 | echo "Started mlflow port-forward, pid: $MINIO_PID"
17 | echo MINIO_PID=$MINIO_PID >> pids.env
18 |
19 | sleep 1
20 |
21 | curl -X POST http://localhost:5000/api/2.0/preview/mlflow/experiments/create -d '{"name":"test"}'
--------------------------------------------------------------------------------
/tests/repo-test/production/kserve-s3.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: "serving.kserve.io/v1beta1"
2 | kind: "InferenceService"
3 | metadata:
4 | name: "sklearn-iris-minio"
5 | spec:
6 | predictor:
7 | model:
8 | modelFormat:
9 | name: mlflow
10 | storageUri: '{{ mlflow.s3["iris demo2"] }}'
--------------------------------------------------------------------------------
/tests/repo-test/production/seldon-s3.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: machinelearning.seldon.io/v1
2 | kind: SeldonDeployment
3 | metadata:
4 | name: mlflow-var-minio
5 | spec:
6 | name: iris
7 | predictors:
8 | - graph:
9 | children:
10 | - name: step-one
11 | modelUri: '{{ mlflow.s3["iris demo1"] }}'
12 | envSecretRefName: seldon-rclone-secret
13 | implementation: MLFLOW_SERVER
14 | type: MODEL
15 | children:
16 | - name: step-two
17 | modelUri: '{{ mlflow.s3["iris demo2"] }}'
18 | envSecretRefName: seldon-rclone-secret
19 | implementation: MLFLOW_SERVER
20 | type: MODEL
21 | children: []
22 | - name: step-three
23 | implementation: MLFLOW_SERVER
24 | modelUri: '{{ mlflow.s3["iris demo3"] }}'
25 | envSecretRefName: seldon-rclone-secret
26 | type: MODEL
27 | children: []
28 | implementation: MLFLOW_SERVER
29 | modelUri: '{{ mlflow.s3["iris demo4"] }}'
30 | envSecretRefName: seldon-rclone-secret
31 | name: classifier
32 | name: default
33 | replicas: 1
--------------------------------------------------------------------------------
/tests/repo-test/staging/kserve-s3.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: "serving.kserve.io/v1beta1"
2 | kind: "InferenceService"
3 | metadata:
4 | name: "sklearn-iris-minio"
5 | spec:
6 | predictor:
7 | serviceAccountName: sa
8 | model:
9 | modelFormat:
10 | name: mlflow
11 | storageUri: '{{ mlflow.s3["iris demo2"] }}'
--------------------------------------------------------------------------------
/tests/repo-test/staging/kserve-s3t.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: "serving.kserve.io/v1beta1"
2 | kind: "InferenceService"
3 | metadata:
4 | name: "sklearn-iris-miniot"
5 | spec:
6 | predictor:
7 | serviceAccountName: sa
8 | model:
9 | modelFormat:
10 | name: mlflow
11 | storageUri: '{{ mlflow.s3["iris demo2"] }}'
--------------------------------------------------------------------------------
/tests/repo-test/staging/kserve-sa.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Secret
3 | metadata:
4 | name: s3creds
5 | annotations:
6 | serving.kserve.io/s3-endpoint: minio.mlflow.svc.cluster.local:9000 # replace with your s3 endpoint e.g minio-service.kubeflow:9000
7 | serving.kserve.io/s3-usehttps: "0" # by default 1, if testing with minio you can set to 0
8 | serving.kserve.io/s3-region: "us-east-2"
9 | serving.kserve.io/s3-useanoncredential: "false" # omitting this is the same as false, if true will ignore provided credential and use anonymous credentials
10 | type: Opaque
11 | stringData: # use `stringData` for raw credential string or `data` for base64 encoded string
12 | AWS_ACCESS_KEY_ID: minioadmin
13 | AWS_SECRET_ACCESS_KEY: minioadmin
14 | ---
15 |
16 | apiVersion: v1
17 | kind: ServiceAccount
18 | metadata:
19 | name: sa
20 | secrets:
21 | - name: s3creds
--------------------------------------------------------------------------------
/tests/repo-test/staging/seldon-s3.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: machinelearning.seldon.io/v1
2 | kind: SeldonDeployment
3 | metadata:
4 | name: mlflow-var-minio
5 | spec:
6 | name: iris
7 | predictors:
8 | - graph:
9 | children:
10 | - name: step-one
11 | modelUri: '{{ mlflow.s3["iris demo1"] }}'
12 | envSecretRefName: seldon-rclone-secret
13 | implementation: MLFLOW_SERVER
14 | type: MODEL
15 | children:
16 | - name: step-two
17 | modelUri: '{{ mlflow.s3["iris demo2"] }}'
18 | envSecretRefName: seldon-rclone-secret
19 | implementation: MLFLOW_SERVER
20 | type: MODEL
21 | children: []
22 | - name: step-three
23 | implementation: MLFLOW_SERVER
24 | modelUri: '{{ mlflow.s3["iris demo3"] }}'
25 | envSecretRefName: seldon-rclone-secret
26 | type: MODEL
27 | children: []
28 | implementation: MLFLOW_SERVER
29 | modelUri: '{{ mlflow.s3["iris demo4"] }}'
30 | envSecretRefName: seldon-rclone-secret
31 | logger:
32 | url: http://broker-ingress.knative-eventing.svc.cluster.local/demo/default
33 | mode: all
34 | name: classifier
35 | name: default
36 | replicas: 1
--------------------------------------------------------------------------------
/tests/repo-test/staging/seldon-secret.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Secret
3 | metadata:
4 | name: seldon-init-container-secret
5 | namespace: staging
6 | type: Opaque
7 | stringData:
8 | RCLONE_CONFIG_S3_TYPE: s3
9 | RCLONE_CONFIG_S3_PROVIDER: minio
10 | RCLONE_CONFIG_S3_ACCESS_KEY_ID: minioadmin
11 | RCLONE_CONFIG_S3_SECRET_ACCESS_KEY: minioadmin
12 | RCLONE_CONFIG_S3_ENDPOINT: http://minio.mlflow.svc.cluster.local:9000
13 | RCLONE_CONFIG_S3_ENV_AUTH: "false"
--------------------------------------------------------------------------------
/tests/repo-test/staging/seldon-single-model.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: machinelearning.seldon.io/v1
2 | kind: SeldonDeployment
3 | metadata:
4 | name: mlflow
5 | spec:
6 | name: iris
7 | predictors:
8 | - componentSpecs:
9 | - spec:
10 | containers:
11 | - name: classifier
12 | livenessProbe:
13 | initialDelaySeconds: 80
14 | failureThreshold: 200
15 | periodSeconds: 25
16 | successThreshold: 1
17 | httpGet:
18 | path: /health/ping
19 | port: http
20 | scheme: HTTP
21 | readinessProbe:
22 | initialDelaySeconds: 80
23 | failureThreshold: 20
24 | periodSeconds: 25
25 | successThreshold: 1
26 | httpGet:
27 | path: /health/ping
28 | port: http
29 | scheme: HTTP
30 | graph:
31 | implementation: MLFLOW_SERVER
32 | modelUri: '{{ mlflow.s3["iris demo3"] }}'
33 | envSecretRefName: seldon-init-container-secret
34 | name: classifier
35 | name: default
36 | replicas: 1
--------------------------------------------------------------------------------
/tests/setup_git_repo.sh:
--------------------------------------------------------------------------------
1 | curl -X 'POST' \
2 | 'http://localhost:3000/api/v1/user/repos' \
3 | -H 'accept: application/json' \
4 | -H 'authorization: Basic bWRjYWRtaW46cGFzc3dvcmQ=' \
5 | -H 'Content-Type: application/json' \
6 | -d '{
7 | "auto_init": false,
8 | "default_branch": "main",
9 | "description": "demo",
10 | "name": "repo-test",
11 | "private": false,
12 | "template": false,
13 | "trust_model": "default"
14 | }'
15 |
16 | git config --global user.email "mdcadmin@example.com"
17 | git config --global user.name "mdcadmin"
18 | cd tests/repo-test
19 | git init
20 | git add .
21 | git checkout -b main
22 | git commit -m "first commit"
23 | git remote add origin "http://mdcadmin:password@localhost:3000/mdcadmin/repo-test"
24 | git push -u origin main
25 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [flake8]
2 | # it's not a bug that we aren't using all of hacking, ignore:
3 | # F812: list comprehension redefines ...
4 | # H101: Use TODO(NAME)
5 | # H202: assertRaises Exception too broad
6 | # H233: Python 3.x incompatible use of print operator
7 | # H301: one import per line
8 | # H306: imports not in alphabetical order (time, os)
9 | # H401: docstring should not start with a space
10 | # H403: multi line docstrings should end on a new line
11 | # H404: multi line docstring should start without a leading new line
12 | # H405: multi line docstring summary not separated with an empty line
13 | # H501: Do not use self.__dict__ for string formatting
14 | ignore = E501,W503
15 |
--------------------------------------------------------------------------------
/ui/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM continuumio/miniconda3:4.11.0
2 | COPY requirements.txt requirements.txt
3 | RUN pip install -r requirements.txt
4 | WORKDIR /ui
5 | COPY . /ui
6 | CMD ["python", "app.py"]
--------------------------------------------------------------------------------
/ui/app.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import dash
4 | import dash_bootstrap_components as dbc
5 | import dash_html_components as html
6 | import pandas as pd
7 | from dash import Input, Output, dcc
8 | from kubernetes import client as kube_client
9 | from kubernetes import config
10 |
11 | MLFLOW_NAMESPACE = os.getenv("namespace", "mlflow")
12 | MDC_LABEL = os.getenv("MDC_LABEL", "mdc-staging")
13 |
14 |
15 | app = dash.Dash(
16 | __name__,
17 | use_pages=True,
18 | external_stylesheets=[dbc.themes.FLATLY, dbc.icons.BOOTSTRAP],
19 | )
20 |
21 | navbar = dbc.NavbarSimple(
22 | [
23 | dbc.Button("Home", href="/", color="secondary", className="me-1"),
24 | dbc.Button("Logs", href="/logs", color="secondary", className="me-1"),
25 | ],
26 | brand="Mlflow Deployment Controller",
27 | color="primary",
28 | dark=True,
29 | className="mb-2",
30 | )
31 |
32 |
33 | def serve_layout():
34 | return html.Div(
35 | [navbar, dash.page_container],
36 | # fluid=True,
37 | )
38 |
39 |
40 | app.layout = serve_layout
41 |
42 | try:
43 | config.load_kube_config()
44 | except config.ConfigException:
45 | config.load_incluster_config()
46 |
47 |
48 | def dataf():
49 | v1 = kube_client.CustomObjectsApi()
50 | manifests = v1.list_cluster_custom_object(
51 | group="machinelearning.seldon.io",
52 | version="v1",
53 | plural="seldondeployments",
54 | label_selector="app.kubernetes.io/managed-by=mdc",
55 | )
56 | model_name = []
57 | namespace = []
58 | state = []
59 | replicas = []
60 | for i in manifests["items"]:
61 | model_name.append(i["metadata"]["name"])
62 | namespace.append(i["metadata"]["namespace"])
63 | for _id in i["metadata"]["annotations"].keys():
64 | if "mdc" in _id:
65 | pass
66 | state.append(i["status"]["state"])
67 | deploy_name = list(i["status"]["deploymentStatus"].keys())[0]
68 | replicas.append(i["status"]["deploymentStatus"][deploy_name]["replicas"])
69 | df = pd.DataFrame(
70 | {
71 | "models": model_name,
72 | "namespace": namespace,
73 | "replicas": replicas,
74 | "state": state,
75 | }
76 | )
77 | df["models"] = [dcc.Link(f"{i}", href=f"/seldon/{i}") for i in df.models.values]
78 | table = dbc.Table.from_dataframe(df, striped=True, bordered=True, hover=True)
79 |
80 | return table
81 |
82 |
83 | @app.callback(
84 | dash.dependencies.Output("table-deployments", "children"),
85 | [dash.dependencies.Input("interval-component", "n_intervals")],
86 | )
87 | def interval_deployment(n_intervals):
88 | return dataf()
89 |
90 |
91 | @app.callback(
92 | dash.dependencies.Output("seldon-deployment", "children"),
93 | [dash.dependencies.Input("interval-component-seldon", "n_intervals")],
94 | )
95 | def internal_seldon_deployment(n_intervals):
96 | return []
97 |
98 |
99 | @app.callback(Output("live-graph", "children"), [Input("graph-update", "n_intervals")])
100 | def update_graph_scatter(n_intervals):
101 | print(n_intervals)
102 | v1 = kube_client.CoreV1Api()
103 | pod_name = v1.list_namespaced_pod(
104 | namespace=MLFLOW_NAMESPACE,
105 | label_selector=f"app.kubernetes.io/instance={MDC_LABEL}",
106 | )
107 | pod_name = pod_name.items[0].metadata.name
108 | lines = []
109 | lines = v1.read_namespaced_pod_log(
110 | name=pod_name,
111 | pretty=True,
112 | since_seconds=60,
113 | namespace=MLFLOW_NAMESPACE,
114 | follow=False,
115 | _preload_content=True,
116 | )
117 | # print(lines)
118 | return [
119 | html.Br(),
120 | html.H4("Controller Logs"),
121 | html.Plaintext(
122 | lines,
123 | style={
124 | "display": "inline-block",
125 | "fontSize": 15,
126 | # "verticalAlign": "top",
127 | "color": "white",
128 | "backgroundColor": "black",
129 | },
130 | ),
131 | ]
132 |
133 |
134 | @app.callback(
135 | Output("collapse0", "is_open"),
136 | Output("collapse1", "is_open"),
137 | Output("collapse2", "is_open"),
138 | Output("collapse3", "is_open"),
139 | Output("collapse4", "is_open"),
140 | Output("collapse5", "is_open"),
141 | Output("collapse-button0", "n_clicks"),
142 | Output("collapse-button1", "n_clicks"),
143 | Output("collapse-button2", "n_clicks"),
144 | Output("collapse-button3", "n_clicks"),
145 | Output("collapse-button4", "n_clicks"),
146 | Output("collapse-button5", "n_clicks"),
147 | [
148 | Input("collapse-button0", "n_clicks"),
149 | Input("collapse-button1", "n_clicks"),
150 | Input("collapse-button2", "n_clicks"),
151 | Input("collapse-button3", "n_clicks"),
152 | Input("collapse-button4", "n_clicks"),
153 | Input("collapse-button5", "n_clicks"),
154 | ],
155 | )
156 | def toggle_collapse(n, n1, n2, n3, n4, n5):
157 | if n:
158 | return True, False, False, False, False, False, 0, 0, 0, 0, 0, 0
159 | if n1:
160 | return False, True, False, False, False, False, 0, 0, 0, 0, 0, 0
161 | if n2:
162 | return False, False, True, False, False, False, 0, 0, 0, 0, 0, 0
163 | if n3:
164 | return False, False, False, True, False, False, 0, 0, 0, 0, 0, 0
165 | if n4:
166 | return False, False, False, False, True, False, 0, 0, 0, 0, 0, 0
167 | if n5:
168 | return False, False, False, False, False, True, 0, 0, 0, 0, 0, 0
169 | return False, False, False, False, False, False, 0, 0, 0, 0, 0, 0
170 |
171 |
172 | if __name__ == "__main__":
173 | app.run_server(host="0.0.0.0", port=8000, debug=False)
174 |
--------------------------------------------------------------------------------
/ui/pages/deployments.py:
--------------------------------------------------------------------------------
1 | from dash import dcc, html, register_page
2 | from kubernetes import config
3 |
4 | register_page(__name__, path="/")
5 |
6 |
7 | try:
8 | config.load_kube_config()
9 | except config.ConfigException:
10 | config.load_incluster_config()
11 |
12 |
13 | layout = html.Div(
14 | [
15 | html.H5(
16 | "Seldon Deployments",
17 | className="mt-5",
18 | ),
19 | dcc.Interval(
20 | id="interval-component", interval=1 * 1000, n_intervals=0 # in milliseconds
21 | ),
22 | html.Div(id="table-deployments"),
23 | ]
24 | )
25 |
--------------------------------------------------------------------------------
/ui/pages/logs.py:
--------------------------------------------------------------------------------
1 | import dash
2 | import dash_core_components as dcc
3 | import dash_html_components as html
4 | from kubernetes import config
5 |
6 | try:
7 | config.load_kube_config()
8 | except config.ConfigException:
9 | config.load_incluster_config()
10 |
11 |
12 | def title():
13 | return "Logs"
14 |
15 |
16 | def description(ticker=None):
17 | return "Controller Logs"
18 |
19 |
20 | dash.register_page(
21 | __name__,
22 | path_template="/logs",
23 | title=title,
24 | description=description,
25 | path="/logs",
26 | )
27 |
28 |
29 | def layout(ticker=None, **other_unknown_query_strings):
30 | return html.Div(
31 | [
32 | html.Div(id="live-graph"),
33 | dcc.Interval(id="graph-update", interval=1 * 10000, n_intervals=0),
34 | ]
35 | )
36 |
--------------------------------------------------------------------------------
/ui/pages/not_found_404.py:
--------------------------------------------------------------------------------
1 | import dash
2 | from dash import html
3 |
4 | dash.register_page(__name__, path="/404")
5 |
6 |
7 | layout = html.H1("404 Not found")
8 |
--------------------------------------------------------------------------------
/ui/pages/seldon.py:
--------------------------------------------------------------------------------
1 | import dash
2 | from kubernetes import config
3 | from seldon_deployments.card import card_layout
4 |
5 | try:
6 | config.load_kube_config()
7 | except config.ConfigException:
8 | config.load_incluster_config()
9 |
10 |
11 | def title(ticker=None):
12 | return f"{ticker} Status"
13 |
14 |
15 | def description(ticker=None):
16 | return f"Deployment status {ticker}"
17 |
18 |
19 | dash.register_page(
20 | __name__,
21 | path_template="/seldon/",
22 | title=title,
23 | description=description,
24 | path="/seldon/mlflow",
25 | )
26 |
27 |
28 | def layout(ticker=None, **other_unknown_query_strings):
29 | return card_layout(ticker)
30 |
--------------------------------------------------------------------------------
/ui/requirements.txt:
--------------------------------------------------------------------------------
1 | mlflow
2 | kubernetes
3 | google
4 | gcloud
5 | google-apitools==0.5.32
6 | google-auth==2.1.0
7 | google-auth-oauthlib==0.4.6
8 | google-cloud==0.34.0
9 | google-cloud-core==2.0.0
10 | google-cloud-storage==1.42.2
11 | google-crc32c==1.2.0
12 | google-pasta==0.2.0
13 | google-reauth==0.1.1
14 | google-resumable-media==2.0.3
15 | googleapis-common-protos==1.52.0
16 | apscheduler
17 | plotly
18 | dash_core_components
19 | dash
20 | dash_html_components
21 | dash_bootstrap_components
--------------------------------------------------------------------------------
/ui/seldon_deployments/card.py:
--------------------------------------------------------------------------------
1 | import ast
2 | import json
3 | import os
4 |
5 | import dash
6 | import dash_bootstrap_components as dbc
7 | import yaml
8 | from dash import dcc, html
9 | from kubernetes import config
10 | from seldon_deployments.data import dataf
11 |
12 | try:
13 | config.load_kube_config()
14 | except config.ConfigException:
15 | config.load_incluster_config()
16 | GLOBAL_NAMESPACE = os.getenv("namespace", "staging")
17 | SELDON_URL = os.getenv("seldon_url", "https://example.mlops.com")
18 |
19 |
20 | def card_layout(deploy_name=None):
21 | (
22 | model_manifests,
23 | name,
24 | external_url,
25 | internal_url,
26 | status,
27 | status_message,
28 | status_reason,
29 | status_button,
30 | manifest,
31 | ) = dataf(name=deploy_name, namespace=GLOBAL_NAMESPACE, seldon_url=SELDON_URL)
32 | conditions = manifest["status"]["conditions"]
33 | collapses = []
34 | for i in range(len(conditions)):
35 | if conditions[i]["status"] == "False":
36 | color = "secondary"
37 | else:
38 | color = "success"
39 | type = conditions[i]["type"]
40 | try:
41 | reason = conditions[i]["reason"]
42 | except Exception as e:
43 | print(e)
44 | reason = type
45 | collapse = html.Div(
46 | [
47 | dbc.Button(
48 | type,
49 | id=f"collapse-button{i}",
50 | className="mb-3",
51 | color=color,
52 | n_clicks=0,
53 | ),
54 | dbc.Collapse(
55 | dbc.Card(dbc.CardBody(reason)),
56 | id=f"collapse{i}",
57 | is_open=False,
58 | ),
59 | ]
60 | )
61 | collapses.append(collapse)
62 |
63 | res = ast.literal_eval(json.dumps(manifest))
64 | res = yaml.safe_dump(res, default_flow_style=False)
65 | code = f"```yaml{res}```"
66 | model_cards = []
67 | for i in model_manifests:
68 | model_card = dbc.Card(
69 | [
70 | dbc.CardBody(
71 | [
72 | html.H4(
73 | i["name"], id="seldon-deployment", className="card-title"
74 | ),
75 | dbc.ListGroup(
76 | [
77 | dbc.ListGroupItem(
78 | [
79 | html.A(
80 | "Run id: ", style={"font-weight": "bold"}
81 | ),
82 | html.A(i["run_id"]),
83 | ]
84 | ),
85 | dbc.ListGroupItem(
86 | [
87 | html.A(
88 | "Source: ", style={"font-weight": "bold"}
89 | ),
90 | html.A(i["source"]),
91 | ]
92 | ),
93 | dbc.ListGroupItem(
94 | [
95 | html.A(
96 | "Version: ", style={"font-weight": "bold"}
97 | ),
98 | html.A(i["version"]),
99 | ]
100 | ),
101 | dbc.ListGroupItem(
102 | [
103 | html.A(
104 | "Artifacu Uri: ",
105 | style={"font-weight": "bold"},
106 | ),
107 | html.A(i["artifact_uri"]),
108 | ]
109 | ),
110 | ]
111 | ),
112 | ]
113 | ),
114 | ],
115 | )
116 | model_cards.append(model_card)
117 |
118 | Overview_tab = dcc.Tab(
119 | label="Overview",
120 | children=[
121 | dbc.Card(
122 | dbc.ListGroup(
123 | [
124 | dbc.ListGroupItem(
125 | [
126 | html.A(
127 | "External Endpoint: ", style={"font-weight": "bold"}
128 | ),
129 | html.A(
130 | id="external_url",
131 | href=external_url,
132 | children=external_url,
133 | target="_blank",
134 | ),
135 | html.A(" "),
136 | dcc.Clipboard(
137 | target_id="external_url",
138 | title="copy",
139 | style={
140 | "display": "inline-block",
141 | "fontSize": 20,
142 | "verticalAlign": "top",
143 | },
144 | ),
145 | ]
146 | ),
147 | dbc.ListGroupItem(
148 | [
149 | html.A(
150 | "Internal Endpoint: ", style={"font-weight": "bold"}
151 | ),
152 | html.A(
153 | id="internal_url",
154 | href=internal_url,
155 | children=internal_url,
156 | target="_blank",
157 | ),
158 | html.A(" "),
159 | dcc.Clipboard(
160 | target_id="internal_url",
161 | title="copy",
162 | style={
163 | "display": "inline-block",
164 | "fontSize": 20,
165 | "verticalAlign": "top",
166 | },
167 | ),
168 | ]
169 | ),
170 | dbc.ListGroupItem(
171 | [
172 | html.A(
173 | "Status Message: ", style={"font-weight": "bold"}
174 | ),
175 | html.A(status_message),
176 | ]
177 | ),
178 | dbc.ListGroupItem(
179 | [
180 | html.A(
181 | "Status Message: ", style={"font-weight": "bold"}
182 | ),
183 | html.A(status_reason),
184 | ]
185 | ),
186 | dbc.ListGroupItem(
187 | [
188 | html.A("Status: ", style={"font-weight": "bold"}),
189 | status_button,
190 | ]
191 | ),
192 | ],
193 | flush=True,
194 | ),
195 | )
196 | ]
197 | + collapses,
198 | )
199 |
200 | tabs = [
201 | Overview_tab,
202 | dcc.Tab(label="Model Details", children=model_cards),
203 | dcc.Tab(label="Yaml", children=[dcc.Markdown(str(code))]),
204 | ]
205 | if status == "Available":
206 | tabs.append(
207 | dcc.Tab(
208 | label="Doc",
209 | children=[
210 | html.Iframe(
211 | src=external_url, style={"height": "1067px", "width": "100%"}
212 | )
213 | ],
214 | )
215 | )
216 |
217 | layout = html.Div(
218 | [
219 | dash.html.H3(f"{name}"),
220 | dcc.Tabs(tabs),
221 | dcc.Interval(
222 | id="interval-component-seldon",
223 | interval=1 * 1000, # in milliseconds
224 | n_intervals=0,
225 | ),
226 | ]
227 | )
228 | return layout
229 |
--------------------------------------------------------------------------------
/ui/seldon_deployments/data.py:
--------------------------------------------------------------------------------
1 | import ast
2 |
3 | import dash_bootstrap_components as dbc
4 | from dash import html
5 | from kubernetes import client as KubeClient
6 | from kubernetes import config
7 |
8 | try:
9 | config.load_kube_config()
10 | except config.ConfigException:
11 | config.load_incluster_config()
12 |
13 |
14 | def pod_status(namespace, deploy_name):
15 | v1 = KubeClient.CoreV1Api()
16 | api_response = v1.list_namespaced_pod(namespace)
17 | for pod in api_response.items:
18 | if (pod.status.container_statuses is None) and (
19 | pod.status.init_container_statuses is None
20 | ):
21 | status = pod.status.conditions[0].message
22 | return (pod.metadata.name, status)
23 |
24 | if api_response.items[0].metadata.labels["app"] == deploy_name:
25 | status = pod.status.phase
26 | container_status = pod.status.container_statuses[0]
27 |
28 | if container_status.started is False or container_status.ready is False:
29 | waiting_state = container_status.state.waiting
30 | if (
31 | waiting_state.message is not None
32 | and "Error" in waiting_state.message
33 | ):
34 | status = waiting_state.reason
35 | try:
36 | init_container_statuses = pod.status.init_container_statuses[0]
37 | if (
38 | init_container_statuses.started is False
39 | or init_container_statuses.ready is False
40 | ):
41 | waiting_state = init_container_statuses.state.waiting
42 | if (
43 | waiting_state.message is not None
44 | and "failed" in waiting_state.message
45 | ):
46 | status = waiting_state.reason
47 | except Exception as e:
48 | print(e)
49 | print("No init container found")
50 | if status == "CrashLoopBackOff":
51 | return (pod.metadata.name, status, waiting_state.message)
52 |
53 |
54 | def dataf(
55 | name="mlflow-var", namespace="staging", seldon_url="https://seldon.mlops.wianai.com"
56 | ):
57 | v1 = KubeClient.CustomObjectsApi()
58 | manifest = v1.get_namespaced_custom_object(
59 | group="machinelearning.seldon.io",
60 | version="v1",
61 | plural="seldondeployments",
62 | namespace=namespace,
63 | name=name,
64 | )
65 | models = []
66 | print(manifest["metadata"]["annotations"].keys())
67 | for _id in manifest["metadata"]["annotations"].keys():
68 | if ("mdc" in _id) and ("mlflow-stage" not in _id):
69 | models.append(manifest["metadata"]["annotations"][_id])
70 | model = [ast.literal_eval(i) for i in models]
71 | name = manifest["metadata"]["name"]
72 | external_url = f"{seldon_url}/seldon/{namespace}/{name}/api/v1.0/doc/"
73 | internal_url = manifest["status"]["address"]["url"]
74 | deploy_name = list(manifest["status"]["deploymentStatus"].keys())[0]
75 | kube_client = KubeClient.AppsV1Api()
76 | deployment = kube_client.read_namespaced_deployment(
77 | name=deploy_name, namespace=namespace
78 | )
79 | # label = deployment.metadata.labels["app"]
80 | status = ""
81 | for condition in deployment.status.conditions:
82 | if (condition.type == "Available") and (condition.status == "True"):
83 | status = "Available"
84 | status_button = dbc.Button(
85 | [html.I(className="bi bi-check-circle-fill me-2"), " Available"],
86 | color="success",
87 | disabled=True,
88 | )
89 | status_message = condition.message
90 | status_reason = condition.reason
91 | if status != "Available":
92 | if (condition.type == "Progressing") and (condition.status == "True"):
93 | status = "Progressing"
94 | status_message = condition.message
95 | status_reason = condition.reason
96 | status_button = dbc.Button(
97 | [dbc.Spinner(size="sm"), " Progressing..."],
98 | color="primary",
99 | disabled=True,
100 | )
101 | elif (condition.type == "Progressing") and (condition.status == "False"):
102 | status = condition.reason
103 | status_message = condition.message
104 | status_reason = condition.reason
105 | status_button = dbc.Button(
106 | [html.I(className="bi bi-x-octagon-fill me-2"), " Failed"],
107 | color="danger",
108 | disabled=True,
109 | )
110 | return (
111 | model,
112 | name,
113 | external_url,
114 | internal_url,
115 | status,
116 | status_message,
117 | status_reason,
118 | status_button,
119 | manifest,
120 | )
121 |
--------------------------------------------------------------------------------