├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    └── workflows
    │   ├── codeql.yml
    │   ├── integration_test_minio_gitops.yaml
    │   ├── linter-py.yaml
    │   └── main.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── charts
    └── mlflow-controller
    │   ├── .helmignore
    │   ├── Chart.yaml
    │   ├── README.md
    │   ├── templates
    │       ├── _helpers.tpl
    │       ├── deployment-ui.yaml
    │       ├── deployment.yaml
    │       ├── gitops-cm.yaml
    │       ├── ingress.yaml
    │       ├── mlflow-cm.yaml
    │       ├── service.yaml
    │       └── serviceaccount.yaml
    │   └── values.yaml
├── doc
    ├── Mlflow Deployment controller.drawio
    ├── doc.md
    └── gitops.md
├── examples
    ├── argo-manifest
    │   ├── mlflow-controller-production.yaml
    │   ├── mlflow-controller.yaml
    │   ├── mlflow.yaml
    │   └── seldon-core.yaml
    ├── gitops
    │   └── gitops.ipynb
    ├── notebook
    │   ├── deploy.yaml
    │   └── mlflow.ipynb
    └── readme.md
├── main.py
├── mlflow_controller
    ├── __init__.py
    ├── controller.py
    ├── gitops.py
    ├── mlflow_direct.py
    ├── mlservers
    │   ├── kserve.py
    │   ├── rclone.py
    │   ├── seldon.py
    │   └── utils.py
    ├── registries
    │   ├── mlflow.py
    │   └── mlflow_backend.py
    └── utils
    │   └── var_extract.py
├── requirements.txt
├── test.py
├── tests
    ├── docker_build_push.sh
    ├── install_gitea.sh
    ├── install_istio.sh
    ├── install_kserve.sh
    ├── install_kserve_deployment_controller.sh
    ├── install_mlflow.sh
    ├── install_seldon_core.sh
    ├── install_seldon_deployment_controller.sh
    ├── kind-cluster-1-24.yaml
    ├── log_mlflow_model.sh
    ├── mlflow-cm.yaml
    ├── mlflow
    │   ├── iris.py
    │   ├── list_model.py
    │   └── test_deploy.py
    ├── pf_mlflow.sh
    ├── repo-test
    │   ├── production
    │   │   ├── kserve-s3.yaml
    │   │   └── seldon-s3.yaml
    │   └── staging
    │   │   ├── kserve-s3.yaml
    │   │   ├── kserve-s3t.yaml
    │   │   ├── kserve-sa.yaml
    │   │   ├── seldon-s3.yaml
    │   │   ├── seldon-secret.yaml
    │   │   └── seldon-single-model.yaml
    └── setup_git_repo.sh
├── tox.ini
└── ui
    ├── Dockerfile
    ├── app.py
    ├── pages
        ├── deployments.py
        ├── logs.py
        ├── not_found_404.py
        └── seldon.py
    ├── requirements.txt
    └── seldon_deployments
        ├── card.py
        └── data.py


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | #
 7 | # ******** NOTE ********
 8 | # We have attempted to detect the languages in your repository. Please check
 9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 | 
14 | on:
15 |   push:
16 |     branches: [ "main", gh-pages ]
17 |   pull_request:
18 |     # The branches below must be a subset of the branches above
19 |     branches: [ "main" ]
20 |   schedule:
21 |     - cron: '33 5 * * 0'
22 | 
23 | jobs:
24 |   analyze:
25 |     name: Analyze
26 |     runs-on: ubuntu-latest
27 |     permissions:
28 |       actions: read
29 |       contents: read
30 |       security-events: write
31 | 
32 |     strategy:
33 |       fail-fast: false
34 |       matrix:
35 |         language: [ 'python' ]
36 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
37 |         # Use only 'java' to analyze code written in Java, Kotlin or both
38 |         # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
39 |         # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
40 | 
41 |     steps:
42 |     - name: Checkout repository
43 |       uses: actions/checkout@v3
44 | 
45 |     # Initializes the CodeQL tools for scanning.
46 |     - name: Initialize CodeQL
47 |       uses: github/codeql-action/init@v2
48 |       with:
49 |         languages: ${{ matrix.language }}
50 |         # If you wish to specify custom queries, you can do so here or in a config file.
51 |         # By default, queries listed here will override any specified in a config file.
52 |         # Prefix the list here with "+" to use these queries and those in the config file.
53 | 
54 |         # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
55 |         # queries: security-extended,security-and-quality
56 | 
57 | 
58 |     # Autobuild attempts to build any compiled languages  (C/C++, C#, Go, or Java).
59 |     # If this step fails, then you should remove it and run the build manually (see below)
60 |     - name: Autobuild
61 |       uses: github/codeql-action/autobuild@v2
62 | 
63 |     # ℹ️ Command-line programs to run using the OS shell.
64 |     # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
65 | 
66 |     #   If the Autobuild fails above, remove it and uncomment the following three lines.
67 |     #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
68 | 
69 |     # - run: |
70 |     #   echo "Run, Build Application using script"
71 |     #   ./location_of_script_within_repo/buildscript.sh
72 | 
73 |     - name: Perform CodeQL Analysis
74 |       uses: github/codeql-action/analyze@v2
75 |       with:
76 |         category: "/language:${{matrix.language}}"
77 | 


--------------------------------------------------------------------------------
/.github/workflows/integration_test_minio_gitops.yaml:
--------------------------------------------------------------------------------
 1 | name: Integration test gitops in KinD [minio]
 2 | on:
 3 |   pull_request:
 4 | 
 5 | 
 6 | jobs:
 7 |   build:
 8 |     runs-on: ubuntu-latest
 9 |     strategy:
10 |       fail-fast: false
11 |       matrix:
12 |         mlserver: [seldon,kserve]
13 |     steps:
14 |     - name: Checkout
15 |       uses: actions/checkout@v3
16 |       with:
17 |         ref: ${{ github.event.pull_request.head.sha }}
18 | 
19 |     - uses: engineerd/setup-kind@v0.5.0
20 |       with:
21 |         skipClusterCreation: "true"
22 |         version: v0.17.0
23 |     
24 |     - name: Install Helm
25 |       uses: azure/setup-helm@v1
26 |       with:
27 |         version: v3.8.1
28 |     
29 | 
30 |     - name: Create KinD Cluster
31 |       run: kind create cluster --config tests/kind-cluster-1-24.yaml
32 |     
33 |     - name: Testing
34 |       run: |
35 |         kubectl cluster-info
36 |         kubectl get pods -n kube-system
37 |         echo "current-context:" $(kubectl config current-context)
38 |         echo "environment-kubeconfig:" ${KUBECONFIG}
39 |         kubectl get nodes
40 |         kubectl wait --for=condition=Ready nodes --all --timeout=600s
41 | 
42 |     - name: Install Gitea
43 |       run: ./tests/install_gitea.sh
44 | 
45 |     - name: setup git repo
46 |       run: ./tests/setup_git_repo.sh
47 | 
48 |     - name: Install mlflow
49 |       run: ./tests/install_mlflow.sh
50 | 
51 |     - name: PF Mlflow
52 |       run: ./tests/pf_mlflow.sh
53 |     - uses: actions/setup-python@v4
54 |       with:
55 |         python-version: '3.7' 
56 | 
57 |     - name: Log model Mlflow
58 |       run: ./tests/log_mlflow_model.sh
59 | 
60 |     - name: Install Kserve
61 |       run: ./tests/install_kserve.sh
62 | 
63 |     - name: Install Seldon Core
64 |       run: ./tests/install_seldon_core.sh
65 |     
66 |     - name: Build and Push image
67 |       run: ./tests/docker_build_push.sh
68 | 
69 |     - name: Install deployment controller ${{matrix.mlserver}}
70 |       run: ./tests/install_${{matrix.mlserver}}_deployment_controller.sh
71 |       env:
72 |         mlserver: ${{matrix.mlserver}}
73 | 
74 |     


--------------------------------------------------------------------------------
/.github/workflows/linter-py.yaml:
--------------------------------------------------------------------------------
 1 | name: linter
 2 | 
 3 | on: [pull_request]
 4 | 
 5 | jobs:
 6 |   lint-python:
 7 |     runs-on: ubuntu-latest
 8 |     env:
 9 |       PYTHON: 3.8
10 |     steps:
11 |       - uses: actions/checkout@v2
12 |       - name: Setup Python
13 |         id: setup-python
14 |         uses: actions/setup-python@v2
15 |         with:
16 |           python-version: "3.8"
17 |           architecture: x64
18 |       - name: Upgrade pip version
19 |         run: |
20 |           pip install --upgrade "pip>=21.3.1,<22.1"
21 |       - name: requirements.txt
22 |         id: pip-requirements
23 |         run: |
24 |           pip install isort black flake8
25 | 
26 |       - name: Lint python
27 |         run: make lint-python-check


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: Release Charts
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |     paths:
 8 |       - 'charts/**'
 9 | permissions: write-all
10 | 
11 | jobs:
12 |   release:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - name: Checkout
16 |         uses: actions/checkout@v2
17 |         with:
18 |           fetch-depth: 0
19 | 
20 |       - name: Configure Git
21 |         run: |
22 |           git config user.name "$GITHUB_ACTOR"
23 |           git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
24 | 
25 |       - name: Install Helm
26 |         uses: azure/setup-helm@v1
27 |         with:
28 |           version: v3.8.1
29 | 
30 |       - name: Run chart-releaser
31 |         uses: helm/chart-releaser-action@v1.4.0
32 |         env:
33 |           CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
34 |   docker:
35 |     runs-on: ubuntu-latest
36 |     needs: ["release"]
37 |     steps:
38 |       - name: Checkout
39 |         uses: actions/checkout@v2
40 |         with:
41 |           fetch-depth: 0
42 |       -
43 |         name: Set up QEMU
44 |         uses: docker/setup-qemu-action@v2
45 |       -
46 |         name: Set up Docker Buildx
47 |         uses: docker/setup-buildx-action@v2
48 | 
49 |       - name: 'Get Previous tag'
50 |         id: previoustag
51 |         uses: "WyriHaximus/github-action-get-previous-tag@v1"
52 |         with:
53 |           fallback: 1.0.0 
54 |       -
55 |         name: Login to DockerHub
56 |         uses: docker/login-action@v2
57 |         with:
58 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
59 |           password: ${{ secrets.DOCKERHUB_TOKEN }}
60 |       -
61 |         name: Build and push
62 |         uses: docker/build-push-action@v3
63 |         with:
64 |           push: true
65 |           tags: tachyongroup/mlflow-deployment-controller:${{ steps.previoustag.outputs.tag }}
66 | 
67 |       -
68 |         name: Build and push
69 |         uses: docker/build-push-action@v3
70 |         with:
71 |           push: true
72 |           context: ui/
73 |           tags: tachyongroup/mlflow-deployment-controller-ui:${{ steps.previoustag.outputs.tag }}
74 |       
75 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | mdc/*
  6 | *.DS_Store
  7 | tmp/*
  8 | # C extensions
  9 | *.so
 10 | scripts/*
 11 | env*
 12 | mdc/*
 13 | *DS_Store
 14 | live.py
 15 | # Distribution / packaging
 16 | .Python
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | share/python-wheels/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | MANIFEST
 34 | 
 35 | # PyInstaller
 36 | #  Usually these files are written by a python script from a template
 37 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 38 | *.manifest
 39 | *.spec
 40 | 
 41 | # Installer logs
 42 | pip-log.txt
 43 | pip-delete-this-directory.txt
 44 | 
 45 | # Unit test / coverage reports
 46 | htmlcov/
 47 | .tox/
 48 | .nox/
 49 | .coverage
 50 | .coverage.*
 51 | .cache
 52 | nosetests.xml
 53 | coverage.xml
 54 | *.cover
 55 | *.py,cover
 56 | .hypothesis/
 57 | .pytest_cache/
 58 | cover/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | db.sqlite3
 68 | db.sqlite3-journal
 69 | 
 70 | # Flask stuff:
 71 | instance/
 72 | .webassets-cache
 73 | 
 74 | # Scrapy stuff:
 75 | .scrapy
 76 | 
 77 | # Sphinx documentation
 78 | docs/_build/
 79 | 
 80 | # PyBuilder
 81 | .pybuilder/
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # IPython
 88 | profile_default/
 89 | ipython_config.py
 90 | 
 91 | # pyenv
 92 | #   For a library or package, you might want to ignore these files since the code is
 93 | #   intended to run in multiple environments; otherwise, check them in:
 94 | # .python-version
 95 | 
 96 | # pipenv
 97 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 98 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 99 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
100 | #   install all needed dependencies.
101 | #Pipfile.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/#use-with-ide
116 | .pdm.toml
117 | 
118 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
119 | __pypackages__/
120 | 
121 | # Celery stuff
122 | celerybeat-schedule
123 | celerybeat.pid
124 | 
125 | # SageMath parsed files
126 | *.sage.py
127 | 
128 | # Environments
129 | .env
130 | .venv
131 | env/
132 | venv/
133 | ENV/
134 | env.bak/
135 | venv.bak/
136 | 
137 | # Spyder project settings
138 | .spyderproject
139 | .spyproject
140 | 
141 | # Rope project settings
142 | .ropeproject
143 | 
144 | # mkdocs documentation
145 | /site
146 | 
147 | # mypy
148 | .mypy_cache/
149 | .dmypy.json
150 | dmypy.json
151 | 
152 | # Pyre type checker
153 | .pyre/
154 | 
155 | # pytype static type analyzer
156 | .pytype/
157 | 
158 | # Cython debug symbols
159 | cython_debug/
160 | 
161 | # PyCharm
162 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
163 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
164 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
165 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
166 | #.idea/
167 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8.16-slim-buster
2 | RUN apt-get -y update
3 | RUN apt-get -y install git
4 | COPY requirements.txt requirements.txt
5 | RUN pip install -r requirements.txt
6 | RUN pip install protobuf==3.20
7 | WORKDIR /app
8 | COPY . /app
9 | CMD ["python", "main.py"]


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 ROCKET9
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | ROOT_DIR 	:= $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
 2 | 
 3 | lint-python:
 4 | 	cd ${ROOT_DIR}; python -m isort . --recursive --atomic 
 5 | 	cd ${ROOT_DIR}; python -m black  .
 6 | 	cd ${ROOT_DIR}; python -m flake8 mlflow_controller/ 
 7 | 	cd ${ROOT_DIR}; python -m flake8 ui/ 
 8 | 	# autoflake --remove-all-unused-imports -i -r .
 9 | 
10 | 
11 | lint-python-check:
12 | 	# cd ${ROOT_DIR}; python -m isort mlflow_controller/  --check-only
13 | 	cd ${ROOT_DIR}; python -m flake8 mlflow_controller/ 
14 | 	cd ${ROOT_DIR}; python -m black --check mlflow_controller 
15 | 	cd ${ROOT_DIR}; python -m black --check ui


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center" id="top"> 
  2 |   <img src="./.github/app.gif" alt="Hellopmlops" />
  3 | 
  4 |   &#xa0;
  5 | 
  6 |  
  7 | </div>
  8 | 
  9 | <h1 align="center">Mlflow Deployment Controller</h1>
 10 | 
 11 | [![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/mlflow-deployment-controller)](https://artifacthub.io/packages/search?repo=mlflow-deployment-controller)
 12 | 
 13 | 
 14 | <br>
 15 | 
 16 | ## :dart: About ##
 17 | 
 18 | Mlflow Does not have integration with model servers ( Ex: Seldon-core) for automated deployment of models when registered or promoted to different stages, Mlflow deployment controller tries to solve this problem. Mlflow deployment controller is a python based controller which periodically checks the state between mlflow and model server's CRDs in k8s and acts accordingly. Every stage in Mlflow needs a separate controller as in the real world we would have different clusters for each stage. you can configure the controller to manage the state for a certain stage based on the use case. 
 19 | 
 20 | <img width="808" alt="Screenshot 2022-12-17 at 5 36 52 PM" src="https://user-images.githubusercontent.com/62284209/208241065-a297f111-6e2f-4e68-b430-a8b5ba455804.png">
 21 | 
 22 | 
 23 | ## :rocket: Technologies ##
 24 | 
 25 | The following tools were used in this project:
 26 | 
 27 | - [Seldon-Core](https://docs.seldon.io/projects/seldon-core/en/latest/index.html)
 28 | - [Mlflow](https://www.mlflow.org/docs/latest/index.html)
 29 | 
 30 | ## :white_check_mark: Requirements ##
 31 | 
 32 | Before starting :checkered_flag:, you need to have [Helm](https://helm.sh/docs/helm/helm_install/) 
 33 | 
 34 | ## :checkered_flag: Starting ##
 35 | 
 36 | ```bash
 37 | $ helm repo add rocket9-code https://rocket9-code.github.io/helm-charts
 38 | 
 39 | $ helm install mlflow-deployment-controller rocket9-code/mlflow-deployment-controller
 40 | 
 41 | ```
 42 | 
 43 | ## To Setup Deployment controller in different environments
 44 | 
 45 | ### For Staging environment
 46 | 
 47 | Deployment controller will look for models logged with deploy.yaml in Mlflow Staging Environment and deploys the model in staging Namespace
 48 | 
 49 | ```bash
 50 | $ helm repo add rocket9-code https://rocket9-code.github.io/mlflow-deployment-controller/
 51 | 
 52 | $ helm install mlflow-deployment-controller-staging  rocket9-code/mlflow-deployment-controller --set mlflow.stage=Staging --set mlflow.namespace=staging
 53 | 
 54 | ```
 55 | 
 56 | ### For Production environment
 57 | 
 58 | Deployment controller will look models logged with deploy.yaml in Mlflow Production Environment and deploys the model in production Namespace
 59 | 
 60 | ```bash
 61 | $ helm repo add rocket9-code https://rocket9-code.github.io/helm-charts
 62 | 
 63 | $ helm install mlflow-deployment-controller-production  rocket9-code/mlflow-deployment-controller --set mlflow.stage=Production --set mlflow.namespace=production
 64 | 
 65 | ```
 66 | 
 67 | Quick Start using argocd
 68 | ---
 69 | 
 70 | Setup Mlflow and Mlflow controllers for different stages using argocd
 71 | 
 72 | ```
 73 | kubectl apply -f  examples/argo-manifest
 74 | ```
 75 | 
 76 | #### Log a Mlflow model with Seldon deployment configuration with the name deploy.yaml
 77 | 
 78 |   Model Uri parameter will be overwritten by controller so it can be left blank
 79 |   
 80 | <img width="783" alt="Screenshot 2022-07-10 at 6 26 01 PM" src="https://user-images.githubusercontent.com/62284209/178153282-9c107398-9f9f-4fc3-8bfc-ca9d5c9a9f3a.png">
 81 |   
 82 |   If any Model in mlflow is registered with deploy.yaml deployment controller will start deploying or managing the model server based on the config
 83 |   
 84 | <img width="1409" alt="Screenshot 2022-07-10 at 6 25 47 PM" src="https://user-images.githubusercontent.com/62284209/178153272-ae254b27-47ed-4251-aa69-07a305223aee.png">
 85 | 
 86 | 
 87 | 
 88 | Once the Model is logged with deploy.yaml deployment controller will deploy the model to the predefined namespace
 89 | Currently, the deployment controller does not have a UI(But it is in our roadmap ) so you can check the logs of the Mlflow deployment controller to see the model deployment  and any errors
 90 | 
 91 |         
 92 | 
 93 | ```
 94 | kubectl logs -f deployment/mlflow-deploment-controller
 95 | ```
 96 | 
 97 | 
 98 | 
 99 | <img width="1038" alt="Screenshot 2022-07-10 at 6 27 11 PM" src="https://user-images.githubusercontent.com/62284209/178153334-8909cecb-162e-4f86-ac22-f6cff0a7859d.png">
100 | 
101 | 
102 | https://user-images.githubusercontent.com/62284209/182024746-1fa281ac-a388-467e-98cd-98e9f40a0ed0.mp4
103 | 
104 | 
105 | ## Gitops based deployment controller 
106 | 
107 | Gitops based deployment controller helps to version control seldon deployments as well as version control the models in ml registries in a automated way. 
108 | Controller expects a templated variable in place of modelUri of the deplyment files which will be updated by the controller with the lastest version 
109 | avalilable from the registies certain stage. For example if a controller is prod namespaces and production stage in mlflow and looking at the git repostory 
110 | under folder production. it will get the manifest from the git repo's folder and the latest version from mlflow and deploy the model servers.
111 | 
112 | 
113 | 
114 | Create a new repository for deployment controller and create a seldon manifest in the place of modelUri use this template '{{ mlflow.blob["iris demo1"] }}' 
115 | to specify the model metadata the syntax of the template is {{ registry.backend["MODEL NAME IN REGISTRY"]}}
116 | 
117 | Example deployment file deploying multiple models in seldon-core 
118 | <details>
119 |   <summary>Expand me</summary>
120 |   
121 |  ```
122 | apiVersion: machinelearning.seldon.io/v1
123 | kind: SeldonDeployment
124 | metadata:
125 |   name: mlflow-var
126 | spec:
127 |   name: iris
128 |   predictors:
129 |   - graph:
130 |       children:
131 |         - name: step-one
132 |           modelUri: '{{ mlflow.blob["iris demo1"] }}'
133 |           envSecretRefName: seldon-rclone-secret
134 |           implementation: MLFLOW_SERVER
135 |           type: MODEL
136 |           children: 
137 |               - name: step-two
138 |                 modelUri: '{{ mlflow.blob["iris demo2"] }}'
139 |                 envSecretRefName: seldon-rclone-secret
140 |                 implementation: MLFLOW_SERVER
141 |                 type: MODEL
142 |                 children: []
143 |         - name: step-three
144 |           implementation: MLFLOW_SERVER
145 |           modelUri: '{{ mlflow.blob["iris demo3"] }}'
146 |           envSecretRefName: seldon-rclone-secret
147 |           type: MODEL
148 |           children: []
149 |       implementation: MLFLOW_SERVER
150 |       modelUri: '{{ mlflow.blob["iris demo4"] }}'
151 |       envSecretRefName: seldon-rclone-secret
152 |       logger:
153 |         url: http://broker-ingress.knative-eventing.svc.cluster.local/demo/default
154 |         mode: all
155 |       name: classifier
156 |     name: default
157 |     replicas: 1
158 | ```
159 | </details>
160 | 
161 | 
162 | The template values are updated by the controller with the latest version the registry as below and submitted to the kubernetes api
163 | 
164 | <details>
165 |   <summary>Expand me</summary>
166 |   
167 |  ```
168 | apiVersion: machinelearning.seldon.io/v1
169 | kind: SeldonDeployment
170 | metadata:
171 |   name: mlflow-var
172 |   namespace: staging
173 | spec:
174 |   name: iris
175 |   predictors:
176 |     - graph:
177 |         children:
178 |           - children:
179 |               - children: []
180 |                 envSecretRefName: seldon-rclone-secret
181 |                 implementation: MLFLOW_SERVER
182 |                 modelUri: '{{ mlflow.blob["iris demo2"] }}'
183 |                 name: step-two
184 |                 type: MODEL
185 |             envSecretRefName: seldon-rclone-secret
186 |             implementation: MLFLOW_SERVER
187 |             modelUri: '{{ mlflow.blob["iris demo1"] }}'
188 |             name: step-one
189 |             type: MODEL
190 |           - children: []
191 |             envSecretRefName: seldon-rclone-secret
192 |             implementation: MLFLOW_SERVER
193 |             modelUri: >-
194 |               wasbs://artifacts/mlflow/10/262bee84b7dd4b039973084383880b57/artifacts/model
195 |             name: step-three
196 |             type: MODEL
197 |         envSecretRefName: seldon-rclone-secret
198 |         implementation: MLFLOW_SERVER
199 |         logger:
200 |           mode: all
201 |           url: >-
202 |             http://broker-ingress.knative-eventing.svc.cluster.local/demo/default
203 |         modelUri: '{{ mlflow.blob["iris demo4"] }}'
204 |         name: classifier
205 |       name: default
206 | ```
207 | </details>
208 | 
209 | 
210 | To enable gitops in the controller 
211 | 
212 | ```
213 | ! helm repo add rocket9-code https://rocket9-code.github.io/helm-charts
214 | 
215 | ! helm install mlflow-controller rocket9-code/mlflow-deployment-controller  -n mlflow --set gitops.enabled=true  
216 | ```
217 | Supported values 
218 | registes: mlflow
219 | backend: blob , gcs , s3
220 | 
221 | in future releases we can support azureml registries and databricks mlflow
222 | 
223 | 
224 | ## To Setup Deployment controller in different environments with Gitops Enabled
225 | 
226 | ### For Staging environment
227 | 
228 | Deployment controller will look for  yaml files staging folder and model in Mlflow Staging Environment and deploys the model in staging Namespace
229 | 
230 | ```bash
231 | $ helm repo add rocket9-code https://rocket9-code.github.io/mlflow-deployment-controller/
232 | 
233 | $ helm install mlflow-deployment-controller-staging  rocket9-code/mlflow-deployment-controller --set gitops.enabled=true \ 
234 |                                                       --set gitops.repository= github.com/rocket9-code/model-deployments  \
235 |                                                       --set gitops.deploymentLocation=staging --set mlflow.stage=Staging \
236 |                                                       --set mlflow.namespace=staging
237 | 
238 | ```
239 | 
240 | ### For Production environment
241 | 
242 | Deployment controller will look for  yaml files in production folder and model in Mlflow Production Environment and deploys the model in production Namespace
243 | 
244 | ```bash
245 | $ helm repo add rocket9-code https://rocket9-code.github.io/helm-charts
246 | 
247 | $ helm install mlflow-deployment-controller-production  rocket9-code/mlflow-deployment-controller --set gitops.enabled=true  \
248 |                                                           --set gitops.repository= github.com/rocket9-code/model-deployments \ 
249 |                                                           --set gitops.deploymentLocation=production --set mlflow.stage=Production \
250 |                                                           --set mlflow.namespace=production
251 | 
252 | ```
253 | 
254 | quick start example is available at examples/gitops
255 | 
256 | Support matrix
257 | | Ml endpoints | Seldon core |  Kserve |  Databricks | Azure ml | Vertex AI | SageMaker | 
258 | |-----|---------|---------|---------|---------|---------|---------|
259 | | Registries | | | | | |
260 | | mlflow oss  gcs | :white_check_mark: |  :white_check_mark: |  ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | 
261 | | mlflow oss blob | :white_check_mark: |  :white_check_mark: |  ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | 
262 | | mlflow oss s3 | :white_check_mark: |  :white_check_mark: | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | 
263 | | databricks mlflow| ✖️ (in roadmap) |  ✖️ (in roadmap) | --- | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | 
264 | | azureml | ✖️ (in roadmap) |  ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | 
265 | | vertexai  registry | ✖️ (in roadmap) |  ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | 
266 | 
267 | ## :memo: License ##
268 | 
269 | This project is under license from MIT. For more details, see the [LICENSE](LICENSE.md) file.
270 | 
271 | <a href="#top">Back to top</a>
272 | 


--------------------------------------------------------------------------------
/charts/mlflow-controller/.helmignore:
--------------------------------------------------------------------------------
 1 | # Patterns to ignore when building packages.
 2 | # This supports shell glob matching, relative path matching, and
 3 | # negation (prefixed with !). Only one pattern per line.
 4 | .DS_Store
 5 | # Common VCS dirs
 6 | .git/
 7 | .gitignore
 8 | .bzr/
 9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *.orig
18 | *~
19 | # Various IDEs
20 | .project
21 | .idea/
22 | *.tmproj
23 | .vscode/
24 | 


--------------------------------------------------------------------------------
/charts/mlflow-controller/Chart.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v2
 2 | name: mlflow-controller
 3 | description: A Helm chart for Mlflow Deployment Controller and MDC ui
 4 | 
 5 | # A chart can be either an 'application' or a 'library' chart.
 6 | #
 7 | #
 8 | # Library charts provide useful utilities or functions for the chart developer. They're included as
 9 | # a dependency of application charts to inject those utilities and functions into the rendering
10 | # pipeline. Library charts do not define any templates and therefore cannot be deployed.
11 | type: application
12 | # This is the chart version. This version number should be incremented each time you make changes
13 | # to the chart and its templates, including the app version.
14 | # Versions are expected to follow Semantic Versioning (https://semver.org/)
15 | version: 0.1.8
16 | 
17 | # This is the version number of the application being deployed. This version number should be
18 | # incremented each time you make changes to the application. Versions are not expected to
19 | # follow Semantic Versioning. They should reflect the version the application is using.
20 | # It is recommended to use it with quotes.
21 | appVersion: "0.1.8"
22 | 


--------------------------------------------------------------------------------
/charts/mlflow-controller/README.md:
--------------------------------------------------------------------------------
 1 | # mlflow-controller
 2 | 
 3 | ![Version: 0.1.6](https://img.shields.io/badge/Version-0.1.6-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.1.6](https://img.shields.io/badge/AppVersion-0.1.6-informational?style=flat-square)
 4 | 
 5 | A Helm chart for Mlflow Deployment Controller
 6 | 
 7 | ## Values
 8 | 
 9 | | Key | Type | Default | Description |
10 | |-----|------|---------|-------------|
11 | | affinity | object | `{}` | affinity |
12 | | envFromSecret | string | `""` | additional ENV from secret |
13 | | fullnameOverride | string | `""` |  |
14 | | gitops.BRANCH | string | `"main"` |  |
15 | | gitops.deploymentLocation | string | `"/"` | deployment files folder location |
16 | | gitops.enabled | bool | `true` | enable/disable gitops |
17 | | gitops.gitPasswordSecretKey | string | `"githubtoken"` | git password secret key |
18 | | gitops.gitPasswordSecretName | string | `"github-secret"` | git password secret name |
19 | | gitops.gitUser | string | `"mdcadmin"` | git username |
20 | | gitops.protocol | string | `"https"` | git repo protocol |
21 | | gitops.repository | string | `"github.com/rocket9-code/model-deployments"` | git repository |
22 | | image.pullPolicy | string | `"Always"` | image pull policy |
23 | | image.repository | string | `"tachyongroup/mlflow-deployment-controller"` | image repository   |
24 | | image.tag | string | `"mlflow-controller-0.1.6"` | image tag |
25 | | imagePullSecrets | list | `[]` |  |
26 | | mlflow.MLFLOW_TRACKING_URI | string | `"http://mlflow-service:5000"` | mlflow tracking uri |
27 | | mlflow.backend | string | `"blob"` | Object Storage Used by mlflow supported gcs , blob , s3  |
28 | | mlflow.enabled | bool | `true` |  |
29 | | mlflow.namespace | string | `"staging"` | Namespace model to be deployed |
30 | | mlflow.stage | string | `"Staging"` | Stage To be Tracked From Mlflow |
31 | | mlserver | string | `"seldon"` | mlserver one of [seldon, kserve] |
32 | | nameOverride | string | `""` |  |
33 | | nodeSelector | object | `{}` | node selector |
34 | | podAnnotations | object | `{}` | pod annotations |
35 | | podSecurityContext | object | `{}` |  |
36 | | replicaCount | int | `1` | replica count |
37 | | resources | object | `{}` | cpu memory resource config |
38 | | securityContext | object | `{}` | security context |
39 | | serviceAccount.annotations | object | `{}` | Annotations to add to the service account |
40 | | serviceAccount.create | bool | `true` | Specifies whether a service account should be created |
41 | | serviceAccount.name | string | `""` | If not set and create is true, a name is generated using the fullname template |
42 | | tolerations | list | `[]` | tolerations |
43 | 
44 | ----------------------------------------------
45 | Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)
46 | 


--------------------------------------------------------------------------------
/charts/mlflow-controller/templates/_helpers.tpl:
--------------------------------------------------------------------------------
 1 | {{/*
 2 | Expand the name of the chart.
 3 | */}}
 4 | {{- define "mlflow-controller.name" -}}
 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 6 | {{- end }}
 7 | 
 8 | {{/*
 9 | Create a default fully qualified app name.
10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11 | If release name contains chart name it will be used as a full name.
12 | */}}
13 | {{- define "mlflow-controller.fullname" -}}
14 | {{- if .Values.fullnameOverride }}
15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16 | {{- else }}
17 | {{- $name := default .Chart.Name .Values.nameOverride }}
18 | {{- if contains $name .Release.Name }}
19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
20 | {{- else }}
21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22 | {{- end }}
23 | {{- end }}
24 | {{- end }}
25 | 
26 | {{/*
27 | Create chart name and version as used by the chart label.
28 | */}}
29 | {{- define "mlflow-controller.chart" -}}
30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31 | {{- end }}
32 | 
33 | {{/*
34 | Common labels
35 | */}}
36 | {{- define "mlflow-controller.labels" -}}
37 | helm.sh/chart: {{ include "mlflow-controller.chart" . }}
38 | {{ include "mlflow-controller.selectorLabels" . }}
39 | {{- if .Chart.AppVersion }}
40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41 | {{- end }}
42 | app.kubernetes.io/managed-by: {{ .Release.Service }}
43 | {{- end }}
44 | 
45 | {{/*
46 | Selector labels
47 | */}}
48 | {{- define "mlflow-controller.selectorLabels" -}}
49 | app.kubernetes.io/name: {{ include "mlflow-controller.name" . }}
50 | app.kubernetes.io/instance: {{ .Release.Name }}
51 | {{- end }}
52 | 
53 | {{/*
54 | Create the name of the service account to use
55 | */}}
56 | {{- define "mlflow-controller.serviceAccountName" -}}
57 | {{- if .Values.serviceAccount.create }}
58 | {{- default (include "mlflow-controller.fullname" .) .Values.serviceAccount.name }}
59 | {{- else }}
60 | {{- default "default" .Values.serviceAccount.name }}
61 | {{- end }}
62 | {{- end }}
63 | 


--------------------------------------------------------------------------------
/charts/mlflow-controller/templates/deployment-ui.yaml:
--------------------------------------------------------------------------------
 1 | {{- if .Values.ui.enabled }}
 2 | apiVersion: apps/v1
 3 | kind: Deployment
 4 | metadata:
 5 |   name: {{ include "mlflow-controller.fullname" . }}-ui
 6 |   labels:
 7 |     app: mlflow-controller-ui
 8 | spec:
 9 |   {{- if not .Values.autoscaling.enabled }}
10 |   replicas: {{ .Values.replicaCount }}
11 |   {{- end }}
12 |   selector:
13 |     matchLabels:
14 |       app: mlflow-controller-ui
15 |   template:
16 |     metadata:
17 |       {{- with .Values.podAnnotations }}
18 |       annotations:
19 |         {{- toYaml . | nindent 8 }}
20 |       {{- end }}
21 |       labels:
22 |         app: mlflow-controller-ui
23 |     spec:
24 |       {{- with .Values.imagePullSecrets }}
25 |       imagePullSecrets:
26 |         {{- toYaml . | nindent 8 }}
27 |       {{- end }}
28 |       serviceAccountName: {{ include "mlflow-controller.serviceAccountName" . }}
29 |       securityContext:
30 |         {{- toYaml .Values.podSecurityContext | nindent 8 }}
31 |       containers:
32 |         - name: {{ .Chart.Name }}
33 |           securityContext:
34 |             {{- toYaml .Values.securityContext | nindent 12 }}
35 |           image: "{{ .Values.ui.image.repository }}:{{ .Values.ui.image.tag | default .Chart.AppVersion }}"
36 |           imagePullPolicy: {{ .Values.image.pullPolicy }}
37 |           env:
38 |           - name: seldon_url
39 |             value: {{ .Values.ui.seldon_url }}
40 |           - name: namespace
41 |             value:  {{ .Values.mlflow.namespace }}
42 |           ports:
43 |             - containerPort: 8000
44 |               name: http
45 |           resources:
46 |             {{- toYaml .Values.resources | nindent 12 }}
47 |       {{- with .Values.nodeSelector }}
48 |       nodeSelector:
49 |         {{- toYaml . | nindent 8 }}
50 |       {{- end }}
51 |       {{- with .Values.affinity }}
52 |       affinity:
53 |         {{- toYaml . | nindent 8 }}
54 |       {{- end }}
55 |       {{- with .Values.tolerations }}
56 |       tolerations:
57 |         {{- toYaml . | nindent 8 }}
58 |       {{- end }}
59 | {{- end }}
60 | 


--------------------------------------------------------------------------------
/charts/mlflow-controller/templates/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: {{ include "mlflow-controller.fullname" . }}
 5 |   labels:
 6 |     {{- include "mlflow-controller.labels" . | nindent 4 }}
 7 | spec:
 8 |   replicas: {{ .Values.replicaCount }}
 9 |   selector:
10 |     matchLabels:
11 |       {{- include "mlflow-controller.selectorLabels" . | nindent 6 }}
12 |   template:
13 |     metadata:
14 |       {{- with .Values.podAnnotations }}
15 |       annotations:
16 |         {{- toYaml . | nindent 8 }}
17 |       {{- end }}
18 |       labels:
19 |         {{- include "mlflow-controller.selectorLabels" . | nindent 8 }}
20 |     spec:
21 |       {{- with .Values.imagePullSecrets }}
22 |       imagePullSecrets:
23 |         {{- toYaml . | nindent 8 }}
24 |       {{- end }}
25 |       serviceAccountName: {{ include "mlflow-controller.serviceAccountName" . }}
26 |       securityContext:
27 |         {{- toYaml .Values.podSecurityContext | nindent 8 }}
28 |       containers:
29 |         - name: {{ .Chart.Name }}
30 |           securityContext:
31 |             {{- toYaml .Values.securityContext | nindent 12 }}
32 |           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
33 |           imagePullPolicy: {{ .Values.image.pullPolicy }}
34 |           resources:
35 |             {{- toYaml .Values.resources | nindent 12 }}
36 |           env:
37 |           - name: ML_SERVER
38 |             value: {{ .Values.mlserver }}
39 |              {{- if .Values.gitops.gitPasswordSecretName }}
40 |           - name: GIT_PASSWORD
41 |             valueFrom:
42 |               secretKeyRef:
43 |                 name: {{ .Values.gitops.gitPasswordSecretName }}
44 |                 key: {{ .Values.gitops.gitPasswordSecretKey }}
45 |                 optional: false
46 |             {{- end }}
47 |           envFrom:
48 |           - configMapRef:
49 |               name: {{ include "mlflow-controller.fullname" . }}-mlflow-cm
50 |           {{- if .Values.envFromSecret }}
51 |           - secretRef:
52 |               name: {{ .Values.envFromSecret }}
53 |           {{- end }}
54 |           {{- if .Values.gitops.enabled }}
55 |           - configMapRef:
56 |               name: {{ include "mlflow-controller.fullname" . }}-gitops-cm
57 |           {{- end }}
58 |       {{- with .Values.nodeSelector }}
59 |       nodeSelector:
60 |         {{- toYaml . | nindent 8 }}
61 |       {{- end }}
62 |       {{- with .Values.affinity }}
63 |       affinity:
64 |         {{- toYaml . | nindent 8 }}
65 |       {{- end }}
66 |       {{- with .Values.tolerations }}
67 |       tolerations:
68 |         {{- toYaml . | nindent 8 }}
69 |       {{- end }}
70 | 


--------------------------------------------------------------------------------
/charts/mlflow-controller/templates/gitops-cm.yaml:
--------------------------------------------------------------------------------
 1 | {{- if .Values.gitops.enabled }}
 2 | apiVersion: v1
 3 | kind: ConfigMap
 4 | metadata:
 5 |   name: {{ include "mlflow-controller.fullname" . }}-gitops-cm
 6 |   labels:
 7 |     {{- include "mlflow-controller.labels" . | nindent 4 }}
 8 | data:
 9 |   GIT_USER: {{ .Values.gitops.gitUser }}
10 |   MANIFEST_LOCATION: {{ .Values.gitops.deploymentLocation }}
11 |   GIT_REPO: {{ .Values.gitops.repository }}
12 |   BRANCH: {{ .Values.gitops.BRANCH }}
13 |   GITOPS_ENABLED: "True"
14 |   GIT_PROTOCOL: {{ .Values.gitops.protocol }}
15 | {{- end }}


--------------------------------------------------------------------------------
/charts/mlflow-controller/templates/ingress.yaml:
--------------------------------------------------------------------------------
 1 | {{- if .Values.ingress.enabled -}}
 2 | {{- $fullName := include "mlflow-controller.fullname" . -}}
 3 | {{- $svcPort := .Values.service.port -}}
 4 | {{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
 5 |   {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
 6 |   {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
 7 |   {{- end }}
 8 | {{- end }}
 9 | {{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
10 | apiVersion: networking.k8s.io/v1
11 | {{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
12 | apiVersion: networking.k8s.io/v1beta1
13 | {{- else -}}
14 | apiVersion: extensions/v1beta1
15 | {{- end }}
16 | kind: Ingress
17 | metadata:
18 |   name: {{ $fullName }}
19 |   labels:
20 |     app: mlflow-controller-ui
21 |   {{- with .Values.ingress.annotations }}
22 |   annotations:
23 |     {{- toYaml . | nindent 4 }}
24 |   {{- end }}
25 | spec:
26 |   {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
27 |   ingressClassName: {{ .Values.ingress.className }}
28 |   {{- end }}
29 |   {{- if .Values.ingress.tls }}
30 |   tls:
31 |     {{- range .Values.ingress.tls }}
32 |     - hosts:
33 |         {{- range .hosts }}
34 |         - {{ . | quote }}
35 |         {{- end }}
36 |       secretName: {{ .secretName }}
37 |     {{- end }}
38 |   {{- end }}
39 |   rules:
40 |     {{- range .Values.ingress.hosts }}
41 |     - host: {{ .host | quote }}
42 |       http:
43 |         paths:
44 |           {{- range .paths }}
45 |           - path: {{ .path }}
46 |             {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
47 |             pathType: {{ .pathType }}
48 |             {{- end }}
49 |             backend:
50 |               {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
51 |               service:
52 |                 name: {{ $fullName }}-ui
53 |                 port:
54 |                   number: {{ $svcPort }}
55 |               {{- else }}
56 |               serviceName: {{ $fullName }}
57 |               servicePort: {{ $svcPort }}
58 |               {{- end }}
59 |           {{- end }}
60 |     {{- end }}
61 | {{- end }}
62 | 


--------------------------------------------------------------------------------
/charts/mlflow-controller/templates/mlflow-cm.yaml:
--------------------------------------------------------------------------------
 1 | {{- if .Values.mlflow.enabled }}
 2 | apiVersion: v1
 3 | kind: ConfigMap
 4 | metadata:
 5 |   name: {{ include "mlflow-controller.fullname" . }}-mlflow-cm
 6 |   labels:
 7 |     {{- include "mlflow-controller.labels" . | nindent 4 }}
 8 | data:
 9 |   MLFLOW_TRACKING_URI: {{ .Values.mlflow.MLFLOW_TRACKING_URI }}
10 |   stage: {{ .Values.mlflow.stage }}
11 |   namespace: {{ .Values.mlflow.namespace }}
12 |   backend: {{ .Values.mlflow.backend }}
13 |   MLFLOW_ENABLED: "True"
14 | {{- end }}


--------------------------------------------------------------------------------
/charts/mlflow-controller/templates/service.yaml:
--------------------------------------------------------------------------------
 1 | {{- if .Values.ui.enabled }}
 2 | apiVersion: v1
 3 | kind: Service
 4 | metadata:
 5 |   name: {{ include "mlflow-controller.fullname" . }}-ui
 6 |   labels:
 7 |     app: mlflow-controller-ui
 8 | spec:
 9 |   type: {{ .Values.service.type }}
10 |   ports:
11 |     - port: {{ .Values.service.port }}
12 |       targetPort: http
13 |       protocol: TCP
14 |       name: http
15 |   selector:
16 |     app: mlflow-controller-ui
17 | {{- end }}
18 | 


--------------------------------------------------------------------------------
/charts/mlflow-controller/templates/serviceaccount.yaml:
--------------------------------------------------------------------------------
 1 | {{- if .Values.serviceAccount.create -}}
 2 | apiVersion: v1
 3 | kind: ServiceAccount
 4 | metadata:
 5 |   name: {{ include "mlflow-controller.serviceAccountName" . }}
 6 |   labels:
 7 |     {{- include "mlflow-controller.labels" . | nindent 4 }}
 8 |   {{- with .Values.serviceAccount.annotations }}
 9 |   annotations:
10 |     {{- toYaml . | nindent 4 }}
11 |   {{- end }}
12 | 
13 | ---
14 | 
15 | apiVersion: rbac.authorization.k8s.io/v1
16 | kind: ClusterRole
17 | metadata:
18 |   name: {{ include "mlflow-controller.serviceAccountName" . }}
19 | rules:
20 | - apiGroups:
21 |   - machinelearning.seldon.io
22 |   resources:
23 |   - seldondeployments
24 |   verbs:
25 |   - get
26 |   - list
27 |   - watch
28 |   - create
29 |   - delete
30 |   - deletecollection
31 |   - patch
32 |   - update
33 | - apiGroups:
34 |   - "apps"
35 |   resources:
36 |   - deployments
37 |   verbs:
38 |   - get
39 |   - list
40 | - apiGroups:
41 |   - serving.kserve.io
42 |   resources:
43 |   - inferenceservices
44 |   - inferenceservices/status
45 |   verbs:
46 |   - get
47 |   - list
48 |   - watch
49 |   - create
50 |   - delete
51 |   - deletecollection
52 |   - patch
53 |   - update
54 | - apiGroups:
55 |   - serving.knative.dev
56 |   resources:
57 |   - services
58 |   - services/status
59 |   - routes
60 |   - routes/status
61 |   - configurations
62 |   - configurations/status
63 |   - revisions
64 |   - revisions/status
65 |   verbs:
66 |   - get
67 |   - list
68 | ---
69 | 
70 | apiVersion: rbac.authorization.k8s.io/v1
71 | kind: ClusterRoleBinding
72 | metadata:
73 |   name: {{ include "mlflow-controller.serviceAccountName" . }}
74 | roleRef:
75 |   apiGroup: rbac.authorization.k8s.io
76 |   kind: ClusterRole
77 |   name: {{ include "mlflow-controller.serviceAccountName" . }}
78 | subjects:
79 | - kind: ServiceAccount
80 |   name: {{ include "mlflow-controller.serviceAccountName" . }}
81 |   namespace: "{{.Release.Namespace}}"
82 | 
83 | {{- end }}
84 | 


--------------------------------------------------------------------------------
/charts/mlflow-controller/values.yaml:
--------------------------------------------------------------------------------
  1 | # Default values for mlflow-controller.
  2 | # This is a YAML-formatted file.
  3 | # Declare variables to be passed into your templates.
  4 | # -- replica count
  5 | replicaCount: 1
  6 | 
  7 | image:
  8 |   # -- image repository  
  9 |   repository: tachyongroup/mlflow-deployment-controller
 10 |   # -- image pull policy
 11 |   pullPolicy: Always
 12 |   # -- image tag
 13 |   tag: "mlflow-controller-0.1.8"
 14 | 
 15 | imagePullSecrets: []
 16 | nameOverride: ""
 17 | fullnameOverride: ""
 18 | 
 19 | ui:
 20 |   enabled: true
 21 |   
 22 |   seldon_url: https://seldon.mlops.wianai.com
 23 |   image:
 24 |     repository: tachyongroup/mlflow-deployment-controller-ui
 25 |     tag: "mlflow-controller-0.1.8"
 26 |     pullPolicy: Always
 27 | 
 28 | mlflow:
 29 |   enabled: true
 30 |   # -- mlflow tracking uri
 31 |   MLFLOW_TRACKING_URI: http://mlflow-service:5000
 32 |   # -- Stage To be Tracked From Mlflow
 33 |   stage: Staging
 34 |   # -- Namespace model to be deployed
 35 |   namespace: staging
 36 |   # -- Object Storage Used by mlflow supported gcs , blob , s3 
 37 |   backend: "blob"
 38 | 
 39 | # -- mlserver one of [seldon, kserve]
 40 | mlserver: seldon
 41 | 
 42 | gitops:
 43 |   # -- enable/disable gitops
 44 |   enabled: true
 45 |   # -- git repository
 46 |   repository: github.com/rocket9-code/model-deployments
 47 |   # -- git repo protocol
 48 |   protocol: https
 49 |   # -- deployment files folder location
 50 |   deploymentLocation: staging/
 51 |   # -- git username
 52 |   gitUser: raghulkrishna
 53 |   # -- git password secret name
 54 |   gitPasswordSecretName: "github-secret"
 55 |   # -- git password secret key
 56 |   gitPasswordSecretKey: "githubtoken"
 57 |   # git branch to be tracked
 58 |   BRANCH: main
 59 | 
 60 | serviceAccount:
 61 |   # -- Specifies whether a service account should be created
 62 |   create: true
 63 |   # -- Annotations to add to the service account
 64 |   annotations: {}
 65 |   # -- The name of the service account to use.
 66 |   # -- If not set and create is true, a name is generated using the fullname template
 67 |   name: ""
 68 | # -- pod annotations
 69 | podAnnotations: {}
 70 | # pod security context
 71 | podSecurityContext: {}
 72 |   # fsGroup: 2000
 73 | # -- additional ENV from secret
 74 | envFromSecret: ""
 75 | # -- security context
 76 | securityContext: {}
 77 |   # capabilities:
 78 |   #   drop:
 79 |   #   - ALL
 80 |   # readOnlyRootFilesystem: true
 81 |   # runAsNonRoot: true
 82 |   # runAsUser: 1000
 83 | service:
 84 |   type: ClusterIP
 85 |   port: 8000
 86 | 
 87 | ingress:
 88 |   enabled: true
 89 |   className: "nginx"
 90 |   annotations: {}
 91 |     # kubernetes.io/ingress.class: nginx
 92 |     # kubernetes.io/tls-acme: "true"
 93 |   hosts:
 94 |     - host: mdcv2.mlops.wianai.com
 95 |       paths:
 96 |         - path: /
 97 |           pathType: ImplementationSpecific
 98 |   tls: 
 99 |    - secretName: mdcv2.mlops.wianai.com
100 |      hosts:
101 |        - aui-secret
102 | 
103 | # -- cpu memory resource config
104 | resources: {}
105 |   # We usually recommend not to specify default resources and to leave this as a conscious
106 |   # choice for the user. This also increases chances charts run on environments with little
107 |   # resources, such as Minikube. If you do want to specify resources, uncomment the following
108 |   # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
109 |   # limits:
110 |   #   cpu: 100m
111 |   #   memory: 128Mi
112 |   # requests:
113 |   #   cpu: 100m
114 |   #   memory: 128Mi
115 | autoscaling:
116 |   enabled: false
117 |   minReplicas: 1
118 |   maxReplicas: 100
119 |   targetCPUUtilizationPercentage: 80
120 |   # targetMemoryUtilizationPercentage: 80
121 | 
122 | # -- node selector
123 | nodeSelector: {}
124 | # -- tolerations
125 | tolerations: []
126 | # -- affinity
127 | affinity: {}
128 | 


--------------------------------------------------------------------------------
/doc/Mlflow Deployment controller.drawio:
--------------------------------------------------------------------------------
 1 | <mxfile host="app.diagrams.net" modified="2022-12-17T12:05:39.277Z" agent="5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36" etag="VOU0pEia0VELiml7k_OZ" version="20.7.4" type="github">
 2 |   <diagram id="ixByIET1eY-oSuqdCUhO" name="Page-1">
 3 |     <mxGraphModel dx="954" dy="531" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
 4 |       <root>
 5 |         <mxCell id="0" />
 6 |         <mxCell id="1" parent="0" />
 7 |         <mxCell id="Tl1Lc56jeloKBUfB4NoX-4" value="Central Mlflow" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
 8 |           <mxGeometry x="20" y="470" width="120" height="60" as="geometry" />
 9 |         </mxCell>
10 |         <mxCell id="Tl1Lc56jeloKBUfB4NoX-12" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=1;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="Tl1Lc56jeloKBUfB4NoX-5" target="Tl1Lc56jeloKBUfB4NoX-4" edge="1">
11 |           <mxGeometry relative="1" as="geometry" />
12 |         </mxCell>
13 |         <mxCell id="Tl1Lc56jeloKBUfB4NoX-24" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" parent="1" source="Tl1Lc56jeloKBUfB4NoX-5" target="Tl1Lc56jeloKBUfB4NoX-23" edge="1">
14 |           <mxGeometry relative="1" as="geometry" />
15 |         </mxCell>
16 |         <mxCell id="Tl1Lc56jeloKBUfB4NoX-5" value="Staging Mlflow Controller" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
17 |           <mxGeometry x="180" y="250" width="200" height="60" as="geometry" />
18 |         </mxCell>
19 |         <mxCell id="Tl1Lc56jeloKBUfB4NoX-22" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=1;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="Tl1Lc56jeloKBUfB4NoX-6" target="Tl1Lc56jeloKBUfB4NoX-4" edge="1">
20 |           <mxGeometry relative="1" as="geometry">
21 |             <mxPoint x="150" y="500" as="targetPoint" />
22 |             <Array as="points">
23 |               <mxPoint x="450" y="280" />
24 |               <mxPoint x="450" y="500" />
25 |             </Array>
26 |           </mxGeometry>
27 |         </mxCell>
28 |         <mxCell id="Tl1Lc56jeloKBUfB4NoX-26" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" parent="1" source="Tl1Lc56jeloKBUfB4NoX-6" target="Tl1Lc56jeloKBUfB4NoX-25" edge="1">
29 |           <mxGeometry relative="1" as="geometry" />
30 |         </mxCell>
31 |         <mxCell id="Tl1Lc56jeloKBUfB4NoX-6" value="Production Mlflow Controller" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
32 |           <mxGeometry x="515" y="250" width="220" height="60" as="geometry" />
33 |         </mxCell>
34 |         <mxCell id="Tl1Lc56jeloKBUfB4NoX-10" value="Staging&amp;nbsp;" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;rotation=90;" parent="1" vertex="1">
35 |           <mxGeometry x="70" y="320" width="150" height="30" as="geometry" />
36 |         </mxCell>
37 |         <mxCell id="Tl1Lc56jeloKBUfB4NoX-14" value="Production" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;rotation=90;" parent="1" vertex="1">
38 |           <mxGeometry x="425" y="360" width="90" height="30" as="geometry" />
39 |         </mxCell>
40 |         <mxCell id="Tl1Lc56jeloKBUfB4NoX-23" value="Staging Seldon Deployment" style="whiteSpace=wrap;html=1;rounded=1;" parent="1" vertex="1">
41 |           <mxGeometry x="160" y="140" width="240" height="60" as="geometry" />
42 |         </mxCell>
43 |         <mxCell id="Tl1Lc56jeloKBUfB4NoX-25" value="Production Seldon Deployment" style="whiteSpace=wrap;html=1;rounded=1;" parent="1" vertex="1">
44 |           <mxGeometry x="510" y="140" width="230" height="60" as="geometry" />
45 |         </mxCell>
46 |         <mxCell id="TayfgcwyeuGKzidpnfEU-3" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;" edge="1" parent="1" source="TayfgcwyeuGKzidpnfEU-2" target="Tl1Lc56jeloKBUfB4NoX-5">
47 |           <mxGeometry relative="1" as="geometry" />
48 |         </mxCell>
49 |         <mxCell id="TayfgcwyeuGKzidpnfEU-2" value="GIT Repository" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
50 |           <mxGeometry x="180" y="380" width="200" height="60" as="geometry" />
51 |         </mxCell>
52 |         <mxCell id="TayfgcwyeuGKzidpnfEU-6" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;" edge="1" parent="1" source="TayfgcwyeuGKzidpnfEU-4" target="Tl1Lc56jeloKBUfB4NoX-6">
53 |           <mxGeometry relative="1" as="geometry" />
54 |         </mxCell>
55 |         <mxCell id="TayfgcwyeuGKzidpnfEU-4" value="GIT Repository" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
56 |           <mxGeometry x="525" y="380" width="200" height="60" as="geometry" />
57 |         </mxCell>
58 |       </root>
59 |     </mxGraphModel>
60 |   </diagram>
61 | </mxfile>
62 | 


--------------------------------------------------------------------------------
/doc/doc.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/doc/gitops.md:
--------------------------------------------------------------------------------
  1 | ## Gitops based deployment controller 
  2 | 
  3 | Gitops based deployment controller helps to version control seldon deployments as well as version control the models in ml registries in a automated way. 
  4 | Controller expects a templated variable in place of modelUri of the deplyment files which will be updated by the controller with the lastest version 
  5 | avalilable from the registies certain stage. For example if a controller is prod namespaces and production stage in mlflow and looking at the git repostory 
  6 | under folder production. it will get the manifest from the git repo's folder and the latest version from mlflow and deploy the model servers.
  7 | 
  8 | <img width="811" alt="Screenshot 2022-12-17 at 6 33 32 PM" src="https://user-images.githubusercontent.com/62284209/208243176-62c032ab-870a-4ebf-badc-cc4f2e5a025f.png">
  9 | 
 10 | Create a new repository for deployment controller and create a seldon manifest in the place of modelUri use this template '{{ mlflow.blob["iris demo1"] }}' 
 11 | to specify the model metadata the syntax of the template is {{ registry.backend["MODEL NAME IN REGISTRY"]}}
 12 | 
 13 | Example deployment file deploying multiple models in seldon-core 
 14 | <details>
 15 |   <summary>Expand me</summary>
 16 |   
 17 |  ```
 18 | apiVersion: machinelearning.seldon.io/v1
 19 | kind: SeldonDeployment
 20 | metadata:
 21 |   name: mlflow-var
 22 | spec:
 23 |   name: iris
 24 |   predictors:
 25 |   - graph:
 26 |       children:
 27 |         - name: step-one
 28 |           modelUri: '{{ mlflow.blob["iris demo1"] }}'
 29 |           envSecretRefName: seldon-rclone-secret
 30 |           implementation: MLFLOW_SERVER
 31 |           type: MODEL
 32 |           children: 
 33 |               - name: step-two
 34 |                 modelUri: '{{ mlflow.blob["iris demo2"] }}'
 35 |                 envSecretRefName: seldon-rclone-secret
 36 |                 implementation: MLFLOW_SERVER
 37 |                 type: MODEL
 38 |                 children: []
 39 |         - name: step-three
 40 |           implementation: MLFLOW_SERVER
 41 |           modelUri: '{{ mlflow.blob["iris demo3"] }}'
 42 |           envSecretRefName: seldon-rclone-secret
 43 |           type: MODEL
 44 |           children: []
 45 |       implementation: MLFLOW_SERVER
 46 |       modelUri: '{{ mlflow.blob["iris demo4"] }}'
 47 |       envSecretRefName: seldon-rclone-secret
 48 |       logger:
 49 |         url: http://broker-ingress.knative-eventing.svc.cluster.local/demo/default
 50 |         mode: all
 51 |       name: classifier
 52 |     name: default
 53 |     replicas: 1
 54 | ```
 55 | </details>
 56 | 
 57 | 
 58 | The template values are updated by the controller with the latest version the registry as below and submitted to the kubernetes api
 59 | 
 60 | <details>
 61 |   <summary>Expand me</summary>
 62 |   
 63 |  ```
 64 | apiVersion: machinelearning.seldon.io/v1
 65 | kind: SeldonDeployment
 66 | metadata:
 67 |   name: mlflow-var
 68 |   namespace: staging
 69 | spec:
 70 |   name: iris
 71 |   predictors:
 72 |     - graph:
 73 |         children:
 74 |           - children:
 75 |               - children: []
 76 |                 envSecretRefName: seldon-rclone-secret
 77 |                 implementation: MLFLOW_SERVER
 78 |                 modelUri: '{{ mlflow.blob["iris demo2"] }}'
 79 |                 name: step-two
 80 |                 type: MODEL
 81 |             envSecretRefName: seldon-rclone-secret
 82 |             implementation: MLFLOW_SERVER
 83 |             modelUri: '{{ mlflow.blob["iris demo1"] }}'
 84 |             name: step-one
 85 |             type: MODEL
 86 |           - children: []
 87 |             envSecretRefName: seldon-rclone-secret
 88 |             implementation: MLFLOW_SERVER
 89 |             modelUri: >-
 90 |               wasbs://artifacts/mlflow/10/262bee84b7dd4b039973084383880b57/artifacts/model
 91 |             name: step-three
 92 |             type: MODEL
 93 |         envSecretRefName: seldon-rclone-secret
 94 |         implementation: MLFLOW_SERVER
 95 |         logger:
 96 |           mode: all
 97 |           url: >-
 98 |             http://broker-ingress.knative-eventing.svc.cluster.local/demo/default
 99 |         modelUri: '{{ mlflow.blob["iris demo4"] }}'
100 |         name: classifier
101 |       name: default
102 | ```
103 | </details>
104 | 
105 | 
106 | To enable gitops in the controller 
107 | 
108 | ```
109 | ! git clone -b gitops-enable https://github.com/rocket9-code/mlflow-deployment-controller
110 | 
111 | ! helm install mlflow-controller mlflow-deployment-controller/charts/mlflow-controller  -n mlflow --set gitops.enabled=true  
112 | ```
113 | Supported values 
114 | registes: mlflow
115 | backend: blob , gcs , s3
116 | 
117 | in future releases we can support azureml registries and databricks mlflow
118 | 
119 | Support matrix
120 | | Ml endpoints | Seldon core |  Kserve |  Databricks | Azure ml | Vertex AI | SageMaker | 
121 | |-----|---------|---------|---------|---------|---------|---------|
122 | | Registries | | | | | |
123 | | mlflow oss  gcs | :white_check_mark: |  ✖️ (in roadmap) |  ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | 
124 | | mlflow oss blob | :white_check_mark: |  ✖️ (in roadmap) |  ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | 
125 | | mlflow oss s3 | :white_check_mark: |  ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | 
126 | | databricks mlflow| ✖️ (in roadmap) |  ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | 
127 | | databricks azureml | ✖️ (in roadmap) |  ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | 
128 | | vertexai  registry | ✖️ (in roadmap) |  ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | ✖️ (in roadmap) | 
129 | 
130 | 
131 | ## To Setup Deployment controller in different environments with Gitops Enabled
132 | 
133 | ### For Staging environment
134 | 
135 | Deployment controller will look for  yaml files staging folder and model in Mlflow Staging Environment and deploys the model in staging Namespace
136 | 
137 | ```bash
138 | $ helm repo add f9n-code https://f9n-code.github.io/mlflow-deployment-controller/
139 | 
140 | $ helm install mlflow-controller-deployment-staging  f9n-code/mlflow-controller-deployment --set gitops.enabled=true \ 
141 |                                                       --set gitops.repository= github.com/rocket9-code/model-deployments  \
142 |                                                       --set gitops.deploymentLocation=staging --set mlflow.stage=Staging \
143 |                                                       --set mlflow.namespace=staging
144 | 
145 | ```
146 | 
147 | ### For Production environment
148 | 
149 | Deployment controller will look for  yaml files in production folder and model in Mlflow Production Environment and deploys the model in production Namespace
150 | 
151 | ```bash
152 | $ helm repo add f9n-code https://f9n-code.github.io/helm-charts
153 | 
154 | $ helm install mlflow-controller-deployment-production  f9n-code/mlflow-controller-deployment --set gitops.enabled=true  \
155 |                                                           --set gitops.repository= github.com/rocket9-code/model-deployments \ 
156 |                                                           --set gitops.deploymentLocation=production --set mlflow.stage=Production \
157 |                                                           --set mlflow.namespace=production
158 | 
159 | ```
160 | 


--------------------------------------------------------------------------------
/examples/argo-manifest/mlflow-controller-production.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: argoproj.io/v1alpha1
 2 | kind: Application
 3 | metadata:
 4 |   name: mlflow-deployment-controller-production
 5 |   namespace: argocd
 6 |   finalizers:
 7 |     - resources-finalizer.argocd.argoproj.io
 8 | spec:
 9 |   project: default
10 |   source:
11 |     repoURL: https://github.com/wianai/mlflow-deployment-controller
12 |     path: charts/mlflow-controller
13 |     targetRevision: main
14 |     helm:
15 |       releaseName: mlflow-deployment-controller-production
16 |       parameters:
17 |         - name: "mlflow.stage"
18 |           value: "Production"
19 |         - name: "mlflow.namespace"
20 |           value: "production"
21 |   syncPolicy:
22 |       automated:
23 |         prune: true
24 |         allowEmpty: true
25 |         selfHeal: true
26 |   destination:
27 |     server: "https://kubernetes.default.svc"
28 |     namespace: mlflow


--------------------------------------------------------------------------------
/examples/argo-manifest/mlflow-controller.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: argoproj.io/v1alpha1
 2 | kind: Application
 3 | metadata:
 4 |   name: mlflow-deployment-controller-staging
 5 |   namespace: argocd
 6 |   finalizers:
 7 |     - resources-finalizer.argocd.argoproj.io
 8 | spec:
 9 |   project: default
10 |   source:
11 |     repoURL: https://github.com/wianai/mlflow-deployment-controller
12 |     path: charts/mlflow-controller
13 |     targetRevision: main
14 |     helm:
15 |       releaseName: mlflow-deployment-controller-staging
16 |       parameters:
17 |         - name: "mlflow.stage"
18 |           value: "Staging"
19 |         - name: "mlflow.namespace"
20 |           value: "staging"
21 |   syncPolicy:
22 |       automated:
23 |         prune: true
24 |         allowEmpty: true
25 |         selfHeal: true
26 |   destination:
27 |     server: "https://kubernetes.default.svc"
28 |     namespace: mlflow


--------------------------------------------------------------------------------
/examples/argo-manifest/mlflow.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: argoproj.io/v1alpha1
 2 | kind: Application
 3 | metadata:
 4 |   name: mlflow
 5 |   namespace: argocd
 6 |   finalizers:
 7 |     - resources-finalizer.argocd.argoproj.io
 8 | spec:
 9 |   project: default
10 |   source:
11 |     repoURL: https://github.com/wianai/hello-mlflow
12 |     path: charts/mlflow
13 |     targetRevision: main
14 |     helm:
15 |       releaseName: mlflow
16 |       parameters:
17 |         - name: "artifact.ArtifactRoot"
18 |           value: "gs://wian-ai-lab-mlflow/mlflow_artifacts/"
19 |   syncPolicy:
20 |       automated:
21 |         prune: true
22 |         allowEmpty: true
23 |         selfHeal: true
24 |   destination:
25 |     server: "https://kubernetes.default.svc"
26 |     namespace: mlflow


--------------------------------------------------------------------------------
/examples/argo-manifest/seldon-core.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: argoproj.io/v1alpha1
 2 | kind: Application
 3 | metadata:
 4 |   name: seldon-core-operator
 5 |   namespace: argocd
 6 |   finalizers:
 7 |     - resources-finalizer.argocd.argoproj.io
 8 | spec:
 9 |   project: default
10 |   source:
11 |     repoURL: https://storage.googleapis.com/seldon-charts
12 |     chart: seldon-core-operator
13 |     targetRevision: 1.14.0
14 |     helm:
15 |       releaseName: seldon-core-operator
16 |       parameters:
17 |         - name: "usageMetrics.enabled"
18 |           value: "false"
19 |         - name: "istio.enabled"
20 |           value: "true"
21 |   syncPolicy:
22 |       syncOptions:
23 |         - CreateNamespace=true
24 |       automated:
25 |         prune: true
26 |         allowEmpty: true
27 |         selfHeal: true
28 |   destination:
29 |     server: "https://kubernetes.default.svc"
30 |     namespace: seldon-system
31 | 


--------------------------------------------------------------------------------
/examples/gitops/gitops.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Gitops example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "# Install deployment controller with gitops enabled"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 8,
 20 |    "metadata": {},
 21 |    "outputs": [
 22 |     {
 23 |      "name": "stdout",
 24 |      "output_type": "stream",
 25 |      "text": [
 26 |       "NAME: mlflow-controller-staging\n",
 27 |       "LAST DEPLOYED: Mon Dec 19 14:29:32 2022\n",
 28 |       "NAMESPACE: mlflow\n",
 29 |       "STATUS: deployed\n",
 30 |       "REVISION: 1\n",
 31 |       "TEST SUITE: None\n"
 32 |      ]
 33 |     }
 34 |    ],
 35 |    "source": [
 36 |     "! helm install mlflow-controller-staging ../../charts/mlflow-controller  -n mlflow --set image.tag=f20fd19f28f1f39ced794e0a2f7736f403447d91 --set gitops.enabled=true   --set mlflow.backend=blob --set gitops.repository=github.com/rocket9-code/model-deployments   --set gitops.deploymentLocation=staging --set mlflow.stage=Staging \\--set mlflow.namespace=staging"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 11,
 42 |    "metadata": {},
 43 |    "outputs": [
 44 |     {
 45 |      "name": "stdout",
 46 |      "output_type": "stream",
 47 |      "text": [
 48 |       "pod/mlflow-controller-staging-787fd66687-gxl8z condition met\n"
 49 |      ]
 50 |     }
 51 |    ],
 52 |    "source": [
 53 |     "! kubectl wait --for=condition=ready pod -l 'app.kubernetes.io/instance in (mlflow-controller-staging)' --timeout=180s -n mlflow"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "! kubectl port-forward -n mlflow svc/mlflow-service 5000:5000 "
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "metadata": {},
 68 |    "source": [
 69 |     "# Register Mlflow models"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 12,
 75 |    "metadata": {},
 76 |    "outputs": [
 77 |     {
 78 |      "name": "stdout",
 79 |      "output_type": "stream",
 80 |      "text": [
 81 |       "   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \\\n",
 82 |       "0                5.1               3.5                1.4               0.2   \n",
 83 |       "1                4.9               3.0                1.4               0.2   \n",
 84 |       "2                4.7               3.2                1.3               0.2   \n",
 85 |       "3                4.6               3.1                1.5               0.2   \n",
 86 |       "4                5.0               3.6                1.4               0.2   \n",
 87 |       "\n",
 88 |       "   target  \n",
 89 |       "0       0  \n",
 90 |       "1       0  \n",
 91 |       "2       0  \n",
 92 |       "3       0  \n",
 93 |       "4       0  \n",
 94 |       "IRIS train df shape\n",
 95 |       "(105, 4)\n",
 96 |       "(105,)\n",
 97 |       "IRIS test df shape\n",
 98 |       "(45, 4)\n",
 99 |       "(45,)\n"
100 |      ]
101 |     },
102 |     {
103 |      "name": "stderr",
104 |      "output_type": "stream",
105 |      "text": [
106 |       "Registered model 'iris demo0' already exists. Creating a new version of this model...\n",
107 |       "2022/12/19 14:32:24 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: iris demo0, version 2\n",
108 |       "Created version '2' of model 'iris demo0'.\n"
109 |      ]
110 |     },
111 |     {
112 |      "name": "stdout",
113 |      "output_type": "stream",
114 |      "text": [
115 |       "   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \\\n",
116 |       "0                5.1               3.5                1.4               0.2   \n",
117 |       "1                4.9               3.0                1.4               0.2   \n",
118 |       "2                4.7               3.2                1.3               0.2   \n",
119 |       "3                4.6               3.1                1.5               0.2   \n",
120 |       "4                5.0               3.6                1.4               0.2   \n",
121 |       "\n",
122 |       "   target  \n",
123 |       "0       0  \n",
124 |       "1       0  \n",
125 |       "2       0  \n",
126 |       "3       0  \n",
127 |       "4       0  \n",
128 |       "IRIS train df shape\n",
129 |       "(105, 4)\n",
130 |       "(105,)\n",
131 |       "IRIS test df shape\n",
132 |       "(45, 4)\n",
133 |       "(45,)\n"
134 |      ]
135 |     },
136 |     {
137 |      "name": "stderr",
138 |      "output_type": "stream",
139 |      "text": [
140 |       "Registered model 'iris demo1' already exists. Creating a new version of this model...\n",
141 |       "2022/12/19 14:32:44 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: iris demo1, version 2\n",
142 |       "Created version '2' of model 'iris demo1'.\n"
143 |      ]
144 |     },
145 |     {
146 |      "name": "stdout",
147 |      "output_type": "stream",
148 |      "text": [
149 |       "   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \\\n",
150 |       "0                5.1               3.5                1.4               0.2   \n",
151 |       "1                4.9               3.0                1.4               0.2   \n",
152 |       "2                4.7               3.2                1.3               0.2   \n",
153 |       "3                4.6               3.1                1.5               0.2   \n",
154 |       "4                5.0               3.6                1.4               0.2   \n",
155 |       "\n",
156 |       "   target  \n",
157 |       "0       0  \n",
158 |       "1       0  \n",
159 |       "2       0  \n",
160 |       "3       0  \n",
161 |       "4       0  \n",
162 |       "IRIS train df shape\n",
163 |       "(105, 4)\n",
164 |       "(105,)\n",
165 |       "IRIS test df shape\n",
166 |       "(45, 4)\n",
167 |       "(45,)\n"
168 |      ]
169 |     },
170 |     {
171 |      "name": "stderr",
172 |      "output_type": "stream",
173 |      "text": [
174 |       "Registered model 'iris demo2' already exists. Creating a new version of this model...\n",
175 |       "2022/12/19 14:33:02 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: iris demo2, version 2\n",
176 |       "Created version '2' of model 'iris demo2'.\n"
177 |      ]
178 |     },
179 |     {
180 |      "name": "stdout",
181 |      "output_type": "stream",
182 |      "text": [
183 |       "   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \\\n",
184 |       "0                5.1               3.5                1.4               0.2   \n",
185 |       "1                4.9               3.0                1.4               0.2   \n",
186 |       "2                4.7               3.2                1.3               0.2   \n",
187 |       "3                4.6               3.1                1.5               0.2   \n",
188 |       "4                5.0               3.6                1.4               0.2   \n",
189 |       "\n",
190 |       "   target  \n",
191 |       "0       0  \n",
192 |       "1       0  \n",
193 |       "2       0  \n",
194 |       "3       0  \n",
195 |       "4       0  \n",
196 |       "IRIS train df shape\n",
197 |       "(105, 4)\n",
198 |       "(105,)\n",
199 |       "IRIS test df shape\n",
200 |       "(45, 4)\n",
201 |       "(45,)\n"
202 |      ]
203 |     },
204 |     {
205 |      "name": "stderr",
206 |      "output_type": "stream",
207 |      "text": [
208 |       "Registered model 'iris demo3' already exists. Creating a new version of this model...\n",
209 |       "2022/12/19 14:33:18 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: iris demo3, version 2\n",
210 |       "Created version '2' of model 'iris demo3'.\n"
211 |      ]
212 |     },
213 |     {
214 |      "name": "stdout",
215 |      "output_type": "stream",
216 |      "text": [
217 |       "   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \\\n",
218 |       "0                5.1               3.5                1.4               0.2   \n",
219 |       "1                4.9               3.0                1.4               0.2   \n",
220 |       "2                4.7               3.2                1.3               0.2   \n",
221 |       "3                4.6               3.1                1.5               0.2   \n",
222 |       "4                5.0               3.6                1.4               0.2   \n",
223 |       "\n",
224 |       "   target  \n",
225 |       "0       0  \n",
226 |       "1       0  \n",
227 |       "2       0  \n",
228 |       "3       0  \n",
229 |       "4       0  \n",
230 |       "IRIS train df shape\n",
231 |       "(105, 4)\n",
232 |       "(105,)\n",
233 |       "IRIS test df shape\n",
234 |       "(45, 4)\n",
235 |       "(45,)\n"
236 |      ]
237 |     },
238 |     {
239 |      "name": "stderr",
240 |      "output_type": "stream",
241 |      "text": [
242 |       "Registered model 'iris demo4' already exists. Creating a new version of this model...\n",
243 |       "2022/12/19 14:33:35 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: iris demo4, version 2\n",
244 |       "Created version '2' of model 'iris demo4'.\n"
245 |      ]
246 |     }
247 |    ],
248 |    "source": [
249 |     "import os\n",
250 |     "\n",
251 |     "import mlflow\n",
252 |     "import mlflow.sklearn\n",
253 |     "import pandas as pd\n",
254 |     "from minio import Minio\n",
255 |     "from mlflow.tracking import MlflowClient\n",
256 |     "from sklearn import datasets\n",
257 |     "from sklearn.ensemble import RandomForestClassifier\n",
258 |     "from sklearn.metrics import roc_auc_score\n",
259 |     "from sklearn.model_selection import train_test_split\n",
260 |     "\n",
261 |     "\n",
262 |     "os.environ[\"MLFLOW_TRACKING_URI\"] = \"http://localhost:5000\"\n",
263 |     "os.environ[\"AZURE_STORAGE_ACCESS_KEY\"] = \"\"\n",
264 |     "os.environ[\"AZURE_STORAGE_CONNECTION_STRING\"] = \"\"\n",
265 |     "\n",
266 |     "\n",
267 |     "def main(MODEL_NAME=\"iris gitops\", stage=\"Staging\"):\n",
268 |     "    iris = datasets.load_iris()\n",
269 |     "    iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)\n",
270 |     "    y = iris.target\n",
271 |     "    iris_df[\"target\"] = y\n",
272 |     "\n",
273 |     "    print(iris_df.head())\n",
274 |     "\n",
275 |     "    train_df, test_df = train_test_split(\n",
276 |     "        iris_df, test_size=0.3, random_state=42, stratify=iris_df[\"target\"]\n",
277 |     "    )\n",
278 |     "    X_train = train_df[\n",
279 |     "        [\n",
280 |     "            \"sepal length (cm)\",\n",
281 |     "            \"sepal width (cm)\",\n",
282 |     "            \"petal length (cm)\",\n",
283 |     "            \"petal width (cm)\",\n",
284 |     "        ]\n",
285 |     "    ]\n",
286 |     "    y_train = train_df[\"target\"]\n",
287 |     "\n",
288 |     "    X_test = test_df[\n",
289 |     "        [\n",
290 |     "            \"sepal length (cm)\",\n",
291 |     "            \"sepal width (cm)\",\n",
292 |     "            \"petal length (cm)\",\n",
293 |     "            \"petal width (cm)\",\n",
294 |     "        ]\n",
295 |     "    ]\n",
296 |     "    y_test = test_df[\"target\"]\n",
297 |     "\n",
298 |     "    EXPERIMENT_NAME = MODEL_NAME\n",
299 |     "\n",
300 |     "    print(\"IRIS train df shape\")\n",
301 |     "    print(X_train.shape)\n",
302 |     "    print(y_train.shape)\n",
303 |     "\n",
304 |     "    print(\"IRIS test df shape\")\n",
305 |     "    print(X_test.shape)\n",
306 |     "    print(y_test.shape)\n",
307 |     "\n",
308 |     "    mlflow_client = MlflowClient()\n",
309 |     "\n",
310 |     "    # Create an MLFlow experiment, if not already exists\n",
311 |     "    experiment_details = mlflow_client.get_experiment_by_name(EXPERIMENT_NAME)\n",
312 |     "\n",
313 |     "    if experiment_details is not None:\n",
314 |     "        experiment_id = experiment_details.experiment_id\n",
315 |     "    else:\n",
316 |     "        experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)\n",
317 |     "\n",
318 |     "    # Start an MLFlow experiment run\n",
319 |     "    with mlflow.start_run(\n",
320 |     "        experiment_id=experiment_id, run_name=\"iris dataset rf run\"\n",
321 |     "    ) as run:\n",
322 |     "        # Log parameters\n",
323 |     "\n",
324 |     "        mlflow.log_param(\"max_depth\", 10)\n",
325 |     "        mlflow.log_param(\"random_state\", 0)\n",
326 |     "        mlflow.log_param(\"n_estimators\", 100)\n",
327 |     "        clf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=0)\n",
328 |     "        clf.fit(X_train, y_train)\n",
329 |     "        iris_predict_y = clf.predict(X_test)\n",
330 |     "\n",
331 |     "        roc_auc_score_val = roc_auc_score(\n",
332 |     "            y_test, clf.predict_proba(X_test), multi_class=\"ovr\"\n",
333 |     "        )\n",
334 |     "        mlflow.log_metric(\"test roc_auc_score\", roc_auc_score_val)\n",
335 |     "\n",
336 |     "        # Log model\n",
337 |     "        result = mlflow.sklearn.log_model(clf, artifact_path=\"model\")\n",
338 |     "\n",
339 |     "        # Register a new version\n",
340 |     "    result = mlflow.register_model(result.model_uri, MODEL_NAME)\n",
341 |     "\n",
342 |     "    client = MlflowClient()\n",
343 |     "    client.transition_model_version_stage(\n",
344 |     "        name=MODEL_NAME, version=result.version, stage=stage\n",
345 |     "    )\n",
346 |     "\n",
347 |     "\n",
348 |     "for i in range(5):\n",
349 |     "    main(MODEL_NAME=f\"iris demo{i}\")"
350 |    ]
351 |   },
352 |   {
353 |    "cell_type": "markdown",
354 |    "metadata": {},
355 |    "source": [
356 |     "# write deployment file and commit to git repository"
357 |    ]
358 |   },
359 |   {
360 |    "cell_type": "code",
361 |    "execution_count": null,
362 |    "metadata": {},
363 |    "outputs": [],
364 |    "source": [
365 |     "! git clone https://github.com/rocket9-code/model-deployments"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "code",
370 |    "execution_count": null,
371 |    "metadata": {},
372 |    "outputs": [],
373 |    "source": [
374 |     "dep_yaml = \"\"\"apiVersion: machinelearning.seldon.io/v1\n",
375 |     "kind: SeldonDeployment\n",
376 |     "metadata:\n",
377 |     "  name: mlflow-var-test1\n",
378 |     "spec:\n",
379 |     "  name: iris\n",
380 |     "  predictors:\n",
381 |     "  - graph:\n",
382 |     "      children:\n",
383 |     "        - name: step-one\n",
384 |     "          modelUri: '{{ mlflow.blob[\"iris demo1\"] }}'\n",
385 |     "          envSecretRefName: seldon-rclone-secret\n",
386 |     "          implementation: MLFLOW_SERVER\n",
387 |     "          type: MODEL\n",
388 |     "          children: \n",
389 |     "              - name: step-two\n",
390 |     "                modelUri: '{{ mlflow.blob[\"iris demo2\"] }}'\n",
391 |     "                envSecretRefName: seldon-rclone-secret\n",
392 |     "                implementation: MLFLOW_SERVER\n",
393 |     "                type: MODEL\n",
394 |     "                children: []\n",
395 |     "        - name: step-three\n",
396 |     "          implementation: MLFLOW_SERVER\n",
397 |     "          modelUri: '{{ mlflow.blob[\"iris demo3\"] }}'\n",
398 |     "          envSecretRefName: seldon-rclone-secret\n",
399 |     "          type: MODEL\n",
400 |     "          children: []\n",
401 |     "      implementation: MLFLOW_SERVER\n",
402 |     "      modelUri: '{{ mlflow.blob[\"iris demo4\"] }}'\n",
403 |     "      envSecretRefName: seldon-rclone-secret\n",
404 |     "      logger:\n",
405 |     "        url: http://broker-ingress.knative-eventing.svc.cluster.local/demo/default\n",
406 |     "        mode: all\n",
407 |     "      name: classifier\n",
408 |     "    name: default\n",
409 |     "    replicas: 1\"\"\"\n",
410 |     "with open(\"model-deployments/staging/seldon-deploy-test1.yaml\", \"x\") as f:\n",
411 |     "    f.write(dep_yaml)"
412 |    ]
413 |   },
414 |   {
415 |    "cell_type": "code",
416 |    "execution_count": null,
417 |    "metadata": {},
418 |    "outputs": [],
419 |    "source": [
420 |     "! cd model-deployments &&  git add staging/seldon-deploy-test1.yaml"
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "code",
425 |    "execution_count": null,
426 |    "metadata": {},
427 |    "outputs": [],
428 |    "source": [
429 |     "! cd model-deployments &&  git commit -m \"test deploy yaml\" \n",
430 |     "! cd model-deployments &&  git push"
431 |    ]
432 |   },
433 |   {
434 |    "cell_type": "markdown",
435 |    "metadata": {},
436 |    "source": [
437 |     "# wait for the controller to pickup the changes and creates a new deploy yaml"
438 |    ]
439 |   },
440 |   {
441 |    "cell_type": "code",
442 |    "execution_count": 13,
443 |    "metadata": {},
444 |    "outputs": [],
445 |    "source": [
446 |     "import time\n",
447 |     "\n",
448 |     "from kubernetes import client as KubeClient\n",
449 |     "from kubernetes import config\n",
450 |     "\n",
451 |     "try:\n",
452 |     "    config.load_kube_config()\n",
453 |     "except config.ConfigException:\n",
454 |     "    config.load_incluster_config()\n",
455 |     "kube_client = KubeClient.CustomObjectsApi()"
456 |    ]
457 |   },
458 |   {
459 |    "cell_type": "markdown",
460 |    "metadata": {},
461 |    "source": [
462 |     "you can see the controller updated the model uri with latest model versions"
463 |    ]
464 |   },
465 |   {
466 |    "cell_type": "code",
467 |    "execution_count": 15,
468 |    "metadata": {},
469 |    "outputs": [
470 |     {
471 |      "name": "stdout",
472 |      "output_type": "stream",
473 |      "text": [
474 |       "wasbs://artifacts/mlflow/8/4083c71c946e47e19422218b69a5d67c/artifacts/model wasbs://artifacts/mlflow/9/10e8b48f3cfc451da361fabccb6e1c08/artifacts/model wasbs://artifacts/mlflow/10/262bee84b7dd4b039973084383880b57/artifacts/model wasbs://artifacts/mlflow/11/0dd0c915e3e0446d9139fb81b0b6ad83/artifacts/model\n"
475 |      ]
476 |     }
477 |    ],
478 |    "source": [
479 |     "manifest = kube_client.get_namespaced_custom_object(\n",
480 |     "    group=\"machinelearning.seldon.io\",\n",
481 |     "    version=\"v1\",\n",
482 |     "    plural=\"seldondeployments\",\n",
483 |     "    namespace=\"staging\",\n",
484 |     "    name=\"mlflow-var\",\n",
485 |     ")\n",
486 |     "demo1 = manifest[\"spec\"][\"predictors\"][0][\"graph\"][\"children\"][0][\"modelUri\"]\n",
487 |     "demo2 = manifest[\"spec\"][\"predictors\"][0][\"graph\"][\"children\"][0][\"children\"][0][\n",
488 |     "    \"modelUri\"\n",
489 |     "]\n",
490 |     "demo3 = manifest[\"spec\"][\"predictors\"][0][\"graph\"][\"children\"][1][\"modelUri\"]\n",
491 |     "demo4 = manifest[\"spec\"][\"predictors\"][0][\"graph\"][\"modelUri\"]\n",
492 |     "\n",
493 |     "print(demo1, demo2, demo3, demo4)"
494 |    ]
495 |   },
496 |   {
497 |    "cell_type": "code",
498 |    "execution_count": 16,
499 |    "metadata": {},
500 |    "outputs": [
501 |     {
502 |      "name": "stdout",
503 |      "output_type": "stream",
504 |      "text": [
505 |       "release \"mlflow-controller-staging\" uninstalled\n"
506 |      ]
507 |     }
508 |    ],
509 |    "source": [
510 |     "! helm delete mlflow-controller-staging -n mlflow"
511 |    ]
512 |   },
513 |   {
514 |    "cell_type": "code",
515 |    "execution_count": null,
516 |    "metadata": {},
517 |    "outputs": [],
518 |    "source": []
519 |   }
520 |  ],
521 |  "metadata": {
522 |   "kernelspec": {
523 |    "display_name": "Python 3",
524 |    "language": "python",
525 |    "name": "python3"
526 |   },
527 |   "language_info": {
528 |    "codemirror_mode": {
529 |     "name": "ipython",
530 |     "version": 3
531 |    },
532 |    "file_extension": ".py",
533 |    "mimetype": "text/x-python",
534 |    "name": "python",
535 |    "nbconvert_exporter": "python",
536 |    "pygments_lexer": "ipython3",
537 |    "version": "3.7.7"
538 |   }
539 |  },
540 |  "nbformat": 4,
541 |  "nbformat_minor": 4
542 | }
543 | 


--------------------------------------------------------------------------------
/examples/notebook/deploy.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: machinelearning.seldon.io/v1
 2 | kind: SeldonDeployment
 3 | metadata:
 4 |   name: mlflow
 5 |   labels:
 6 |     app.kubernetes.io/managed-by: mlflow-seldon
 7 |     app.kubernetes.io/name: mlflow
 8 | spec:
 9 |   name: iris
10 |   predictors:
11 |   - componentSpecs:
12 |     - spec:
13 |         # We are setting high failureThreshold as installing conda dependencies
14 |         # can take long time and we want to avoid k8s killing the container prematurely
15 |         containers:
16 |         # - image: seldonio/mlflowserver:1.14.0-dev
17 |         #   imagePullPolicy: IfNotPresent
18 |         #   name: classifier
19 |         - name: classifier
20 |           livenessProbe:
21 |             initialDelaySeconds: 800
22 |             failureThreshold: 20000000
23 |             periodSeconds: 25
24 |             successThreshold: 1
25 |             httpGet:
26 |               path: /health/ping
27 |               port: http
28 |               scheme: HTTP
29 |           readinessProbe:
30 |             initialDelaySeconds: 800
31 |             failureThreshold: 2000000
32 |             periodSeconds: 25
33 |             successThreshold: 1
34 |             httpGet:
35 |               path: /health/ping
36 |               port: http
37 |               scheme: HTTP
38 | 
39 |     graph:
40 |       children: []
41 |       implementation: MLFLOW_SERVER
42 |       modelUri: gs://hellomlops-mlflow/mlflow_artifacts/1/6887f98225b9419f9681d68e7cdd9335/artifacts/random-forest-model
43 |       logger:
44 |         url: http://broker-ingress.knative-eventing.svc.cluster.local/demo/default
45 |         mode: all
46 |       name: classifier
47 |     name: default
48 |     replicas: 1


--------------------------------------------------------------------------------
/examples/notebook/mlflow.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "id": "3bab55b3-a167-48c1-b3b2-0ca66f4c7c21",
  7 |    "metadata": {},
  8 |    "outputs": [
  9 |     {
 10 |      "name": "stdout",
 11 |      "output_type": "stream",
 12 |      "text": [
 13 |       "   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \\\n",
 14 |       "0                5.1               3.5                1.4               0.2   \n",
 15 |       "1                4.9               3.0                1.4               0.2   \n",
 16 |       "2                4.7               3.2                1.3               0.2   \n",
 17 |       "3                4.6               3.1                1.5               0.2   \n",
 18 |       "4                5.0               3.6                1.4               0.2   \n",
 19 |       "\n",
 20 |       "   target  \n",
 21 |       "0       0  \n",
 22 |       "1       0  \n",
 23 |       "2       0  \n",
 24 |       "3       0  \n",
 25 |       "4       0  \n",
 26 |       "IRIS train df shape\n",
 27 |       "(105, 4)\n",
 28 |       "(105,)\n",
 29 |       "IRIS test df shape\n",
 30 |       "(45, 4)\n",
 31 |       "(45,)\n"
 32 |      ]
 33 |     }
 34 |    ],
 35 |    "source": [
 36 |     "import pandas as pd\n",
 37 |     "from sklearn import datasets\n",
 38 |     "from sklearn.ensemble import RandomForestClassifier\n",
 39 |     "import mlflow, os\n",
 40 |     "import mlflow.sklearn\n",
 41 |     "from mlflow.tracking import MlflowClient\n",
 42 |     "from sklearn.metrics import roc_auc_score, accuracy_score\n",
 43 |     "from sklearn.model_selection import train_test_split\n",
 44 |     "\n",
 45 |     "os.environ[\"MLFLOW_TRACKING_URI\"] = \"http://localhost:5000\"\n",
 46 |     "iris = datasets.load_iris()\n",
 47 |     "iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)\n",
 48 |     "y = iris.target\n",
 49 |     "iris_df[\"target\"] = y\n",
 50 |     "\n",
 51 |     "print(iris_df.head())\n",
 52 |     "\n",
 53 |     "\n",
 54 |     "train_df, test_df = train_test_split(\n",
 55 |     "    iris_df, test_size=0.3, random_state=42, stratify=iris_df[\"target\"]\n",
 56 |     ")\n",
 57 |     "X_train = train_df[\n",
 58 |     "    [\"sepal length (cm)\", \"sepal width (cm)\", \"petal length (cm)\", \"petal width (cm)\"]\n",
 59 |     "]\n",
 60 |     "y_train = train_df[\"target\"]\n",
 61 |     "\n",
 62 |     "X_test = test_df[\n",
 63 |     "    [\"sepal length (cm)\", \"sepal width (cm)\", \"petal length (cm)\", \"petal width (cm)\"]\n",
 64 |     "]\n",
 65 |     "y_test = test_df[\"target\"]\n",
 66 |     "# print(iris)\n",
 67 |     "# print(iris_df.head())\n",
 68 |     "\n",
 69 |     "\n",
 70 |     "EXPERIMENT_NAME = \"IRIS dataset classification\"\n",
 71 |     "\n",
 72 |     "\n",
 73 |     "print(\"IRIS train df shape\")\n",
 74 |     "print(X_train.shape)\n",
 75 |     "print(y_train.shape)\n",
 76 |     "\n",
 77 |     "print(\"IRIS test df shape\")\n",
 78 |     "print(X_test.shape)\n",
 79 |     "print(y_test.shape)\n",
 80 |     "\n",
 81 |     "mlflow_client = MlflowClient()\n",
 82 |     "\n",
 83 |     "# Create an MLFlow experiment, if not already exists\n",
 84 |     "experiment_details = mlflow_client.get_experiment_by_name(EXPERIMENT_NAME)\n",
 85 |     "\n",
 86 |     "if experiment_details is not None:\n",
 87 |     "    experiment_id = experiment_details.experiment_id\n",
 88 |     "else:\n",
 89 |     "    experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)\n",
 90 |     "\n",
 91 |     "# Start an MLFlow experiment run\n",
 92 |     "with mlflow.start_run(\n",
 93 |     "    experiment_id=experiment_id, run_name=\"iris dataset rf run\"\n",
 94 |     ") as run:\n",
 95 |     "    # Log parameters\n",
 96 |     "\n",
 97 |     "    mlflow.log_param(\"max_depth\", 10)\n",
 98 |     "    mlflow.log_param(\"random_state\", 0)\n",
 99 |     "    mlflow.log_param(\"n_estimators\", 100)\n",
100 |     "    clf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=0)\n",
101 |     "    clf.fit(X_train, y_train)\n",
102 |     "    iris_predict_y = clf.predict(X_test)\n",
103 |     "\n",
104 |     "    roc_auc_score_val = roc_auc_score(\n",
105 |     "        y_test, clf.predict_proba(X_test), multi_class=\"ovr\"\n",
106 |     "    )\n",
107 |     "    mlflow.log_metric(\"test roc_auc_score\", roc_auc_score_val)\n",
108 |     "\n",
109 |     "    accuracy_score = accuracy_score(y_test, iris_predict_y)\n",
110 |     "    mlflow.log_metric(\"test accuracy_score\", accuracy_score)\n",
111 |     "    mlflow.log_artifact(\"deploy.yaml\")\n",
112 |     "\n",
113 |     "    # Log model\n",
114 |     "    mlflow.sklearn.log_model(clf, artifact_path=\"model\")"
115 |    ]
116 |   }
117 |  ],
118 |  "metadata": {
119 |   "interpreter": {
120 |    "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
121 |   },
122 |   "kernelspec": {
123 |    "display_name": "Python 3",
124 |    "language": "python",
125 |    "name": "python3"
126 |   },
127 |   "language_info": {
128 |    "codemirror_mode": {
129 |     "name": "ipython",
130 |     "version": 3
131 |    },
132 |    "file_extension": ".py",
133 |    "mimetype": "text/x-python",
134 |    "name": "python",
135 |    "nbconvert_exporter": "python",
136 |    "pygments_lexer": "ipython3",
137 |    "version": "3.9.12"
138 |   }
139 |  },
140 |  "nbformat": 4,
141 |  "nbformat_minor": 5
142 | }
143 | 


--------------------------------------------------------------------------------
/examples/readme.md:
--------------------------------------------------------------------------------
 1 | Example Deployment using argocd
 2 | ---
 3 | 
 4 | Setup Mlflow and Mlflow controllers for different stages using argocd
 5 | 
 6 | ```
 7 | kubectl apply -f  argo-manifest
 8 | ```
 9 | 
10 | Log a Mlflow model with Seldon deployment configuration with the name deploy.yaml
11 | 
12 | <img width="783" alt="Screenshot 2022-07-10 at 6 26 01 PM" src="https://user-images.githubusercontent.com/62284209/178153282-9c107398-9f9f-4fc3-8bfc-ca9d5c9a9f3a.png">
13 | 
14 | <img width="1409" alt="Screenshot 2022-07-10 at 6 25 47 PM" src="https://user-images.githubusercontent.com/62284209/178153272-ae254b27-47ed-4251-aa69-07a305223aee.png">
15 | 
16 | 
17 | Mlflow controllers will deploy the models to appropriate Namespaces based on the configuration
18 | 
19 | <img width="1038" alt="Screenshot 2022-07-10 at 6 27 11 PM" src="https://user-images.githubusercontent.com/62284209/178153334-8909cecb-162e-4f86-ac22-f6cff0a7859d.png">
20 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | """
 2 | __author__ = "Raghul Krishna"
 3 | __copyright__ = ""
 4 | __credits__ = ""
 5 | __license__ = ""
 6 | __version__ = ""
 7 | __maintainer__ = "raghul Krishna"
 8 | __email__ = "rrkraghulkrishna@gmail.com"
 9 | 
10 | """
11 | import logging
12 | import os
13 | from time import sleep
14 | 
15 | from apscheduler.schedulers.background import BackgroundScheduler
16 | 
17 | from mlflow_controller.gitops import GitopsMDC
18 | from mlflow_controller.mlflow_direct import DeployConroller
19 | 
20 | logging.getLogger("apscheduler").setLevel(logging.ERROR)
21 | 
22 | if __name__ == "__main__":
23 |     scheduler = BackgroundScheduler()
24 |     controller = DeployConroller()
25 |     giopsmdc = GitopsMDC()
26 |     # scheduler.add_job(
27 |     #     controller.deploy_controller, CronTrigger.from_crontab("* * * * *")
28 |     # )
29 |     # scheduler.add_job(
30 |     #     id="controller",
31 |     #     func=controller.deploy_controller,
32 |     #     trigger="interval",
33 |     #     seconds=15,
34 |     # )
35 |     if os.getenv("GITOPS_ENABLED", "False"):
36 |         scheduler.add_job(
37 |             id="gitopsmdc",
38 |             func=giopsmdc.gitops_mlflow_controller,
39 |             trigger="interval",
40 |             seconds=15,
41 |         )
42 |     scheduler.start()
43 |     while True:
44 |         sleep(1)
45 | 


--------------------------------------------------------------------------------
/mlflow_controller/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rocket9-code/mlflow-deployment-controller/9bd5eefa87f8351bfe6837754f373fcab0ac86c0/mlflow_controller/__init__.py


--------------------------------------------------------------------------------
/mlflow_controller/controller.py:
--------------------------------------------------------------------------------
  1 | """
  2 | __author__ = "Raghul Krishna"
  3 | __copyright__ = ""
  4 | __credits__ = ""
  5 | __license__ = ""
  6 | __version__ = ""
  7 | __maintainer__ = "raghul Krishna"
  8 | __email__ = "rrkraghulkrishna@gmail.com"
  9 | 
 10 | """
 11 | import logging
 12 | import os
 13 | import re
 14 | 
 15 | from kubernetes import client as KubeClient
 16 | from kubernetes import config
 17 | from mlflow.tracking import MlflowClient
 18 | 
 19 | import mlflow_controller.storage
 20 | 
 21 | logger = logging.getLogger(__name__)
 22 | logger.setLevel(logging.DEBUG)
 23 | 
 24 | formatter = logging.Formatter("%(asctime)s:%(name)s:%(message)s")
 25 | 
 26 | file_handler = logging.FileHandler("log.log")
 27 | file_handler.setLevel(logging.ERROR)
 28 | file_handler.setFormatter(formatter)
 29 | 
 30 | stream_handler = logging.StreamHandler()
 31 | stream_handler.setFormatter(formatter)
 32 | 
 33 | logger.addHandler(file_handler)
 34 | logger.addHandler(stream_handler)
 35 | 
 36 | logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
 37 | 
 38 | # os.environ["MLFLOW_TRACKING_URI"] = "http://localhost:5000"
 39 | 
 40 | 
 41 | class DeployConroller:
 42 |     """
 43 |     A class to Matain the controller
 44 |     ...
 45 | 
 46 |     Methods
 47 |     -------
 48 |     deploy_controller():
 49 |         Manages the deployments from Mlflow
 50 |     """
 51 | 
 52 |     def __init__(self):
 53 |         self.mlflow_client = MlflowClient()
 54 |         logger.info("Mlflow client initialized")
 55 |         self.object_init = mlflow_controller.storage.Artifact()
 56 |         try:
 57 |             config.load_kube_config()
 58 |         except config.ConfigException:
 59 |             config.load_incluster_config()
 60 |         self.kube_client = KubeClient.CustomObjectsApi()
 61 |         logger.info("KubeClient initialized")
 62 |         self.mlflow_deploy_config = "deploy.yaml"
 63 |         self.stage = os.environ["stage"]
 64 |         self.model_details = []
 65 |         self.Namespace = os.environ["namespace"]
 66 |         self.cloud = os.environ["cloud"]
 67 |         self.label = "app.kubernetes.io/managed-by=mdc-mlflow"
 68 | 
 69 |     def __str__(self):
 70 |         return self.__class__.__name__
 71 | 
 72 |     def state_manager(self):
 73 |         """To delete resources deleted in Mlflow"""
 74 |         manifests = self.kube_client.list_namespaced_custom_object(
 75 |             group="machinelearning.seldon.io",
 76 |             version="v1",
 77 |             plural="seldondeployments",
 78 |             namespace=self.Namespace,
 79 |             label_selector=self.label,
 80 |         )
 81 |         for manifest in manifests["items"]:
 82 |             model_names = self.model_details
 83 |             manifest_name = manifest["metadata"]["name"]
 84 |             manifest_namespace = manifest["metadata"]["namespace"]
 85 |             print(model_names, manifest_name, manifest_namespace)
 86 |             model = next(
 87 |                 (
 88 |                     item
 89 |                     for item in model_names
 90 |                     if item["deploy_name"] == manifest_name
 91 |                     and item["Namespace"] == manifest_namespace
 92 |                 ),
 93 |                 None,
 94 |             )
 95 |             if model:
 96 |                 logger.info(
 97 |                     "Model %s Namespace %s in Sync ",
 98 |                     manifest["metadata"]["name"],
 99 |                     manifest["metadata"]["namespace"],
100 |                 )
101 |             else:
102 |                 logger.info(
103 |                     "Deleting a Deployment %s Namespace %s",
104 |                     manifest["metadata"]["name"],
105 |                     manifest["metadata"]["namespace"],
106 |                 )
107 |                 self.kube_client.delete_namespaced_custom_object(
108 |                     group="machinelearning.seldon.io",
109 |                     version="v1",
110 |                     plural="seldondeployments",
111 |                     name=manifest["metadata"]["name"],
112 |                     namespace=manifest["metadata"]["namespace"],
113 |                 )
114 |         self.model_details = []
115 | 
116 |     def deploy_controller(self):
117 |         """
118 |         Manages the deployments from Mlflow
119 |         """
120 |         model_versions = []
121 |         for registered_model in self.mlflow_client.list_registered_models():
122 |             for version in registered_model.latest_versions:
123 |                 model_versions.append(version)
124 |         for version in model_versions:
125 |             if version.current_stage == self.stage:
126 |                 print(version.current_stage)
127 |                 for file in self.mlflow_client.list_artifacts(version.run_id):
128 |                     if file.path == self.mlflow_deploy_config:
129 |                         model_name = version.name.lower()
130 |                         model_run_id = version.run_id
131 |                         run_details = self.mlflow_client.get_run(version.run_id)
132 |                         model_version = version.version
133 |                         artifact_uri = run_details.info.artifact_uri
134 |                         if self.cloud == "gcp":
135 |                             model_source = version.source
136 |                             deploy_yaml = self.object_init.gcp_bucket(artifact_uri)
137 |                         elif self.cloud == "azure_blob":
138 |                             model_source = re.sub(
139 |                                 r"(?=\@)(.*?)(?=\/)", "", version.source
140 |                             )
141 |                             deploy_yaml = self.object_init.azure_blob(artifact_uri)
142 |                         elif self.cloud == "aws_s3":
143 |                             model_source = re.sub(
144 |                                 r"(?=\@)(.*?)(?=\/)", "", version.source
145 |                             )
146 |                             deploy_yaml = self.object_init.azure_blob(artifact_uri)
147 | 
148 |                         else:
149 |                             raise ("unsupported Object Storage")
150 |                         model_deploy_name = model_name.replace(" ", "").replace(
151 |                             "_", "-"
152 |                         )
153 |                         deploy_yaml["spec"]["predictors"][0]["graph"][
154 |                             "modelUri"
155 |                         ] = model_source
156 |                         deploy_yaml["spec"]["predictors"][0]["annotations"][
157 |                             "predictor_version"
158 |                         ] = model_version
159 |                         deploy_yaml["metadata"]["name"] = model_deploy_name
160 |                         try:
161 |                             deploy_yaml["metadata"]["annotations"]
162 |                         except KeyError:
163 |                             deploy_yaml["metadata"]["annotations"] = {}
164 |                         deploy_yaml["metadata"]["labels"][
165 |                             "app.kubernetes.io/managed-by"
166 |                         ] = "mdc-mlflow"
167 |                         logger.info(
168 |                             "Model Name: %s, Model Run Id: %s",
169 |                             model_name,
170 |                             model_run_id,
171 |                         )
172 |                         self.model_details.append(
173 |                             {
174 |                                 "name": model_name,
175 |                                 "deploy_name": deploy_yaml["metadata"]["name"],
176 |                                 "Namespace": self.Namespace,
177 |                             }
178 |                         )
179 |                         try:
180 |                             self.kube_client.create_namespaced_custom_object(
181 |                                 group="machinelearning.seldon.io",
182 |                                 version="v1",
183 |                                 plural="seldondeployments",
184 |                                 body=deploy_yaml,
185 |                                 namespace=self.Namespace,
186 |                             )
187 |                             logger.info(
188 |                                 "Created a Deployment %s Namespace %s",
189 |                                 model_name,
190 |                                 self.Namespace,
191 |                             )
192 |                         except KubeClient.rest.ApiException:
193 |                             self.kube_client.patch_namespaced_custom_object(
194 |                                 group="machinelearning.seldon.io",
195 |                                 version="v1",
196 |                                 plural="seldondeployments",
197 |                                 body=deploy_yaml,
198 |                                 name=deploy_yaml["metadata"]["name"],
199 |                                 namespace=self.Namespace,
200 |                             )
201 |                             logger.info(
202 |                                 "Patched a Deployment %s  Namespace %s",
203 |                                 model_name,
204 |                                 self.Namespace,
205 |                             )
206 |         self.state_manager()
207 | 


--------------------------------------------------------------------------------
/mlflow_controller/gitops.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import logging
  3 | import os
  4 | import shutil
  5 | import uuid
  6 | 
  7 | import yaml
  8 | from git import Repo
  9 | from kubernetes import config
 10 | 
 11 | from mlflow_controller.mlservers import kserve, seldon
 12 | from mlflow_controller.registries.mlflow import MLflowMetadata
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | logger.setLevel(logging.DEBUG)
 16 | 
 17 | formatter = logging.Formatter("%(asctime)s:%(name)s:%(message)s")
 18 | 
 19 | file_handler = logging.FileHandler("log.log")
 20 | file_handler.setLevel(logging.ERROR)
 21 | file_handler.setFormatter(formatter)
 22 | 
 23 | stream_handler = logging.StreamHandler()
 24 | stream_handler.setFormatter(formatter)
 25 | 
 26 | logger.addHandler(file_handler)
 27 | logger.addHandler(stream_handler)
 28 | 
 29 | logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
 30 | 
 31 | TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI", "http://localhost:9000")
 32 | GIT_USER = os.getenv("GIT_USER", "")
 33 | GIT_PASSWORD = os.getenv("GIT_PASSWORD", "")
 34 | GIT_PROTOCOL = os.getenv("GIT_PROTOCOL", "https")
 35 | GIT_REPO = os.getenv("GIT_REPO", "github.com/rocket9-code/model-deployments")
 36 | if GIT_PASSWORD:
 37 |     GIT_URL = f"{GIT_PROTOCOL}://{GIT_USER}:{GIT_PASSWORD}@{GIT_REPO}"
 38 | else:
 39 |     GIT_URL = f"{GIT_PROTOCOL}://{GIT_REPO}"
 40 | 
 41 | MANIFEST_LOCATION = os.getenv("MANIFEST_LOCATION", "staging")
 42 | GLOBAL_NAMESPACE = os.getenv("namespace", "staging")
 43 | MLFLOW_STAGE = os.getenv("stage", "Staging")
 44 | backend = os.getenv("backend", "")
 45 | BRANCH = os.getenv("BRANCH", "main")
 46 | ML_SERVER = os.getenv("ML_SERVER", "kserve")
 47 | 
 48 | 
 49 | class GitopsMDC:
 50 |     def gitops_mlflow_controller(self):
 51 |         folder_name = str(uuid.uuid4())
 52 |         path = "./tmp/" + folder_name
 53 |         if not os.path.exists(path):
 54 |             os.makedirs(path)
 55 |         logger.info(f"Cloning repo {GIT_REPO} with branch {BRANCH}")
 56 |         Repo.clone_from(GIT_URL, path, single_branch=True, branch=BRANCH)
 57 |         try:
 58 |             config.load_kube_config()
 59 |         except config.ConfigException:
 60 |             config.load_incluster_config()
 61 |         manifest_path = path + "/" + MANIFEST_LOCATION
 62 |         deploy_yamls = glob.glob(f"{manifest_path}/*.yaml") + glob.glob(
 63 |             f"{manifest_path}/*.yml"
 64 |         )
 65 |         mlflowcontroller = MLflowMetadata(tracking_uri=TRACKING_URI, stage=MLFLOW_STAGE)
 66 |         logger.info(f"Mlflow tracking uri {TRACKING_URI}")
 67 |         logger.info(f"Mlflow Stage {MLFLOW_STAGE}")
 68 |         logger.info(f"backend {backend}")
 69 |         mlflow_models_metadata, _ = mlflowcontroller.get_model_metadata(
 70 |             check_deploy=False, backend=backend
 71 |         )
 72 |         read_seldon_deploy_yamls = []
 73 |         for i in deploy_yamls:
 74 |             with open(i, "r") as stream:
 75 |                 try:
 76 |                     deploy_yaml = yaml.safe_load(stream)
 77 |                     resource_group = deploy_yaml["apiVersion"].split("/")[0]
 78 |                     if ML_SERVER == "seldon":
 79 |                         if resource_group == "machinelearning.seldon.io":
 80 |                             read_seldon_deploy_yamls.append(deploy_yaml)
 81 |                     elif ML_SERVER == "kserve":
 82 |                         if resource_group == "serving.kserve.io":
 83 |                             read_seldon_deploy_yamls.append(deploy_yaml)
 84 |                 except yaml.YAMLError as exc:
 85 |                     logger.error(exc)
 86 |         if len(mlflow_models_metadata.keys()) > 0:
 87 |             if ML_SERVER == "seldon":
 88 |                 seldon.sync(
 89 |                     read_seldon_deploy_yamls,
 90 |                     mlflow_models_metadata,
 91 |                     MLFLOW_STAGE,
 92 |                     GLOBAL_NAMESPACE,
 93 |                     f"mdc-gitops-{backend}-mlflow-seldon",
 94 |                     "mlflow",
 95 |                     backend,
 96 |                 )
 97 |             elif ML_SERVER == "kserve":
 98 |                 kserve.sync(
 99 |                     read_seldon_deploy_yamls,
100 |                     mlflow_models_metadata,
101 |                     MLFLOW_STAGE,
102 |                     GLOBAL_NAMESPACE,
103 |                     f"mdc-gitops-{backend}-mlflow-kserve",
104 |                     "mlflow",
105 |                     backend,
106 |                 )
107 |         shutil.rmtree(path, ignore_errors=True)
108 | 


--------------------------------------------------------------------------------
/mlflow_controller/mlflow_direct.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | from mlflow_controller.mlservers import kserve, seldon
 5 | from mlflow_controller.registries.mlflow import MLflowMetadata
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | logger.setLevel(logging.DEBUG)
 9 | 
10 | formatter = logging.Formatter("%(asctime)s:%(name)s:%(message)s")
11 | 
12 | file_handler = logging.FileHandler("log.log")
13 | file_handler.setLevel(logging.ERROR)
14 | file_handler.setFormatter(formatter)
15 | 
16 | stream_handler = logging.StreamHandler()
17 | stream_handler.setFormatter(formatter)
18 | 
19 | logger.addHandler(file_handler)
20 | logger.addHandler(stream_handler)
21 | 
22 | logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
23 | TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI", "http://localhost:5000")
24 | GLOBAL_NAMESPACE = os.getenv("namespace", "staging")
25 | MLFLOW_STAGE = os.getenv("stage", "Staging")
26 | backend = os.getenv("backend", "")
27 | ML_SERVER = os.getenv("ML_SERVER", "kserve")
28 | 
29 | 
30 | class DeployConroller:
31 |     """
32 |     A class to Matain the controller
33 | 
34 |     ...
35 | 
36 |     Methods
37 |     -------
38 |     deploy_controller():
39 |         Manages the deployments from Mlflow
40 |     """
41 | 
42 |     def __init__(self):
43 |         self.managed_label = "mdc-direct"
44 | 
45 |     def __str__(self):
46 |         return self.__class__.__name__
47 | 
48 |     def deploy_controller(self):
49 |         """
50 |         Manages the deployments from Mlflow
51 |         """
52 |         mlflowcontroller = MLflowMetadata(tracking_uri=TRACKING_URI, stage=MLFLOW_STAGE)
53 |         logger.info(f"Mlflow tracking uri {TRACKING_URI}")
54 |         logger.info(f"Mlflow Stage {MLFLOW_STAGE}")
55 |         logger.info(f"backend {backend}")
56 |         mlflow_models_metadata, read_deploy_yaml = mlflowcontroller.get_model_metadata(
57 |             check_deploy=True,
58 |             backend=backend,
59 |             manager_label=self.managed_label,
60 |             mlflow_deploy_config="deploy.yaml",
61 |         )
62 |         if len(mlflow_models_metadata.keys()) > 0:
63 |             if ML_SERVER == "seldon":
64 |                 seldon.sync(
65 |                     read_deploy_yaml,
66 |                     mlflow_models_metadata,
67 |                     MLFLOW_STAGE,
68 |                     GLOBAL_NAMESPACE,
69 |                     f"{self.managed_label}-mlflow-{backend}-seldon",
70 |                     "mlflow",
71 |                     backend,
72 |                 )
73 |             elif ML_SERVER == "kserve":
74 |                 kserve.sync(
75 |                     read_deploy_yaml,
76 |                     mlflow_models_metadata,
77 |                     MLFLOW_STAGE,
78 |                     GLOBAL_NAMESPACE,
79 |                     f"{self.managed_label}-mlflow-{backend}-kserve",
80 |                     "mlflow",
81 |                     backend,
82 |                 )
83 | 


--------------------------------------------------------------------------------
/mlflow_controller/mlservers/kserve.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import re
  3 | 
  4 | from kubernetes import client as KubeClient
  5 | from kubernetes import config
  6 | 
  7 | from mlflow_controller.mlservers.utils import mlflow_model_search, update_modeluris
  8 | from mlflow_controller.utils.var_extract import var_parser
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | logger.setLevel(logging.DEBUG)
 12 | 
 13 | formatter = logging.Formatter("%(asctime)s:%(name)s:%(message)s")
 14 | 
 15 | file_handler = logging.FileHandler("log.log")
 16 | file_handler.setLevel(logging.ERROR)
 17 | file_handler.setFormatter(formatter)
 18 | 
 19 | stream_handler = logging.StreamHandler()
 20 | stream_handler.setFormatter(formatter)
 21 | 
 22 | logger.addHandler(file_handler)
 23 | logger.addHandler(stream_handler)
 24 | 
 25 | try:
 26 |     config.load_kube_config()
 27 | except config.ConfigException:
 28 |     config.load_incluster_config()
 29 | kube_client = KubeClient.CustomObjectsApi()
 30 | 
 31 | 
 32 | class InvalidVariable(Exception):
 33 |     "Raised when wrong templates"
 34 | 
 35 | 
 36 | def sync(
 37 |     deploy_yamls,
 38 |     model_metadata,
 39 |     stage,
 40 |     GLOBAL_NAMESPACE,
 41 |     controller_label_value,
 42 |     registry_name,
 43 |     backend,
 44 | ):
 45 |     git_models = []
 46 |     for deploy_yaml in deploy_yamls:
 47 |         logger.info(deploy_yamls)
 48 |         resource_group = deploy_yaml["apiVersion"].split("/")[0]
 49 |         logger.info(resource_group)
 50 |         if resource_group == "serving.kserve.io":
 51 |             models = list(
 52 |                 set(mlflow_model_search("storageUri", deploy_yaml, search_result=[]))
 53 |             )
 54 |             logger.info(f"models {models}")
 55 |             rep_deploy_yaml = deploy_yaml
 56 |             try:
 57 |                 rep_deploy_yaml["metadata"]["annotations"]
 58 | 
 59 |             except KeyError:
 60 |                 rep_deploy_yaml["metadata"]["annotations"] = {}
 61 |             try:
 62 |                 rep_deploy_yaml["metadata"]["labels"]
 63 | 
 64 |             except KeyError:
 65 |                 rep_deploy_yaml["metadata"]["labels"] = {}
 66 |             deploy = False
 67 |             for m in models:
 68 |                 try:
 69 |                     pattern = r"\{\{\s(.*)\s\}\}"
 70 |                     model_jinja = re.findall(pattern, m)[0]
 71 |                     model_name, bk_name, rg_name = var_parser(model_jinja)
 72 |                     if (bk_name != backend) or (rg_name != registry_name):
 73 |                         raise InvalidVariable
 74 |                     model = model_metadata[registry_name][backend][model_name]
 75 |                     run_id = model["run_id"]
 76 |                     if backend == "blob":
 77 |                         model_source = model["source"].replace("wasbs", "https")
 78 |                     else:
 79 |                         model_source = model["source"]
 80 |                     rep_deploy_yaml = update_modeluris(
 81 |                         rep_deploy_yaml,
 82 |                         f'{{{{ {registry_name}.{backend}["{model_name}"] }}}}',
 83 |                         model_source,
 84 |                     )
 85 |                     rep_deploy_yaml["metadata"]["annotations"][
 86 |                         f"mdc/mlflow-{run_id}"
 87 |                     ] = str(model)
 88 |                     rep_deploy_yaml["metadata"]["annotations"][
 89 |                         "mdc/mlflow-stage"
 90 |                     ] = stage
 91 |                     rep_deploy_yaml["metadata"]["labels"][
 92 |                         "app.kubernetes.io/mdc-type"
 93 |                     ] = controller_label_value
 94 |                     rep_deploy_yaml["metadata"]["labels"][
 95 |                         "app.kubernetes.io/managed-by"
 96 |                     ] = "mdc"
 97 |                     deploy = True
 98 |                     name = rep_deploy_yaml["metadata"]["name"]
 99 |                 except InvalidVariable:
100 |                     deploy = False
101 |                     logger.error(
102 |                         f"Error in variable for model {m} backend {bk_name} registry {rg_name}"
103 |                     )
104 |                 except Exception as e:
105 |                     deploy = False
106 |                     logger.error(
107 |                         f"Error deploying {name} Model {m} not found in mlflow {e}"
108 |                     )
109 |         if deploy:
110 |             logger.info(
111 |                 f"deploying kserve deployment {name} in namespace {GLOBAL_NAMESPACE}"
112 |             )
113 |             try:
114 |                 manifest = kube_client.get_namespaced_custom_object(
115 |                     group=resource_group,
116 |                     version="v1beta1",
117 |                     plural="inferenceservices",
118 |                     namespace=GLOBAL_NAMESPACE,
119 |                     name=rep_deploy_yaml["metadata"]["name"],
120 |                 )
121 |                 resourceVersion = manifest["metadata"]["resourceVersion"]
122 |                 manifest["metadata"].pop("creationTimestamp")
123 |                 manifest["metadata"].pop("generation")
124 |                 manifest["metadata"].pop("managedFields")
125 |                 manifest["metadata"].pop("resourceVersion")
126 |                 manifest["metadata"].pop("uid")
127 |                 manifest["metadata"].pop("namespace")
128 |                 manifest.pop("status")
129 |                 _name = rep_deploy_yaml["metadata"]["name"]
130 |                 if rep_deploy_yaml == manifest:
131 |                     logger.info(f"Kserve deployment {_name} in sync")
132 |                 else:
133 |                     rep_deploy_yaml["metadata"]["resourceVersion"] = resourceVersion
134 |                     kube_client.replace_namespaced_custom_object(
135 |                         group=resource_group,
136 |                         version="v1beta1",
137 |                         plural="inferenceservices",
138 |                         body=rep_deploy_yaml,
139 |                         name=_name,
140 |                         namespace=GLOBAL_NAMESPACE,
141 |                     )
142 |             except KubeClient.rest.ApiException:
143 |                 kube_client.create_namespaced_custom_object(
144 |                     group=resource_group,
145 |                     version="v1beta1",
146 |                     plural="inferenceservices",
147 |                     body=rep_deploy_yaml,
148 |                     namespace=GLOBAL_NAMESPACE,
149 |                 )
150 |             git_models.append(rep_deploy_yaml["metadata"]["name"])
151 |     manifests = kube_client.list_namespaced_custom_object(
152 |         group="serving.kserve.io",
153 |         version="v1beta1",
154 |         plural="inferenceservices",
155 |         namespace=GLOBAL_NAMESPACE,
156 |         label_selector=f"app.kubernetes.io/mdc-type={controller_label_value}",
157 |     )
158 |     for i in manifests["items"]:
159 |         model_name = i["metadata"]["name"]
160 |         if model_name in git_models:
161 |             logger.info(f"kserve dpeloyment in sync {model_name}")
162 |         else:
163 |             kube_client.delete_namespaced_custom_object(
164 |                 group="serving.kserve.io",
165 |                 version="v1beta1",
166 |                 plural="inferenceservices",
167 |                 name=model_name,
168 |                 namespace=GLOBAL_NAMESPACE,
169 |             )
170 | 


--------------------------------------------------------------------------------
/mlflow_controller/mlservers/rclone.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | 
 4 | def rclone_source(source, backend):
 5 |     if backend == "blob":
 6 |         pattern = r"(?<=net/).*"
 7 |         rclonesource = re.search(pattern, source).group()
 8 |         conatiner_pattern = r"(?<=/)\w+"
 9 |         conatiner_name = re.search(conatiner_pattern, source).group()
10 |         return "wasbs://" + conatiner_name + "/" + rclonesource
11 |     else:
12 |         return source
13 | 


--------------------------------------------------------------------------------
/mlflow_controller/mlservers/seldon.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import re
  3 | 
  4 | from kubernetes import client as KubeClient
  5 | from kubernetes import config
  6 | 
  7 | from mlflow_controller.mlservers.rclone import rclone_source
  8 | from mlflow_controller.mlservers.utils import mlflow_model_search, update_modeluris
  9 | from mlflow_controller.utils.var_extract import var_parser
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | logger.setLevel(logging.DEBUG)
 13 | 
 14 | formatter = logging.Formatter("%(asctime)s:%(name)s:%(message)s")
 15 | 
 16 | file_handler = logging.FileHandler("log.log")
 17 | file_handler.setLevel(logging.ERROR)
 18 | file_handler.setFormatter(formatter)
 19 | 
 20 | stream_handler = logging.StreamHandler()
 21 | stream_handler.setFormatter(formatter)
 22 | 
 23 | logger.addHandler(file_handler)
 24 | logger.addHandler(stream_handler)
 25 | 
 26 | try:
 27 |     config.load_kube_config()
 28 | except config.ConfigException:
 29 |     config.load_incluster_config()
 30 | kube_client = KubeClient.CustomObjectsApi()
 31 | 
 32 | 
 33 | class InvalidVariable(Exception):
 34 |     "Raised when wrong templates"
 35 | 
 36 | 
 37 | def sync(
 38 |     deploy_yamls,
 39 |     model_metadata,
 40 |     stage,
 41 |     GLOBAL_NAMESPACE,
 42 |     controller_label_value,
 43 |     registry_name,
 44 |     backend,
 45 | ):
 46 |     git_models = []
 47 |     for deploy_yaml in deploy_yamls:
 48 |         resource_group = deploy_yaml["apiVersion"].split("/")[0]
 49 |         if resource_group == "machinelearning.seldon.io":
 50 |             models = list(
 51 |                 set(mlflow_model_search("modelUri", deploy_yaml, search_result=[]))
 52 |             )
 53 |             logger.info(f"models {models}")
 54 |             rep_deploy_yaml = deploy_yaml
 55 |             try:
 56 |                 rep_deploy_yaml["metadata"]["annotations"]
 57 | 
 58 |             except KeyError:
 59 |                 rep_deploy_yaml["metadata"]["annotations"] = {}
 60 |             try:
 61 |                 rep_deploy_yaml["metadata"]["labels"]
 62 | 
 63 |             except KeyError:
 64 |                 rep_deploy_yaml["metadata"]["labels"] = {}
 65 |             deploy = False
 66 |             for m in models:
 67 |                 try:
 68 |                     pattern = r"\{\{\s(.*)\s\}\}"
 69 |                     model_jinja = re.findall(pattern, m)[0]
 70 |                     model_name, bk_name, rg_name = var_parser(model_jinja)
 71 |                     if (bk_name != backend) or (rg_name != registry_name):
 72 |                         raise InvalidVariable
 73 |                     model = model_metadata[registry_name][backend][model_name]
 74 |                     run_id = model["run_id"]
 75 |                     rep_deploy_yaml = update_modeluris(
 76 |                         rep_deploy_yaml,
 77 |                         f'{{{{ {registry_name}.{backend}["{model_name}"] }}}}',
 78 |                         rclone_source(model["source"], backend),
 79 |                     )
 80 |                     rep_deploy_yaml["metadata"]["annotations"][
 81 |                         f"mdc/mlflow-{run_id}"
 82 |                     ] = str(model)
 83 |                     rep_deploy_yaml["metadata"]["annotations"][
 84 |                         "mdc/mlflow-stage"
 85 |                     ] = stage
 86 |                     rep_deploy_yaml["metadata"]["labels"][
 87 |                         "app.kubernetes.io/mdc-type"
 88 |                     ] = controller_label_value
 89 |                     rep_deploy_yaml["metadata"]["labels"][
 90 |                         "app.kubernetes.io/managed-by"
 91 |                     ] = "mdc"
 92 |                     deploy = True
 93 |                     name = rep_deploy_yaml["metadata"]["name"]
 94 |                 except InvalidVariable:
 95 |                     deploy = False
 96 |                     logger.error(
 97 |                         f"Error in variable for model {m} backend {bk_name} registry {rg_name}"
 98 |                     )
 99 |                 except Exception as e:
100 |                     deploy = False
101 |                     logger.error(
102 |                         f"Error deploying {name} Model {m} not found in mlflow {e}"
103 |                     )
104 |         if deploy:
105 |             logger.info(
106 |                 f"deploying seldon deployment {name} in namespace {GLOBAL_NAMESPACE}"
107 |             )
108 |             try:
109 |                 manifest = kube_client.get_namespaced_custom_object(
110 |                     group=resource_group,
111 |                     version="v1",
112 |                     plural="seldondeployments",
113 |                     namespace=GLOBAL_NAMESPACE,
114 |                     name=rep_deploy_yaml["metadata"]["name"],
115 |                 )
116 |                 resourceVersion = manifest["metadata"]["resourceVersion"]
117 |                 manifest["metadata"].pop("creationTimestamp")
118 |                 manifest["metadata"].pop("generation")
119 |                 manifest["metadata"].pop("managedFields")
120 |                 manifest["metadata"].pop("resourceVersion")
121 |                 manifest["metadata"].pop("uid")
122 |                 manifest["metadata"].pop("namespace")
123 |                 manifest.pop("status")
124 |                 _name = rep_deploy_yaml["metadata"]["name"]
125 |                 if rep_deploy_yaml == manifest:
126 |                     logger.info(f"seldon deployment {_name} in sync")
127 |                 else:
128 |                     rep_deploy_yaml["metadata"]["resourceVersion"] = resourceVersion
129 |                     kube_client.replace_namespaced_custom_object(
130 |                         group=resource_group,
131 |                         version="v1",
132 |                         plural="seldondeployments",
133 |                         body=rep_deploy_yaml,
134 |                         name=_name,
135 |                         namespace=GLOBAL_NAMESPACE,
136 |                     )
137 | 
138 |             except KubeClient.rest.ApiException:
139 |                 kube_client.create_namespaced_custom_object(
140 |                     group=resource_group,
141 |                     version="v1",
142 |                     plural="seldondeployments",
143 |                     body=rep_deploy_yaml,
144 |                     namespace=GLOBAL_NAMESPACE,
145 |                 )
146 |             git_models.append(rep_deploy_yaml["metadata"]["name"])
147 |     manifests = kube_client.list_namespaced_custom_object(
148 |         group="machinelearning.seldon.io",
149 |         version="v1",
150 |         plural="seldondeployments",
151 |         namespace=GLOBAL_NAMESPACE,
152 |         label_selector=f"app.kubernetes.io/mdc-type={controller_label_value}",
153 |     )
154 |     for i in manifests["items"]:
155 |         model_name = i["metadata"]["name"]
156 |         if model_name in git_models:
157 |             logger.info(f"seldon deployment in sync {model_name}")
158 |         else:
159 |             kube_client.delete_namespaced_custom_object(
160 |                 group="machinelearning.seldon.io",
161 |                 version="v1",
162 |                 plural="seldondeployments",
163 |                 name=model_name,
164 |                 namespace=GLOBAL_NAMESPACE,
165 |             )
166 | 


--------------------------------------------------------------------------------
/mlflow_controller/mlservers/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | 
 4 | def mlflow_model_search(lookup_key, json_dict, search_result=[]):
 5 |     if type(json_dict) == dict:
 6 |         for key, value in json_dict.items():
 7 |             if key == lookup_key:
 8 |                 search_result.append(value)
 9 |             mlflow_model_search(lookup_key, value, search_result)
10 |     elif type(json_dict) == list:
11 |         for element in json_dict:
12 |             mlflow_model_search(lookup_key, element, search_result)
13 |     return search_result
14 | 
15 | 
16 | def update_modeluris(json_para, search_para, replace_para):
17 |     def decode_dict(a_dict):
18 |         if search_para in a_dict.values():
19 |             for key, value in a_dict.items():
20 |                 if value == search_para:
21 |                     a_dict[key] = replace_para
22 |         return a_dict
23 | 
24 |     return json.loads(json.dumps(json_para), object_hook=decode_dict)
25 | 


--------------------------------------------------------------------------------
/mlflow_controller/registries/mlflow.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | from mlflow.tracking import MlflowClient
 5 | 
 6 | from mlflow_controller.registries import mlflow_backend
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | logger.setLevel(logging.DEBUG)
10 | 
11 | formatter = logging.Formatter("%(asctime)s:%(name)s:%(message)s")
12 | 
13 | file_handler = logging.FileHandler("log.log")
14 | file_handler.setLevel(logging.ERROR)
15 | file_handler.setFormatter(formatter)
16 | 
17 | stream_handler = logging.StreamHandler()
18 | stream_handler.setFormatter(formatter)
19 | 
20 | logger.addHandler(file_handler)
21 | logger.addHandler(stream_handler)
22 | 
23 | logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
24 | 
25 | 
26 | class MLflowMetadata:
27 |     def __init__(self, tracking_uri, stage):
28 |         self.mlflow_client = MlflowClient(tracking_uri=tracking_uri)
29 |         logger.debug("Mlflow client initialized")
30 |         self.object_init = mlflow_backend.Artifact()
31 |         self.stage = stage
32 | 
33 |     def __str__(self):
34 |         return self.__class__.__name__
35 | 
36 |     def get_model_metadata(
37 |         self,
38 |         check_deploy=False,
39 |         manager_label="mdc-mlflow-direct",
40 |         backend="",
41 |         mlflow_deploy_config="deploy.yaml",
42 |     ):
43 |         mlflow_models_metadata = {}
44 |         read_deploy_yaml = []
45 |         registered_models = self.mlflow_client.list_registered_models()
46 |         for registered_model in registered_models:
47 |             for version in registered_model.latest_versions:
48 |                 if version.current_stage == self.stage:
49 |                     model_details = dict(version)
50 |                     model_run_id = model_details["run_id"]
51 |                     run_details = dict(self.mlflow_client.get_run(model_run_id).info)
52 |                     name = model_details["name"]
53 |                     model_template = f'{{{{ mlflow.{backend}["{name}"] }}}}'
54 |                     artifact_uri = run_details["artifact_uri"]
55 |                     mlflow_models_metadata[name] = {
56 |                         "name": name,
57 |                         "run_id": model_details["run_id"],
58 |                         "source": model_details["source"],
59 |                         "status": model_details["status"],
60 |                         "version": model_details["version"],
61 |                         "artifact_uri": artifact_uri,
62 |                     }
63 |                     logger.debug(artifact_uri)
64 |                     if check_deploy:
65 |                         for file in self.mlflow_client.list_artifacts(model_run_id):
66 |                             if file.path == mlflow_deploy_config:
67 |                                 if backend == "gcs":
68 |                                     deploy_yaml = self.object_init.gcp_bucket(
69 |                                         artifact_uri
70 |                                     )
71 |                                 elif backend == "blob":
72 |                                     deploy_yaml = self.object_init.azure_blob(
73 |                                         artifact_uri
74 |                                     )
75 |                                 elif backend == "s3":
76 |                                     deploy_yaml = self.object_init.aws_s3(artifact_uri)
77 |                                 else:
78 |                                     raise ("unsupported Object Storage")
79 |                             deploy_yaml["spec"]["predictors"][0]["graph"][
80 |                                 "modelUri"
81 |                             ] = model_template
82 |                             deploy_yaml["spec"]["predictors"][0]["annotations"][
83 |                                 "predictor_version"
84 |                             ] = model_details["version"]
85 |                             try:
86 |                                 deploy_yaml["metadata"]["annotations"]
87 |                             except KeyError:
88 |                                 deploy_yaml["metadata"]["annotations"] = {}
89 |                             deploy_yaml["metadata"]["labels"][
90 |                                 "app.kubernetes.io/mdc-type"
91 |                             ] = manager_label
92 |                             read_deploy_yaml.append(deploy_yaml)
93 |         ml_metadata = {"mlflow": {f"{backend}": mlflow_models_metadata}}
94 |         return ml_metadata, read_deploy_yaml
95 | 


--------------------------------------------------------------------------------
/mlflow_controller/registries/mlflow_backend.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import re
 4 | from io import BytesIO
 5 | 
 6 | import boto3
 7 | import yaml
 8 | from azure.identity import DefaultAzureCredential
 9 | from azure.storage.blob import BlobServiceClient
10 | from google.cloud.storage import Client as GoogleClient
11 | 
12 | logger = logging.getLogger(__name__)
13 | logger.setLevel(logging.DEBUG)
14 | 
15 | formatter = logging.Formatter("%(asctime)s:%(name)s:%(message)s")
16 | 
17 | file_handler = logging.FileHandler("log.log")
18 | file_handler.setLevel(logging.ERROR)
19 | file_handler.setFormatter(formatter)
20 | 
21 | stream_handler = logging.StreamHandler()
22 | stream_handler.setFormatter(formatter)
23 | 
24 | logger.addHandler(file_handler)
25 | logger.addHandler(stream_handler)
26 | 
27 | logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
28 | 
29 | 
30 | class Artifact:
31 |     def __init__(self):
32 |         print("Class Artifact initalized")
33 |         self.mlflow_deploy_config = "deploy.yaml"
34 | 
35 |     def gcp_bucket(self, artifact_uri):
36 |         google_client = GoogleClient()
37 |         bucket = artifact_uri.split("/")[2]
38 |         object_name = (
39 |             "/".join(artifact_uri.split("/")[3:]) + f"/{self.mlflow_deploy_config}"
40 |         )
41 |         bucket = google_client.get_bucket(bucket)
42 |         blob = bucket.get_blob(object_name)
43 |         downloaded_file = blob.download_as_text(encoding="utf-8")
44 |         deploy_yaml = yaml.safe_load(downloaded_file)
45 |         return deploy_yaml
46 | 
47 |     def azure_blob(self, artifact_uri):
48 |         acc_name_re = r"(?<=\/\/)(.*)(?=\@)"
49 |         container_re = r"(?<=\@)(.*)(?=[\.])"
50 |         container = re.search(acc_name_re, artifact_uri).group(1)
51 |         acc_name = re.search(container_re, artifact_uri).group(1).split(".")[0]
52 |         STORAGEACCOUNTURL = f"https://{acc_name}.blob.core.windows.net"
53 |         default_credential = DefaultAzureCredential()
54 |         blob_service_client_instance = BlobServiceClient(
55 |             account_url=STORAGEACCOUNTURL, credential=default_credential
56 |         )
57 |         blob_location = (
58 |             "/".join(artifact_uri.split("blob.core.windows.net")[1].split("/")[1:-1])
59 |             + f"/artifacts/{self.mlflow_deploy_config}"
60 |         )
61 |         blob_client_instance = blob_service_client_instance.get_blob_client(
62 |             container, blob_location, snapshot=None
63 |         )
64 |         blob_data = blob_client_instance.download_blob()
65 |         bl = blob_data.readall()
66 |         deploy_yaml = yaml.load(bl, Loader=yaml.FullLoader)
67 |         return deploy_yaml
68 | 
69 |     def aws_s3(self, artifact_uri):
70 |         session = boto3.Session()
71 |         s3_client = session.client("s3")
72 |         path_parts = artifact_uri.replace("s3://", "").split("/")
73 |         bucket = path_parts.pop(0)
74 |         key = "/".join(path_parts) + "/deploy.yaml"
75 |         f = BytesIO()
76 |         s3_client.download_fileobj(bucket, key, f)
77 | 
78 |         deploy_yaml = yaml.load(f.getvalue(), Loader=yaml.FullLoader)
79 |         return deploy_yaml
80 | 


--------------------------------------------------------------------------------
/mlflow_controller/utils/var_extract.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | import re
 3 | 
 4 | 
 5 | def var_parser(placeholder):
 6 |     model_pattern = r"\[.*\]"
 7 |     model = re.search(model_pattern, placeholder)
 8 |     model_name = ast.literal_eval(model.group())[0]
 9 |     vendor_pattern = r"\..*\["
10 |     vendor = re.search(vendor_pattern, placeholder)
11 |     vendor_name = vendor.group().replace(".", "").replace("[", "")
12 |     registry_pattern = r"^[a-zA-Z0-9_]*"
13 |     registry = re.search(registry_pattern, placeholder)
14 |     registry_name = registry.group()
15 |     return model_name, vendor_name, registry_name
16 | 
17 | 
18 | def validate_variable(placeholder):
19 |     pattern = re.compile(r"\w+\.\w+\[\".+\"\]", re.IGNORECASE)
20 |     return pattern.match(placeholder)
21 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | mlflow==1.25
 2 | kubernetes==22.6.0
 3 | google==3.0.0
 4 | gcloud==0.18.3
 5 | apscheduler
 6 | azure-storage-blob==12.14.1
 7 | azure-identity==1.12.0
 8 | boto3==1.26.25
 9 | GitPython>=3.1.30
10 | google-apitools==0.5.32
11 | google-auth==2.1.0
12 | google-auth-oauthlib==0.4.6
13 | google-cloud==0.34.0
14 | google-cloud-core==2.0.0
15 | google-cloud-storage==1.42.2
16 | google-crc32c==1.2.0
17 | google-pasta==0.2.0
18 | google-reauth==0.1.1
19 | google-resumable-media==2.0.3
20 | googleapis-common-protos==1.52.0
21 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | from mlflow_controller.gitops import GitopsMDC
2 | 
3 | controller = GitopsMDC()
4 | controller.gitops_mlflow_controller()
5 | 
6 | # controller = DeployConroller()
7 | # controller.deploy_controller()
8 | 


--------------------------------------------------------------------------------
/tests/docker_build_push.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | echo "Installing build test image and push ..."
4 | docker build -t tachyongroup/mdc-test:$GITHUB_SHA .
5 | # docker push tachyongroup/mdc-test:$GITHUB_SHA
6 | kind load docker-image tachyongroup/mdc-test:$GITHUB_SHA
7 | 


--------------------------------------------------------------------------------
/tests/install_gitea.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -euo pipefail
 3 | 
 4 | helm repo add gitea-charts https://dl.gitea.io/charts/
 5 | helm install gitea gitea-charts/gitea --set "gitea.admin.username=mdcadmin" --set "gitea.admin.password=password" --set "gitea.admin.email=mdcadmin@local.domain"
 6 | sleep 30
 7 | kubectl wait --for=condition=ready pod -l 'app.kubernetes.io/name in (gitea)' --timeout=180s
 8 | 
 9 | kubectl --namespace default port-forward svc/gitea-http 3000:3000 &
10 | GITEA_PID=$!
11 | 


--------------------------------------------------------------------------------
/tests/install_istio.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | echo "Installing Istio service mesh ..."
 4 | helm repo add istio https://istio-release.storage.googleapis.com/charts
 5 | helm repo update
 6 | kubectl create namespace istio-system
 7 | helm install istio-base istio/base -n istio-system
 8 | helm install istiod istio/istiod -n istio-system --wait
 9 | helm status istiod -n istio-system
10 | 
11 | echo "Waiting for Istio service mesh to be ready ..."
12 | kubectl wait --for=condition=ready pod -l 'app in (istiod)' --timeout=180s -n  istio-system


--------------------------------------------------------------------------------
/tests/install_kserve.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | echo "Installing Kserve ..."
4 | curl -s "https://raw.githubusercontent.com/kserve/kserve/release-0.9/hack/quick_install.sh" | bash
5 | 


--------------------------------------------------------------------------------
/tests/install_kserve_deployment_controller.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | echo "Installing Kserve Deployment Controller ..."
 4 | kubectl create ns staging
 5 | kubectl create ns production
 6 | kubectl create secret generic github-secret -n mlflow --from-literal=githubtoken=password
 7 | kubectl apply -f tests/repo-test/staging/kserve-sa.yaml -n staging
 8 | 
 9 | helm install mdc-staging charts/mlflow-controller  -n mlflow  --set image.tag=$GITHUB_SHA  --set image.pullPolicy=Never  --set image.repository=docker.io/tachyongroup/mdc-test --set mlflow.backend=s3 --set gitops.deploymentLocation=staging/ --set mlserver=kserve --set gitops.repository=gitea-http.default.svc.cluster.local:3000/mdcadmin/repo-test --set  gitops.protocol=http
10 | 
11 | kubectl get deployment -n mlflow
12 | kubectl get cm -n mlflow
13 | kubectl get po -n mlflow
14 | echo "Waiting for Deployment Controller to be ready ..."
15 | export POD_NAME=$(kubectl get pods --namespace mlflow -l "app.kubernetes.io/instance=mdc-staging" -o jsonpath="{.items[0].metadata.name}")
16 | 
17 | kubectl describe po $POD_NAME -n mlflow
18 | sleep 180
19 | kubectl logs deployment/mdc-staging-mlflow-controller -n mlflow
20 | #kubectl get inferenceservice --all-namespaces
21 | kubectl get inferenceservice sklearn-iris-minio  -n staging -o yaml
22 | 
23 | export MLFLOW_S3_ENDPOINT_URL=http://localhost:9000
24 | export AWS_ACCESS_KEY_ID=minioadmin
25 | export AWS_SECRET_ACCESS_KEY=minioadmin
26 | export MLFLOW_TRACKING_URI=http://localhost:5000
27 | python ./tests/mlflow/list_model.py $mlserver
28 | 
29 | kubectl wait --for=condition=ready  inferenceservice sklearn-iris-miniot  -n staging  --timeout=380s
30 | kubectl describe inferenceservice sklearn-iris-miniot  -n staging


--------------------------------------------------------------------------------
/tests/install_mlflow.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | echo "Installing Mlflow ..."
 4 | kubectl create ns mlflow
 5 | helm repo add minio https://charts.bitnami.com/bitnami
 6 | helm install minio minio/minio -n mlflow --set auth.rootUser=minioadmin --set auth.rootPassword=minioadmin --set livenessProbe.enabled=false --set readinessProbe.enabled=false #--set mode=distributed
 7 | 
 8 | export ROOT_USER=$(kubectl get secret --namespace mlflow minio -o jsonpath="{.data.root-user}" | base64 -d)
 9 | export ROOT_PASSWORD=$(kubectl get secret --namespace mlflow minio -o jsonpath="{.data.root-password}" | base64 -d)
10 | 
11 | kubectl wait --for=condition=ready pod -l 'app.kubernetes.io/name in (minio)' --timeout=380s -n mlflow
12 | kubectl apply -f tests/mlflow-cm.yaml -n mlflow
13 | helm repo add rocket9-code https://rocket9-code.github.io/hello-mlflow
14 | helm install mlflow rocket9-code/mlflow  -n mlflow --set artifact.ArtifactRoot=s3://artifacts  --set envFromconfigMap=minio-cm --set image.pullPolicy=Always
15 | kubectl get po -n mlflow
16 | export POD_NAME=$(kubectl get pods --namespace mlflow -l "app.kubernetes.io/name=mlflow,app.kubernetes.io/instance=mlflow" -o jsonpath="{.items[0].metadata.name}")
17 | kubectl describe po $POD_NAME -n mlflow
18 | kubectl wait --for=condition=ready pod -l 'app.kubernetes.io/name in (mlflow)' --timeout=380s -n mlflow
19 | 


--------------------------------------------------------------------------------
/tests/install_seldon_core.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | echo "Installing Seldon Core ..."
 4 | kubectl create namespace seldon-system
 5 | helm install seldon-core seldon-core-operator \
 6 |     --repo https://storage.googleapis.com/seldon-charts \
 7 |     --set usageMetrics.enabled=true \
 8 |     --set istio.enabled=true \
 9 |     --namespace seldon-system
10 | echo "Waiting for Seldon Core to be ready ..."
11 | kubectl wait --for=condition=ready pod -l 'app in (seldon)' --timeout=180s -n seldon-system


--------------------------------------------------------------------------------
/tests/install_seldon_deployment_controller.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | echo "Installing Seldon Deployment Controller ..."
 4 | kubectl create ns staging
 5 | kubectl create secret generic github-secret -n mlflow --from-literal=githubtoken=password
 6 | 
 7 | helm install mdc-staging charts/mlflow-controller  -n mlflow  --set image.tag=$GITHUB_SHA  --set image.pullPolicy=Never  --set image.repository=docker.io/tachyongroup/mdc-test --set mlflow.backend=s3 --set gitops.deploymentLocation=staging/ --set mlserver=seldon --set gitops.repository=gitea-http.default.svc.cluster.local:3000/mdcadmin/repo-test --set  gitops.protocol=http
 8 | kubectl apply -f tests/repo-test/staging/seldon-secret.yaml -n staging
 9 | kubectl get deployment -n mlflow
10 | kubectl get cm -n mlflow
11 | kubectl get po -n mlflow
12 | 
13 | echo "Waiting for Deployment Controller to be ready ..."
14 | export POD_NAME=$(kubectl get pods --namespace mlflow -l "app.kubernetes.io/instance=mdc-staging" -o jsonpath="{.items[0].metadata.name}")
15 | sleep 180
16 | kubectl describe po $POD_NAME -n mlflow
17 | kubectl wait --for=condition=ready pod -l 'app.kubernetes.io/instance in (mdc-staging)' --timeout=380s -n mlflow
18 | 
19 | 
20 | kubectl describe po $POD_NAME -n mlflow
21 | sleep 180
22 | kubectl logs deployment/mdc-staging-mlflow-controller -n mlflow
23 | kubectl get seldondeployment --all-namespaces
24 | kubectl get seldondeployment mlflow-var-minio   -n staging -o yaml
25 | 
26 | export MLFLOW_S3_ENDPOINT_URL=http://localhost:9000
27 | export AWS_ACCESS_KEY_ID=minioadmin
28 | export AWS_SECRET_ACCESS_KEY=minioadmin
29 | export MLFLOW_TRACKING_URI=http://localhost:5000
30 | python ./tests/mlflow/list_model.py $mlserver
31 | 
32 | 
33 | python ./tests/mlflow/test_deploy.py


--------------------------------------------------------------------------------
/tests/kind-cluster-1-24.yaml:
--------------------------------------------------------------------------------
 1 | # This testing option is available for testing projects that don't yet support k8s 1.25
 2 | apiVersion: kind.x-k8s.io/v1alpha4
 3 | kind: Cluster
 4 | # Configure registry for KinD.
 5 | containerdConfigPatches:
 6 | - |-
 7 |   [plugins."io.containerd.grpc.v1.cri".registry.mirrors."$REGISTRY_NAME:$REGISTRY_PORT"]
 8 |     endpoint = ["http://$REGISTRY_NAME:$REGISTRY_PORT"]
 9 | # This is needed in order to support projected volumes with service account tokens.
10 | # See: https://kubernetes.slack.com/archives/CEKK1KTN2/p1600268272383600
11 | kubeadmConfigPatches:
12 |   - |
13 |     apiVersion: kubeadm.k8s.io/v1beta2
14 |     kind: ClusterConfiguration
15 |     metadata:
16 |       name: config
17 |     apiServer:
18 |       extraArgs:
19 |         "service-account-issuer": "kubernetes.default.svc"
20 |         "service-account-signing-key-file": "/etc/kubernetes/pki/sa.key"
21 | nodes:
22 | - role: control-plane
23 |   image: kindest/node:v1.22.15@sha256:7d9708c4b0873f0fe2e171e2b1b7f45ae89482617778c1c875f1053d4cef2e41
24 | - role: worker
25 |   image: kindest/node:v1.22.15@sha256:7d9708c4b0873f0fe2e171e2b1b7f45ae89482617778c1c875f1053d4cef2e41
26 | - role: worker
27 |   image: kindest/node:v1.22.15@sha256:7d9708c4b0873f0fe2e171e2b1b7f45ae89482617778c1c875f1053d4cef2e41
28 | 
29 | - role: worker
30 |   image: kindest/node:v1.22.15@sha256:7d9708c4b0873f0fe2e171e2b1b7f45ae89482617778c1c875f1053d4cef2e41


--------------------------------------------------------------------------------
/tests/log_mlflow_model.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | echo "Installing Mlflow ..."
 4 | pip install mlflow==1.25.1
 5 | pip install protobuf==3.20.*
 6 | pip install scikit-learn==0.23.2
 7 | pip install pandas==0.23.4
 8 | pip install boto3==1.22.9
 9 | pip install minio
10 | pip install kubernetes
11 | pip install termcolor
12 | export MLFLOW_S3_ENDPOINT_URL=http://localhost:9000
13 | export AWS_ACCESS_KEY_ID=minioadmin
14 | export AWS_SECRET_ACCESS_KEY=minioadmin
15 | export MLFLOW_TRACKING_URI=http://localhost:5000
16 | python ./tests/mlflow/iris.py 1 staging
17 | 
18 | 


--------------------------------------------------------------------------------
/tests/mlflow-cm.yaml:
--------------------------------------------------------------------------------
 1 | kind: ConfigMap
 2 | apiVersion: v1
 3 | metadata:
 4 |   name: minio-cm
 5 |   namespace: mlflow
 6 | data:
 7 |   MLFLOW_S3_ENDPOINT_URL: 'http://minio.mlflow.svc.cluster.local'
 8 |   AWS_ACCESS_KEY_ID: minioadmin
 9 |   AWS_SECRET_ACCESS_KEY: minioadmin
10 | 


--------------------------------------------------------------------------------
/tests/mlflow/iris.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import mlflow
  4 | import mlflow.sklearn
  5 | import pandas as pd
  6 | from minio import Minio
  7 | from mlflow.tracking import MlflowClient
  8 | from sklearn import datasets
  9 | from sklearn.ensemble import RandomForestClassifier
 10 | from sklearn.metrics import roc_auc_score
 11 | from sklearn.model_selection import train_test_split
 12 | 
 13 | try:
 14 |     client = Minio(
 15 |         "localhost:9000", access_key="minioadmin", secret_key="minioadmin", secure=False
 16 |     )
 17 | 
 18 |     # Create bucket.
 19 |     client.make_bucket("artifacts")
 20 |     policy = '{"Version":"2012-10-17","Statement":[{"Action":["s3:GetBucketLocation","s3:ListBucket","s3:ListBucketMultipartUploads"],"Effect":"Allow","Principal":{"AWS":["*"]},"Resource":["arn:aws:s3:::artifacts"],"Sid":""},{"Action":["s3:AbortMultipartUpload","s3:DeleteObject","s3:GetObject","s3:ListMultipartUploadParts","s3:PutObject"],"Effect":"Allow","Principal":{"AWS":["*"]},"Resource":["arn:aws:s3:::artifacts/*"],"Sid":""}]}'
 21 |     client.set_bucket_policy(bucket_name="artifacts", policy=policy)
 22 | except Exception as e:
 23 |     print(e)
 24 | 
 25 | 
 26 | def main(version, stage, MODEL_NAME):
 27 |     iris = datasets.load_iris()
 28 |     iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
 29 |     y = iris.target
 30 |     iris_df["target"] = y
 31 | 
 32 |     train_df, test_df = train_test_split(
 33 |         iris_df, test_size=0.3, random_state=42, stratify=iris_df["target"]
 34 |     )
 35 |     X_train = train_df[
 36 |         [
 37 |             "sepal length (cm)",
 38 |             "sepal width (cm)",
 39 |             "petal length (cm)",
 40 |             "petal width (cm)",
 41 |         ]
 42 |     ]
 43 |     y_train = train_df["target"]
 44 | 
 45 |     X_test = test_df[
 46 |         [
 47 |             "sepal length (cm)",
 48 |             "sepal width (cm)",
 49 |             "petal length (cm)",
 50 |             "petal width (cm)",
 51 |         ]
 52 |     ]
 53 |     y_test = test_df["target"]
 54 | 
 55 |     EXPERIMENT_NAME = MODEL_NAME
 56 | 
 57 |     # print("IRIS train df shape")
 58 |     # print(X_train.shape)
 59 |     # print(y_train.shape)
 60 | 
 61 |     # print("IRIS test df shape")
 62 |     # print(X_test.shape)
 63 |     # print(y_test.shape)
 64 | 
 65 |     mlflow_client = MlflowClient()
 66 | 
 67 |     # Create an MLFlow experiment, if not already exists
 68 |     experiment_details = mlflow_client.get_experiment_by_name(EXPERIMENT_NAME)
 69 | 
 70 |     if experiment_details is not None:
 71 |         experiment_id = experiment_details.experiment_id
 72 |     else:
 73 |         experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)
 74 | 
 75 |     # Start an MLFlow experiment run
 76 |     with mlflow.start_run(
 77 |         experiment_id=experiment_id, run_name="iris dataset rf run"
 78 |     ) as run:
 79 |         # Log parameters
 80 | 
 81 |         mlflow.log_param("max_depth", 10)
 82 |         mlflow.log_param("random_state", 0)
 83 |         mlflow.log_param("n_estimators", 100)
 84 |         clf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=0)
 85 |         clf.fit(X_train, y_train)
 86 |         iris_predict_y = clf.predict(X_test)
 87 | 
 88 |         roc_auc_score_val = roc_auc_score(
 89 |             y_test, clf.predict_proba(X_test), multi_class="ovr"
 90 |         )
 91 |         mlflow.log_metric("test roc_auc_score", roc_auc_score_val)
 92 | 
 93 |         # Log model
 94 |         result = mlflow.sklearn.log_model(clf, artifact_path="model")
 95 | 
 96 |         # Register a new version
 97 |     result = mlflow.register_model(result.model_uri, MODEL_NAME)
 98 | 
 99 |     mlflow_client.transition_model_version_stage(
100 |         name=MODEL_NAME, version=version, stage=stage
101 |     )
102 |     registered_models = mlflow_client.list_registered_models()
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     for i in range(5):
107 |         print(f"iris demo{i}")
108 |         version = sys.argv[1]
109 |         stage = sys.argv[2]
110 |         main(MODEL_NAME=f"iris demo{i}", version=version, stage=stage)
111 | 


--------------------------------------------------------------------------------
/tests/mlflow/list_model.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | 
  4 | from git import Repo
  5 | from iris import main
  6 | from kubernetes import client as KubeClient
  7 | from kubernetes import config
  8 | from mlflow.tracking import MlflowClient
  9 | from termcolor import colored
 10 | 
 11 | try:
 12 |     config.load_kube_config()
 13 | except config.ConfigException:
 14 |     config.load_incluster_config()
 15 | kube_client = KubeClient.CustomObjectsApi()
 16 | 
 17 | timeout = time.time() + 60 * 2
 18 | 
 19 | 
 20 | print(colored("Test", "red"), colored("no1", "green"))
 21 | 
 22 | 
 23 | def test():
 24 |     backend = "s3"
 25 |     mlflow_client = MlflowClient()
 26 |     registered_models = mlflow_client.list_registered_models()
 27 |     mlflow_models_metadata = {}
 28 |     for registered_model in registered_models:
 29 |         for version in registered_model.latest_versions:
 30 |             if version.current_stage == "Staging":
 31 |                 model_details = dict(version)
 32 |                 model_run_id = model_details["run_id"]
 33 |                 run_details = dict(mlflow_client.get_run(model_run_id).info)
 34 |                 name = model_details["name"]
 35 |                 model_template = f'{{{{ mlflow.{backend}["{name}"] }}}}'
 36 |                 artifact_uri = run_details["artifact_uri"]
 37 |                 mlflow_models_metadata[name] = {
 38 |                     "name": name,
 39 |                     "run_id": model_details["run_id"],
 40 |                     "source": model_details["source"],
 41 |                     "status": model_details["status"],
 42 |                     "version": model_details["version"],
 43 |                     "artifact_uri": artifact_uri,
 44 |                 }
 45 |     while True:
 46 |         if sys.argv[1] == "seldon":
 47 |             manifest = kube_client.get_namespaced_custom_object(
 48 |                 group="machinelearning.seldon.io",
 49 |                 version="v1",
 50 |                 plural="seldondeployments",
 51 |                 namespace="staging",
 52 |                 name="mlflow-var-minio",
 53 |             )
 54 |             demo1 = manifest["spec"]["predictors"][0]["graph"]["children"][0][
 55 |                 "modelUri"
 56 |             ]
 57 |             demo2 = manifest["spec"]["predictors"][0]["graph"]["children"][0][
 58 |                 "children"
 59 |             ][0]["modelUri"]
 60 |             demo3 = manifest["spec"]["predictors"][0]["graph"]["children"][1][
 61 |                 "modelUri"
 62 |             ]
 63 |             demo4 = manifest["spec"]["predictors"][0]["graph"]["modelUri"]
 64 |             if (
 65 |                 (demo1 == mlflow_models_metadata["iris demo1"]["source"])
 66 |                 & (demo2 == mlflow_models_metadata["iris demo2"]["source"])
 67 |                 & (demo4 == mlflow_models_metadata["iris demo4"]["source"])
 68 |             ):
 69 |                 print(demo1, demo2, demo3, demo4)
 70 |                 print("test passed", mlflow_models_metadata)
 71 |                 break
 72 |         elif sys.argv[1] == "kserve":
 73 |             manifest = kube_client.get_namespaced_custom_object(
 74 |                 group="serving.kserve.io",
 75 |                 version="v1beta1",
 76 |                 plural="inferenceservices",
 77 |                 namespace="staging",
 78 |                 name="sklearn-iris-minio",
 79 |             )
 80 |             demo2 = manifest["spec"]["predictor"]["model"]["storageUri"]
 81 |             if demo2 == mlflow_models_metadata["iris demo2"]["source"]:
 82 |                 print(demo2)
 83 |                 print("test passed", mlflow_models_metadata)
 84 |                 break
 85 |         if time.time() > timeout:
 86 |             print(mlflow_models_metadata)
 87 |             print(manifest)
 88 |             print(sys.argv[1])
 89 |             raise ("Timeout error")
 90 | 
 91 | 
 92 | test()
 93 | 
 94 | # Test transition
 95 | 
 96 | print(colored("Test", "red"), colored("no2", "green"))
 97 | 
 98 | for i in range(5):
 99 |     main(MODEL_NAME=f"iris demo{i}", version=2, stage="Staging")
100 | 
101 | test()
102 | 
103 | # Test removal
104 | print(colored("Test", "red"), colored("no3", "green"))
105 | 
106 | if sys.argv[1] == "kserve":
107 |     PATH_OF_GIT_REPO = "tests/repo-test"
108 |     COMMIT_MESSAGE = "comment from python script"
109 | 
110 |     def git_push():
111 |         import os
112 | 
113 |         os.remove("tests/repo-test/staging/kserve-s3.yaml")
114 |         try:
115 |             repo = Repo(PATH_OF_GIT_REPO)
116 |             repo.git.add(update=True)
117 |             repo.index.commit(COMMIT_MESSAGE)
118 |             origin = repo.remote(name="origin")
119 |             origin.push()
120 |         except:
121 |             print("Some error occured while pushing the code")
122 | 
123 |     git_push()
124 | 
125 |     while True:
126 |         if time.time() > timeout:
127 |             raise ("Timeout error")
128 |         manifest = kube_client.list_namespaced_custom_object(
129 |             group="serving.kserve.io",
130 |             version="v1beta1",
131 |             plural="inferenceservices",
132 |             namespace="staging",
133 |         )
134 |         model_names = []
135 |         for i in manifest["items"]:
136 |             model_names.append(i["metadata"]["name"])
137 |         if "sklearn-iris-minio" in model_names:
138 |             pass
139 |         else:
140 |             print(model_names)
141 |             print("Deletion test passed")
142 |             break
143 | 
144 | if sys.argv[1] == "seldon":
145 |     PATH_OF_GIT_REPO = "tests/repo-test"
146 |     COMMIT_MESSAGE = "comment from python script"
147 | 
148 |     def git_push():
149 |         import os
150 | 
151 |         os.remove("tests/repo-test/staging/seldon-s3.yaml")
152 |         try:
153 |             repo = Repo(PATH_OF_GIT_REPO)
154 |             repo.git.add(update=True)
155 |             repo.index.commit(COMMIT_MESSAGE)
156 |             origin = repo.remote(name="origin")
157 |             origin.push()
158 |         except:
159 |             print("Some error occured while pushing the code")
160 | 
161 |     git_push()
162 |     time.sleep(60)
163 | 
164 |     while True:
165 |         if time.time() > timeout:
166 |             raise ("Timeout error")
167 |         manifest = kube_client.list_namespaced_custom_object(
168 |             group="machinelearning.seldon.io",
169 |             version="v1",
170 |             plural="seldondeployments",
171 |             namespace="staging",
172 |         )
173 |         model_names = []
174 |         for i in manifest["items"]:
175 |             model_names.append(i["metadata"]["name"])
176 |         if "mlflow-var-minio" in model_names:
177 |             pass
178 |         else:
179 |             print(model_names)
180 |             print("Deletion test passed")
181 |             break
182 | 


--------------------------------------------------------------------------------
/tests/mlflow/test_deploy.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from kubernetes import client as KubeClient
 4 | from kubernetes import config
 5 | 
 6 | try:
 7 |     config.load_kube_config()
 8 | except config.ConfigException:
 9 |     config.load_incluster_config()
10 | kube_client = KubeClient.CustomObjectsApi()
11 | status = ""
12 | timeout = time.time() + 60 * 10
13 | 
14 | while True:
15 |     test = kube_client.get_namespaced_custom_object(
16 |         group="machinelearning.seldon.io",
17 |         version="v1",
18 |         plural="seldondeployments",
19 |         namespace="staging",
20 |         name="mlflow",
21 |     )
22 |     status = test["status"]["state"]
23 |     print(status)
24 |     if status == "Available":
25 |         break
26 |     else:
27 |         print(test["status"])
28 |         time.sleep(30)
29 |     if time.time() > timeout:
30 |         # print(test)
31 |         deploy_name = list(test["status"]["deploymentStatus"].keys())[0]
32 |         kube_client = KubeClient.AppsV1Api()
33 |         deployment = kube_client.read_namespaced_deployment(
34 |             name=deploy_name, namespace="staging"
35 |         )
36 |         print(deployment)
37 |         raise ("Timeout error")
38 | 


--------------------------------------------------------------------------------
/tests/pf_mlflow.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -euo pipefail
 3 | 
 4 | kubectl port-forward -n mlflow svc/mlflow-service 5000:5000 &
 5 | MLFLOW_PID=$!
 6 | 
 7 | echo "Started mlflow port-forward, pid: $MLFLOW_PID"
 8 | echo MLFLOW_PID=$MLFLOW_PID >> pids.env
 9 | 
10 | sleep 1
11 | 
12 | 
13 | kubectl port-forward --namespace mlflow svc/minio 9000:9000 &
14 | MINIO_PID=$!
15 | 
16 | echo "Started mlflow port-forward, pid: $MINIO_PID"
17 | echo MINIO_PID=$MINIO_PID >> pids.env
18 | 
19 | sleep 1
20 | 
21 | curl -X POST http://localhost:5000/api/2.0/preview/mlflow/experiments/create -d '{"name":"test"}'


--------------------------------------------------------------------------------
/tests/repo-test/production/kserve-s3.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: "serving.kserve.io/v1beta1"
 2 | kind: "InferenceService"
 3 | metadata:
 4 |   name: "sklearn-iris-minio"
 5 | spec:
 6 |   predictor:
 7 |     model:
 8 |       modelFormat:
 9 |         name: mlflow
10 |       storageUri: '{{ mlflow.s3["iris demo2"] }}'


--------------------------------------------------------------------------------
/tests/repo-test/production/seldon-s3.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: machinelearning.seldon.io/v1
 2 | kind: SeldonDeployment
 3 | metadata:
 4 |   name: mlflow-var-minio
 5 | spec:
 6 |   name: iris
 7 |   predictors:
 8 |   - graph:
 9 |       children:
10 |         - name: step-one
11 |           modelUri: '{{ mlflow.s3["iris demo1"] }}'
12 |           envSecretRefName: seldon-rclone-secret
13 |           implementation: MLFLOW_SERVER
14 |           type: MODEL
15 |           children: 
16 |               - name: step-two
17 |                 modelUri: '{{ mlflow.s3["iris demo2"] }}'
18 |                 envSecretRefName: seldon-rclone-secret
19 |                 implementation: MLFLOW_SERVER
20 |                 type: MODEL
21 |                 children: []
22 |         - name: step-three
23 |           implementation: MLFLOW_SERVER
24 |           modelUri: '{{ mlflow.s3["iris demo3"] }}'
25 |           envSecretRefName: seldon-rclone-secret
26 |           type: MODEL
27 |           children: []
28 |       implementation: MLFLOW_SERVER
29 |       modelUri: '{{ mlflow.s3["iris demo4"] }}'
30 |       envSecretRefName: seldon-rclone-secret
31 |       name: classifier
32 |     name: default
33 |     replicas: 1


--------------------------------------------------------------------------------
/tests/repo-test/staging/kserve-s3.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: "serving.kserve.io/v1beta1"
 2 | kind: "InferenceService"
 3 | metadata:
 4 |   name: "sklearn-iris-minio"
 5 | spec:
 6 |   predictor:
 7 |     serviceAccountName: sa
 8 |     model:
 9 |       modelFormat:
10 |         name: mlflow
11 |       storageUri: '{{ mlflow.s3["iris demo2"] }}'


--------------------------------------------------------------------------------
/tests/repo-test/staging/kserve-s3t.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: "serving.kserve.io/v1beta1"
 2 | kind: "InferenceService"
 3 | metadata:
 4 |   name: "sklearn-iris-miniot"
 5 | spec:
 6 |   predictor:
 7 |     serviceAccountName: sa
 8 |     model:
 9 |       modelFormat:
10 |         name: mlflow
11 |       storageUri: '{{ mlflow.s3["iris demo2"] }}'


--------------------------------------------------------------------------------
/tests/repo-test/staging/kserve-sa.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Secret
 3 | metadata:
 4 |   name: s3creds
 5 |   annotations:
 6 |      serving.kserve.io/s3-endpoint: minio.mlflow.svc.cluster.local:9000 # replace with your s3 endpoint e.g minio-service.kubeflow:9000
 7 |      serving.kserve.io/s3-usehttps: "0" # by default 1, if testing with minio you can set to 0
 8 |      serving.kserve.io/s3-region: "us-east-2"
 9 |      serving.kserve.io/s3-useanoncredential: "false" # omitting this is the same as false, if true will ignore provided credential and use anonymous credentials
10 | type: Opaque
11 | stringData: # use `stringData` for raw credential string or `data` for base64 encoded string
12 |   AWS_ACCESS_KEY_ID: minioadmin
13 |   AWS_SECRET_ACCESS_KEY: minioadmin
14 | ---
15 | 
16 | apiVersion: v1
17 | kind: ServiceAccount
18 | metadata:
19 |   name: sa
20 | secrets:
21 | - name: s3creds


--------------------------------------------------------------------------------
/tests/repo-test/staging/seldon-s3.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: machinelearning.seldon.io/v1
 2 | kind: SeldonDeployment
 3 | metadata:
 4 |   name: mlflow-var-minio
 5 | spec:
 6 |   name: iris
 7 |   predictors:
 8 |   - graph:
 9 |       children:
10 |         - name: step-one
11 |           modelUri: '{{ mlflow.s3["iris demo1"] }}'
12 |           envSecretRefName: seldon-rclone-secret
13 |           implementation: MLFLOW_SERVER
14 |           type: MODEL
15 |           children: 
16 |               - name: step-two
17 |                 modelUri: '{{ mlflow.s3["iris demo2"] }}'
18 |                 envSecretRefName: seldon-rclone-secret
19 |                 implementation: MLFLOW_SERVER
20 |                 type: MODEL
21 |                 children: []
22 |         - name: step-three
23 |           implementation: MLFLOW_SERVER
24 |           modelUri: '{{ mlflow.s3["iris demo3"] }}'
25 |           envSecretRefName: seldon-rclone-secret
26 |           type: MODEL
27 |           children: []
28 |       implementation: MLFLOW_SERVER
29 |       modelUri: '{{ mlflow.s3["iris demo4"] }}'
30 |       envSecretRefName: seldon-rclone-secret
31 |       logger:
32 |         url: http://broker-ingress.knative-eventing.svc.cluster.local/demo/default
33 |         mode: all
34 |       name: classifier
35 |     name: default
36 |     replicas: 1


--------------------------------------------------------------------------------
/tests/repo-test/staging/seldon-secret.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Secret
 3 | metadata:
 4 |   name: seldon-init-container-secret
 5 |   namespace: staging
 6 | type: Opaque
 7 | stringData:
 8 |   RCLONE_CONFIG_S3_TYPE: s3
 9 |   RCLONE_CONFIG_S3_PROVIDER: minio
10 |   RCLONE_CONFIG_S3_ACCESS_KEY_ID: minioadmin
11 |   RCLONE_CONFIG_S3_SECRET_ACCESS_KEY: minioadmin
12 |   RCLONE_CONFIG_S3_ENDPOINT: http://minio.mlflow.svc.cluster.local:9000
13 |   RCLONE_CONFIG_S3_ENV_AUTH: "false"


--------------------------------------------------------------------------------
/tests/repo-test/staging/seldon-single-model.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: machinelearning.seldon.io/v1
 2 | kind: SeldonDeployment
 3 | metadata:
 4 |   name: mlflow
 5 | spec:
 6 |   name: iris
 7 |   predictors:
 8 |   - componentSpecs:
 9 |     - spec:
10 |         containers:
11 |         - name: classifier
12 |           livenessProbe:
13 |             initialDelaySeconds: 80
14 |             failureThreshold: 200
15 |             periodSeconds: 25
16 |             successThreshold: 1
17 |             httpGet:
18 |               path: /health/ping
19 |               port: http
20 |               scheme: HTTP
21 |           readinessProbe:
22 |             initialDelaySeconds: 80
23 |             failureThreshold: 20
24 |             periodSeconds: 25
25 |             successThreshold: 1
26 |             httpGet:
27 |               path: /health/ping
28 |               port: http
29 |               scheme: HTTP
30 |     graph:
31 |       implementation: MLFLOW_SERVER
32 |       modelUri: '{{ mlflow.s3["iris demo3"] }}'
33 |       envSecretRefName: seldon-init-container-secret
34 |       name: classifier
35 |     name: default
36 |     replicas: 1


--------------------------------------------------------------------------------
/tests/setup_git_repo.sh:
--------------------------------------------------------------------------------
 1 | curl -X 'POST' \
 2 |   'http://localhost:3000/api/v1/user/repos' \
 3 |   -H 'accept: application/json' \
 4 |   -H 'authorization: Basic bWRjYWRtaW46cGFzc3dvcmQ=' \
 5 |   -H 'Content-Type: application/json' \
 6 |   -d '{
 7 |   "auto_init": false,
 8 |   "default_branch": "main",
 9 |   "description": "demo",
10 |   "name": "repo-test",
11 |   "private": false,
12 |   "template": false,
13 |   "trust_model": "default"
14 | }'
15 | 
16 | git config --global user.email "mdcadmin@example.com"
17 | git config --global user.name "mdcadmin"
18 | cd tests/repo-test
19 | git init
20 | git add .
21 | git checkout -b main
22 | git commit -m "first commit"
23 | git remote add origin "http://mdcadmin:password@localhost:3000/mdcadmin/repo-test"
24 | git push -u origin main
25 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | # it's not a bug that we aren't using all of hacking, ignore:
 3 | # F812: list comprehension redefines ...
 4 | # H101: Use TODO(NAME)
 5 | # H202: assertRaises Exception too broad
 6 | # H233: Python 3.x incompatible use of print operator
 7 | # H301: one import per line
 8 | # H306: imports not in alphabetical order (time, os)
 9 | # H401: docstring should not start with a space
10 | # H403: multi line docstrings should end on a new line
11 | # H404: multi line docstring should start without a leading new line
12 | # H405: multi line docstring summary not separated with an empty line
13 | # H501: Do not use self.__dict__ for string formatting
14 | ignore = E501,W503
15 | 


--------------------------------------------------------------------------------
/ui/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM continuumio/miniconda3:4.11.0
2 | COPY requirements.txt requirements.txt
3 | RUN pip install -r requirements.txt
4 | WORKDIR /ui
5 | COPY . /ui
6 | CMD ["python", "app.py"]


--------------------------------------------------------------------------------
/ui/app.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import dash
  4 | import dash_bootstrap_components as dbc
  5 | import dash_html_components as html
  6 | import pandas as pd
  7 | from dash import Input, Output, dcc
  8 | from kubernetes import client as kube_client
  9 | from kubernetes import config
 10 | 
 11 | MLFLOW_NAMESPACE = os.getenv("namespace", "mlflow")
 12 | MDC_LABEL = os.getenv("MDC_LABEL", "mdc-staging")
 13 | 
 14 | 
 15 | app = dash.Dash(
 16 |     __name__,
 17 |     use_pages=True,
 18 |     external_stylesheets=[dbc.themes.FLATLY, dbc.icons.BOOTSTRAP],
 19 | )
 20 | 
 21 | navbar = dbc.NavbarSimple(
 22 |     [
 23 |         dbc.Button("Home", href="/", color="secondary", className="me-1"),
 24 |         dbc.Button("Logs", href="/logs", color="secondary", className="me-1"),
 25 |     ],
 26 |     brand="Mlflow Deployment Controller",
 27 |     color="primary",
 28 |     dark=True,
 29 |     className="mb-2",
 30 | )
 31 | 
 32 | 
 33 | def serve_layout():
 34 |     return html.Div(
 35 |         [navbar, dash.page_container],
 36 |         # fluid=True,
 37 |     )
 38 | 
 39 | 
 40 | app.layout = serve_layout
 41 | 
 42 | try:
 43 |     config.load_kube_config()
 44 | except config.ConfigException:
 45 |     config.load_incluster_config()
 46 | 
 47 | 
 48 | def dataf():
 49 |     v1 = kube_client.CustomObjectsApi()
 50 |     manifests = v1.list_cluster_custom_object(
 51 |         group="machinelearning.seldon.io",
 52 |         version="v1",
 53 |         plural="seldondeployments",
 54 |         label_selector="app.kubernetes.io/managed-by=mdc",
 55 |     )
 56 |     model_name = []
 57 |     namespace = []
 58 |     state = []
 59 |     replicas = []
 60 |     for i in manifests["items"]:
 61 |         model_name.append(i["metadata"]["name"])
 62 |         namespace.append(i["metadata"]["namespace"])
 63 |         for _id in i["metadata"]["annotations"].keys():
 64 |             if "mdc" in _id:
 65 |                 pass
 66 |         state.append(i["status"]["state"])
 67 |         deploy_name = list(i["status"]["deploymentStatus"].keys())[0]
 68 |         replicas.append(i["status"]["deploymentStatus"][deploy_name]["replicas"])
 69 |     df = pd.DataFrame(
 70 |         {
 71 |             "models": model_name,
 72 |             "namespace": namespace,
 73 |             "replicas": replicas,
 74 |             "state": state,
 75 |         }
 76 |     )
 77 |     df["models"] = [dcc.Link(f"{i}", href=f"/seldon/{i}") for i in df.models.values]
 78 |     table = dbc.Table.from_dataframe(df, striped=True, bordered=True, hover=True)
 79 | 
 80 |     return table
 81 | 
 82 | 
 83 | @app.callback(
 84 |     dash.dependencies.Output("table-deployments", "children"),
 85 |     [dash.dependencies.Input("interval-component", "n_intervals")],
 86 | )
 87 | def interval_deployment(n_intervals):
 88 |     return dataf()
 89 | 
 90 | 
 91 | @app.callback(
 92 |     dash.dependencies.Output("seldon-deployment", "children"),
 93 |     [dash.dependencies.Input("interval-component-seldon", "n_intervals")],
 94 | )
 95 | def internal_seldon_deployment(n_intervals):
 96 |     return []
 97 | 
 98 | 
 99 | @app.callback(Output("live-graph", "children"), [Input("graph-update", "n_intervals")])
100 | def update_graph_scatter(n_intervals):
101 |     print(n_intervals)
102 |     v1 = kube_client.CoreV1Api()
103 |     pod_name = v1.list_namespaced_pod(
104 |         namespace=MLFLOW_NAMESPACE,
105 |         label_selector=f"app.kubernetes.io/instance={MDC_LABEL}",
106 |     )
107 |     pod_name = pod_name.items[0].metadata.name
108 |     lines = []
109 |     lines = v1.read_namespaced_pod_log(
110 |         name=pod_name,
111 |         pretty=True,
112 |         since_seconds=60,
113 |         namespace=MLFLOW_NAMESPACE,
114 |         follow=False,
115 |         _preload_content=True,
116 |     )
117 |     # print(lines)
118 |     return [
119 |         html.Br(),
120 |         html.H4("Controller Logs"),
121 |         html.Plaintext(
122 |             lines,
123 |             style={
124 |                 "display": "inline-block",
125 |                 "fontSize": 15,
126 |                 # "verticalAlign": "top",
127 |                 "color": "white",
128 |                 "backgroundColor": "black",
129 |             },
130 |         ),
131 |     ]
132 | 
133 | 
134 | @app.callback(
135 |     Output("collapse0", "is_open"),
136 |     Output("collapse1", "is_open"),
137 |     Output("collapse2", "is_open"),
138 |     Output("collapse3", "is_open"),
139 |     Output("collapse4", "is_open"),
140 |     Output("collapse5", "is_open"),
141 |     Output("collapse-button0", "n_clicks"),
142 |     Output("collapse-button1", "n_clicks"),
143 |     Output("collapse-button2", "n_clicks"),
144 |     Output("collapse-button3", "n_clicks"),
145 |     Output("collapse-button4", "n_clicks"),
146 |     Output("collapse-button5", "n_clicks"),
147 |     [
148 |         Input("collapse-button0", "n_clicks"),
149 |         Input("collapse-button1", "n_clicks"),
150 |         Input("collapse-button2", "n_clicks"),
151 |         Input("collapse-button3", "n_clicks"),
152 |         Input("collapse-button4", "n_clicks"),
153 |         Input("collapse-button5", "n_clicks"),
154 |     ],
155 | )
156 | def toggle_collapse(n, n1, n2, n3, n4, n5):
157 |     if n:
158 |         return True, False, False, False, False, False, 0, 0, 0, 0, 0, 0
159 |     if n1:
160 |         return False, True, False, False, False, False, 0, 0, 0, 0, 0, 0
161 |     if n2:
162 |         return False, False, True, False, False, False, 0, 0, 0, 0, 0, 0
163 |     if n3:
164 |         return False, False, False, True, False, False, 0, 0, 0, 0, 0, 0
165 |     if n4:
166 |         return False, False, False, False, True, False, 0, 0, 0, 0, 0, 0
167 |     if n5:
168 |         return False, False, False, False, False, True, 0, 0, 0, 0, 0, 0
169 |     return False, False, False, False, False, False, 0, 0, 0, 0, 0, 0
170 | 
171 | 
172 | if __name__ == "__main__":
173 |     app.run_server(host="0.0.0.0", port=8000, debug=False)
174 | 


--------------------------------------------------------------------------------
/ui/pages/deployments.py:
--------------------------------------------------------------------------------
 1 | from dash import dcc, html, register_page
 2 | from kubernetes import config
 3 | 
 4 | register_page(__name__, path="/")
 5 | 
 6 | 
 7 | try:
 8 |     config.load_kube_config()
 9 | except config.ConfigException:
10 |     config.load_incluster_config()
11 | 
12 | 
13 | layout = html.Div(
14 |     [
15 |         html.H5(
16 |             "Seldon Deployments",
17 |             className="mt-5",
18 |         ),
19 |         dcc.Interval(
20 |             id="interval-component", interval=1 * 1000, n_intervals=0  # in milliseconds
21 |         ),
22 |         html.Div(id="table-deployments"),
23 |     ]
24 | )
25 | 


--------------------------------------------------------------------------------
/ui/pages/logs.py:
--------------------------------------------------------------------------------
 1 | import dash
 2 | import dash_core_components as dcc
 3 | import dash_html_components as html
 4 | from kubernetes import config
 5 | 
 6 | try:
 7 |     config.load_kube_config()
 8 | except config.ConfigException:
 9 |     config.load_incluster_config()
10 | 
11 | 
12 | def title():
13 |     return "Logs"
14 | 
15 | 
16 | def description(ticker=None):
17 |     return "Controller Logs"
18 | 
19 | 
20 | dash.register_page(
21 |     __name__,
22 |     path_template="/logs",
23 |     title=title,
24 |     description=description,
25 |     path="/logs",
26 | )
27 | 
28 | 
29 | def layout(ticker=None, **other_unknown_query_strings):
30 |     return html.Div(
31 |         [
32 |             html.Div(id="live-graph"),
33 |             dcc.Interval(id="graph-update", interval=1 * 10000, n_intervals=0),
34 |         ]
35 |     )
36 | 


--------------------------------------------------------------------------------
/ui/pages/not_found_404.py:
--------------------------------------------------------------------------------
1 | import dash
2 | from dash import html
3 | 
4 | dash.register_page(__name__, path="/404")
5 | 
6 | 
7 | layout = html.H1("404 Not found")
8 | 


--------------------------------------------------------------------------------
/ui/pages/seldon.py:
--------------------------------------------------------------------------------
 1 | import dash
 2 | from kubernetes import config
 3 | from seldon_deployments.card import card_layout
 4 | 
 5 | try:
 6 |     config.load_kube_config()
 7 | except config.ConfigException:
 8 |     config.load_incluster_config()
 9 | 
10 | 
11 | def title(ticker=None):
12 |     return f"{ticker} Status"
13 | 
14 | 
15 | def description(ticker=None):
16 |     return f"Deployment status {ticker}"
17 | 
18 | 
19 | dash.register_page(
20 |     __name__,
21 |     path_template="/seldon/<ticker>",
22 |     title=title,
23 |     description=description,
24 |     path="/seldon/mlflow",
25 | )
26 | 
27 | 
28 | def layout(ticker=None, **other_unknown_query_strings):
29 |     return card_layout(ticker)
30 | 


--------------------------------------------------------------------------------
/ui/requirements.txt:
--------------------------------------------------------------------------------
 1 | mlflow
 2 | kubernetes
 3 | google
 4 | gcloud
 5 | google-apitools==0.5.32
 6 | google-auth==2.1.0
 7 | google-auth-oauthlib==0.4.6
 8 | google-cloud==0.34.0
 9 | google-cloud-core==2.0.0
10 | google-cloud-storage==1.42.2
11 | google-crc32c==1.2.0
12 | google-pasta==0.2.0
13 | google-reauth==0.1.1
14 | google-resumable-media==2.0.3
15 | googleapis-common-protos==1.52.0
16 | apscheduler
17 | plotly
18 | dash_core_components
19 | dash
20 | dash_html_components
21 | dash_bootstrap_components


--------------------------------------------------------------------------------
/ui/seldon_deployments/card.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import json
  3 | import os
  4 | 
  5 | import dash
  6 | import dash_bootstrap_components as dbc
  7 | import yaml
  8 | from dash import dcc, html
  9 | from kubernetes import config
 10 | from seldon_deployments.data import dataf
 11 | 
 12 | try:
 13 |     config.load_kube_config()
 14 | except config.ConfigException:
 15 |     config.load_incluster_config()
 16 | GLOBAL_NAMESPACE = os.getenv("namespace", "staging")
 17 | SELDON_URL = os.getenv("seldon_url", "https://example.mlops.com")
 18 | 
 19 | 
 20 | def card_layout(deploy_name=None):
 21 |     (
 22 |         model_manifests,
 23 |         name,
 24 |         external_url,
 25 |         internal_url,
 26 |         status,
 27 |         status_message,
 28 |         status_reason,
 29 |         status_button,
 30 |         manifest,
 31 |     ) = dataf(name=deploy_name, namespace=GLOBAL_NAMESPACE, seldon_url=SELDON_URL)
 32 |     conditions = manifest["status"]["conditions"]
 33 |     collapses = []
 34 |     for i in range(len(conditions)):
 35 |         if conditions[i]["status"] == "False":
 36 |             color = "secondary"
 37 |         else:
 38 |             color = "success"
 39 |         type = conditions[i]["type"]
 40 |         try:
 41 |             reason = conditions[i]["reason"]
 42 |         except Exception as e:
 43 |             print(e)
 44 |             reason = type
 45 |         collapse = html.Div(
 46 |             [
 47 |                 dbc.Button(
 48 |                     type,
 49 |                     id=f"collapse-button{i}",
 50 |                     className="mb-3",
 51 |                     color=color,
 52 |                     n_clicks=0,
 53 |                 ),
 54 |                 dbc.Collapse(
 55 |                     dbc.Card(dbc.CardBody(reason)),
 56 |                     id=f"collapse{i}",
 57 |                     is_open=False,
 58 |                 ),
 59 |             ]
 60 |         )
 61 |         collapses.append(collapse)
 62 | 
 63 |     res = ast.literal_eval(json.dumps(manifest))
 64 |     res = yaml.safe_dump(res, default_flow_style=False)
 65 |     code = f"```yaml{res}```"
 66 |     model_cards = []
 67 |     for i in model_manifests:
 68 |         model_card = dbc.Card(
 69 |             [
 70 |                 dbc.CardBody(
 71 |                     [
 72 |                         html.H4(
 73 |                             i["name"], id="seldon-deployment", className="card-title"
 74 |                         ),
 75 |                         dbc.ListGroup(
 76 |                             [
 77 |                                 dbc.ListGroupItem(
 78 |                                     [
 79 |                                         html.A(
 80 |                                             "Run id: ", style={"font-weight": "bold"}
 81 |                                         ),
 82 |                                         html.A(i["run_id"]),
 83 |                                     ]
 84 |                                 ),
 85 |                                 dbc.ListGroupItem(
 86 |                                     [
 87 |                                         html.A(
 88 |                                             "Source: ", style={"font-weight": "bold"}
 89 |                                         ),
 90 |                                         html.A(i["source"]),
 91 |                                     ]
 92 |                                 ),
 93 |                                 dbc.ListGroupItem(
 94 |                                     [
 95 |                                         html.A(
 96 |                                             "Version: ", style={"font-weight": "bold"}
 97 |                                         ),
 98 |                                         html.A(i["version"]),
 99 |                                     ]
100 |                                 ),
101 |                                 dbc.ListGroupItem(
102 |                                     [
103 |                                         html.A(
104 |                                             "Artifacu Uri: ",
105 |                                             style={"font-weight": "bold"},
106 |                                         ),
107 |                                         html.A(i["artifact_uri"]),
108 |                                     ]
109 |                                 ),
110 |                             ]
111 |                         ),
112 |                     ]
113 |                 ),
114 |             ],
115 |         )
116 |         model_cards.append(model_card)
117 | 
118 |     Overview_tab = dcc.Tab(
119 |         label="Overview",
120 |         children=[
121 |             dbc.Card(
122 |                 dbc.ListGroup(
123 |                     [
124 |                         dbc.ListGroupItem(
125 |                             [
126 |                                 html.A(
127 |                                     "External Endpoint: ", style={"font-weight": "bold"}
128 |                                 ),
129 |                                 html.A(
130 |                                     id="external_url",
131 |                                     href=external_url,
132 |                                     children=external_url,
133 |                                     target="_blank",
134 |                                 ),
135 |                                 html.A(" "),
136 |                                 dcc.Clipboard(
137 |                                     target_id="external_url",
138 |                                     title="copy",
139 |                                     style={
140 |                                         "display": "inline-block",
141 |                                         "fontSize": 20,
142 |                                         "verticalAlign": "top",
143 |                                     },
144 |                                 ),
145 |                             ]
146 |                         ),
147 |                         dbc.ListGroupItem(
148 |                             [
149 |                                 html.A(
150 |                                     "Internal Endpoint: ", style={"font-weight": "bold"}
151 |                                 ),
152 |                                 html.A(
153 |                                     id="internal_url",
154 |                                     href=internal_url,
155 |                                     children=internal_url,
156 |                                     target="_blank",
157 |                                 ),
158 |                                 html.A(" "),
159 |                                 dcc.Clipboard(
160 |                                     target_id="internal_url",
161 |                                     title="copy",
162 |                                     style={
163 |                                         "display": "inline-block",
164 |                                         "fontSize": 20,
165 |                                         "verticalAlign": "top",
166 |                                     },
167 |                                 ),
168 |                             ]
169 |                         ),
170 |                         dbc.ListGroupItem(
171 |                             [
172 |                                 html.A(
173 |                                     "Status Message: ", style={"font-weight": "bold"}
174 |                                 ),
175 |                                 html.A(status_message),
176 |                             ]
177 |                         ),
178 |                         dbc.ListGroupItem(
179 |                             [
180 |                                 html.A(
181 |                                     "Status Message: ", style={"font-weight": "bold"}
182 |                                 ),
183 |                                 html.A(status_reason),
184 |                             ]
185 |                         ),
186 |                         dbc.ListGroupItem(
187 |                             [
188 |                                 html.A("Status: ", style={"font-weight": "bold"}),
189 |                                 status_button,
190 |                             ]
191 |                         ),
192 |                     ],
193 |                     flush=True,
194 |                 ),
195 |             )
196 |         ]
197 |         + collapses,
198 |     )
199 | 
200 |     tabs = [
201 |         Overview_tab,
202 |         dcc.Tab(label="Model Details", children=model_cards),
203 |         dcc.Tab(label="Yaml", children=[dcc.Markdown(str(code))]),
204 |     ]
205 |     if status == "Available":
206 |         tabs.append(
207 |             dcc.Tab(
208 |                 label="Doc",
209 |                 children=[
210 |                     html.Iframe(
211 |                         src=external_url, style={"height": "1067px", "width": "100%"}
212 |                     )
213 |                 ],
214 |             )
215 |         )
216 | 
217 |     layout = html.Div(
218 |         [
219 |             dash.html.H3(f"{name}"),
220 |             dcc.Tabs(tabs),
221 |             dcc.Interval(
222 |                 id="interval-component-seldon",
223 |                 interval=1 * 1000,  # in milliseconds
224 |                 n_intervals=0,
225 |             ),
226 |         ]
227 |     )
228 |     return layout
229 | 


--------------------------------------------------------------------------------
/ui/seldon_deployments/data.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | 
  3 | import dash_bootstrap_components as dbc
  4 | from dash import html
  5 | from kubernetes import client as KubeClient
  6 | from kubernetes import config
  7 | 
  8 | try:
  9 |     config.load_kube_config()
 10 | except config.ConfigException:
 11 |     config.load_incluster_config()
 12 | 
 13 | 
 14 | def pod_status(namespace, deploy_name):
 15 |     v1 = KubeClient.CoreV1Api()
 16 |     api_response = v1.list_namespaced_pod(namespace)
 17 |     for pod in api_response.items:
 18 |         if (pod.status.container_statuses is None) and (
 19 |             pod.status.init_container_statuses is None
 20 |         ):
 21 |             status = pod.status.conditions[0].message
 22 |             return (pod.metadata.name, status)
 23 | 
 24 |         if api_response.items[0].metadata.labels["app"] == deploy_name:
 25 |             status = pod.status.phase
 26 |             container_status = pod.status.container_statuses[0]
 27 | 
 28 |             if container_status.started is False or container_status.ready is False:
 29 |                 waiting_state = container_status.state.waiting
 30 |                 if (
 31 |                     waiting_state.message is not None
 32 |                     and "Error" in waiting_state.message
 33 |                 ):
 34 |                     status = waiting_state.reason
 35 |             try:
 36 |                 init_container_statuses = pod.status.init_container_statuses[0]
 37 |                 if (
 38 |                     init_container_statuses.started is False
 39 |                     or init_container_statuses.ready is False
 40 |                 ):
 41 |                     waiting_state = init_container_statuses.state.waiting
 42 |                     if (
 43 |                         waiting_state.message is not None
 44 |                         and "failed" in waiting_state.message
 45 |                     ):
 46 |                         status = waiting_state.reason
 47 |             except Exception as e:
 48 |                 print(e)
 49 |                 print("No init container found")
 50 |             if status == "CrashLoopBackOff":
 51 |                 return (pod.metadata.name, status, waiting_state.message)
 52 | 
 53 | 
 54 | def dataf(
 55 |     name="mlflow-var", namespace="staging", seldon_url="https://seldon.mlops.wianai.com"
 56 | ):
 57 |     v1 = KubeClient.CustomObjectsApi()
 58 |     manifest = v1.get_namespaced_custom_object(
 59 |         group="machinelearning.seldon.io",
 60 |         version="v1",
 61 |         plural="seldondeployments",
 62 |         namespace=namespace,
 63 |         name=name,
 64 |     )
 65 |     models = []
 66 |     print(manifest["metadata"]["annotations"].keys())
 67 |     for _id in manifest["metadata"]["annotations"].keys():
 68 |         if ("mdc" in _id) and ("mlflow-stage" not in _id):
 69 |             models.append(manifest["metadata"]["annotations"][_id])
 70 |     model = [ast.literal_eval(i) for i in models]
 71 |     name = manifest["metadata"]["name"]
 72 |     external_url = f"{seldon_url}/seldon/{namespace}/{name}/api/v1.0/doc/"
 73 |     internal_url = manifest["status"]["address"]["url"]
 74 |     deploy_name = list(manifest["status"]["deploymentStatus"].keys())[0]
 75 |     kube_client = KubeClient.AppsV1Api()
 76 |     deployment = kube_client.read_namespaced_deployment(
 77 |         name=deploy_name, namespace=namespace
 78 |     )
 79 |     # label = deployment.metadata.labels["app"]
 80 |     status = ""
 81 |     for condition in deployment.status.conditions:
 82 |         if (condition.type == "Available") and (condition.status == "True"):
 83 |             status = "Available"
 84 |             status_button = dbc.Button(
 85 |                 [html.I(className="bi bi-check-circle-fill me-2"), " Available"],
 86 |                 color="success",
 87 |                 disabled=True,
 88 |             )
 89 |             status_message = condition.message
 90 |             status_reason = condition.reason
 91 |         if status != "Available":
 92 |             if (condition.type == "Progressing") and (condition.status == "True"):
 93 |                 status = "Progressing"
 94 |                 status_message = condition.message
 95 |                 status_reason = condition.reason
 96 |                 status_button = dbc.Button(
 97 |                     [dbc.Spinner(size="sm"), " Progressing..."],
 98 |                     color="primary",
 99 |                     disabled=True,
100 |                 )
101 |             elif (condition.type == "Progressing") and (condition.status == "False"):
102 |                 status = condition.reason
103 |                 status_message = condition.message
104 |                 status_reason = condition.reason
105 |                 status_button = dbc.Button(
106 |                     [html.I(className="bi bi-x-octagon-fill me-2"), " Failed"],
107 |                     color="danger",
108 |                     disabled=True,
109 |                 )
110 |     return (
111 |         model,
112 |         name,
113 |         external_url,
114 |         internal_url,
115 |         status,
116 |         status_message,
117 |         status_reason,
118 |         status_button,
119 |         manifest,
120 |     )
121 | 


--------------------------------------------------------------------------------