├── .gitignore
├── README.md
├── first_experiment.ipynb
├── ml_flow_binary_classification.ipynb
├── ml_flow_dagshub.ipynb
└── ml_flow_model_management.ipynb


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # mlflow_dagshub_demo
2 | Demo for mlflow and dagshub
3 | 


--------------------------------------------------------------------------------
/first_experiment.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "id": "5f05cdda",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import numpy as np\n",
 11 |     "from sklearn.datasets import make_classification\n",
 12 |     "from sklearn.model_selection import train_test_split\n",
 13 |     "from sklearn.linear_model import LogisticRegression\n",
 14 |     "from sklearn.ensemble import RandomForestClassifier\n",
 15 |     "from xgboost import XGBClassifier\n",
 16 |     "from sklearn.metrics import classification_report\n",
 17 |     "import warnings\n",
 18 |     "warnings.filterwarnings('ignore')"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 3,
 24 |    "id": "56014f75",
 25 |    "metadata": {},
 26 |    "outputs": [
 27 |     {
 28 |      "data": {
 29 |       "text/plain": [
 30 |        "(array([0, 1]), array([900, 100], dtype=int64))"
 31 |       ]
 32 |      },
 33 |      "execution_count": 3,
 34 |      "metadata": {},
 35 |      "output_type": "execute_result"
 36 |     }
 37 |    ],
 38 |    "source": [
 39 |     "# Step 1: Create an imbalanced binary classification dataset\n",
 40 |     "X, y = make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=8, \n",
 41 |     "                           weights=[0.9, 0.1], flip_y=0, random_state=42)\n",
 42 |     "\n",
 43 |     "np.unique(y, return_counts=True)"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 5,
 49 |    "id": "e94ae830",
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "# Split the dataset into training and testing sets\n",
 54 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 13,
 60 |    "id": "f7d21a3c",
 61 |    "metadata": {
 62 |     "scrolled": false
 63 |    },
 64 |    "outputs": [
 65 |     {
 66 |      "name": "stdout",
 67 |      "output_type": "stream",
 68 |      "text": [
 69 |       "              precision    recall  f1-score   support\n",
 70 |       "\n",
 71 |       "           0       0.95      0.97      0.96       270\n",
 72 |       "           1       0.62      0.50      0.56        30\n",
 73 |       "\n",
 74 |       "    accuracy                           0.92       300\n",
 75 |       "   macro avg       0.79      0.73      0.76       300\n",
 76 |       "weighted avg       0.91      0.92      0.92       300\n",
 77 |       "\n"
 78 |      ]
 79 |     }
 80 |    ],
 81 |    "source": [
 82 |     "# Define the model hyperparameters\n",
 83 |     "params = {\n",
 84 |     "    \"solver\": \"lbfgs\",\n",
 85 |     "    \"max_iter\": 1000,\n",
 86 |     "    \"multi_class\": \"auto\",\n",
 87 |     "    \"random_state\": 8888,\n",
 88 |     "}\n",
 89 |     "\n",
 90 |     "# Train the model\n",
 91 |     "lr = LogisticRegression(**params)\n",
 92 |     "lr.fit(X_train, y_train)\n",
 93 |     "\n",
 94 |     "# Predict on the test set\n",
 95 |     "y_pred = lr.predict(X_test)\n",
 96 |     "\n",
 97 |     "report = classification_report(y_test, y_pred)\n",
 98 |     "print(report)"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 15,
104 |    "id": "c37eb3c8",
105 |    "metadata": {},
106 |    "outputs": [
107 |     {
108 |      "data": {
109 |       "text/plain": [
110 |        "{'0': {'precision': 0.9456521739130435,\n",
111 |        "  'recall': 0.9666666666666667,\n",
112 |        "  'f1-score': 0.956043956043956,\n",
113 |        "  'support': 270.0},\n",
114 |        " '1': {'precision': 0.625,\n",
115 |        "  'recall': 0.5,\n",
116 |        "  'f1-score': 0.5555555555555556,\n",
117 |        "  'support': 30.0},\n",
118 |        " 'accuracy': 0.92,\n",
119 |        " 'macro avg': {'precision': 0.7853260869565217,\n",
120 |        "  'recall': 0.7333333333333334,\n",
121 |        "  'f1-score': 0.7557997557997558,\n",
122 |        "  'support': 300.0},\n",
123 |        " 'weighted avg': {'precision': 0.9135869565217392,\n",
124 |        "  'recall': 0.92,\n",
125 |        "  'f1-score': 0.915995115995116,\n",
126 |        "  'support': 300.0}}"
127 |       ]
128 |      },
129 |      "execution_count": 15,
130 |      "metadata": {},
131 |      "output_type": "execute_result"
132 |     }
133 |    ],
134 |    "source": [
135 |     "report_dict = classification_report(y_test, y_pred, output_dict=True)\n",
136 |     "report_dict"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": 7,
142 |    "id": "66f89a13",
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": [
146 |     "import mlflow"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 23,
152 |    "id": "f380ca75",
153 |    "metadata": {},
154 |    "outputs": [
155 |     {
156 |      "name": "stderr",
157 |      "output_type": "stream",
158 |      "text": [
159 |       "2024/07/29 13:57:02 INFO mlflow.tracking.fluent: Experiment with name 'First Experiment' does not exist. Creating a new experiment.\n",
160 |       "Registered model 'tracking-quickstart' already exists. Creating a new version of this model...\n",
161 |       "2024/07/29 13:57:04 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-quickstart, version 2\n",
162 |       "Created version '2' of model 'tracking-quickstart'.\n"
163 |      ]
164 |     }
165 |    ],
166 |    "source": [
167 |     "mlflow.set_experiment(\"First Experiment\")\n",
168 |     "mlflow.set_tracking_uri(uri=\"http://127.0.0.1:5000/\")\n",
169 |     "\n",
170 |     "with mlflow.start_run():\n",
171 |     "    mlflow.log_params(params)\n",
172 |     "    mlflow.log_metrics({\n",
173 |     "        'accuracy': report_dict['accuracy'],\n",
174 |     "        'recall_class_0': report_dict['0']['recall'],\n",
175 |     "        'recall_class_1': report_dict['1']['recall'],\n",
176 |     "        'f1_score_macro': report_dict['macro avg']['f1-score']\n",
177 |     "    })\n",
178 |     "    mlflow.sklearn.log_model(lr, \"Logistic Regression\")  "
179 |    ]
180 |   }
181 |  ],
182 |  "metadata": {
183 |   "kernelspec": {
184 |    "display_name": "Python 3 (ipykernel)",
185 |    "language": "python",
186 |    "name": "python3"
187 |   },
188 |   "language_info": {
189 |    "codemirror_mode": {
190 |     "name": "ipython",
191 |     "version": 3
192 |    },
193 |    "file_extension": ".py",
194 |    "mimetype": "text/x-python",
195 |    "name": "python",
196 |    "nbconvert_exporter": "python",
197 |    "pygments_lexer": "ipython3",
198 |    "version": "3.10.11"
199 |   }
200 |  },
201 |  "nbformat": 4,
202 |  "nbformat_minor": 5
203 | }
204 | 


--------------------------------------------------------------------------------
/ml_flow_binary_classification.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "f4e36302",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "<h2 align='center'>Codebasics ML Course: ML Flow Tutorial</h2>"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 37,
 14 |    "id": "295e5486",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import numpy as np\n",
 19 |     "from sklearn.datasets import make_classification\n",
 20 |     "from sklearn.model_selection import train_test_split\n",
 21 |     "from sklearn.linear_model import LogisticRegression\n",
 22 |     "from sklearn.ensemble import RandomForestClassifier\n",
 23 |     "from xgboost import XGBClassifier\n",
 24 |     "from sklearn.metrics import classification_report\n",
 25 |     "import warnings\n",
 26 |     "warnings.filterwarnings('ignore')"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 38,
 32 |    "id": "ac73cd36",
 33 |    "metadata": {},
 34 |    "outputs": [
 35 |     {
 36 |      "data": {
 37 |       "text/plain": [
 38 |        "(array([0, 1]), array([900, 100], dtype=int64))"
 39 |       ]
 40 |      },
 41 |      "execution_count": 38,
 42 |      "metadata": {},
 43 |      "output_type": "execute_result"
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "# Step 1: Create an imbalanced binary classification dataset\n",
 48 |     "X, y = make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=8, \n",
 49 |     "                           weights=[0.9, 0.1], flip_y=0, random_state=42)\n",
 50 |     "\n",
 51 |     "np.unique(y, return_counts=True)"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 39,
 57 |    "id": "0934ac03",
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "# Split the dataset into training and testing sets\n",
 62 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "id": "027f7e0a",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "### Experiment 1: Train Logistic Regression Classifier"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 40,
 76 |    "id": "df52d46a",
 77 |    "metadata": {
 78 |     "scrolled": true
 79 |    },
 80 |    "outputs": [
 81 |     {
 82 |      "name": "stdout",
 83 |      "output_type": "stream",
 84 |      "text": [
 85 |       "              precision    recall  f1-score   support\n",
 86 |       "\n",
 87 |       "           0       0.95      0.96      0.95       270\n",
 88 |       "           1       0.60      0.50      0.55        30\n",
 89 |       "\n",
 90 |       "    accuracy                           0.92       300\n",
 91 |       "   macro avg       0.77      0.73      0.75       300\n",
 92 |       "weighted avg       0.91      0.92      0.91       300\n",
 93 |       "\n"
 94 |      ]
 95 |     }
 96 |    ],
 97 |    "source": [
 98 |     "log_reg = LogisticRegression(C=1, solver='liblinear')\n",
 99 |     "log_reg.fit(X_train, y_train)\n",
100 |     "y_pred_log_reg = log_reg.predict(X_test)\n",
101 |     "print(classification_report(y_test, y_pred_log_reg))"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "id": "1468bab4",
107 |    "metadata": {},
108 |    "source": [
109 |     "### Experiment 2: Train Random Forest Classifier"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 41,
115 |    "id": "2742e30d",
116 |    "metadata": {
117 |     "scrolled": true
118 |    },
119 |    "outputs": [
120 |     {
121 |      "name": "stdout",
122 |      "output_type": "stream",
123 |      "text": [
124 |       "              precision    recall  f1-score   support\n",
125 |       "\n",
126 |       "           0       0.96      1.00      0.98       270\n",
127 |       "           1       0.95      0.67      0.78        30\n",
128 |       "\n",
129 |       "    accuracy                           0.96       300\n",
130 |       "   macro avg       0.96      0.83      0.88       300\n",
131 |       "weighted avg       0.96      0.96      0.96       300\n",
132 |       "\n"
133 |      ]
134 |     }
135 |    ],
136 |    "source": [
137 |     "rf_clf = RandomForestClassifier(n_estimators=30, max_depth=3)\n",
138 |     "rf_clf.fit(X_train, y_train)\n",
139 |     "y_pred_rf = rf_clf.predict(X_test)\n",
140 |     "print(classification_report(y_test, y_pred_rf))"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "markdown",
145 |    "id": "7db18915",
146 |    "metadata": {},
147 |    "source": [
148 |     "### Experiment 3: Train XGBoost"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 42,
154 |    "id": "fa3fe3e3",
155 |    "metadata": {},
156 |    "outputs": [
157 |     {
158 |      "name": "stdout",
159 |      "output_type": "stream",
160 |      "text": [
161 |       "              precision    recall  f1-score   support\n",
162 |       "\n",
163 |       "           0       0.98      1.00      0.99       270\n",
164 |       "           1       0.96      0.80      0.87        30\n",
165 |       "\n",
166 |       "    accuracy                           0.98       300\n",
167 |       "   macro avg       0.97      0.90      0.93       300\n",
168 |       "weighted avg       0.98      0.98      0.98       300\n",
169 |       "\n"
170 |      ]
171 |     }
172 |    ],
173 |    "source": [
174 |     "xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='logloss')\n",
175 |     "xgb_clf.fit(X_train, y_train)\n",
176 |     "y_pred_xgb = xgb_clf.predict(X_test)\n",
177 |     "print(classification_report(y_test, y_pred_xgb))"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "id": "b70bbef1",
183 |    "metadata": {},
184 |    "source": [
185 |     "### Experiment 4: Handle class imbalance using SMOTETomek and then Train XGBoost"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": 43,
191 |    "id": "5ecbe6a5",
192 |    "metadata": {},
193 |    "outputs": [
194 |     {
195 |      "data": {
196 |       "text/plain": [
197 |        "(array([0, 1]), array([619, 619], dtype=int64))"
198 |       ]
199 |      },
200 |      "execution_count": 43,
201 |      "metadata": {},
202 |      "output_type": "execute_result"
203 |     }
204 |    ],
205 |    "source": [
206 |     "from imblearn.combine import SMOTETomek\n",
207 |     "\n",
208 |     "smt = SMOTETomek(random_state=42)\n",
209 |     "X_train_res, y_train_res = smt.fit_resample(X_train, y_train)\n",
210 |     "\n",
211 |     "np.unique(y_train_res, return_counts=True)"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 44,
217 |    "id": "2b931191",
218 |    "metadata": {
219 |     "scrolled": true
220 |    },
221 |    "outputs": [
222 |     {
223 |      "name": "stdout",
224 |      "output_type": "stream",
225 |      "text": [
226 |       "              precision    recall  f1-score   support\n",
227 |       "\n",
228 |       "           0       0.98      0.98      0.98       270\n",
229 |       "           1       0.81      0.83      0.82        30\n",
230 |       "\n",
231 |       "    accuracy                           0.96       300\n",
232 |       "   macro avg       0.89      0.91      0.90       300\n",
233 |       "weighted avg       0.96      0.96      0.96       300\n",
234 |       "\n"
235 |      ]
236 |     }
237 |    ],
238 |    "source": [
239 |     "xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='logloss')\n",
240 |     "xgb_clf.fit(X_train_res, y_train_res)\n",
241 |     "y_pred_xgb = xgb_clf.predict(X_test)\n",
242 |     "print(classification_report(y_test, y_pred_xgb))"
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "markdown",
247 |    "id": "8ac546b4",
248 |    "metadata": {},
249 |    "source": [
250 |     "<h2 align=\"center\" style=\"color:blue\">Track Experiments Using MLFlow</h2>"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": 59,
256 |    "id": "9fc788a3",
257 |    "metadata": {},
258 |    "outputs": [],
259 |    "source": [
260 |     "models = [\n",
261 |     "    (\n",
262 |     "        \"Logistic Regression\", \n",
263 |     "        LogisticRegression(C=1, solver='liblinear'), \n",
264 |     "        (X_train, y_train),\n",
265 |     "        (X_test, y_test)\n",
266 |     "    ),\n",
267 |     "    (\n",
268 |     "        \"Random Forest\", \n",
269 |     "        RandomForestClassifier(n_estimators=30, max_depth=3), \n",
270 |     "        (X_train, y_train),\n",
271 |     "        (X_test, y_test)\n",
272 |     "    ),\n",
273 |     "    (\n",
274 |     "        \"XGBClassifier\",\n",
275 |     "        XGBClassifier(use_label_encoder=False, eval_metric='logloss'), \n",
276 |     "        (X_train, y_train),\n",
277 |     "        (X_test, y_test)\n",
278 |     "    ),\n",
279 |     "    (\n",
280 |     "        \"XGBClassifier With SMOTE\",\n",
281 |     "        XGBClassifier(use_label_encoder=False, eval_metric='logloss'), \n",
282 |     "        (X_train_res, y_train_res),\n",
283 |     "        (X_test, y_test)\n",
284 |     "    )\n",
285 |     "]"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": 60,
291 |    "id": "1a827a88",
292 |    "metadata": {},
293 |    "outputs": [],
294 |    "source": [
295 |     "reports = []\n",
296 |     "\n",
297 |     "for model_name, model, train_set, test_set in models:\n",
298 |     "    X_train = train_set[0]\n",
299 |     "    y_train = train_set[1]\n",
300 |     "    X_test = test_set[0]\n",
301 |     "    y_test = test_set[1]\n",
302 |     "    \n",
303 |     "    model.fit(X_train, y_train)\n",
304 |     "    y_pred = model.predict(X_test)\n",
305 |     "    report = classification_report(y_test, y_pred, output_dict=True)\n",
306 |     "    reports.append(report)"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "code",
311 |    "execution_count": 61,
312 |    "id": "29ca91b0",
313 |    "metadata": {},
314 |    "outputs": [],
315 |    "source": [
316 |     "import mlflow\n",
317 |     "import mlflow.sklearn\n",
318 |     "import mlflow.xgboost"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": 63,
324 |    "id": "420f2511",
325 |    "metadata": {
326 |     "scrolled": false
327 |    },
328 |    "outputs": [],
329 |    "source": [
330 |     "# Initialize MLflow\n",
331 |     "mlflow.set_experiment(\"Anomaly Detection\")\n",
332 |     "mlflow.set_tracking_uri(\"http://localhost:5000\")\n",
333 |     "\n",
334 |     "for i, element in enumerate(models):\n",
335 |     "    model_name = element[0]\n",
336 |     "    model = element[1]\n",
337 |     "    report = reports[i]\n",
338 |     "    \n",
339 |     "    with mlflow.start_run(run_name=model_name):        \n",
340 |     "        mlflow.log_param(\"model\", model_name)\n",
341 |     "        mlflow.log_metric('accuracy', report['accuracy'])\n",
342 |     "        mlflow.log_metric('recall_class_1', report['1']['recall'])\n",
343 |     "        mlflow.log_metric('recall_class_0', report['0']['recall'])\n",
344 |     "        mlflow.log_metric('f1_score_macro', report['macro avg']['f1-score'])        \n",
345 |     "        \n",
346 |     "        if \"XGB\" in model_name:\n",
347 |     "            mlflow.xgboost.log_model(model, \"model\")\n",
348 |     "        else:\n",
349 |     "            mlflow.sklearn.log_model(model, \"model\")  "
350 |    ]
351 |   }
352 |  ],
353 |  "metadata": {
354 |   "kernelspec": {
355 |    "display_name": "Python 3 (ipykernel)",
356 |    "language": "python",
357 |    "name": "python3"
358 |   },
359 |   "language_info": {
360 |    "codemirror_mode": {
361 |     "name": "ipython",
362 |     "version": 3
363 |    },
364 |    "file_extension": ".py",
365 |    "mimetype": "text/x-python",
366 |    "name": "python",
367 |    "nbconvert_exporter": "python",
368 |    "pygments_lexer": "ipython3",
369 |    "version": "3.10.11"
370 |   }
371 |  },
372 |  "nbformat": 4,
373 |  "nbformat_minor": 5
374 | }
375 | 


--------------------------------------------------------------------------------
/ml_flow_dagshub.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "b4f9d400",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "<h2 align='center'>Codebasics ML Course: ML Flow Dagshub Tutorial</h2>"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "id": "5eb3c2b2",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import numpy as np\n",
 19 |     "from sklearn.datasets import make_classification\n",
 20 |     "from sklearn.model_selection import train_test_split\n",
 21 |     "from sklearn.linear_model import LogisticRegression\n",
 22 |     "from sklearn.ensemble import RandomForestClassifier\n",
 23 |     "from xgboost import XGBClassifier\n",
 24 |     "from sklearn.metrics import classification_report\n",
 25 |     "import warnings\n",
 26 |     "warnings.filterwarnings('ignore')"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 2,
 32 |    "id": "0878fc4c",
 33 |    "metadata": {},
 34 |    "outputs": [
 35 |     {
 36 |      "data": {
 37 |       "text/plain": [
 38 |        "(array([0, 1]), array([900, 100], dtype=int64))"
 39 |       ]
 40 |      },
 41 |      "execution_count": 2,
 42 |      "metadata": {},
 43 |      "output_type": "execute_result"
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "# Step 1: Create an imbalanced binary classification dataset\n",
 48 |     "X, y = make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=8, \n",
 49 |     "                           weights=[0.9, 0.1], flip_y=0, random_state=42)\n",
 50 |     "\n",
 51 |     "np.unique(y, return_counts=True)"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 3,
 57 |    "id": "2a6b80dd",
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "# Split the dataset into training and testing sets\n",
 62 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "id": "f3a6191b",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "#### Handle class imbalance"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 4,
 76 |    "id": "3190fd47",
 77 |    "metadata": {},
 78 |    "outputs": [
 79 |     {
 80 |      "data": {
 81 |       "text/plain": [
 82 |        "(array([0, 1]), array([619, 619], dtype=int64))"
 83 |       ]
 84 |      },
 85 |      "execution_count": 4,
 86 |      "metadata": {},
 87 |      "output_type": "execute_result"
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "from imblearn.combine import SMOTETomek\n",
 92 |     "\n",
 93 |     "smt = SMOTETomek(random_state=42)\n",
 94 |     "X_train_res, y_train_res = smt.fit_resample(X_train, y_train)\n",
 95 |     "np.unique(y_train_res, return_counts=True)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "id": "75b6f715",
101 |    "metadata": {},
102 |    "source": [
103 |     "### Track Experiments"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 5,
109 |    "id": "1eb49554",
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "models = [\n",
114 |     "    (\n",
115 |     "        \"Logistic Regression\", \n",
116 |     "        {\"C\": 1, \"solver\": 'liblinear'},\n",
117 |     "        LogisticRegression(), \n",
118 |     "        (X_train, y_train),\n",
119 |     "        (X_test, y_test)\n",
120 |     "    ),\n",
121 |     "    (\n",
122 |     "        \"Random Forest\", \n",
123 |     "        {\"n_estimators\": 30, \"max_depth\": 3},\n",
124 |     "        RandomForestClassifier(), \n",
125 |     "        (X_train, y_train),\n",
126 |     "        (X_test, y_test)\n",
127 |     "    ),\n",
128 |     "    (\n",
129 |     "        \"XGBClassifier\",\n",
130 |     "        {\"use_label_encoder\": False, \"eval_metric\": 'logloss'},\n",
131 |     "        XGBClassifier(), \n",
132 |     "        (X_train, y_train),\n",
133 |     "        (X_test, y_test)\n",
134 |     "    ),\n",
135 |     "    (\n",
136 |     "        \"XGBClassifier With SMOTE\",\n",
137 |     "        {\"use_label_encoder\": False, \"eval_metric\": 'logloss'},\n",
138 |     "        XGBClassifier(), \n",
139 |     "        (X_train_res, y_train_res),\n",
140 |     "        (X_test, y_test)\n",
141 |     "    )\n",
142 |     "]"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 6,
148 |    "id": "a91ad5ae",
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "reports = []\n",
153 |     "\n",
154 |     "for model_name, params, model, train_set, test_set in models:\n",
155 |     "    X_train = train_set[0]\n",
156 |     "    y_train = train_set[1]\n",
157 |     "    X_test = test_set[0]\n",
158 |     "    y_test = test_set[1]\n",
159 |     "    \n",
160 |     "    model.set_params(**params)\n",
161 |     "    model.fit(X_train, y_train)\n",
162 |     "    y_pred = model.predict(X_test)\n",
163 |     "    report = classification_report(y_test, y_pred, output_dict=True)\n",
164 |     "    reports.append(report)"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 7,
170 |    "id": "08741b0a",
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "import mlflow\n",
175 |     "import mlflow.sklearn\n",
176 |     "import mlflow.xgboost"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 8,
182 |    "id": "503f6e93",
183 |    "metadata": {},
184 |    "outputs": [
185 |     {
186 |      "data": {
187 |       "text/html": [
188 |        "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Accessing as dhavalsays\n",
189 |        "</pre>\n"
190 |       ],
191 |       "text/plain": [
192 |        "Accessing as dhavalsays\n"
193 |       ]
194 |      },
195 |      "metadata": {},
196 |      "output_type": "display_data"
197 |     },
198 |     {
199 |      "data": {
200 |       "text/html": [
201 |        "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Initialized MLflow to track repo <span style=\"color: #008000; text-decoration-color: #008000\">\"learnpythonlanguage/mlflow_dagshub_demo\"</span>\n",
202 |        "</pre>\n"
203 |       ],
204 |       "text/plain": [
205 |        "Initialized MLflow to track repo \u001b[32m\"learnpythonlanguage/mlflow_dagshub_demo\"\u001b[0m\n"
206 |       ]
207 |      },
208 |      "metadata": {},
209 |      "output_type": "display_data"
210 |     },
211 |     {
212 |      "data": {
213 |       "text/html": [
214 |        "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Repository learnpythonlanguage/mlflow_dagshub_demo initialized!\n",
215 |        "</pre>\n"
216 |       ],
217 |       "text/plain": [
218 |        "Repository learnpythonlanguage/mlflow_dagshub_demo initialized!\n"
219 |       ]
220 |      },
221 |      "metadata": {},
222 |      "output_type": "display_data"
223 |     }
224 |    ],
225 |    "source": [
226 |     "# dagshub setup\n",
227 |     "\n",
228 |     "import dagshub\n",
229 |     "dagshub.init(repo_owner='learnpythonlanguage', repo_name='mlflow_dagshub_demo', mlflow=True)"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": 12,
235 |    "id": "cfcc9a19",
236 |    "metadata": {
237 |     "scrolled": false
238 |    },
239 |    "outputs": [
240 |     {
241 |      "name": "stderr",
242 |      "output_type": "stream",
243 |      "text": [
244 |       "2024/08/01 11:50:40 INFO mlflow.tracking.fluent: Experiment with name 'Anomaly Detection' does not exist. Creating a new experiment.\n"
245 |      ]
246 |     }
247 |    ],
248 |    "source": [
249 |     "# Ideally you will not require following 4 lines if you have started fresh and do not have any previous dagshub credentials on your computer\n",
250 |     "import os\n",
251 |     "os.environ['MLFLOW_TRACKING_USERNAME'] = 'your user name' # 'learnpythonlanguage'\n",
252 |     "os.environ['MLFLOW_TRACKING_PASSWORD'] = 'your password' # \n",
253 |     "os.environ['MLFLOW_TRACKING_URI'] = 'your dagshub unique uri' # https://dagshub.com/learnpythonlanguage/mlflow_dagshub_demo.mlflow\n",
254 |     "\n",
255 |     "# Initialize MLflow\n",
256 |     "mlflow.set_experiment(\"Anomaly Detection\")\n",
257 |     "# mlflow.set_tracking_uri(\"http://localhost:5000\")\n",
258 |     "\n",
259 |     "for i, element in enumerate(models):\n",
260 |     "    model_name = element[0]\n",
261 |     "    params = element[1]\n",
262 |     "    model = element[2]\n",
263 |     "    report = reports[i]\n",
264 |     "    \n",
265 |     "    with mlflow.start_run(run_name=model_name):        \n",
266 |     "        mlflow.log_params(params)\n",
267 |     "        mlflow.log_metrics({\n",
268 |     "            'accuracy': report['accuracy'],\n",
269 |     "            'recall_class_1': report['1']['recall'],\n",
270 |     "            'recall_class_0': report['0']['recall'],\n",
271 |     "            'f1_score_macro': report['macro avg']['f1-score']\n",
272 |     "        })  \n",
273 |     "        \n",
274 |     "        if \"XGB\" in model_name:\n",
275 |     "            mlflow.xgboost.log_model(model, \"model\")\n",
276 |     "        else:\n",
277 |     "            mlflow.sklearn.log_model(model, \"model\")  "
278 |    ]
279 |   }
280 |  ],
281 |  "metadata": {
282 |   "kernelspec": {
283 |    "display_name": "Python 3 (ipykernel)",
284 |    "language": "python",
285 |    "name": "python3"
286 |   },
287 |   "language_info": {
288 |    "codemirror_mode": {
289 |     "name": "ipython",
290 |     "version": 3
291 |    },
292 |    "file_extension": ".py",
293 |    "mimetype": "text/x-python",
294 |    "name": "python",
295 |    "nbconvert_exporter": "python",
296 |    "pygments_lexer": "ipython3",
297 |    "version": "3.10.11"
298 |   }
299 |  },
300 |  "nbformat": 4,
301 |  "nbformat_minor": 5
302 | }
303 | 


--------------------------------------------------------------------------------
/ml_flow_model_management.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "20086d7c",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "<h2 align='center'>Codebasics ML Course: ML Flow Tutorial</h2>"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 29,
 14 |    "id": "2134f63a",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import numpy as np\n",
 19 |     "from sklearn.datasets import make_classification\n",
 20 |     "from sklearn.model_selection import train_test_split\n",
 21 |     "from sklearn.linear_model import LogisticRegression\n",
 22 |     "from sklearn.ensemble import RandomForestClassifier\n",
 23 |     "from xgboost import XGBClassifier\n",
 24 |     "from sklearn.metrics import classification_report\n",
 25 |     "import warnings\n",
 26 |     "warnings.filterwarnings('ignore')"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 30,
 32 |    "id": "8a467445",
 33 |    "metadata": {},
 34 |    "outputs": [
 35 |     {
 36 |      "data": {
 37 |       "text/plain": [
 38 |        "(array([0, 1]), array([900, 100], dtype=int64))"
 39 |       ]
 40 |      },
 41 |      "execution_count": 30,
 42 |      "metadata": {},
 43 |      "output_type": "execute_result"
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "# Step 1: Create an imbalanced binary classification dataset\n",
 48 |     "X, y = make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=8, \n",
 49 |     "                           weights=[0.9, 0.1], flip_y=0, random_state=42)\n",
 50 |     "\n",
 51 |     "np.unique(y, return_counts=True)"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 31,
 57 |    "id": "7fc473ad",
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "# Split the dataset into training and testing sets\n",
 62 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "id": "ce174acd",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "#### Handle class imbalance"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 32,
 76 |    "id": "0c6d768a",
 77 |    "metadata": {},
 78 |    "outputs": [
 79 |     {
 80 |      "data": {
 81 |       "text/plain": [
 82 |        "(array([0, 1]), array([619, 619], dtype=int64))"
 83 |       ]
 84 |      },
 85 |      "execution_count": 32,
 86 |      "metadata": {},
 87 |      "output_type": "execute_result"
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "from imblearn.combine import SMOTETomek\n",
 92 |     "\n",
 93 |     "smt = SMOTETomek(random_state=42)\n",
 94 |     "X_train_res, y_train_res = smt.fit_resample(X_train, y_train)\n",
 95 |     "np.unique(y_train_smt, return_counts=True)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "id": "f04a52b2",
101 |    "metadata": {},
102 |    "source": [
103 |     "### Track Experiments"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 33,
109 |    "id": "82fdaf1c",
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "models = [\n",
114 |     "    (\n",
115 |     "        \"Logistic Regression\", \n",
116 |     "        {\"C\": 1, \"solver\": 'liblinear'},\n",
117 |     "        LogisticRegression(), \n",
118 |     "        (X_train, y_train),\n",
119 |     "        (X_test, y_test)\n",
120 |     "    ),\n",
121 |     "    (\n",
122 |     "        \"Random Forest\", \n",
123 |     "        {\"n_estimators\": 30, \"max_depth\": 3},\n",
124 |     "        RandomForestClassifier(), \n",
125 |     "        (X_train, y_train),\n",
126 |     "        (X_test, y_test)\n",
127 |     "    ),\n",
128 |     "    (\n",
129 |     "        \"XGBClassifier\",\n",
130 |     "        {\"use_label_encoder\": False, \"eval_metric\": 'logloss'},\n",
131 |     "        XGBClassifier(), \n",
132 |     "        (X_train, y_train),\n",
133 |     "        (X_test, y_test)\n",
134 |     "    ),\n",
135 |     "    (\n",
136 |     "        \"XGBClassifier With SMOTE\",\n",
137 |     "        {\"use_label_encoder\": False, \"eval_metric\": 'logloss'},\n",
138 |     "        XGBClassifier(), \n",
139 |     "        (X_train_res, y_train_res),\n",
140 |     "        (X_test, y_test)\n",
141 |     "    )\n",
142 |     "]"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 34,
148 |    "id": "13a992c3",
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "reports = []\n",
153 |     "\n",
154 |     "for model_name, params, model, train_set, test_set in models:\n",
155 |     "    X_train = train_set[0]\n",
156 |     "    y_train = train_set[1]\n",
157 |     "    X_test = test_set[0]\n",
158 |     "    y_test = test_set[1]\n",
159 |     "    \n",
160 |     "    model.set_params(**params)\n",
161 |     "    model.fit(X_train, y_train)\n",
162 |     "    y_pred = model.predict(X_test)\n",
163 |     "    report = classification_report(y_test, y_pred, output_dict=True)\n",
164 |     "    reports.append(report)"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 35,
170 |    "id": "d9301bc0",
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "import mlflow\n",
175 |     "import mlflow.sklearn\n",
176 |     "import mlflow.xgboost"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 36,
182 |    "id": "9ad9cf4d",
183 |    "metadata": {
184 |     "scrolled": false
185 |    },
186 |    "outputs": [],
187 |    "source": [
188 |     "# Initialize MLflow\n",
189 |     "mlflow.set_experiment(\"Anomaly Detection\")\n",
190 |     "mlflow.set_tracking_uri(\"http://localhost:5000\")\n",
191 |     "\n",
192 |     "for i, element in enumerate(models):\n",
193 |     "    model_name = element[0]\n",
194 |     "    params = element[1]\n",
195 |     "    model = element[2]\n",
196 |     "    report = reports[i]\n",
197 |     "    \n",
198 |     "    with mlflow.start_run(run_name=model_name):        \n",
199 |     "        mlflow.log_params(params)\n",
200 |     "        mlflow.log_metrics({\n",
201 |     "            'accuracy': report['accuracy'],\n",
202 |     "            'recall_class_1': report['1']['recall'],\n",
203 |     "            'recall_class_0': report['0']['recall'],\n",
204 |     "            'f1_score_macro': report['macro avg']['f1-score']\n",
205 |     "        })  \n",
206 |     "        \n",
207 |     "        if \"XGB\" in model_name:\n",
208 |     "            mlflow.xgboost.log_model(model, \"model\")\n",
209 |     "        else:\n",
210 |     "            mlflow.sklearn.log_model(model, \"model\")  "
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "markdown",
215 |    "id": "7446ae8a",
216 |    "metadata": {},
217 |    "source": [
218 |     "### Register the Model"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": null,
224 |    "id": "51c0013a",
225 |    "metadata": {
226 |     "scrolled": false
227 |    },
228 |    "outputs": [],
229 |    "source": [
230 |     "model_name = 'XGB-Smote'\n",
231 |     "run_id=input('Please type RunID')\n",
232 |     "model_uri = f'runs:/{run_id}/model_name'\n",
233 |     "\n",
234 |     "with mlflow.start_run(run_id=run_id):\n",
235 |     "    mlflow.register_model(model_uri=model_uri, name=model_name)"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "markdown",
240 |    "id": "1b074a08",
241 |    "metadata": {},
242 |    "source": [
243 |     "### Load the Model"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": 41,
249 |    "id": "a40fef12",
250 |    "metadata": {
251 |     "scrolled": true
252 |    },
253 |    "outputs": [
254 |     {
255 |      "name": "stderr",
256 |      "output_type": "stream",
257 |      "text": [
258 |       "Downloading artifacts: 100%|██████████| 5/5 [00:02<00:00,  2.33it/s]\n"
259 |      ]
260 |     },
261 |     {
262 |      "data": {
263 |       "text/plain": [
264 |        "array([0, 0, 0, 0])"
265 |       ]
266 |      },
267 |      "execution_count": 41,
268 |      "metadata": {},
269 |      "output_type": "execute_result"
270 |     }
271 |    ],
272 |    "source": [
273 |     "model_version = 1\n",
274 |     "model_uri = f\"models:/{model_name}/{model_version}\"\n",
275 |     "\n",
276 |     "loaded_model = mlflow.xgboost.load_model(model_uri)\n",
277 |     "y_pred = loaded_model.predict(X_test)\n",
278 |     "y_pred[:4]"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "markdown",
283 |    "id": "5f8d2893",
284 |    "metadata": {},
285 |    "source": [
286 |     "### Transition the Model to Production"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": 42,
292 |    "id": "9c8ac1e3",
293 |    "metadata": {
294 |     "scrolled": true
295 |    },
296 |    "outputs": [
297 |     {
298 |      "data": {
299 |       "text/plain": [
300 |        "<ModelVersion: aliases=[], creation_timestamp=1722288798033, current_stage='None', description='', last_updated_timestamp=1722288798033, name='anomaly-detection-prod', run_id='3e6b0bdb88ac4df9add7a826594d8d33', run_link='', source='models:/XGB-Smote/1', status='READY', status_message='', tags={}, user_id='', version='1'>"
301 |       ]
302 |      },
303 |      "execution_count": 42,
304 |      "metadata": {},
305 |      "output_type": "execute_result"
306 |     }
307 |    ],
308 |    "source": [
309 |     "current_model_uri = f\"models:/{model_name}@challenger\"\n",
310 |     "production_model_name = \"anomaly-detection-prod\"\n",
311 |     "\n",
312 |     "client = mlflow.MlflowClient()\n",
313 |     "client.copy_model_version(src_model_uri=current_model_uri, dst_name=production_model_name)"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": 43,
319 |    "id": "4297a2fe",
320 |    "metadata": {},
321 |    "outputs": [
322 |     {
323 |      "name": "stderr",
324 |      "output_type": "stream",
325 |      "text": [
326 |       "Downloading artifacts: 100%|██████████| 5/5 [00:02<00:00,  2.33it/s]\n"
327 |      ]
328 |     },
329 |     {
330 |      "data": {
331 |       "text/plain": [
332 |        "array([0, 0, 0, 0])"
333 |       ]
334 |      },
335 |      "execution_count": 43,
336 |      "metadata": {},
337 |      "output_type": "execute_result"
338 |     }
339 |    ],
340 |    "source": [
341 |     "model_version = 1\n",
342 |     "prod_model_uri = f\"models:/{production_model_name}@champion\"\n",
343 |     "\n",
344 |     "loaded_model = mlflow.xgboost.load_model(prod_model_uri)\n",
345 |     "y_pred = loaded_model.predict(X_test)\n",
346 |     "y_pred[:4]"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "markdown",
351 |    "id": "ca565a87",
352 |    "metadata": {},
353 |    "source": [
354 |     "Please refer to following to learn more about model registry\n",
355 |     "\n",
356 |     "https://mlflow.org/docs/latest/model-registry.html#model-registry-workflows to learn "
357 |    ]
358 |   }
359 |  ],
360 |  "metadata": {
361 |   "kernelspec": {
362 |    "display_name": "Python 3 (ipykernel)",
363 |    "language": "python",
364 |    "name": "python3"
365 |   },
366 |   "language_info": {
367 |    "codemirror_mode": {
368 |     "name": "ipython",
369 |     "version": 3
370 |    },
371 |    "file_extension": ".py",
372 |    "mimetype": "text/x-python",
373 |    "name": "python",
374 |    "nbconvert_exporter": "python",
375 |    "pygments_lexer": "ipython3",
376 |    "version": "3.10.11"
377 |   }
378 |  },
379 |  "nbformat": 4,
380 |  "nbformat_minor": 5
381 | }
382 | 


--------------------------------------------------------------------------------