├── README.md
├── kubeflow
    └── base_images
    │   ├── jupyter_lab_config.py
    │   ├── jupyter_server_config.py
    │   ├── Dockerfile_scipy
    │   ├── Dockerfile_pytorch_full
    │   └── Dockerfile_pytorch_cuda
├── airflow_mlflow
    ├── Dockerfile
    ├── requirements.txt
    ├── dags
    │   ├── example.py
    │   ├── example_3.py
    │   ├── example_2.py
    │   └── example_4.py
    ├── docker-compose-windows.yml
    ├── docker-compose.yml
    ├── jupyter-data
    │   ├── MLFlow_test.ipynb
    │   └── MLFlow_test_nested.ipynb
    └── requirements.lock
├── tips_and_tricks
    └── jupyter_config
    │   ├── jupyter_notebook_config.py
    │   └── Dockerfile
└── .gitignore


/README.md:
--------------------------------------------------------------------------------
1 | # Repo to share MLOps files
2 | 
3 | ## Airflow_mlflow folder for MLOps.Inception course materials.
4 | Обратите внимание, что docker-compose-windows.yml не монтирует вольюмы в ваши папки.


--------------------------------------------------------------------------------
/kubeflow/base_images/jupyter_lab_config.py:
--------------------------------------------------------------------------------
1 | # Configuration file for lab.
2 | 
3 | c = get_config()  #noqa
4 | 
5 | c.FileContentsManager.delete_to_trash = False
6 | c.ResourceUseDisplay.track_cpu_percent = True
7 | 


--------------------------------------------------------------------------------
/airflow_mlflow/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM apache/airflow:2.9.3-python3.10
2 | 
3 | WORKDIR /app
4 | 
5 | COPY requirements.txt /app/requirements.txt
6 | 
7 | RUN pip install --upgrade pip && \
8 |     pip install --no-cache-dir -r requirements.txt && \
9 |     which airflow && airflow version


--------------------------------------------------------------------------------
/airflow_mlflow/requirements.txt:
--------------------------------------------------------------------------------
 1 | apache-airflow==2.9.3
 2 | apache-airflow-providers-amazon==8.27.0
 3 | apache-airflow-providers-postgres==5.11.3
 4 | mlflow==2.15.1
 5 | flask<2.3
 6 | click<9
 7 | sqlalchemy<2
 8 | boto3==1.34.90
 9 | pandas==2.2.2
10 | psycopg2-binary==2.9.9
11 | scikit-learn==1.5.1
12 | 


--------------------------------------------------------------------------------
/tips_and_tricks/jupyter_config/jupyter_notebook_config.py:
--------------------------------------------------------------------------------
 1 | # Disable token and password
 2 | c.NotebookApp.token = ''
 3 | c.NotebookApp.password = ''
 4 | 
 5 | # Set Jupyter working directory
 6 | c.NotebookApp.notebook_dir = '/home/jovyan/work'
 7 | 
 8 | # Disable trash (delete - forever)
 9 | c.FileContentsManager.delete_to_trash = False
10 | 


--------------------------------------------------------------------------------
/kubeflow/base_images/jupyter_server_config.py:
--------------------------------------------------------------------------------
1 | # Отображаемый лимит памяти - 16ГБ
2 | c.ResourceUseDisplay.mem_limit = 17179869184
3 | # На последних 10% памяти подсвечивать красным
4 | c.ResourceUseDisplay.mem_warning_threshold=0.1
5 | 
6 | # Прибивать ядра не активные последние 3 дня
7 | c.MappingKernelManager.cull_idle_timeout = 60 * 60 * 24 * 3
8 | # Проверять ядра каждый час
9 | c.MappingKernelManager.cull_interval = 60 * 60


--------------------------------------------------------------------------------
/tips_and_tricks/jupyter_config/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM jupyter/base-notebook:python-3.11
 2 | 
 3 | # Copy config file
 4 | COPY jupyter_notebook_config.py /home/jovyan/.jupyter/jupyter_notebook_config.py
 5 | 
 6 | # Create work directory nd permissions for it
 7 | RUN mkdir -p /home/jovyan/work && \
 8 |     chown -R jovyan:users /home/jovyan/.jupyter /home/jovyan/work
 9 | 
10 | # Set Docker Working directory
11 | WORKDIR /home/jovyan/work
12 | 
13 | # Set JupyterLab (optional)
14 | RUN pip install --no-cache-dir jupyterlab
15 | 
16 | # Open port
17 | EXPOSE 8888
18 | 
19 | # Start notebook with our configs
20 | CMD ["jupyter", "notebook", "--no-browser"]
21 | 


--------------------------------------------------------------------------------
/kubeflow/base_images/Dockerfile_scipy:
--------------------------------------------------------------------------------
 1 | FROM kubeflownotebookswg/jupyter-scipy:v1.7.0
 2 | 
 3 | # Update the package lists, install necessary dependencies, and clean the cache in one step
 4 | USER root
 5 | RUN apt-get update \
 6 |     && apt-get install -y \
 7 |         python3-pip \
 8 |         dnsutils \
 9 |         ffmpeg \
10 |         libsm6 \
11 |         libxext6 \
12 |     && apt-get clean \
13 |     && rm -rf /var/lib/apt/lists/*
14 | 
15 | # Use pre-defined jupyter configs
16 | COPY jupyter_lab_config.py /home/jovyan/.jupyter/jupyter_lab_config.py
17 | COPY jupyter_server_config.py /etc/jupyter/jupyter_server_config.py
18 | COPY jupyter_server_config.py /home/jovyan/.jupyter/jupyter_server_config.py
19 | 
20 | USER 1000
21 | # Install additional Python packages using pip
22 | RUN pip3 install jupyter-resource-usage\
23 |     psycopg2-binary==2.9.5 \
24 |     openpyxl \
25 |     boto3 
26 | 


--------------------------------------------------------------------------------
/kubeflow/base_images/Dockerfile_pytorch_full:
--------------------------------------------------------------------------------
 1 | FROM kubeflownotebookswg/jupyter-pytorch-full:v1.7.0
 2 | 
 3 | # Update the package lists, install necessary dependencies, and clean the cache in one step
 4 | USER root
 5 | RUN apt-get update \
 6 |     && apt-get install -y \
 7 |         python3-pip \
 8 |         dnsutils \
 9 |         ffmpeg \
10 |         libsm6 \
11 |         libxext6 \
12 |     && apt-get clean \
13 |     && rm -rf /var/lib/apt/lists/*
14 | 
15 | # Use pre-defined jupyter configs
16 | COPY jupyter_lab_config.py /home/jovyan/.jupyter/jupyter_lab_config.py
17 | COPY jupyter_server_config.py /etc/jupyter/jupyter_server_config.py
18 | COPY jupyter_server_config.py /home/jovyan/.jupyter/jupyter_server_config.py
19 | 
20 | USER 1000
21 | # Install additional Python packages using pip
22 | RUN pip3 install jupyter-resource-usage\
23 |     psycopg2-binary==2.9.5 \
24 |     openpyxl \
25 |     boto3 
26 | 


--------------------------------------------------------------------------------
/kubeflow/base_images/Dockerfile_pytorch_cuda:
--------------------------------------------------------------------------------
 1 | FROM kubeflownotebookswg/jupyter-pytorch-cuda-full:v1.7.0
 2 | 
 3 | # Update the package lists, install necessary dependencies, and clean the cache in one step
 4 | USER root
 5 | RUN apt-get update \
 6 |     && apt-get install -y \
 7 |         python3-pip \
 8 |         dnsutils \
 9 |         ffmpeg \
10 |         libsm6 \
11 |         libxext6 \
12 |     && apt-get clean \
13 |     && rm -rf /var/lib/apt/lists/*
14 | ENV JUPYTER_ALLOW_INSECURE_WRITES=1
15 | 
16 | 
17 | # Use pre-defined jupyter configs
18 | COPY jupyter_lab_config.py /home/jovyan/.jupyter/jupyter_lab_config.py
19 | COPY jupyter_server_config.py /etc/jupyter/jupyter_server_config.py
20 | COPY jupyter_server_config.py /home/jovyan/.jupyter/jupyter_server_config.py
21 | 
22 | USER 1000
23 | # Install additional Python packages using pip
24 | RUN pip3 install jupyter-resource-usage\
25 |     psycopg2-binary==2.9.5 \
26 |     openpyxl \
27 |     boto3
28 | 
29 | ENV JUPYTER_ALLOW_INSECURE_WRITES=1
30 | 


--------------------------------------------------------------------------------
/airflow_mlflow/dags/example.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from airflow.models import DAG, Variable
 4 | from airflow.operators.python_operator import PythonOperator
 5 | from airflow.utils.dates import days_ago
 6 | 
 7 | DAG_ID = "mlops"
 8 | 
 9 | logging.basicConfig(filename="my_first_dag.log", level=logging.INFO)
10 | _LOG = logging.getLogger()
11 | _LOG.addHandler(logging.StreamHandler())
12 | 
13 | 
14 | args = {
15 |     "owner": "Elizaveta Gavrilova",
16 |     "email": ["liz.vladi@gmail.com"],
17 | }
18 | 
19 | dag = DAG(
20 |     dag_id=DAG_ID,
21 |     default_args=args,
22 |     max_active_runs=1,
23 |     concurrency=3,
24 |     schedule_interval="0 4 * * *",
25 |     start_date=days_ago(1),
26 |     tags=["mlops"],
27 | )
28 | 
29 | def download_data() -> None:
30 |     import pandas as pd
31 |     
32 |     from sklearn import datasets
33 |     
34 |     wine_data = datasets.load_wine()
35 |     
36 |     X = pd.DataFrame(wine_data['data'], columns = wine_data['feature_names'])
37 |     y = wine_data['target']
38 |     
39 |     X.to_csv('features.csv')
40 | 
41 | task_download_data = PythonOperator(task_id="task_download_data",
42 |                                  python_callable=download_data,
43 |                                  dag=dag)
44 | 
45 | task_download_data  


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # DS
 2 | **/*.DS_Store
 3 | *.ipynb_checkpoints
 4 | *.ipynb_checkpoints/*
 5 | **/*.ipython/*
 6 | **/*.jupyter/*
 7 | **/*.local/*
 8 | **/scikit_learn_data/*
 9 | **/logs/*
10 | # Git
11 | .git
12 | .gitignore
13 | .gitattributes
14 | # Docker
15 | .dockerignore
16 | # Byte-compiled / optimized / DLL files
17 | **/__pycache__/
18 | **/*.py[cod]
19 | # C extensions
20 | *.so
21 | # Distribution / packaging
22 | .Python
23 | env/
24 | build/
25 | develop-eggs/
26 | dist/
27 | downloads/
28 | eggs/
29 | lib/
30 | lib64/
31 | parts/
32 | sdist/
33 | var/
34 | *.egg-info/
35 | .installed.cfg
36 | *.egg
37 | # PyInstaller
38 | #  Usually these files are written by a python script from a template
39 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
40 | *.manifest
41 | *.spec
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 | # Unit test / coverage reports
46 | htmlcov/
47 | .tox/
48 | .coverage
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | # Translations
53 | *.mo
54 | *.pot
55 | # Django stuff:
56 | *.log
57 | # Sphinx documentation
58 | docs/_build/
59 | # PyBuilder
60 | target/
61 | # Virtual environment
62 | .env
63 | .venv/
64 | venv/
65 | # PyCharm
66 | .idea
67 | # Python mode for VIM
68 | .ropeproject
69 | **/.ropeproject
70 | # Vim swap files
71 | **/*.swp
72 | # VS Code
73 | .vscode/
74 | 


--------------------------------------------------------------------------------
/airflow_mlflow/docker-compose-windows.yml:
--------------------------------------------------------------------------------
  1 | services:
  2 |   postgres:
  3 |     image: postgres:13
  4 |     environment:
  5 |       POSTGRES_USER: airflow
  6 |       POSTGRES_PASSWORD: airflow
  7 |       POSTGRES_DB: airflow
  8 |     volumes:
  9 |       - postgres-db-volume:/var/lib/postgresql/data
 10 | 
 11 |   airflow-webserver:
 12 |     build:
 13 |       context: .
 14 |       dockerfile: Dockerfile
 15 |     depends_on:
 16 |       - postgres
 17 |     environment:
 18 |       AIRFLOW__CORE__EXECUTOR: LocalExecutor
 19 |       AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
 20 |       AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
 21 |     volumes:
 22 |       - ./dags:/opt/airflow/dags
 23 |       - ./plugins:/opt/airflow/plugins
 24 |       - airflow_logs:/opt/airflow/logs
 25 |     ports:
 26 |       - "8080:8080"
 27 |     command: webserver
 28 | 
 29 |   airflow-scheduler:
 30 |     build:
 31 |       context: .
 32 |       dockerfile: Dockerfile
 33 |     depends_on:
 34 |       - postgres
 35 |     environment:
 36 |       AIRFLOW__CORE__EXECUTOR: LocalExecutor
 37 |       AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
 38 |       AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
 39 |     volumes:
 40 |       - ./dags:/opt/airflow/dags
 41 |       - ./plugins:/opt/airflow/plugins
 42 |       - airflow_logs:/opt/airflow/logs
 43 |     command: scheduler
 44 | 
 45 |   airflow-init:
 46 |     build:
 47 |       context: .
 48 |       dockerfile: Dockerfile
 49 |     depends_on:
 50 |       - postgres
 51 |     entrypoint: >
 52 |       bash -c "airflow db migrate &&
 53 |                airflow users create --username admin --firstname admin --lastname admin --role Admin --email admin@example.com --password admin"
 54 |     environment:
 55 |       AIRFLOW__CORE__EXECUTOR: LocalExecutor
 56 |       AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
 57 |       PATH: /home/airflow/.local/bin:/opt/airflow/bin:/usr/local/bin:/usr/bin:/bin
 58 | 
 59 |   jupyter:
 60 |     image: jupyter/datascience-notebook:notebook-6.5.4
 61 |     ports:
 62 |       - "8888:8888"
 63 |     volumes:
 64 |       - jupyter_data:/home/jovyan/
 65 |       - dags_shared:/home/jovyan/dags
 66 |     environment:
 67 |       MLFLOW_TRACKING_URI: http://mlflow-service:5000
 68 |       AWS_ENDPOINT_URL: https://storage.yandexcloud.net
 69 |       AWS_ACCESS_KEY_ID: your_key
 70 |       AWS_SECRET_ACCESS_KEY: your_secret
 71 |       AWS_DEFAULT_REGION: ru-central1
 72 |       JUPYTER_ENABLE_LAB: "yes"
 73 |     depends_on:
 74 |       - mlflow-service
 75 |     command:
 76 |       - bash
 77 |       - -c
 78 |       - |
 79 |         pip install mlflow==2.15.1 --quiet
 80 |         pip install boto3 --quiet
 81 |         /usr/local/bin/start-notebook.sh --NotebookApp.token=''
 82 | 
 83 |   mlflow-service:
 84 |     image: ghcr.io/mlflow/mlflow:v2.15.1
 85 |     environment:
 86 |       MLFLOW_S3_ENDPOINT_URL: https://storage.yandexcloud.net
 87 |       AWS_ACCESS_KEY_ID: your_key
 88 |       AWS_SECRET_ACCESS_KEY: your_secret
 89 |       AWS_DEFAULT_REGION: ru-central1
 90 |     command:
 91 |       - bash
 92 |       - -c
 93 |       - |
 94 |         pip install boto3 --quiet
 95 |         mlflow server --host 0.0.0.0 \
 96 |             --default-artifact-root s3://yourbucket/mlflow --serve-artifacts
 97 |     ports:
 98 |       - "5050:5000"
 99 |     volumes:
100 |       - mlflow-data:/mlflow
101 | 
102 | volumes:
103 |   postgres-db-volume:
104 |   airflow_logs:
105 |   jupyter_data:
106 |   dags_shared:
107 |   mlflow-data:
108 | 


--------------------------------------------------------------------------------
/airflow_mlflow/dags/example_3.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | 
  4 | from airflow.models import DAG, Variable
  5 | from airflow.operators.python_operator import PythonOperator
  6 | from airflow.providers.amazon.aws.hooks.s3 import S3Hook
  7 | from airflow.utils.dates import days_ago
  8 | 
  9 | DAG_ID = "train_dag_with_mlflow"
 10 | 
 11 | 
 12 | logging.basicConfig(filename="my_third_dag.log", level=logging.INFO)
 13 | _LOG = logging.getLogger()
 14 | _LOG.addHandler(logging.StreamHandler())
 15 | 
 16 | FEATURES = [
 17 |     "MedInc", "HouseAge", "AveRooms", "AveBedrms", "Population", "AveOccup",
 18 |     "Latitude", "Longitude"
 19 | ]
 20 | TARGET = "MedHouseVal"
 21 | 
 22 | args = {
 23 |     "owner": "Elizaveta Gavrilova",
 24 | }
 25 | 
 26 | dag = DAG(
 27 |     dag_id=DAG_ID,
 28 |     default_args=args,
 29 |     max_active_runs=1,
 30 |     concurrency=3,
 31 |     schedule_interval="0 4 * * *",
 32 |     start_date=days_ago(1),
 33 |     tags=["mlops"],
 34 | )
 35 | 
 36 | def configure_mlflow():
 37 |     for key in [
 38 |         "MLFLOW_TRACKING_URI",
 39 |         "AWS_ENDPOINT_URL",
 40 |         "AWS_ACCESS_KEY_ID",
 41 |         "AWS_SECRET_ACCESS_KEY",
 42 |         "AWS_DEFAULT_REGION",
 43 |     ]:
 44 |         os.environ[key] = Variable.get(key)
 45 | 
 46 | def download_data() -> None:
 47 |     import io
 48 |     import pandas as pd
 49 |     
 50 |     from sklearn import datasets
 51 |     
 52 |     # Получим датасет California housing
 53 |     housing = datasets.fetch_california_housing(as_frame=True)
 54 |     # Объединим фичи и таргет в один np.array
 55 |     data = pd.concat([housing["data"], pd.DataFrame(housing["target"])], axis=1)
 56 | 
 57 |     # Сохраняем данные в буффер
 58 |     filebuffer = io.BytesIO()
 59 |     data.to_pickle(filebuffer)
 60 |     filebuffer.seek(0)
 61 | 
 62 |     # Сохранить файл в формате pkl на S3
 63 |     BUCKET = Variable.get("BUCKET")
 64 |     s3_hook = S3Hook("s3_connection")
 65 |     s3_hook.load_file_obj(
 66 |         file_obj=filebuffer,
 67 |         key="2025/datasets/california_housing.pkl",
 68 |         bucket_name=BUCKET,
 69 |         replace=True,
 70 |     )
 71 |     _LOG.info("Data downloaded.")
 72 |     
 73 | 
 74 | def train_model() -> None:
 75 |     import mlflow
 76 |     import pandas as pd
 77 | 
 78 |     from mlflow.models import infer_signature
 79 |     from sklearn.model_selection import train_test_split
 80 |     from sklearn.preprocessing import StandardScaler
 81 |     from sklearn.linear_model import LinearRegression
 82 | 
 83 |     configure_mlflow()
 84 |     
 85 |     # Использовать созданный ранее S3 connection
 86 |     s3_hook = S3Hook("s3_connection")
 87 |     BUCKET = Variable.get("BUCKET")
 88 |     file = s3_hook.download_file(key=f"2025/datasets/california_housing.pkl", bucket_name=BUCKET)
 89 |     data = pd.read_pickle(file)
 90 | 
 91 |     # Сделать препроцессинг
 92 |     # Разделить на фичи и таргет
 93 |     X, y = data[FEATURES], data[TARGET]
 94 | 
 95 |     # Разделить данные на обучение и тест
 96 |     X_train, X_test, y_train, y_test = train_test_split(
 97 |         X, y, test_size=0.2, random_state=42
 98 |     )
 99 | 
100 |     # Обучить стандартизатор на train
101 |     scaler = StandardScaler()
102 |     X_train_fitted = scaler.fit_transform(X_train)
103 |     X_test_fitted = scaler.transform(X_test)
104 | 
105 |     # Обучить стандартизатор на train
106 |     scaler = StandardScaler()
107 |     X_train_fitted = scaler.fit_transform(X_train)
108 |     X_test_fitted = scaler.transform(X_test)
109 | 
110 |     # Обучить модель
111 |     mlflow.set_experiment(experiment_name="MedHouseExp")
112 |     with mlflow.start_run(run_name="my_third_run", experiment_id = "135293466297753618"):
113 |         # Обучить модель
114 |         model = LinearRegression()
115 |         model.fit(X_train_fitted, y_train)
116 |         y_pred = model.predict(X_test_fitted)
117 |     
118 |         # Получить описание данных
119 |         signature = infer_signature(X_test_fitted, y_pred)
120 |         # Сохранить модель в артифактори
121 |         model_info = mlflow.sklearn.log_model(model, "MedHouseExp_airflow", signature=signature)
122 |         # Сохранить метрики модели
123 |         mlflow.evaluate(
124 |             model_info.model_uri,
125 |             data=X_test_fitted,
126 |             targets=y_test.values,
127 |             model_type="regressor",
128 |             evaluators=["default"],
129 |         )
130 | 
131 |     
132 | 
133 | task_download_data = PythonOperator(task_id="task_download_data",
134 |                                  python_callable=download_data,
135 |                                  dag=dag)
136 | 
137 | task_train_model = PythonOperator(task_id="task_train_model",
138 |                                  python_callable=train_model,
139 |                                  dag=dag, provide_context=True)
140 | 
141 | task_download_data >> task_train_model 


--------------------------------------------------------------------------------
/airflow_mlflow/dags/example_2.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | from typing import Dict, Any
  4 | 
  5 | from airflow.models import DAG, Variable
  6 | from airflow.operators.python_operator import PythonOperator
  7 | from airflow.providers.amazon.aws.hooks.s3 import S3Hook
  8 | from airflow.utils.dates import days_ago
  9 | 
 10 | DAG_ID = "train_dag"
 11 | 
 12 | 
 13 | logging.basicConfig(filename="my_second_dag.log", level=logging.INFO)
 14 | _LOG = logging.getLogger()
 15 | _LOG.addHandler(logging.StreamHandler())
 16 | 
 17 | FEATURES = [
 18 |     "MedInc", "HouseAge", "AveRooms", "AveBedrms", "Population", "AveOccup",
 19 |     "Latitude", "Longitude"
 20 | ]
 21 | TARGET = "MedHouseVal"
 22 | 
 23 | args = {
 24 |     "owner": "Elizaveta Gavrilova",
 25 | }
 26 | 
 27 | dag = DAG(
 28 |     dag_id=DAG_ID,
 29 |     default_args=args,
 30 |     max_active_runs=1,
 31 |     concurrency=3,
 32 |     schedule_interval="0 4 * * *",
 33 |     start_date=days_ago(1),
 34 |     tags=["mlops"],
 35 | )
 36 | 
 37 | def download_data() -> None:
 38 |     import io
 39 |     import pandas as pd
 40 |     
 41 |     from sklearn import datasets
 42 |     
 43 |     # Получим датасет California housing
 44 |     housing = datasets.fetch_california_housing(as_frame=True)
 45 |     # Объединим фичи и таргет в один np.array
 46 |     data = pd.concat([housing["data"], pd.DataFrame(housing["target"])], axis=1)
 47 | 
 48 |     # Сохраняем данные в буффер
 49 |     filebuffer = io.BytesIO()
 50 |     data.to_pickle(filebuffer)
 51 |     filebuffer.seek(0)
 52 | 
 53 |     # Сохранить файл в формате pkl на S3
 54 |     BUCKET = Variable.get("BUCKET")
 55 |     s3_hook = S3Hook("s3_connection")
 56 |     s3_hook.load_file_obj(
 57 |         file_obj=filebuffer,
 58 |         key="2025/datasets/california_housing.pkl",
 59 |         bucket_name=BUCKET,
 60 |         replace=True,
 61 |     )
 62 |     _LOG.info("Data downloaded.")
 63 |     
 64 | 
 65 | def train_model() -> Dict[str, Any]:
 66 |     import pandas as pd
 67 |     
 68 |     from sklearn.model_selection import train_test_split
 69 |     from sklearn.preprocessing import StandardScaler
 70 |     from sklearn.linear_model import LinearRegression
 71 |     from sklearn.metrics import mean_squared_error, median_absolute_error, r2_score
 72 | 
 73 |     # Использовать созданный ранее S3 connection
 74 |     s3_hook = S3Hook("s3_connection")
 75 |     BUCKET = Variable.get("BUCKET")
 76 |     file = s3_hook.download_file(key=f"2025/datasets/california_housing.pkl", bucket_name=BUCKET)
 77 |     data = pd.read_pickle(file)
 78 | 
 79 |     # Сделать препроцессинг
 80 |     # Разделить на фичи и таргет
 81 |     X, y = data[FEATURES], data[TARGET]
 82 | 
 83 |     # Разделить данные на обучение и тест
 84 |     X_train, X_test, y_train, y_test = train_test_split(
 85 |         X, y, test_size=0.2, random_state=42
 86 |     )
 87 | 
 88 |     # Обучить стандартизатор на train
 89 |     scaler = StandardScaler()
 90 |     X_train_fitted = scaler.fit_transform(X_train)
 91 |     X_test_fitted = scaler.transform(X_test)
 92 | 
 93 |     # Обучить стандартизатор на train
 94 |     scaler = StandardScaler()
 95 |     X_train_fitted = scaler.fit_transform(X_train)
 96 |     X_test_fitted = scaler.transform(X_test)
 97 | 
 98 |     # Обучить модель
 99 |     model = LinearRegression()
100 |     model.fit(X_train_fitted, y_train)
101 |     y_pred = model.predict(X_test_fitted)
102 | 
103 |     metrics = {}
104 |     metrics["r_squared"] = r2_score(y_test, y_pred)
105 |     metrics["RMSE"] = mean_squared_error(y_test, y_pred)**0.5
106 |     metrics["MAE"] = median_absolute_error(y_test, y_pred)
107 | 
108 |     return metrics
109 |     
110 | 
111 | def save_results(**kwargs) -> None:
112 |     import io
113 |     import json
114 |     
115 |     ti = kwargs['ti']
116 |     metrics = ti.xcom_pull(task_ids='train_model')
117 | 
118 |     filebuffer = io.BytesIO()
119 |     filebuffer.write(json.dumps(metrics).encode())
120 |     filebuffer.seek(0)
121 | 
122 |     BUCKET = Variable.get("BUCKET")
123 |     s3_hook = S3Hook("s3_connection")
124 |     s3_hook.load_file_obj(
125 |             file_obj=filebuffer,
126 |             key=f"2025/linearregression/metrics/metrics.json",
127 |             bucket_name=BUCKET,
128 |             replace=True,
129 |         )
130 |     
131 | 
132 | task_download_data = PythonOperator(task_id="task_download_data",
133 |                                  python_callable=download_data,
134 |                                  dag=dag)
135 | 
136 | task_train_model = PythonOperator(task_id="task_train_model",
137 |                                  python_callable=train_model,
138 |                                  dag=dag, provide_context=True)
139 | 
140 | task_save_results = PythonOperator(task_id="task_save_results",
141 |                                  python_callable=save_results,
142 |                                  dag=dag, provide_context=True)
143 | 
144 | task_download_data >> task_train_model >> task_save_results


--------------------------------------------------------------------------------
/airflow_mlflow/dags/example_4.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | from typing import Dict, Any
  4 | 
  5 | from airflow.models import DAG, Variable
  6 | from airflow.operators.python_operator import PythonOperator
  7 | from airflow.providers.amazon.aws.hooks.s3 import S3Hook
  8 | from airflow.utils.dates import days_ago
  9 | 
 10 | from sklearn.ensemble import RandomForestRegressor, HistGradientBoostingRegressor
 11 | from sklearn.linear_model import LinearRegression
 12 | 
 13 | 
 14 | DAG_ID = "train_multi_models"
 15 | 
 16 | models = dict(zip(["RandomForest", "LinearRegression", "HistGB"], 
 17 |                   [RandomForestRegressor(), LinearRegression(), HistGradientBoostingRegressor()]))
 18 | 
 19 | 
 20 | logging.basicConfig(filename="my_forth_dag.log", level=logging.INFO)
 21 | _LOG = logging.getLogger()
 22 | _LOG.addHandler(logging.StreamHandler())
 23 | 
 24 | FEATURES = [
 25 |     "MedInc", "HouseAge", "AveRooms", "AveBedrms", "Population", "AveOccup",
 26 |     "Latitude", "Longitude"
 27 | ]
 28 | TARGET = "MedHouseVal"
 29 | 
 30 | args = {
 31 |     "owner": "Elizaveta Gavrilova",
 32 | }
 33 | 
 34 | dag = DAG(
 35 |     dag_id=DAG_ID,
 36 |     default_args=args,
 37 |     max_active_runs=1,
 38 |     concurrency=3,
 39 |     schedule_interval="0 4 * * *",
 40 |     start_date=days_ago(1),
 41 |     tags=["mlops"],
 42 | )
 43 | 
 44 | def download_data() -> None:
 45 |     import io
 46 |     import pandas as pd
 47 |     
 48 |     from sklearn import datasets
 49 |     
 50 |     # Получим датасет California housing
 51 |     housing = datasets.fetch_california_housing(as_frame=True)
 52 |     # Объединим фичи и таргет в один np.array
 53 |     data = pd.concat([housing["data"], pd.DataFrame(housing["target"])], axis=1)
 54 | 
 55 |     # Сохраняем данные в буффер
 56 |     filebuffer = io.BytesIO()
 57 |     data.to_pickle(filebuffer)
 58 |     filebuffer.seek(0)
 59 | 
 60 |     # Сохранить файл в формате pkl на S3
 61 |     BUCKET = Variable.get("BUCKET")
 62 |     s3_hook = S3Hook("s3_connection")
 63 |     s3_hook.load_file_obj(
 64 |         file_obj=filebuffer,
 65 |         key="2025/datasets/california_housing.pkl",
 66 |         bucket_name=BUCKET,
 67 |         replace=True,
 68 |     )
 69 |     _LOG.info("Data downloaded.")
 70 |     
 71 | 
 72 | def train_model(**kwargs) -> Dict[str, Any]:
 73 |     import pandas as pd
 74 |     
 75 |     from sklearn.model_selection import train_test_split
 76 |     from sklearn.preprocessing import StandardScaler
 77 |     from sklearn.metrics import mean_squared_error, median_absolute_error, r2_score
 78 | 
 79 |     # Чтение параметра
 80 |     ti = kwargs["ti"]
 81 |     model_name  = kwargs["model_name"]
 82 | 
 83 |     # Использовать созданный ранее S3 connection
 84 |     s3_hook = S3Hook("s3_connection")
 85 |     BUCKET = Variable.get("BUCKET")
 86 |     file = s3_hook.download_file(key=f"2025/datasets/california_housing.pkl", bucket_name=BUCKET)
 87 |     data = pd.read_pickle(file)
 88 | 
 89 |     # Сделать препроцессинг
 90 |     # Разделить на фичи и таргет
 91 |     X, y = data[FEATURES], data[TARGET]
 92 | 
 93 |     # Разделить данные на обучение и тест
 94 |     X_train, X_test, y_train, y_test = train_test_split(
 95 |         X, y, test_size=0.2, random_state=42
 96 |     )
 97 | 
 98 |     # Обучить стандартизатор на train
 99 |     scaler = StandardScaler()
100 |     X_train_fitted = scaler.fit_transform(X_train)
101 |     X_test_fitted = scaler.transform(X_test)
102 | 
103 |     # Обучить стандартизатор на train
104 |     scaler = StandardScaler()
105 |     X_train_fitted = scaler.fit_transform(X_train)
106 |     X_test_fitted = scaler.transform(X_test)
107 | 
108 |     # Обучить модель
109 |     model = models[model_name]
110 |     model.fit(X_train_fitted, y_train)
111 |     y_pred = model.predict(X_test_fitted)
112 | 
113 |     metrics = {}
114 |     metrics[f"{model_name}_r_squared"] = r2_score(y_test, y_pred)
115 |     metrics[f"{model_name}_RMSE"] = mean_squared_error(y_test, y_pred)**0.5
116 |     metrics[f"{model_name}_MAE"] = median_absolute_error(y_test, y_pred)
117 | 
118 |     return metrics
119 |     
120 | 
121 | def save_results(**kwargs) -> None:
122 |     import io
123 |     import json
124 | 
125 |     ti = kwargs["ti"]
126 |     models_metrics = ti.xcom_pull(
127 |         task_ids=[f"task_train_model_{model_name}" for model_name in models.keys()]
128 |     )
129 |     result = {}
130 |     for model_metrics in models_metrics:
131 |         result.update(model_metrics)
132 | 
133 |     filebuffer = io.BytesIO()
134 |     filebuffer.write(json.dumps(result).encode())
135 |     filebuffer.seek(0)
136 | 
137 |     BUCKET = Variable.get("BUCKET")
138 |     s3_hook = S3Hook("s3_connection")
139 |     s3_hook.load_file_obj(
140 |             file_obj=filebuffer,
141 |             key=f"2025/multi_model/metrics/metrics.json",
142 |             bucket_name=BUCKET,
143 |             replace=True,
144 |         )
145 |     
146 | 
147 | task_download_data = PythonOperator(task_id="task_download_data",
148 |                                  python_callable=download_data,
149 |                                  dag=dag)
150 | training_model_tasks = [PythonOperator(task_id=f"task_train_model_{model_name}",
151 |                                  python_callable=train_model,
152 |                                  dag=dag, provide_context=True, op_kwargs={"model_name": model_name}) for model_name in models.keys()]
153 | 
154 | task_save_results = PythonOperator(task_id="task_save_results",
155 |                                  python_callable=save_results,
156 |                                  dag=dag, provide_context=True)
157 | 
158 | task_download_data >> training_model_tasks >> task_save_results


--------------------------------------------------------------------------------
/airflow_mlflow/docker-compose.yml:
--------------------------------------------------------------------------------
  1 | x-airflow-common:
  2 |   &airflow-common
  3 |   build: .
  4 |   environment:
  5 |     &airflow-common-env
  6 |     AIRFLOW__CORE__EXECUTOR: LocalExecutor
  7 |     AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
  8 |     AIRFLOW__CORE__FERNET_KEY: ''
  9 |     AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
 10 |     AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
 11 |     AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session'
 12 |     AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true'
 13 |     _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-}
 14 |   volumes:
 15 |     - ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags
 16 |     - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs
 17 |     - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config
 18 |     - ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins
 19 |   user: "${AIRFLOW_UID:-50000}:0"
 20 |   depends_on:
 21 |     &airflow-common-depends-on
 22 |     postgres:
 23 |       condition: service_healthy
 24 | 
 25 | services:
 26 |   postgres:
 27 |     image: postgres:13
 28 |     environment:
 29 |       POSTGRES_USER: airflow
 30 |       POSTGRES_PASSWORD: airflow
 31 |       POSTGRES_DB: airflow
 32 |     volumes:
 33 |       - postgres-db-volume:/var/lib/postgresql/data
 34 |     healthcheck:
 35 |       test: ["CMD", "pg_isready", "-U", "airflow"]
 36 |       interval: 10s
 37 |       retries: 5
 38 |       start_period: 5s
 39 |     restart: always
 40 | 
 41 |   airflow-webserver:
 42 |     <<: *airflow-common
 43 |     command: webserver
 44 |     ports:
 45 |       - "8080:8080"
 46 |     healthcheck:
 47 |       test: ["CMD", "curl", "--fail", "http://localhost:8080/health"]
 48 |       interval: 30s
 49 |       timeout: 10s
 50 |       retries: 5
 51 |       start_period: 30s
 52 |     restart: always
 53 |     depends_on:
 54 |       <<: *airflow-common-depends-on
 55 |       airflow-init:
 56 |         condition: service_completed_successfully
 57 |      
 58 | 
 59 |   airflow-scheduler:
 60 |     <<: *airflow-common
 61 |     command: scheduler
 62 |     healthcheck:
 63 |       test: ["CMD", "curl", "--fail", "http://localhost:8974/health"]
 64 |       interval: 30s
 65 |       timeout: 10s
 66 |       retries: 5
 67 |       start_period: 30s
 68 |     restart: always
 69 |     depends_on:
 70 |       <<: *airflow-common-depends-on
 71 |       airflow-init:
 72 |         condition: service_completed_successfully
 73 | 
 74 | 
 75 |   airflow-init:
 76 |     <<: *airflow-common
 77 |     entrypoint: /bin/bash
 78 |     # yamllint disable rule:line-length
 79 |     command:
 80 |       - -c
 81 |       - |
 82 |         if [[ -z "${AIRFLOW_UID}" ]]; then
 83 |           echo
 84 |           echo -e "\033[1;33mWARNING!!!: AIRFLOW_UID not set!\e[0m"
 85 |           echo "If you are on Linux, you SHOULD follow the instructions below to set "
 86 |           echo "AIRFLOW_UID environment variable, otherwise files will be owned by root."
 87 |           echo "For other operating systems you can get rid of the warning with manually created .env file:"
 88 |           echo "    See: https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#setting-the-right-airflow-user"
 89 |           echo
 90 |         fi
 91 |         one_meg=1048576
 92 |         mem_available=$$(($$(getconf _PHYS_PAGES) * $$(getconf PAGE_SIZE) / one_meg))
 93 |         cpus_available=$$(grep -cE 'cpu[0-9]+' /proc/stat)
 94 |         disk_available=$$(df / | tail -1 | awk '{print $$4}')
 95 |         warning_resources="false"
 96 |         if (( mem_available < 4000 )) ; then
 97 |           echo
 98 |           echo -e "\033[1;33mWARNING!!!: Not enough memory available for Docker.\e[0m"
 99 |           echo "At least 4GB of memory required. You have $$(numfmt --to iec $$((mem_available * one_meg)))"
100 |           echo
101 |           warning_resources="true"
102 |         fi
103 |         if (( cpus_available < 2 )); then
104 |           echo
105 |           echo -e "\033[1;33mWARNING!!!: Not enough CPUS available for Docker.\e[0m"
106 |           echo "At least 2 CPUs recommended. You have $${cpus_available}"
107 |           echo
108 |           warning_resources="true"
109 |         fi
110 |         if (( disk_available < one_meg * 10 )); then
111 |           echo
112 |           echo -e "\033[1;33mWARNING!!!: Not enough Disk space available for Docker.\e[0m"
113 |           echo "At least 10 GBs recommended. You have $$(numfmt --to iec $$((disk_available * 1024 )))"
114 |           echo
115 |           warning_resources="true"
116 |         fi
117 |         if [[ $${warning_resources} == "true" ]]; then
118 |           echo
119 |           echo -e "\033[1;33mWARNING!!!: You have not enough resources to run Airflow (see above)!\e[0m"
120 |           echo "Please follow the instructions to increase amount of resources available:"
121 |           echo "   https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#before-you-begin"
122 |           echo
123 |         fi
124 |         mkdir -p /sources/logs /sources/dags /sources/plugins
125 |         chown -R "${AIRFLOW_UID}:0" /sources/{logs,dags,plugins}
126 |         exec /entrypoint airflow version
127 |     # yamllint enable rule:line-length
128 |     environment:
129 |       <<: *airflow-common-env
130 |       _AIRFLOW_DB_MIGRATE: 'true'
131 |       _AIRFLOW_WWW_USER_CREATE: 'true'
132 |       _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow}
133 |       _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow}
134 |       _PIP_ADDITIONAL_REQUIREMENTS: ''
135 |     user: "0:0"
136 |     volumes:
137 |       - ${AIRFLOW_PROJ_DIR:-.}:/sources
138 | 
139 |   mlflow-service:
140 |     image: ghcr.io/mlflow/mlflow:v2.15.1
141 |     environment:
142 |       MLFLOW_S3_ENDPOINT_URL: https://storage.yandexcloud.net
143 |       AWS_ACCESS_KEY_ID: # YOUR KEY HERE
144 |       AWS_SECRET_ACCESS_KEY:# YOUR KEY HERE
145 |       AWS_DEFAULT_REGION: ru-central1
146 |     command:
147 |       - bash
148 |       - -c
149 |       - |
150 |         pip install boto3 --quiet
151 |         mlflow server --host 0.0.0.0 \
152 |             --default-artifact-root s3://YOURBUCKET/mlflow --serve-artifacts
153 |     expose:
154 |       - "5000"
155 |     ports:
156 |       - "5050:5000"
157 |     volumes:
158 |       - mlflow-data:/mlflow
159 | 
160 |   jupyter:
161 |     image: jupyter/datascience-notebook:notebook-6.5.4
162 |     expose:
163 |       - "8888"
164 |     ports:
165 |       - "8888:8888"
166 |     volumes:
167 |       - ./jupyter-data:/home/jovyan/
168 |       - ./dags:/home/jovyan/dags
169 |     environment:
170 |       MLFLOW_TRACKING_URI: http://mlflow-service:5000
171 |       AWS_ENDPOINT_URL: https://storage.yandexcloud.net
172 |       AWS_ACCESS_KEY_ID: # YOUR KEY HERE 
173 |       AWS_SECRET_ACCESS_KEY: # YOUR KEY HERE 
174 |       AWS_DEFAULT_REGION: ru-central1
175 |     depends_on:
176 |       - mlflow-service
177 |     command:
178 |       - bash
179 |       - -c
180 |       - |
181 |         pip install mlflow==2.15.1 --quiet
182 |         pip install boto3 --quiet
183 |         start-notebook.sh --NotebookApp.token=''
184 | 
185 | volumes:
186 |   postgres-db-volume:
187 |   mlflow-data:
188 | 


--------------------------------------------------------------------------------
/airflow_mlflow/jupyter-data/MLFlow_test.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 4,
  6 |    "id": "362f7a85-5027-4f89-b312-356bc7186b52",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import os\n",
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "\n",
 14 |     "import mlflow\n",
 15 |     "from mlflow.models import infer_signature\n",
 16 |     "\n",
 17 |     "from datetime import datetime, timedelta\n",
 18 |     "from sklearn.linear_model import LinearRegression\n",
 19 |     "from sklearn.model_selection import train_test_split\n",
 20 |     "from sklearn.metrics import mean_squared_error, median_absolute_error, r2_score\n",
 21 |     "from sklearn.preprocessing import StandardScaler\n",
 22 |     "from sklearn import datasets"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "id": "4a28d796-5f75-4378-a7e2-b5291bf6e784",
 29 |    "metadata": {},
 30 |    "outputs": [
 31 |     {
 32 |      "data": {
 33 |       "text/plain": [
 34 |        "'http://mlflow-service:5000'"
 35 |       ]
 36 |      },
 37 |      "execution_count": 2,
 38 |      "metadata": {},
 39 |      "output_type": "execute_result"
 40 |     }
 41 |    ],
 42 |    "source": [
 43 |     "os.getenv(\"MLFLOW_TRACKING_URI\")"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 5,
 49 |    "id": "5223682b-1897-4862-954e-65e8316f0703",
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "# Получим датасет California housing\n",
 54 |     "housing = datasets.fetch_california_housing(as_frame=True)\n",
 55 |     "# Объединим фичи и таргет в один np.array\n",
 56 |     "data = pd.concat([housing[\"data\"], pd.DataFrame(housing[\"target\"])], axis=1)"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 7,
 62 |    "id": "d3e5875c-1fec-4176-8593-bac16f85f080",
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "FEATURES = [\n",
 67 |     "    \"MedInc\", \"HouseAge\", \"AveRooms\", \"AveBedrms\", \"Population\", \"AveOccup\",\n",
 68 |     "    \"Latitude\", \"Longitude\"\n",
 69 |     "]\n",
 70 |     "TARGET = \"MedHouseVal\""
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 8,
 76 |    "id": "4e603b7e-a7b3-4579-8e43-44b0c18947c5",
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "# Сделать препроцессинг\n",
 81 |     "# Разделить на фичи и таргет\n",
 82 |     "X, y = data[FEATURES], data[TARGET]\n",
 83 |     "\n",
 84 |     "# Разделить данные на обучение и тест\n",
 85 |     "X_train, X_test, y_train, y_test = train_test_split(\n",
 86 |     "    X, y, test_size=0.2, random_state=42\n",
 87 |     ")\n",
 88 |     "\n",
 89 |     "# Обучить стандартизатор на train\n",
 90 |     "scaler = StandardScaler()\n",
 91 |     "X_train_fitted = scaler.fit_transform(X_train)\n",
 92 |     "X_test_fitted = scaler.transform(X_test)\n",
 93 |     "\n",
 94 |     "# Обучить стандартизатор на train\n",
 95 |     "scaler = StandardScaler()\n",
 96 |     "X_train_fitted = scaler.fit_transform(X_train)\n",
 97 |     "X_test_fitted = scaler.transform(X_test)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 12,
103 |    "id": "23756f17-ea6e-4b8c-9f59-1375bc04462b",
104 |    "metadata": {},
105 |    "outputs": [
106 |     {
107 |      "name": "stderr",
108 |      "output_type": "stream",
109 |      "text": [
110 |       "2025/04/16 10:41:37 INFO mlflow.tracking.fluent: Experiment with name '135293466297753618' does not exist. Creating a new experiment.\n"
111 |      ]
112 |     },
113 |     {
114 |      "data": {
115 |       "text/plain": [
116 |        "<Experiment: artifact_location='s3://mlops-webinar-example/mlflow/274632898515108876', creation_time=1744800097028, experiment_id='274632898515108876', last_update_time=1744800097028, lifecycle_stage='active', name='135293466297753618', tags={}>"
117 |       ]
118 |      },
119 |      "execution_count": 12,
120 |      "metadata": {},
121 |      "output_type": "execute_result"
122 |     }
123 |    ],
124 |    "source": [
125 |     "name = \"MedHouseExp\"\n",
126 |     "experiment_id = mlflow.create_experiment(name)\n",
127 |     "mlflow.set_experiment(experiment_id)"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 13,
133 |    "id": "0c02bb9a-ae75-4a58-ab2c-ddb7ade9ecf4",
134 |    "metadata": {},
135 |    "outputs": [
136 |     {
137 |      "data": {
138 |       "application/vnd.jupyter.widget-view+json": {
139 |        "model_id": "260cab37811c47cf9d7bb2cb835470de",
140 |        "version_major": 2,
141 |        "version_minor": 0
142 |       },
143 |       "text/plain": [
144 |        "Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]"
145 |       ]
146 |      },
147 |      "metadata": {},
148 |      "output_type": "display_data"
149 |     },
150 |     {
151 |      "name": "stderr",
152 |      "output_type": "stream",
153 |      "text": [
154 |       "2025/04/16 10:45:35 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.\n",
155 |       "/opt/conda/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but LinearRegression was fitted without feature names\n",
156 |       "  warnings.warn(\n",
157 |       "/opt/conda/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but LinearRegression was fitted without feature names\n",
158 |       "  warnings.warn(\n",
159 |       "2025/04/16 10:45:35 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...\n",
160 |       "2025/04/16 10:45:35 WARNING mlflow.models.evaluation.default_evaluator: SHAP or matplotlib package is not installed, so model explainability insights will not be logged.\n",
161 |       "2025/04/16 10:45:35 INFO mlflow.tracking._tracking_service.client: 🏃 View run my_first_run at: http://mlflow-service:5000/#/experiments/135293466297753618/runs/44800a57471a445e93469df93c7eef8a.\n",
162 |       "2025/04/16 10:45:35 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow-service:5000/#/experiments/135293466297753618.\n"
163 |      ]
164 |     }
165 |    ],
166 |    "source": [
167 |     "with mlflow.start_run(run_name=\"my_first_run\", experiment_id = experiment_id):\n",
168 |     "    # Обучить модель\n",
169 |     "    model = LinearRegression()\n",
170 |     "    model.fit(X_train_fitted, y_train)\n",
171 |     "    y_pred = model.predict(X_test_fitted)\n",
172 |     "\n",
173 |     "    # Получить описание данных\n",
174 |     "    signature = infer_signature(X_test_fitted, y_pred)\n",
175 |     "    # Сохранить модель в артифактори\n",
176 |     "    model_info = mlflow.sklearn.log_model(model, name, signature=signature)\n",
177 |     "    # Сохранить метрики модели\n",
178 |     "    mlflow.evaluate(\n",
179 |     "        model_info.model_uri,\n",
180 |     "        data=X_test_fitted,\n",
181 |     "        targets=y_test.values,\n",
182 |     "        model_type=\"regressor\",\n",
183 |     "        evaluators=[\"default\"],\n",
184 |     "    )"
185 |    ]
186 |   }
187 |  ],
188 |  "metadata": {
189 |   "kernelspec": {
190 |    "display_name": "Python 3 (ipykernel)",
191 |    "language": "python",
192 |    "name": "python3"
193 |   },
194 |   "language_info": {
195 |    "codemirror_mode": {
196 |     "name": "ipython",
197 |     "version": 3
198 |    },
199 |    "file_extension": ".py",
200 |    "mimetype": "text/x-python",
201 |    "name": "python",
202 |    "nbconvert_exporter": "python",
203 |    "pygments_lexer": "ipython3",
204 |    "version": "3.11.4"
205 |   }
206 |  },
207 |  "nbformat": 4,
208 |  "nbformat_minor": 5
209 | }
210 | 


--------------------------------------------------------------------------------
/airflow_mlflow/requirements.lock:
--------------------------------------------------------------------------------
  1 | adal==1.2.7
  2 | adlfs==2024.4.1
  3 | aiobotocore==2.13.1
  4 | aiofiles==23.2.1
  5 | aiohttp==3.9.5
  6 | aioitertools==0.11.0
  7 | aiosignal==1.3.1
  8 | alembic==1.13.2
  9 | amqp==5.2.0
 10 | aniso8601==9.0.1
 11 | annotated-types==0.7.0
 12 | anyio==4.4.0
 13 | apache-airflow==2.9.3
 14 | apache-airflow-providers-amazon==8.27.0
 15 | apache-airflow-providers-celery==3.7.2
 16 | apache-airflow-providers-cncf-kubernetes==8.3.3
 17 | apache-airflow-providers-common-compat==1.2.0
 18 | apache-airflow-providers-common-io==1.3.2
 19 | apache-airflow-providers-common-sql==1.14.2
 20 | apache-airflow-providers-docker==3.12.2
 21 | apache-airflow-providers-elasticsearch==5.4.1
 22 | apache-airflow-providers-fab==1.2.2
 23 | apache-airflow-providers-ftp==3.10.0
 24 | apache-airflow-providers-google==10.21.0
 25 | apache-airflow-providers-grpc==3.5.2
 26 | apache-airflow-providers-hashicorp==3.7.1
 27 | apache-airflow-providers-http==4.12.0
 28 | apache-airflow-providers-imap==3.6.1
 29 | apache-airflow-providers-microsoft-azure==10.2.0
 30 | apache-airflow-providers-mysql==5.6.2
 31 | apache-airflow-providers-odbc==4.6.2
 32 | apache-airflow-providers-openlineage==1.9.1
 33 | apache-airflow-providers-postgres==5.11.3
 34 | apache-airflow-providers-redis==3.7.1
 35 | apache-airflow-providers-sendgrid==3.5.1
 36 | apache-airflow-providers-sftp==4.10.2
 37 | apache-airflow-providers-slack==8.7.1
 38 | apache-airflow-providers-smtp==1.7.1
 39 | apache-airflow-providers-snowflake==5.6.0
 40 | apache-airflow-providers-sqlite==3.8.1
 41 | apache-airflow-providers-ssh==3.11.2
 42 | apispec==6.6.1
 43 | argcomplete==3.4.0
 44 | asgiref==3.8.1
 45 | asn1crypto==1.5.1
 46 | async-timeout==4.0.3
 47 | asyncssh==2.15.0
 48 | attrs==23.2.0
 49 | Authlib==1.3.1
 50 | azure-batch==14.2.0
 51 | azure-common==1.1.28
 52 | azure-core==1.30.2
 53 | azure-cosmos==4.7.0
 54 | azure-datalake-store==0.0.53
 55 | azure-identity==1.17.1
 56 | azure-keyvault-secrets==4.8.0
 57 | azure-kusto-data==4.5.1
 58 | azure-mgmt-containerinstance==10.1.0
 59 | azure-mgmt-containerregistry==10.3.0
 60 | azure-mgmt-core==1.4.0
 61 | azure-mgmt-cosmosdb==9.5.1
 62 | azure-mgmt-datafactory==8.0.0
 63 | azure-mgmt-datalake-nspkg==3.0.1
 64 | azure-mgmt-datalake-store==0.5.0
 65 | azure-mgmt-nspkg==3.0.2
 66 | azure-mgmt-resource==23.1.1
 67 | azure-mgmt-storage==21.2.1
 68 | azure-nspkg==3.0.2
 69 | azure-servicebus==7.12.2
 70 | azure-storage-blob==12.20.0
 71 | azure-storage-file-datalake==12.15.0
 72 | azure-storage-file-share==12.16.0
 73 | azure-synapse-artifacts==0.19.0
 74 | azure-synapse-spark==0.7.0
 75 | Babel==2.15.0
 76 | backoff==2.2.1
 77 | bcrypt==4.1.3
 78 | beautifulsoup4==4.12.3
 79 | billiard==4.2.0
 80 | blinker==1.8.2
 81 | boto3==1.34.90
 82 | botocore==1.34.131
 83 | cachelib==0.9.0
 84 | cachetools==5.3.3
 85 | cattrs==23.2.3
 86 | celery==5.4.0
 87 | certifi==2024.7.4
 88 | cffi==1.16.0
 89 | chardet==5.2.0
 90 | charset-normalizer==3.3.2
 91 | click==8.1.7
 92 | click-didyoumean==0.3.1
 93 | click-plugins==1.1.1
 94 | click-repl==0.3.0
 95 | clickclick==20.10.2
 96 | cloudpickle==3.0.0
 97 | colorama==0.4.6
 98 | colorlog==4.8.0
 99 | ConfigUpdater==3.2
100 | connexion==2.14.2
101 | contourpy==1.3.0
102 | cron-descriptor==1.4.3
103 | croniter==2.0.5
104 | cryptography==41.0.7
105 | cycler==0.12.1
106 | databricks-sdk==0.31.1
107 | db-dtypes==1.2.0
108 | decorator==5.1.1
109 | Deprecated==1.2.14
110 | dill==0.3.1.1
111 | distlib==0.3.8
112 | dnspython==2.6.1
113 | docker==7.1.0
114 | docstring_parser==0.16
115 | docutils==0.16
116 | elastic-transport==8.13.1
117 | elasticsearch==8.14.0
118 | email_validator==2.2.0
119 | entrypoints==0.4
120 | eventlet==0.36.1
121 | exceptiongroup==1.2.1
122 | filelock==3.15.4
123 | Flask==2.2.5
124 | Flask-AppBuilder==4.5.0
125 | Flask-Babel==2.0.0
126 | Flask-Caching==2.3.0
127 | Flask-JWT-Extended==4.6.0
128 | Flask-Limiter==3.7.0
129 | Flask-Login==0.6.3
130 | Flask-Session==0.5.0
131 | Flask-SQLAlchemy==2.5.1
132 | Flask-WTF==1.2.1
133 | flower==2.0.1
134 | fonttools==4.53.1
135 | frozenlist==1.4.1
136 | fsspec==2023.12.2
137 | gcloud-aio-auth==4.2.3
138 | gcloud-aio-bigquery==7.1.0
139 | gcloud-aio-storage==9.2.0
140 | gcsfs==2023.12.2.post1
141 | gevent==24.2.1
142 | gitdb==4.0.11
143 | GitPython==3.1.43
144 | google-ads==24.1.0
145 | google-analytics-admin==0.22.8
146 | google-api-core==2.19.1
147 | google-api-python-client==2.137.0
148 | google-auth==2.32.0
149 | google-auth-httplib2==0.2.0
150 | google-auth-oauthlib==1.2.1
151 | google-cloud-aiplatform==1.59.0
152 | google-cloud-appengine-logging==1.4.4
153 | google-cloud-audit-log==0.2.5
154 | google-cloud-automl==2.13.4
155 | google-cloud-batch==0.17.22
156 | google-cloud-bigquery==3.20.1
157 | google-cloud-bigquery-datatransfer==3.15.4
158 | google-cloud-bigtable==2.24.0
159 | google-cloud-build==3.24.1
160 | google-cloud-compute==1.19.1
161 | google-cloud-container==2.49.0
162 | google-cloud-core==2.4.1
163 | google-cloud-datacatalog==3.19.1
164 | google-cloud-dataflow-client==0.8.11
165 | google-cloud-dataform==0.5.10
166 | google-cloud-dataplex==2.2.1
167 | google-cloud-dataproc==5.10.1
168 | google-cloud-dataproc-metastore==1.15.4
169 | google-cloud-dlp==3.18.1
170 | google-cloud-kms==2.24.1
171 | google-cloud-language==2.13.4
172 | google-cloud-logging==3.10.0
173 | google-cloud-memcache==1.9.4
174 | google-cloud-monitoring==2.22.1
175 | google-cloud-orchestration-airflow==1.13.0
176 | google-cloud-os-login==2.14.5
177 | google-cloud-pubsub==2.22.0
178 | google-cloud-redis==2.15.4
179 | google-cloud-resource-manager==1.12.4
180 | google-cloud-run==0.10.7
181 | google-cloud-secret-manager==2.20.1
182 | google-cloud-spanner==3.47.0
183 | google-cloud-speech==2.26.1
184 | google-cloud-storage==2.17.0
185 | google-cloud-storage-transfer==1.11.4
186 | google-cloud-tasks==2.16.4
187 | google-cloud-texttospeech==2.16.4
188 | google-cloud-translate==3.15.4
189 | google-cloud-videointelligence==2.13.4
190 | google-cloud-vision==3.7.3
191 | google-cloud-workflows==1.14.4
192 | google-crc32c==1.5.0
193 | google-re2==1.1.20240702
194 | google-resumable-media==2.7.1
195 | googleapis-common-protos==1.63.2
196 | graphene==3.3
197 | graphql-core==3.2.3
198 | graphql-relay==3.2.0
199 | graphviz==0.20.3
200 | greenlet==3.0.3
201 | grpc-google-iam-v1==0.13.1
202 | grpc-interceptor==0.15.4
203 | grpcio==1.64.1
204 | grpcio-gcp==0.2.2
205 | grpcio-status==1.62.2
206 | gunicorn==22.0.0
207 | h11==0.14.0
208 | h2==4.1.0
209 | hpack==4.0.0
210 | httpcore==1.0.5
211 | httplib2==0.22.0
212 | httpx==0.27.0
213 | humanize==4.10.0
214 | hvac==2.3.0
215 | hyperframe==6.0.1
216 | idna==3.7
217 | ijson==3.3.0
218 | importlib-metadata==6.11.0
219 | importlib_resources==6.4.0
220 | inflection==0.5.1
221 | isodate==0.6.1
222 | itsdangerous==2.2.0
223 | Jinja2==3.1.4
224 | jmespath==0.10.0
225 | joblib==1.4.2
226 | json-merge-patch==0.2
227 | jsonpath-ng==1.6.1
228 | jsonschema==4.23.0
229 | jsonschema-specifications==2023.12.1
230 | kiwisolver==1.4.5
231 | kombu==5.3.7
232 | kubernetes==29.0.0
233 | kubernetes_asyncio==29.0.0
234 | lazy-object-proxy==1.10.0
235 | ldap3==2.9.1
236 | limits==3.13.0
237 | linkify-it-py==2.0.3
238 | lockfile==0.12.2
239 | looker-sdk==24.10.0
240 | lxml==5.2.2
241 | Mako==1.3.5
242 | Markdown==3.7
243 | markdown-it-py==3.0.0
244 | MarkupSafe==2.1.5
245 | marshmallow==3.21.3
246 | marshmallow-oneofschema==3.1.1
247 | marshmallow-sqlalchemy==0.28.2
248 | matplotlib==3.9.2
249 | mdit-py-plugins==0.4.1
250 | mdurl==0.1.2
251 | methodtools==0.4.7
252 | microsoft-kiota-abstractions==1.3.3
253 | microsoft-kiota-authentication-azure==1.0.0
254 | microsoft-kiota-http==1.3.2
255 | mlflow==2.15.1
256 | mlflow-skinny==2.15.1
257 | more-itertools==10.3.0
258 | msal==1.29.0
259 | msal-extensions==1.2.0
260 | msgraph-core==1.1.1
261 | msrest==0.7.1
262 | msrestazure==0.6.4.post1
263 | multidict==6.0.5
264 | mysql-connector-python==9.0.0
265 | mysqlclient==2.2.4
266 | numpy==1.26.4
267 | oauthlib==3.2.2
268 | openlineage-integration-common==1.18.0
269 | openlineage-python==1.18.0
270 | openlineage_sql==1.18.0
271 | opentelemetry-api==1.25.0
272 | opentelemetry-exporter-otlp==1.25.0
273 | opentelemetry-exporter-otlp-proto-common==1.25.0
274 | opentelemetry-exporter-otlp-proto-grpc==1.25.0
275 | opentelemetry-exporter-otlp-proto-http==1.25.0
276 | opentelemetry-proto==1.25.0
277 | opentelemetry-sdk==1.25.0
278 | opentelemetry-semantic-conventions==0.46b0
279 | ordered-set==4.1.0
280 | packaging==24.1
281 | pandas==2.2.2
282 | pandas-gbq==0.23.1
283 | paramiko==3.4.0
284 | pathspec==0.12.1
285 | pendulum==3.0.0
286 | pillow==10.4.0
287 | platformdirs==4.2.2
288 | pluggy==1.5.0
289 | ply==3.11
290 | portalocker==2.10.0
291 | prison==0.2.1
292 | prometheus_client==0.20.0
293 | prompt_toolkit==3.0.47
294 | proto-plus==1.24.0
295 | protobuf==4.25.3
296 | psutil==6.0.0
297 | psycopg2-binary==2.9.9
298 | pyarrow==15.0.2
299 | pyasn1==0.5.1
300 | pyasn1-modules==0.3.0
301 | PyAthena==3.8.3
302 | pycparser==2.22
303 | pydantic==2.8.2
304 | pydantic_core==2.20.1
305 | pydata-google-auth==1.8.2
306 | Pygments==2.18.0
307 | PyJWT==2.8.0
308 | PyNaCl==1.5.0
309 | pyodbc==5.1.0
310 | pyOpenSSL==24.1.0
311 | pyparsing==3.1.2
312 | python-daemon==3.0.1
313 | python-dateutil==2.9.0.post0
314 | python-dotenv==1.0.1
315 | python-http-client==3.3.7
316 | python-ldap==3.4.4
317 | python-nvd3==0.16.0
318 | python-slugify==8.0.4
319 | pytz==2024.1
320 | PyYAML==6.0.1
321 | querystring-parser==1.2.4
322 | redis==5.0.7
323 | redshift-connector==2.1.2
324 | referencing==0.35.1
325 | requests==2.31.0
326 | requests-oauthlib==1.3.1
327 | requests-toolbelt==1.0.0
328 | rfc3339-validator==0.1.4
329 | rich==13.7.1
330 | rich-argparse==1.5.2
331 | rpds-py==0.19.0
332 | rsa==4.9
333 | s3transfer==0.10.2
334 | scikit-learn==1.5.1
335 | scipy==1.14.1
336 | scramp==1.4.5
337 | sendgrid==6.11.0
338 | setproctitle==1.3.3
339 | shapely==2.0.4
340 | six==1.16.0
341 | slack_sdk==3.31.0
342 | smmap==5.0.1
343 | sniffio==1.3.1
344 | snowflake-connector-python==3.11.0
345 | snowflake-sqlalchemy==1.6.1
346 | sortedcontainers==2.4.0
347 | soupsieve==2.5
348 | SQLAlchemy==1.4.52
349 | sqlalchemy-bigquery==1.11.0
350 | SQLAlchemy-JSONField==1.0.2
351 | sqlalchemy-redshift==0.8.14
352 | sqlalchemy-spanner==1.7.0
353 | SQLAlchemy-Utils==0.41.2
354 | sqlparse==0.5.0
355 | sshtunnel==0.4.0
356 | starkbank-ecdsa==2.2.0
357 | statsd==4.0.1
358 | std-uritemplate==1.0.3
359 | tabulate==0.9.0
360 | tenacity==8.5.0
361 | termcolor==2.4.0
362 | text-unidecode==1.3
363 | threadpoolctl==3.5.0
364 | time-machine==2.14.2
365 | tomlkit==0.13.0
366 | tornado==6.4.1
367 | typing_extensions==4.12.2
368 | tzdata==2024.1
369 | uc-micro-py==1.0.3
370 | unicodecsv==0.14.1
371 | universal_pathlib==0.2.2
372 | uritemplate==4.1.1
373 | urllib3==2.0.7
374 | uv==0.2.31
375 | vine==5.1.0
376 | virtualenv==20.26.3
377 | watchtower==3.2.0
378 | wcwidth==0.2.13
379 | websocket-client==1.8.0
380 | Werkzeug==2.2.3
381 | wirerope==0.4.7
382 | wrapt==1.16.0
383 | WTForms==3.1.2
384 | yarl==1.9.4
385 | zipp==3.19.2
386 | zope.event==5.0
387 | zope.interface==6.4.post2
388 | 


--------------------------------------------------------------------------------
/airflow_mlflow/jupyter-data/MLFlow_test_nested.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 7,
  6 |    "id": "362f7a85-5027-4f89-b312-356bc7186b52",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import os\n",
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "\n",
 14 |     "import mlflow\n",
 15 |     "from mlflow.models import infer_signature\n",
 16 |     "\n",
 17 |     "from datetime import datetime, timedelta\n",
 18 |     "from sklearn.linear_model import LinearRegression\n",
 19 |     "from sklearn.ensemble import RandomForestRegressor, HistGradientBoostingRegressor\n",
 20 |     "from sklearn.model_selection import train_test_split\n",
 21 |     "from sklearn.metrics import mean_squared_error, median_absolute_error, r2_score\n",
 22 |     "from sklearn.preprocessing import StandardScaler\n",
 23 |     "from sklearn import datasets"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 2,
 29 |    "id": "4a28d796-5f75-4378-a7e2-b5291bf6e784",
 30 |    "metadata": {},
 31 |    "outputs": [
 32 |     {
 33 |      "data": {
 34 |       "text/plain": [
 35 |        "'http://mlflow-service:5000'"
 36 |       ]
 37 |      },
 38 |      "execution_count": 2,
 39 |      "metadata": {},
 40 |      "output_type": "execute_result"
 41 |     }
 42 |    ],
 43 |    "source": [
 44 |     "os.getenv(\"MLFLOW_TRACKING_URI\")"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 3,
 50 |    "id": "5223682b-1897-4862-954e-65e8316f0703",
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "# Получим датасет California housing\n",
 55 |     "housing = datasets.fetch_california_housing(as_frame=True)\n",
 56 |     "# Объединим фичи и таргет в один np.array\n",
 57 |     "data = pd.concat([housing[\"data\"], pd.DataFrame(housing[\"target\"])], axis=1)"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 4,
 63 |    "id": "d3e5875c-1fec-4176-8593-bac16f85f080",
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "FEATURES = [\n",
 68 |     "    \"MedInc\", \"HouseAge\", \"AveRooms\", \"AveBedrms\", \"Population\", \"AveOccup\",\n",
 69 |     "    \"Latitude\", \"Longitude\"\n",
 70 |     "]\n",
 71 |     "TARGET = \"MedHouseVal\""
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 5,
 77 |    "id": "4e603b7e-a7b3-4579-8e43-44b0c18947c5",
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "# Сделать препроцессинг\n",
 82 |     "# Разделить на фичи и таргет\n",
 83 |     "X, y = data[FEATURES], data[TARGET]\n",
 84 |     "\n",
 85 |     "# Разделить данные на обучение и тест\n",
 86 |     "X_train, X_test, y_train, y_test = train_test_split(\n",
 87 |     "    X, y, test_size=0.2, random_state=42\n",
 88 |     ")\n",
 89 |     "\n",
 90 |     "# Обучить стандартизатор на train\n",
 91 |     "scaler = StandardScaler()\n",
 92 |     "X_train_fitted = scaler.fit_transform(X_train)\n",
 93 |     "X_test_fitted = scaler.transform(X_test)\n",
 94 |     "\n",
 95 |     "# Обучить стандартизатор на train\n",
 96 |     "scaler = StandardScaler()\n",
 97 |     "X_train_fitted = scaler.fit_transform(X_train)\n",
 98 |     "X_test_fitted = scaler.transform(X_test)"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 6,
104 |    "id": "23756f17-ea6e-4b8c-9f59-1375bc04462b",
105 |    "metadata": {},
106 |    "outputs": [
107 |     {
108 |      "name": "stderr",
109 |      "output_type": "stream",
110 |      "text": [
111 |       "2025/04/16 12:13:55 INFO mlflow.tracking.fluent: Experiment with name '134601518219341040' does not exist. Creating a new experiment.\n"
112 |      ]
113 |     },
114 |     {
115 |      "data": {
116 |       "text/plain": [
117 |        "<Experiment: artifact_location='s3://mlops-webinar-example/mlflow/499020991183303194', creation_time=1744805635163, experiment_id='499020991183303194', last_update_time=1744805635163, lifecycle_stage='active', name='134601518219341040', tags={}>"
118 |       ]
119 |      },
120 |      "execution_count": 6,
121 |      "metadata": {},
122 |      "output_type": "execute_result"
123 |     }
124 |    ],
125 |    "source": [
126 |     "name = \"MedHouseExp_2\"\n",
127 |     "experiment_id = mlflow.create_experiment(name)\n",
128 |     "mlflow.set_experiment(experiment_id)"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 8,
134 |    "id": "69b86ab6-640a-4ee2-a5e9-9183c89be9c4",
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "models = dict(zip([\"RandomForest\", \"LinearRegression\", \"HistGB\"], \n",
139 |     "                  [RandomForestRegressor(), LinearRegression(), HistGradientBoostingRegressor()]))"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 9,
145 |    "id": "35169ee2-0848-4c28-a516-4c2d2ae16f9e",
146 |    "metadata": {},
147 |    "outputs": [],
148 |    "source": [
149 |     "def train_model(model, name, X_train, X_test, y_train, y_test):\n",
150 |     "\n",
151 |     "    # Обучить модель\n",
152 |     "    model.fit(X_train, y_train)\n",
153 |     "\n",
154 |     "    # Сделать predict\n",
155 |     "    prediction = model.predict(X_test)\n",
156 |     "\n",
157 |     "    # Получить описание данных\n",
158 |     "    signature = infer_signature(X_test, prediction)\n",
159 |     "    # Сохранить модель в артифактори\n",
160 |     "    model_info = mlflow.sklearn.log_model(model, name, signature=signature)\n",
161 |     "    # Сохранить метрики модели\n",
162 |     "    mlflow.evaluate(\n",
163 |     "        model_info.model_uri,\n",
164 |     "        data=X_test,\n",
165 |     "        targets=y_test.values,\n",
166 |     "        model_type=\"regressor\",\n",
167 |     "        evaluators=[\"default\"],\n",
168 |     "    )"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 13,
174 |    "id": "d91503ba-162a-430d-a3d5-5b310aac6577",
175 |    "metadata": {},
176 |    "outputs": [
177 |     {
178 |      "name": "stderr",
179 |      "output_type": "stream",
180 |      "text": [
181 |       "2025/04/16 12:27:13 WARNING mlflow.utils.environment: Encountered an unexpected error while inferring pip requirements (model URI: /tmp/tmprj4kebrt/model/model.pkl, flavor: sklearn). Fall back to return ['scikit-learn==1.3.0', 'cloudpickle==2.2.1']. Set logging level to DEBUG to see the full traceback. \n",
182 |       "/opt/conda/lib/python3.11/site-packages/_distutils_hack/__init__.py:18: UserWarning: Distutils was imported before Setuptools, but importing Setuptools also replaces the `distutils` module in `sys.modules`. This may lead to undesirable behaviors or errors. To avoid these issues, avoid using distutils directly, ensure that setuptools is installed in the traditional way (e.g. not an editable install), and/or make sure that setuptools is always imported before distutils.\n",
183 |       "  warnings.warn(\n",
184 |       "/opt/conda/lib/python3.11/site-packages/_distutils_hack/__init__.py:33: UserWarning: Setuptools is replacing distutils.\n",
185 |       "  warnings.warn(\"Setuptools is replacing distutils.\")\n"
186 |      ]
187 |     },
188 |     {
189 |      "data": {
190 |       "application/vnd.jupyter.widget-view+json": {
191 |        "model_id": "a0fe3df63d0d4113917880f3286b46e3",
192 |        "version_major": 2,
193 |        "version_minor": 0
194 |       },
195 |       "text/plain": [
196 |        "Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]"
197 |       ]
198 |      },
199 |      "metadata": {},
200 |      "output_type": "display_data"
201 |     },
202 |     {
203 |      "name": "stderr",
204 |      "output_type": "stream",
205 |      "text": [
206 |       "2025/04/16 12:29:48 INFO mlflow.tracking._tracking_service.client: 🏃 View run RandomForest at: http://mlflow-service:5000/#/experiments/134601518219341040/runs/cd844fc8f0ad4abc9d286b59b8db94a1.\n",
207 |       "2025/04/16 12:29:48 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow-service:5000/#/experiments/134601518219341040.\n",
208 |       "2025/04/16 12:29:48 INFO mlflow.tracking._tracking_service.client: 🏃 View run Parent_Run at: http://mlflow-service:5000/#/experiments/134601518219341040/runs/4c0662b5f2a547aeb09008769f162f0f.\n",
209 |       "2025/04/16 12:29:48 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow-service:5000/#/experiments/134601518219341040.\n"
210 |      ]
211 |     },
212 |     {
213 |      "ename": "OSError",
214 |      "evalue": "[Errno 28] No space left on device: '/tmp/tmpg2ygki9o'",
215 |      "output_type": "error",
216 |      "traceback": [
217 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
218 |       "\u001b[0;31mOSError\u001b[0m                                   Traceback (most recent call last)",
219 |       "Cell \u001b[0;32mIn[13], line 4\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m model \u001b[38;5;129;01min\u001b[39;00m models\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m      3\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m mlflow\u001b[38;5;241m.\u001b[39mstart_run(run_name\u001b[38;5;241m=\u001b[39mmodel, experiment_id\u001b[38;5;241m=\u001b[39mexperiment_id, nested\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m child_run:\n\u001b[0;32m----> 4\u001b[0m         \u001b[43mtrain_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodels\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX_train_fitted\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX_test_fitted\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_test\u001b[49m\u001b[43m)\u001b[49m\n",
220 |       "Cell \u001b[0;32mIn[9], line 14\u001b[0m, in \u001b[0;36mtrain_model\u001b[0;34m(model, name, X_train, X_test, y_train, y_test)\u001b[0m\n\u001b[1;32m     12\u001b[0m model_info \u001b[38;5;241m=\u001b[39m mlflow\u001b[38;5;241m.\u001b[39msklearn\u001b[38;5;241m.\u001b[39mlog_model(model, name, signature\u001b[38;5;241m=\u001b[39msignature)\n\u001b[1;32m     13\u001b[0m \u001b[38;5;66;03m# Сохранить метрики модели\u001b[39;00m\n\u001b[0;32m---> 14\u001b[0m \u001b[43mmlflow\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mevaluate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     15\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodel_info\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_uri\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     16\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mX_test\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     17\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtargets\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43my_test\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     18\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodel_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mregressor\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     19\u001b[0m \u001b[43m    \u001b[49m\u001b[43mevaluators\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdefault\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     20\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
221 |       "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/mlflow/models/evaluation/base.py:1608\u001b[0m, in \u001b[0;36mevaluate\u001b[0;34m(model, data, model_type, targets, predictions, dataset_path, feature_names, evaluators, evaluator_config, custom_metrics, extra_metrics, custom_artifacts, validation_thresholds, baseline_model, env_manager, model_config, baseline_config, inference_params)\u001b[0m\n\u001b[1;32m   1605\u001b[0m predictions_expected_in_model_output \u001b[38;5;241m=\u001b[39m predictions \u001b[38;5;28;01mif\u001b[39;00m model \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   1607\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1608\u001b[0m     evaluate_result \u001b[38;5;241m=\u001b[39m \u001b[43m_evaluate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1609\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1610\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmodel_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1611\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdataset\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1612\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrun_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1613\u001b[0m \u001b[43m        \u001b[49m\u001b[43mevaluator_name_list\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mevaluator_name_list\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1614\u001b[0m \u001b[43m        \u001b[49m\u001b[43mevaluator_name_to_conf_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mevaluator_name_to_conf_map\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1615\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcustom_metrics\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcustom_metrics\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1616\u001b[0m \u001b[43m        \u001b[49m\u001b[43mextra_metrics\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_metrics\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1617\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcustom_artifacts\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcustom_artifacts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1618\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbaseline_model\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbaseline_model\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1619\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpredictions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpredictions_expected_in_model_output\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1620\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1621\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m   1622\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(model, _ServedPyFuncModel):\n",
222 |       "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/mlflow/models/evaluation/base.py:786\u001b[0m, in \u001b[0;36m_evaluate\u001b[0;34m(model, model_type, dataset, run_id, evaluator_name_list, evaluator_name_to_conf_map, custom_metrics, extra_metrics, custom_artifacts, baseline_model, predictions)\u001b[0m\n\u001b[1;32m    784\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m evaluator\u001b[38;5;241m.\u001b[39mcan_evaluate(model_type\u001b[38;5;241m=\u001b[39mmodel_type, evaluator_config\u001b[38;5;241m=\u001b[39mconfig):\n\u001b[1;32m    785\u001b[0m         _logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEvaluating the model with the \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mevaluator_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m evaluator.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 786\u001b[0m         eval_result \u001b[38;5;241m=\u001b[39m \u001b[43mevaluator\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mevaluate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    787\u001b[0m \u001b[43m            \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    788\u001b[0m \u001b[43m            \u001b[49m\u001b[43mmodel_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    789\u001b[0m \u001b[43m            \u001b[49m\u001b[43mdataset\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    790\u001b[0m \u001b[43m            \u001b[49m\u001b[43mrun_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    791\u001b[0m \u001b[43m            \u001b[49m\u001b[43mevaluator_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    792\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcustom_metrics\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcustom_metrics\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    793\u001b[0m \u001b[43m            \u001b[49m\u001b[43mextra_metrics\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_metrics\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    794\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcustom_artifacts\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcustom_artifacts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    795\u001b[0m \u001b[43m            \u001b[49m\u001b[43mbaseline_model\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbaseline_model\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    796\u001b[0m \u001b[43m            \u001b[49m\u001b[43mpredictions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpredictions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    797\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    798\u001b[0m         eval_results\u001b[38;5;241m.\u001b[39mappend(eval_result)\n\u001b[1;32m    800\u001b[0m _last_failed_evaluator \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
223 |       "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/mlflow/models/evaluation/default_evaluator.py:2110\u001b[0m, in \u001b[0;36mDefaultEvaluator.evaluate\u001b[0;34m(self, model_type, dataset, run_id, evaluator_config, model, custom_metrics, extra_metrics, custom_artifacts, baseline_model, predictions, **kwargs)\u001b[0m\n\u001b[1;32m   2108\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m baseline_model:\n\u001b[1;32m   2109\u001b[0m         _logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEvaluating candidate model:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 2110\u001b[0m     evaluation_result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_evaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mis_baseline_model\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m   2112\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m baseline_model:\n\u001b[1;32m   2113\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m evaluation_result\n",
224 |       "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/mlflow/models/evaluation/default_evaluator.py:1936\u001b[0m, in \u001b[0;36mDefaultEvaluator._evaluate\u001b[0;34m(self, model, is_baseline_model, **kwargs)\u001b[0m\n\u001b[1;32m   1928\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_evaluate\u001b[39m(\n\u001b[1;32m   1929\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m   1930\u001b[0m     model: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmlflow.pyfunc.PyFuncModel\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m   1931\u001b[0m     is_baseline_model\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m   1932\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m   1933\u001b[0m ):\n\u001b[1;32m   1934\u001b[0m     \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\n\u001b[0;32m-> 1936\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43;01mwith\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mTempDir\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mas\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mtemp_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmatplotlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrc_context\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_matplotlib_config\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m   1937\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtemp_dir\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mtemp_dir\u001b[49m\n\u001b[1;32m   1938\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\n",
225 |       "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/mlflow/utils/file_utils.py:410\u001b[0m, in \u001b[0;36mTempDir.__enter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    409\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__enter__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 410\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mabspath(\u001b[43mcreate_tmp_dir\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m    411\u001b[0m     \u001b[38;5;28;01massert\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexists(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_path)\n\u001b[1;32m    412\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_chdr:\n",
226 |       "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/mlflow/utils/file_utils.py:876\u001b[0m, in \u001b[0;36mcreate_tmp_dir\u001b[0;34m()\u001b[0m\n\u001b[1;32m    873\u001b[0m     os\u001b[38;5;241m.\u001b[39mmakedirs(directory, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m    874\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m tempfile\u001b[38;5;241m.\u001b[39mmkdtemp(\u001b[38;5;28mdir\u001b[39m\u001b[38;5;241m=\u001b[39mdirectory)\n\u001b[0;32m--> 876\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtempfile\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmkdtemp\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
227 |       "File \u001b[0;32m/opt/conda/lib/python3.11/tempfile.py:368\u001b[0m, in \u001b[0;36mmkdtemp\u001b[0;34m(suffix, prefix, dir)\u001b[0m\n\u001b[1;32m    366\u001b[0m _sys\u001b[38;5;241m.\u001b[39maudit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtempfile.mkdtemp\u001b[39m\u001b[38;5;124m\"\u001b[39m, file)\n\u001b[1;32m    367\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 368\u001b[0m     _os\u001b[38;5;241m.\u001b[39mmkdir(file, \u001b[38;5;241m0o700\u001b[39m)\n\u001b[1;32m    369\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mFileExistsError\u001b[39;00m:\n\u001b[1;32m    370\u001b[0m     \u001b[38;5;28;01mcontinue\u001b[39;00m    \u001b[38;5;66;03m# try again\u001b[39;00m\n",
228 |       "\u001b[0;31mOSError\u001b[0m: [Errno 28] No space left on device: '/tmp/tmpg2ygki9o'"
229 |      ]
230 |     }
231 |    ],
232 |    "source": [
233 |     "with mlflow.start_run(run_name=\"Parent_Run\", experiment_id=experiment_id, description=\"Example description\") as parent_run:\n",
234 |     "    for model in models.keys():\n",
235 |     "        with mlflow.start_run(run_name=model, experiment_id=experiment_id, nested=True) as child_run:\n",
236 |     "            train_model(models[model], model, X_train_fitted, X_test_fitted, y_train, y_test)"
237 |    ]
238 |   }
239 |  ],
240 |  "metadata": {
241 |   "kernelspec": {
242 |    "display_name": "Python 3 (ipykernel)",
243 |    "language": "python",
244 |    "name": "python3"
245 |   },
246 |   "language_info": {
247 |    "codemirror_mode": {
248 |     "name": "ipython",
249 |     "version": 3
250 |    },
251 |    "file_extension": ".py",
252 |    "mimetype": "text/x-python",
253 |    "name": "python",
254 |    "nbconvert_exporter": "python",
255 |    "pygments_lexer": "ipython3",
256 |    "version": "3.11.4"
257 |   }
258 |  },
259 |  "nbformat": 4,
260 |  "nbformat_minor": 5
261 | }
262 | 


--------------------------------------------------------------------------------