├── .gitignore
├── resources
    └── demo.png
├── Dockerfile
├── docker-compose.yml
├── README.md
├── requirements.txt
└── app.py


/.gitignore:
--------------------------------------------------------------------------------
1 | venv
2 | .vscode


--------------------------------------------------------------------------------
/resources/demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mwinterde/mlflow-with-streamlit/HEAD/resources/demo.png


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8-slim
2 | COPY requirements.txt requirements.txt
3 | RUN pip install --upgrade pip && pip install -r requirements.txt && rm -rf requirements.txt
4 | COPY app.py app.py
5 | CMD mlflow experiments create --experiment-name iris \
6 |     && mlflow experiments create --experiment-name wine \
7 |     && mlflow experiments create --experiment-name diabetes


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3.3"
 2 | services:
 3 |   streamlit:
 4 |     build: .
 5 |     command: "streamlit run app.py"
 6 |     ports:
 7 |       - "8501:8501"
 8 |     volumes:
 9 |       - mlruns:/mlruns  # app writes into mlruns
10 |   mlflow:
11 |     build: .
12 |     command: "mlflow server --host 0.0.0.0"
13 |     ports:
14 |       - "5000:5000"
15 |     volumes:
16 |       - mlruns:/mlruns  # mlflow reads from mlruns
17 | 
18 | volumes:
19 |   mlruns:


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MLFlow Streamlit Interaction
 2 | 
 3 | ## Motivation
 4 | 
 5 | In this repo I bring together my favorite two Python libraries right now: 
 6 | [streamlit](https://github.com/streamlit) and [mlflow](https://github.com/mlflow). 
 7 | It is a simple demonstration of how you can use streamlit to experiment 
 8 | with various machine learning models and mlflow to keep track of your
 9 | experiments. 
10 | 
11 | Watch [this YouTube video](https://youtu.be/2wEbOmsV028) for a simple
12 | demonstration:
13 | 
14 | [![](resources/demo.png)](https://youtu.be/2wEbOmsV028)
15 | 
16 | ## Try yourself
17 | 
18 | Step 1: Clone this repository to your local machine
19 | ```
20 | git clone git@github.com:mwinterde/mlflow-with-streamlit.git
21 | ```
22 | 
23 | Step 2: Build docker containers for the streamlit and mlflow services.
24 | ```
25 | docker-compose build
26 | ```
27 | 
28 | Step 3: Run the services.
29 | ```
30 | docker-compose up
31 | ```
32 | 
33 | Step 4: Explore.
34 | * Open http://localhost:8501 to interact with the streamlit app.
35 | * Open http://localhost:5000 to access the mlflow user interface.
36 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | alembic==1.4.1
  2 | altair==4.1.0
  3 | appnope==0.1.0
  4 | argon2-cffi==20.1.0
  5 | astor==0.8.1
  6 | async-generator==1.10
  7 | attrs==20.2.0
  8 | azure-core==1.8.2
  9 | azure-storage-blob==12.5.0
 10 | backcall==0.2.0
 11 | base58==2.0.1
 12 | bleach==3.2.1
 13 | blinker==1.4
 14 | boto3==1.15.12
 15 | botocore==1.18.12
 16 | cachetools==4.1.1
 17 | certifi==2020.6.20
 18 | cffi==1.14.3
 19 | chardet==3.0.4
 20 | click==7.1.2
 21 | cloudpickle==1.6.0
 22 | cryptography==3.1.1
 23 | databricks-cli==0.12.0
 24 | decorator==4.4.2
 25 | defusedxml==0.6.0
 26 | docker==4.3.1
 27 | entrypoints==0.3
 28 | enum-compat==0.0.3
 29 | Flask==1.1.2
 30 | gitdb==4.0.5
 31 | GitPython==3.1.9
 32 | gorilla==0.3.0
 33 | gunicorn==20.0.4
 34 | idna==2.10
 35 | ipykernel==5.3.4
 36 | ipython==7.18.1
 37 | ipython-genutils==0.2.0
 38 | ipywidgets==7.5.1
 39 | isodate==0.6.0
 40 | itsdangerous==1.1.0
 41 | jedi==0.17.2
 42 | Jinja2==2.11.2
 43 | jmespath==0.10.0
 44 | joblib==0.17.0
 45 | jsonschema==3.2.0
 46 | jupyter-client==6.1.7
 47 | jupyter-core==4.6.3
 48 | jupyterlab-pygments==0.1.2
 49 | Mako==1.1.3
 50 | MarkupSafe==1.1.1
 51 | mistune==0.8.4
 52 | mlflow==1.11.0
 53 | msrest==0.6.19
 54 | nbclient==0.5.0
 55 | nbconvert==6.0.7
 56 | nbformat==5.0.7
 57 | nest-asyncio==1.4.1
 58 | notebook==6.1.4
 59 | numpy==1.19.2
 60 | oauthlib==3.1.0
 61 | packaging==20.4
 62 | pandas==1.1.2
 63 | pandocfilters==1.4.2
 64 | parso==0.7.1
 65 | pathtools==0.1.2
 66 | pexpect==4.8.0
 67 | pickleshare==0.7.5
 68 | Pillow==7.2.0
 69 | prometheus-client==0.8.0
 70 | prometheus-flask-exporter==0.18.0
 71 | prompt-toolkit==3.0.7
 72 | protobuf==3.13.0
 73 | ptyprocess==0.6.0
 74 | pyarrow==1.0.1
 75 | pycparser==2.20
 76 | pydeck==0.4.1
 77 | Pygments==2.7.1
 78 | pyparsing==2.4.7
 79 | pyrsistent==0.17.3
 80 | python-dateutil==2.8.1
 81 | python-editor==1.0.4
 82 | pytz==2020.1
 83 | PyYAML==5.3.1
 84 | pyzmq==19.0.2
 85 | querystring-parser==1.2.4
 86 | requests==2.24.0
 87 | requests-oauthlib==1.3.0
 88 | s3transfer==0.3.3
 89 | scikit-learn==0.23.2
 90 | scipy==1.5.2
 91 | Send2Trash==1.5.0
 92 | six==1.15.0
 93 | sklearn==0.0
 94 | smmap==3.0.4
 95 | SQLAlchemy==1.3.13
 96 | sqlparse==0.3.1
 97 | streamlit==0.81.0
 98 | tabulate==0.8.7
 99 | tenacity==6.2.0
100 | terminado==0.9.1
101 | testpath==0.4.4
102 | threadpoolctl==2.1.0
103 | toml==0.10.1
104 | toolz==0.11.1
105 | tornado==6.0.4
106 | traitlets==5.0.4
107 | tzlocal==2.1
108 | urllib3==1.25.10
109 | validators==0.18.1
110 | watchdog==0.10.3
111 | wcwidth==0.2.5
112 | webencodings==0.5.1
113 | websocket-client==0.57.0
114 | Werkzeug==1.0.1
115 | widgetsnbextension==3.5.1
116 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import mlflow
  3 | from sklearn.datasets import load_iris, load_wine, load_diabetes
  4 | from sklearn.model_selection import train_test_split
  5 | from sklearn.neighbors import KNeighborsClassifier
  6 | from sklearn.svm import SVC
  7 | from sklearn.linear_model import LinearRegression
  8 | from sklearn.ensemble import RandomForestRegressor
  9 | from sklearn.metrics import f1_score, r2_score
 10 | 
 11 | def selectbox_without_default(label, options):
 12 |     options = [''] + options
 13 |     format_func = lambda x: 'Select one option' if x == '' else x
 14 |     return st.selectbox(label, options, format_func=format_func)
 15 | 
 16 | @st.cache
 17 | def load_data(key):
 18 |     data = DATA[key](as_frame=True)
 19 |     df = data['data']
 20 |     df['target'] = data['target']
 21 |     return df
 22 | 
 23 | DATA = {
 24 |     "iris": load_iris,
 25 |     "wine": load_wine,
 26 |     "diabetes": load_diabetes
 27 | }
 28 | 
 29 | PROBLEMS = {
 30 |     "iris": "classification",
 31 |     "wine": "classification",
 32 |     "diabetes": "regression"
 33 | }
 34 | 
 35 | MODELS = {
 36 |     "classification": {
 37 |         "KNN": KNeighborsClassifier,
 38 |         "SVM": SVC
 39 |     },
 40 |     "regression": {
 41 |         "LR": LinearRegression,
 42 |         "RFR": RandomForestRegressor
 43 |     }
 44 | }
 45 | 
 46 | 
 47 | def main():
 48 |     # Title
 49 |     st.title("Model Experimentation with MLflow")
 50 | 
 51 |     # Choose dataset
 52 |     data_options = list(DATA.keys())
 53 |     data_choice = selectbox_without_default("Choose a dataset", data_options)
 54 |     if not data_choice:
 55 |         st.stop()
 56 |     df = load_data(data_choice)
 57 |     st.write(df)
 58 | 
 59 |     # Model selection
 60 |     problem_type = PROBLEMS[data_choice]
 61 |     model_options = list(MODELS[problem_type].keys())
 62 |     model_choice = selectbox_without_default("Choose a model", model_options)
 63 |     if not model_choice:
 64 |         st.stop()
 65 | 
 66 |     # Feature selection
 67 |     feature_options = df.columns.drop('target').tolist()
 68 |     feature_choice = st.multiselect("Choose some features", feature_options)
 69 | 
 70 |     # Mlflow tracking
 71 |     track_with_mlflow = st.checkbox("Track with mlflow?")
 72 | 
 73 |     # Model training
 74 |     start_training = st.button("Start training")
 75 |     if not start_training:
 76 |         st.stop()
 77 | 
 78 |     if track_with_mlflow:
 79 |         mlflow.set_experiment(data_choice)
 80 |         mlflow.start_run()
 81 |         mlflow.log_param('model', model_choice)
 82 |         mlflow.log_param('features', feature_choice)
 83 | 
 84 |     X = df[feature_choice].copy()
 85 |     y = df['target'].copy()
 86 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
 87 |     model = MODELS[problem_type][model_choice]()
 88 |     model.fit(X_train, y_train)
 89 | 
 90 |     # Model evaluation
 91 |     preds_train = model.predict(X_train)
 92 |     preds_test = model.predict(X_test)
 93 |     if problem_type=="classification":
 94 |         metric_name = "f1_score"
 95 |         metric_train = f1_score(y_train, preds_train, average='micro')
 96 |         metric_test = f1_score(y_test, preds_test, average='micro')
 97 |     else:
 98 |         metric_name = "r2_score"
 99 |         metric_train = r2_score(y_train, preds_train)
100 |         metric_test = r2_score(y_test, preds_test)
101 |     st.write(metric_name+"_train", round(metric_train, 3))
102 |     st.write(metric_name+"_test", round(metric_test, 3))
103 | 
104 |     if track_with_mlflow:
105 |         mlflow.log_metric(metric_name+"_train", metric_train)
106 |         mlflow.log_metric(metric_name+"_test", metric_test)
107 |         mlflow.end_run()
108 | 
109 | 
110 | if __name__ == '__main__':
111 |     main()


--------------------------------------------------------------------------------