├── .gitignore ├── resources └── demo.png ├── Dockerfile ├── docker-compose.yml ├── README.md ├── requirements.txt └── app.py /.gitignore: -------------------------------------------------------------------------------- 1 | venv 2 | .vscode -------------------------------------------------------------------------------- /resources/demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mwinterde/mlflow-with-streamlit/HEAD/resources/demo.png -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim 2 | COPY requirements.txt requirements.txt 3 | RUN pip install --upgrade pip && pip install -r requirements.txt && rm -rf requirements.txt 4 | COPY app.py app.py 5 | CMD mlflow experiments create --experiment-name iris \ 6 | && mlflow experiments create --experiment-name wine \ 7 | && mlflow experiments create --experiment-name diabetes -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.3" 2 | services: 3 | streamlit: 4 | build: . 5 | command: "streamlit run app.py" 6 | ports: 7 | - "8501:8501" 8 | volumes: 9 | - mlruns:/mlruns # app writes into mlruns 10 | mlflow: 11 | build: . 12 | command: "mlflow server --host 0.0.0.0" 13 | ports: 14 | - "5000:5000" 15 | volumes: 16 | - mlruns:/mlruns # mlflow reads from mlruns 17 | 18 | volumes: 19 | mlruns: -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MLFlow Streamlit Interaction 2 | 3 | ## Motivation 4 | 5 | In this repo I bring together my favorite two Python libraries right now: 6 | [streamlit](https://github.com/streamlit) and [mlflow](https://github.com/mlflow). 7 | It is a simple demonstration of how you can use streamlit to experiment 8 | with various machine learning models and mlflow to keep track of your 9 | experiments. 10 | 11 | Watch [this YouTube video](https://youtu.be/2wEbOmsV028) for a simple 12 | demonstration: 13 | 14 | [![](resources/demo.png)](https://youtu.be/2wEbOmsV028) 15 | 16 | ## Try yourself 17 | 18 | Step 1: Clone this repository to your local machine 19 | ``` 20 | git clone git@github.com:mwinterde/mlflow-with-streamlit.git 21 | ``` 22 | 23 | Step 2: Build docker containers for the streamlit and mlflow services. 24 | ``` 25 | docker-compose build 26 | ``` 27 | 28 | Step 3: Run the services. 29 | ``` 30 | docker-compose up 31 | ``` 32 | 33 | Step 4: Explore. 34 | * Open http://localhost:8501 to interact with the streamlit app. 35 | * Open http://localhost:5000 to access the mlflow user interface. 36 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | alembic==1.4.1 2 | altair==4.1.0 3 | appnope==0.1.0 4 | argon2-cffi==20.1.0 5 | astor==0.8.1 6 | async-generator==1.10 7 | attrs==20.2.0 8 | azure-core==1.8.2 9 | azure-storage-blob==12.5.0 10 | backcall==0.2.0 11 | base58==2.0.1 12 | bleach==3.2.1 13 | blinker==1.4 14 | boto3==1.15.12 15 | botocore==1.18.12 16 | cachetools==4.1.1 17 | certifi==2020.6.20 18 | cffi==1.14.3 19 | chardet==3.0.4 20 | click==7.1.2 21 | cloudpickle==1.6.0 22 | cryptography==3.1.1 23 | databricks-cli==0.12.0 24 | decorator==4.4.2 25 | defusedxml==0.6.0 26 | docker==4.3.1 27 | entrypoints==0.3 28 | enum-compat==0.0.3 29 | Flask==1.1.2 30 | gitdb==4.0.5 31 | GitPython==3.1.9 32 | gorilla==0.3.0 33 | gunicorn==20.0.4 34 | idna==2.10 35 | ipykernel==5.3.4 36 | ipython==7.18.1 37 | ipython-genutils==0.2.0 38 | ipywidgets==7.5.1 39 | isodate==0.6.0 40 | itsdangerous==1.1.0 41 | jedi==0.17.2 42 | Jinja2==2.11.2 43 | jmespath==0.10.0 44 | joblib==0.17.0 45 | jsonschema==3.2.0 46 | jupyter-client==6.1.7 47 | jupyter-core==4.6.3 48 | jupyterlab-pygments==0.1.2 49 | Mako==1.1.3 50 | MarkupSafe==1.1.1 51 | mistune==0.8.4 52 | mlflow==1.11.0 53 | msrest==0.6.19 54 | nbclient==0.5.0 55 | nbconvert==6.0.7 56 | nbformat==5.0.7 57 | nest-asyncio==1.4.1 58 | notebook==6.1.4 59 | numpy==1.19.2 60 | oauthlib==3.1.0 61 | packaging==20.4 62 | pandas==1.1.2 63 | pandocfilters==1.4.2 64 | parso==0.7.1 65 | pathtools==0.1.2 66 | pexpect==4.8.0 67 | pickleshare==0.7.5 68 | Pillow==7.2.0 69 | prometheus-client==0.8.0 70 | prometheus-flask-exporter==0.18.0 71 | prompt-toolkit==3.0.7 72 | protobuf==3.13.0 73 | ptyprocess==0.6.0 74 | pyarrow==1.0.1 75 | pycparser==2.20 76 | pydeck==0.4.1 77 | Pygments==2.7.1 78 | pyparsing==2.4.7 79 | pyrsistent==0.17.3 80 | python-dateutil==2.8.1 81 | python-editor==1.0.4 82 | pytz==2020.1 83 | PyYAML==5.3.1 84 | pyzmq==19.0.2 85 | querystring-parser==1.2.4 86 | requests==2.24.0 87 | requests-oauthlib==1.3.0 88 | s3transfer==0.3.3 89 | scikit-learn==0.23.2 90 | scipy==1.5.2 91 | Send2Trash==1.5.0 92 | six==1.15.0 93 | sklearn==0.0 94 | smmap==3.0.4 95 | SQLAlchemy==1.3.13 96 | sqlparse==0.3.1 97 | streamlit==0.81.0 98 | tabulate==0.8.7 99 | tenacity==6.2.0 100 | terminado==0.9.1 101 | testpath==0.4.4 102 | threadpoolctl==2.1.0 103 | toml==0.10.1 104 | toolz==0.11.1 105 | tornado==6.0.4 106 | traitlets==5.0.4 107 | tzlocal==2.1 108 | urllib3==1.25.10 109 | validators==0.18.1 110 | watchdog==0.10.3 111 | wcwidth==0.2.5 112 | webencodings==0.5.1 113 | websocket-client==0.57.0 114 | Werkzeug==1.0.1 115 | widgetsnbextension==3.5.1 116 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import mlflow 3 | from sklearn.datasets import load_iris, load_wine, load_diabetes 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.neighbors import KNeighborsClassifier 6 | from sklearn.svm import SVC 7 | from sklearn.linear_model import LinearRegression 8 | from sklearn.ensemble import RandomForestRegressor 9 | from sklearn.metrics import f1_score, r2_score 10 | 11 | def selectbox_without_default(label, options): 12 | options = [''] + options 13 | format_func = lambda x: 'Select one option' if x == '' else x 14 | return st.selectbox(label, options, format_func=format_func) 15 | 16 | @st.cache 17 | def load_data(key): 18 | data = DATA[key](as_frame=True) 19 | df = data['data'] 20 | df['target'] = data['target'] 21 | return df 22 | 23 | DATA = { 24 | "iris": load_iris, 25 | "wine": load_wine, 26 | "diabetes": load_diabetes 27 | } 28 | 29 | PROBLEMS = { 30 | "iris": "classification", 31 | "wine": "classification", 32 | "diabetes": "regression" 33 | } 34 | 35 | MODELS = { 36 | "classification": { 37 | "KNN": KNeighborsClassifier, 38 | "SVM": SVC 39 | }, 40 | "regression": { 41 | "LR": LinearRegression, 42 | "RFR": RandomForestRegressor 43 | } 44 | } 45 | 46 | 47 | def main(): 48 | # Title 49 | st.title("Model Experimentation with MLflow") 50 | 51 | # Choose dataset 52 | data_options = list(DATA.keys()) 53 | data_choice = selectbox_without_default("Choose a dataset", data_options) 54 | if not data_choice: 55 | st.stop() 56 | df = load_data(data_choice) 57 | st.write(df) 58 | 59 | # Model selection 60 | problem_type = PROBLEMS[data_choice] 61 | model_options = list(MODELS[problem_type].keys()) 62 | model_choice = selectbox_without_default("Choose a model", model_options) 63 | if not model_choice: 64 | st.stop() 65 | 66 | # Feature selection 67 | feature_options = df.columns.drop('target').tolist() 68 | feature_choice = st.multiselect("Choose some features", feature_options) 69 | 70 | # Mlflow tracking 71 | track_with_mlflow = st.checkbox("Track with mlflow?") 72 | 73 | # Model training 74 | start_training = st.button("Start training") 75 | if not start_training: 76 | st.stop() 77 | 78 | if track_with_mlflow: 79 | mlflow.set_experiment(data_choice) 80 | mlflow.start_run() 81 | mlflow.log_param('model', model_choice) 82 | mlflow.log_param('features', feature_choice) 83 | 84 | X = df[feature_choice].copy() 85 | y = df['target'].copy() 86 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 87 | model = MODELS[problem_type][model_choice]() 88 | model.fit(X_train, y_train) 89 | 90 | # Model evaluation 91 | preds_train = model.predict(X_train) 92 | preds_test = model.predict(X_test) 93 | if problem_type=="classification": 94 | metric_name = "f1_score" 95 | metric_train = f1_score(y_train, preds_train, average='micro') 96 | metric_test = f1_score(y_test, preds_test, average='micro') 97 | else: 98 | metric_name = "r2_score" 99 | metric_train = r2_score(y_train, preds_train) 100 | metric_test = r2_score(y_test, preds_test) 101 | st.write(metric_name+"_train", round(metric_train, 3)) 102 | st.write(metric_name+"_test", round(metric_test, 3)) 103 | 104 | if track_with_mlflow: 105 | mlflow.log_metric(metric_name+"_train", metric_train) 106 | mlflow.log_metric(metric_name+"_test", metric_test) 107 | mlflow.end_run() 108 | 109 | 110 | if __name__ == '__main__': 111 | main() --------------------------------------------------------------------------------