├── Procfile ├── README.md ├── model-building └── penguins-model-building.py ├── penguins-app.py ├── penguins_clf.pkl ├── penguins_example.csv ├── requirements.txt ├── runtime.txt └── setup.sh /Procfile: -------------------------------------------------------------------------------- 1 | web: sh setup.sh && streamlit run penguins-app.py 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Penguins web app deployed on Heroku 2 | 3 | Check out the YouTube video showing the development of this web app at https://youtu.be/zK4Ch6e1zq8 4 | 5 | The deployed web app is live at https://dp-penguins.herokuapp.com/ 6 | 7 | This web app predicts the species of penguins as a function of their input parameters (bill length, bill width, flipper length, body mass, sex and island). 8 | 9 | The web app was built in Python using the following libraries: 10 | * streamlit 11 | * pandas 12 | * numpy 13 | * scikit-learn 14 | * pickle 15 | -------------------------------------------------------------------------------- /model-building/penguins-model-building.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | penguins = pd.read_csv('https://github.com/dataprofessor/data/blob/master/penguins_cleaned.csv') 3 | 4 | # Ordinal feature encoding 5 | # https://www.kaggle.com/pratik1120/penguin-dataset-eda-classification-and-clustering 6 | df = penguins.copy() 7 | target = 'species' 8 | encode = ['sex','island'] 9 | 10 | for col in encode: 11 | dummy = pd.get_dummies(df[col], prefix=col) 12 | df = pd.concat([df,dummy], axis=1) 13 | del df[col] 14 | 15 | target_mapper = {'Adelie':0, 'Chinstrap':1, 'Gentoo':2} 16 | def target_encode(val): 17 | return target_mapper[val] 18 | 19 | df['species'] = df['species'].apply(target_encode) 20 | 21 | # Separating X and y 22 | X = df.drop('species', axis=1) 23 | Y = df['species'] 24 | 25 | # Build random forest model 26 | from sklearn.ensemble import RandomForestClassifier 27 | clf = RandomForestClassifier() 28 | clf.fit(X, Y) 29 | 30 | # Saving the model 31 | import pickle 32 | pickle.dump(clf, open('penguins_clf.pkl', 'wb')) 33 | -------------------------------------------------------------------------------- /penguins-app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import pandas as pd 3 | import numpy as np 4 | import pickle 5 | from sklearn.ensemble import RandomForestClassifier 6 | 7 | st.write(""" 8 | # Penguin Prediction App 9 | 10 | This app predicts the **Palmer Penguin** species! 11 | 12 | Data obtained from the [palmerpenguins library](https://github.com/allisonhorst/palmerpenguins) in R by Allison Horst. 13 | """) 14 | 15 | st.sidebar.header('User Input Features') 16 | 17 | st.sidebar.markdown(""" 18 | [Example CSV input file](https://raw.githubusercontent.com/dataprofessor/data/master/penguins_example.csv) 19 | """) 20 | 21 | # Collects user input features into dataframe 22 | uploaded_file = st.sidebar.file_uploader("Upload your input CSV file", type=["csv"]) 23 | if uploaded_file is not None: 24 | input_df = pd.read_csv(uploaded_file) 25 | else: 26 | def user_input_features(): 27 | island = st.sidebar.selectbox('Island',('Biscoe','Dream','Torgersen')) 28 | sex = st.sidebar.selectbox('Sex',('male','female')) 29 | bill_length_mm = st.sidebar.slider('Bill length (mm)', 32.1,59.6,43.9) 30 | bill_depth_mm = st.sidebar.slider('Bill depth (mm)', 13.1,21.5,17.2) 31 | flipper_length_mm = st.sidebar.slider('Flipper length (mm)', 172.0,231.0,201.0) 32 | body_mass_g = st.sidebar.slider('Body mass (g)', 2700.0,6300.0,4207.0) 33 | data = {'island': island, 34 | 'bill_length_mm': bill_length_mm, 35 | 'bill_depth_mm': bill_depth_mm, 36 | 'flipper_length_mm': flipper_length_mm, 37 | 'body_mass_g': body_mass_g, 38 | 'sex': sex} 39 | features = pd.DataFrame(data, index=[0]) 40 | return features 41 | input_df = user_input_features() 42 | 43 | # Combines user input features with entire penguins dataset 44 | # This will be useful for the encoding phase 45 | penguins_raw = pd.read_csv('https://raw.githubusercontent.com/dataprofessor/data/master/penguins_cleaned.csv') 46 | penguins = penguins_raw.drop(columns=['species'], axis=1) 47 | df = pd.concat([input_df,penguins],axis=0) 48 | 49 | # Encoding of ordinal features 50 | # https://www.kaggle.com/pratik1120/penguin-dataset-eda-classification-and-clustering 51 | encode = ['sex','island'] 52 | for col in encode: 53 | dummy = pd.get_dummies(df[col], prefix=col) 54 | df = pd.concat([df,dummy], axis=1) 55 | del df[col] 56 | df = df[:1] # Selects only the first row (the user input data) 57 | 58 | # Displays the user input features 59 | st.subheader('User Input features') 60 | 61 | if uploaded_file is not None: 62 | st.write(df) 63 | else: 64 | st.write('Awaiting CSV file to be uploaded. Currently using example input parameters (shown below).') 65 | st.write(df) 66 | 67 | # Reads in saved classification model 68 | load_clf = pickle.load(open('penguins_clf.pkl', 'rb')) 69 | 70 | # Apply model to make predictions 71 | prediction = load_clf.predict(df) 72 | prediction_proba = load_clf.predict_proba(df) 73 | 74 | 75 | st.subheader('Prediction') 76 | penguins_species = np.array(['Adelie','Chinstrap','Gentoo']) 77 | st.write(penguins_species[prediction]) 78 | 79 | st.subheader('Prediction Probability') 80 | st.write(prediction_proba) 81 | -------------------------------------------------------------------------------- /penguins_clf.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataprofessor/penguins-heroku/ee13a9b77bf3d5fddcff81860bc94b787037c32f/penguins_clf.pkl -------------------------------------------------------------------------------- /penguins_example.csv: -------------------------------------------------------------------------------- 1 | island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex 2 | Biscoe,43.9,17.2,201.0,4207.0,male 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit==0.61.0 2 | pandas==0.25.3 3 | numpy==1.19 4 | scikit-learn==0.22.1 5 | -------------------------------------------------------------------------------- /runtime.txt: -------------------------------------------------------------------------------- 1 | python-3.7.9 2 | -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | mkdir -p ~/.streamlit/ 2 | 3 | echo "\ 4 | [server]\n\ 5 | port = $PORT\n\ 6 | enableCORS = false\n\ 7 | headless = true\n\ 8 | \n\ 9 | " > ~/.streamlit/config.toml 10 | --------------------------------------------------------------------------------