├── Procfile
├── README.md
├── model-building
    └── penguins-model-building.py
├── penguins-app.py
├── penguins_clf.pkl
├── penguins_example.csv
├── requirements.txt
├── runtime.txt
└── setup.sh


/Procfile:
--------------------------------------------------------------------------------
1 | web: sh setup.sh && streamlit run penguins-app.py
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Penguins web app deployed on Heroku
 2 | 
 3 | Check out the YouTube video showing the development of this web app at https://youtu.be/zK4Ch6e1zq8
 4 | 
 5 | The deployed web app is live at https://dp-penguins.herokuapp.com/
 6 | 
 7 | This web app predicts the species of penguins as a function of their input parameters (bill length, bill width, flipper length, body mass, sex and island).
 8 | 
 9 | The web app was built in Python using the following libraries:
10 | * streamlit
11 | * pandas
12 | * numpy
13 | * scikit-learn
14 | * pickle
15 | 


--------------------------------------------------------------------------------
/model-building/penguins-model-building.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | penguins = pd.read_csv('https://github.com/dataprofessor/data/blob/master/penguins_cleaned.csv')
 3 | 
 4 | # Ordinal feature encoding
 5 | # https://www.kaggle.com/pratik1120/penguin-dataset-eda-classification-and-clustering
 6 | df = penguins.copy()
 7 | target = 'species'
 8 | encode = ['sex','island']
 9 | 
10 | for col in encode:
11 |     dummy = pd.get_dummies(df[col], prefix=col)
12 |     df = pd.concat([df,dummy], axis=1)
13 |     del df[col]
14 | 
15 | target_mapper = {'Adelie':0, 'Chinstrap':1, 'Gentoo':2}
16 | def target_encode(val):
17 |     return target_mapper[val]
18 | 
19 | df['species'] = df['species'].apply(target_encode)
20 | 
21 | # Separating X and y
22 | X = df.drop('species', axis=1)
23 | Y = df['species']
24 | 
25 | # Build random forest model
26 | from sklearn.ensemble import RandomForestClassifier
27 | clf = RandomForestClassifier()
28 | clf.fit(X, Y)
29 | 
30 | # Saving the model
31 | import pickle
32 | pickle.dump(clf, open('penguins_clf.pkl', 'wb'))
33 | 


--------------------------------------------------------------------------------
/penguins-app.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | import numpy as np
 4 | import pickle
 5 | from sklearn.ensemble import RandomForestClassifier
 6 | 
 7 | st.write("""
 8 | # Penguin Prediction App
 9 | 
10 | This app predicts the **Palmer Penguin** species!
11 | 
12 | Data obtained from the [palmerpenguins library](https://github.com/allisonhorst/palmerpenguins) in R by Allison Horst.
13 | """)
14 | 
15 | st.sidebar.header('User Input Features')
16 | 
17 | st.sidebar.markdown("""
18 | [Example CSV input file](https://raw.githubusercontent.com/dataprofessor/data/master/penguins_example.csv)
19 | """)
20 | 
21 | # Collects user input features into dataframe
22 | uploaded_file = st.sidebar.file_uploader("Upload your input CSV file", type=["csv"])
23 | if uploaded_file is not None:
24 |     input_df = pd.read_csv(uploaded_file)
25 | else:
26 |     def user_input_features():
27 |         island = st.sidebar.selectbox('Island',('Biscoe','Dream','Torgersen'))
28 |         sex = st.sidebar.selectbox('Sex',('male','female'))
29 |         bill_length_mm = st.sidebar.slider('Bill length (mm)', 32.1,59.6,43.9)
30 |         bill_depth_mm = st.sidebar.slider('Bill depth (mm)', 13.1,21.5,17.2)
31 |         flipper_length_mm = st.sidebar.slider('Flipper length (mm)', 172.0,231.0,201.0)
32 |         body_mass_g = st.sidebar.slider('Body mass (g)', 2700.0,6300.0,4207.0)
33 |         data = {'island': island,
34 |                 'bill_length_mm': bill_length_mm,
35 |                 'bill_depth_mm': bill_depth_mm,
36 |                 'flipper_length_mm': flipper_length_mm,
37 |                 'body_mass_g': body_mass_g,
38 |                 'sex': sex}
39 |         features = pd.DataFrame(data, index=[0])
40 |         return features
41 |     input_df = user_input_features()
42 | 
43 | # Combines user input features with entire penguins dataset
44 | # This will be useful for the encoding phase
45 | penguins_raw = pd.read_csv('https://raw.githubusercontent.com/dataprofessor/data/master/penguins_cleaned.csv')
46 | penguins = penguins_raw.drop(columns=['species'], axis=1)
47 | df = pd.concat([input_df,penguins],axis=0)
48 | 
49 | # Encoding of ordinal features
50 | # https://www.kaggle.com/pratik1120/penguin-dataset-eda-classification-and-clustering
51 | encode = ['sex','island']
52 | for col in encode:
53 |     dummy = pd.get_dummies(df[col], prefix=col)
54 |     df = pd.concat([df,dummy], axis=1)
55 |     del df[col]
56 | df = df[:1] # Selects only the first row (the user input data)
57 | 
58 | # Displays the user input features
59 | st.subheader('User Input features')
60 | 
61 | if uploaded_file is not None:
62 |     st.write(df)
63 | else:
64 |     st.write('Awaiting CSV file to be uploaded. Currently using example input parameters (shown below).')
65 |     st.write(df)
66 | 
67 | # Reads in saved classification model
68 | load_clf = pickle.load(open('penguins_clf.pkl', 'rb'))
69 | 
70 | # Apply model to make predictions
71 | prediction = load_clf.predict(df)
72 | prediction_proba = load_clf.predict_proba(df)
73 | 
74 | 
75 | st.subheader('Prediction')
76 | penguins_species = np.array(['Adelie','Chinstrap','Gentoo'])
77 | st.write(penguins_species[prediction])
78 | 
79 | st.subheader('Prediction Probability')
80 | st.write(prediction_proba)
81 | 


--------------------------------------------------------------------------------
/penguins_clf.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dataprofessor/penguins-heroku/ee13a9b77bf3d5fddcff81860bc94b787037c32f/penguins_clf.pkl


--------------------------------------------------------------------------------
/penguins_example.csv:
--------------------------------------------------------------------------------
1 | island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
2 | Biscoe,43.9,17.2,201.0,4207.0,male
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit==0.61.0
2 | pandas==0.25.3
3 | numpy==1.19
4 | scikit-learn==0.22.1
5 | 


--------------------------------------------------------------------------------
/runtime.txt:
--------------------------------------------------------------------------------
1 | python-3.7.9
2 | 


--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
 1 | mkdir -p ~/.streamlit/
 2 | 
 3 | echo "\
 4 | [server]\n\
 5 | port = $PORT\n\
 6 | enableCORS = false\n\
 7 | headless = true\n\
 8 | \n\
 9 | " > ~/.streamlit/config.toml
10 | 


--------------------------------------------------------------------------------