├── 32_authentication_basics
    ├── .gitignore
    ├── starter.py
    ├── username_starter.py
    ├── main.py
    └── username.py
├── 04_basic_streamlit_app
    └── main.py
├── 11_layout
    ├── data
    │   └── sample.csv
    └── main.py
├── 08_input_widgets
    ├── data
    │   └── sample.csv
    └── main.py
├── 07_charting_elements
    ├── data
    │   ├── sample.csv
    │   └── sample_map.csv
    └── main.py
├── 09_input_widgets_part2
    ├── data
    │   └── sample.csv
    └── main.py
├── 06_data_display_elements
    ├── data
    │   └── sample.csv
    └── main.py
├── 25_state_capstone
    ├── model
    │   └── model.joblib
    ├── model.py
    ├── starter.py
    ├── main.py
    └── model.ipynb
├── 19_caching_capstone2
    ├── model
    │   └── pipe.joblib
    ├── model_starter.py
    ├── model.py
    ├── starter.py
    └── main.py
├── 39_connect_data_capstone
    ├── data
    │   ├── car_parts_monthly_sales.xlsx
    │   └── car_parts_monthly_sales.csv
    ├── starter.py
    └── main.py
├── 35_authentication_capstone
    ├── data
    │   └── segmentation data legend.xlsx
    ├── config.yaml
    ├── starter.py
    └── main.py
├── 37_streamlit_connect_database
    ├── data
    │   ├── car_parts_monthly_sales.xlsx
    │   └── car_parts_monthly_sales.csv
    ├── starter.py
    └── main.py
├── 29_multipage_refactor
    ├── pages
    │   ├── 2_page_2.py
    │   └── 1_page_1.py
    ├── starter.py
    └── home.py
├── 27_multipage
    ├── pages
    │   ├── 2_page_2.py
    │   └── 1_page_1.py
    ├── starter.py
    └── home.py
├── 15_caching_basics
    ├── starter.py
    └── main.py
├── 21_state_basics
    ├── starter.py
    └── main.py
├── 34_streamlit_authenticator
    ├── config.yaml
    └── main.py
├── 23_state_advanced
    ├── starter.py
    └── main.py
├── 05_text_elements
    └── main.py
├── 30_multipage_capstone
    ├── home_starter.py
    ├── home.py
    └── pages
    │   ├── starter.py
    │   └── experiment.py
├── 10_forms
    └── main.py
├── 12_dashboard_capstone
    ├── starter.py
    ├── main.py
    └── data
    │   └── quarterly_canada_population.csv
├── 24_state_exercise
    ├── starter.py
    └── main.py
├── 38_streamlit_api_calls
    ├── starter.py
    └── main.py
├── 18_caching_capstone
    ├── starter.py
    └── main.py
└── 16_caching_refactor
    ├── old.py
    ├── main.py
    └── data
        └── quarterly_canada_population.csv


/32_authentication_basics/.gitignore:
--------------------------------------------------------------------------------
1 | secrets.toml


--------------------------------------------------------------------------------
/04_basic_streamlit_app/main.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | 
3 | st.text("Hello Streamlit and world")


--------------------------------------------------------------------------------
/11_layout/data/sample.csv:
--------------------------------------------------------------------------------
1 | year,col1,col2,col3
2 | 2018,10,15,20
3 | 2019,12,18,22
4 | 2020,14,20,25
5 | 2021,16,22,28
6 | 2022,18,25,30
7 | 


--------------------------------------------------------------------------------
/08_input_widgets/data/sample.csv:
--------------------------------------------------------------------------------
1 | year,col1,col2,col3
2 | 2018,10,15,20
3 | 2019,12,18,22
4 | 2020,14,20,25
5 | 2021,16,22,28
6 | 2022,18,25,30
7 | 


--------------------------------------------------------------------------------
/07_charting_elements/data/sample.csv:
--------------------------------------------------------------------------------
1 | year,col1,col2,col3
2 | 2018,10,15,20
3 | 2019,12,18,22
4 | 2020,14,20,25
5 | 2021,16,22,28
6 | 2022,18,25,30
7 | 


--------------------------------------------------------------------------------
/09_input_widgets_part2/data/sample.csv:
--------------------------------------------------------------------------------
1 | year,col1,col2,col3
2 | 2018,10,15,20
3 | 2019,12,18,22
4 | 2020,14,20,25
5 | 2021,16,22,28
6 | 2022,18,25,30
7 | 


--------------------------------------------------------------------------------
/06_data_display_elements/data/sample.csv:
--------------------------------------------------------------------------------
1 | year,col1,col2,col3
2 | 2018,10,15,20
3 | 2019,12,18,22
4 | 2020,14,20,25
5 | 2021,16,22,28
6 | 2022,18,25,30
7 | 


--------------------------------------------------------------------------------
/25_state_capstone/model/model.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marcopeix/MachineLearningModelDeploymentwithStreamlit/HEAD/25_state_capstone/model/model.joblib


--------------------------------------------------------------------------------
/19_caching_capstone2/model/pipe.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marcopeix/MachineLearningModelDeploymentwithStreamlit/HEAD/19_caching_capstone2/model/pipe.joblib


--------------------------------------------------------------------------------
/07_charting_elements/data/sample_map.csv:
--------------------------------------------------------------------------------
1 | latitude,longitude
2 | 43.6532,-79.3832
3 | 49.2827,-123.1207
4 | 51.0447,-114.0719
5 | 45.4215,-75.6981
6 | 53.5444,-113.4909
7 | 


--------------------------------------------------------------------------------
/39_connect_data_capstone/data/car_parts_monthly_sales.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marcopeix/MachineLearningModelDeploymentwithStreamlit/HEAD/39_connect_data_capstone/data/car_parts_monthly_sales.xlsx


--------------------------------------------------------------------------------
/35_authentication_capstone/data/segmentation data legend.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marcopeix/MachineLearningModelDeploymentwithStreamlit/HEAD/35_authentication_capstone/data/segmentation data legend.xlsx


--------------------------------------------------------------------------------
/37_streamlit_connect_database/data/car_parts_monthly_sales.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marcopeix/MachineLearningModelDeploymentwithStreamlit/HEAD/37_streamlit_connect_database/data/car_parts_monthly_sales.xlsx


--------------------------------------------------------------------------------
/29_multipage_refactor/pages/2_page_2.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | st.title("Second page")
 4 | 
 5 | product = st.session_state["product"]
 6 | 
 7 | st.subheader(f"The product is {product}🎉")
 8 | 
 9 | st.write(st.session_state)
10 | 


--------------------------------------------------------------------------------
/37_streamlit_connect_database/starter.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | from supabase import create_client, Client
 4 | 
 5 | # Initialize connection to db
 6 | 
 7 | # Query the db
 8 | 
 9 | 
10 | st.title("Query a database")
11 | 


--------------------------------------------------------------------------------
/27_multipage/pages/2_page_2.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | st.title("Second page")
 4 | 
 5 | st.write(st.session_state['df'])
 6 | 
 7 | product = st.session_state["product"]
 8 | 
 9 | st.subheader(f"The product is {product}🎉")
10 | 
11 | st.write(st.session_state)
12 | 


--------------------------------------------------------------------------------
/06_data_display_elements/main.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | 
 4 | df = pd.read_csv("data/sample.csv", dtype="int")
 5 | 
 6 | st.dataframe(df)
 7 | st.write(df)
 8 | 
 9 | st.table(df)
10 | 
11 | st.metric(label="Expenses", value=900, delta=20, delta_color="inverse")


--------------------------------------------------------------------------------
/15_caching_basics/starter.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import time
 3 | import numpy as np
 4 | from sklearn.linear_model import LinearRegression
 5 | 
 6 | st.title("Caching demonstration")
 7 | 
 8 | st.button('Test cache')
 9 | 
10 | st.subheader("st.cache_data")
11 | 
12 | 
13 | st.subheader("st.cache_resource")
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/29_multipage_refactor/pages/1_page_1.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | st.title("First page")
 4 | 
 5 | x1 = st.session_state['x1']
 6 | x2 = st.session_state['x2']
 7 | 
 8 | st.subheader(f"You chose to multiply {x1} with {x2} 👍")
 9 | st.markdown("""#### Check the second page for the result!""")
10 | 
11 | st.write(st.session_state)
12 | 


--------------------------------------------------------------------------------
/27_multipage/pages/1_page_1.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | st.title("First page")
 4 | 
 5 | st.write(st.session_state['df'])
 6 | 
 7 | x1 = st.session_state['x1']
 8 | x2 = st.session_state['x2']
 9 | 
10 | st.subheader(f"You chose to multiply {x1} with {x2} 👍")
11 | st.markdown("""#### Check the second page for the result!""")
12 | 
13 | st.write(st.session_state)
14 | 


--------------------------------------------------------------------------------
/21_state_basics/starter.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | st.title("Stateful apps")
 4 | 
 5 | st.write("Here is the session state:")
 6 | st.write(st.session_state)
 7 | st.button("Update state")
 8 | 
 9 | # Set the value using the key-value syntax
10 | 
11 | 
12 | # Set the value using the attribute syntax
13 | 
14 | 
15 | # Read value from session state
16 | 
17 | 
18 | # Update values in state
19 | 
20 | 
21 | # Delete item in state
22 | 


--------------------------------------------------------------------------------
/35_authentication_capstone/config.yaml:
--------------------------------------------------------------------------------
 1 | credentials:
 2 |   usernames:
 3 |     marketing:
 4 |       email: marketing@mail.com
 5 |       name: Marketing Team
 6 |       password: "$2b$12$XY6Vau7F7YGRIuIBvB2Zf.1LY/KBHRdIhcp8Xlq/ydo3HJFbpQWnK"
 7 |     datascience:
 8 |       email: datascience@mail.com
 9 |       name: Data Science Team
10 |       password: "$2b$12$cnEz04fT84pF/7bFFEPyaObRuUDBTG80Er0A5XKhFkPcI.tewtEoi"
11 | cookie:
12 |   expiry_days: 30
13 |   key: cookie_signature
14 |   name: cookie_name
15 | 
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/34_streamlit_authenticator/config.yaml:
--------------------------------------------------------------------------------
 1 | cookie:
 2 |   expiry_days: 30
 3 |   key: cookie_signature_key
 4 |   name: cookie_name
 5 | credentials:
 6 |   usernames:
 7 |     jsmith: 
 8 |       email: jsmith@gmail.com
 9 |       name: Jane Smith
10 |       passsword: $2b$12$NKNwNMANtBgr2qD8qDrGJeKRAK6JzyCem1HWWX1tjouLbxKZMoVB6
11 |     mpolo:
12 |       email: mpolo@gmail.com
13 |       name: marcopolo
14 |       password: $2b$12$Qs7J3IV0qRYLOtPCi6WA/utBa8QNLHioYjmWfHPnBXudJLpdJTbJm
15 | preauthorized:
16 |   emails:
17 |   - admin@gmail.com
18 | 


--------------------------------------------------------------------------------
/27_multipage/starter.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | # Set page configuration
 4 | 
 5 | # Initialize a sample df and store it in the session state
 6 | 
 7 | # Initialize state with the key "product" set to 0
 8 | 
 9 | # Define a function to multiply two numbers
10 | 
11 | if __name__ == "__main__":
12 | 
13 |     st.title("Homepage")
14 | 
15 |     col1, col2 = st.columns(2)
16 | 
17 |     with col1:
18 |         x1 = st.number_input("Pick a number", 0, 10, key="x1")
19 |     with col2:
20 |         x2 = st.number_input("Pick another number", 0, 10, key="x2")
21 | 
22 |     st.button("Multiply!", type="primary")
23 | 
24 | st.write(st.session_state)
25 | 


--------------------------------------------------------------------------------
/23_state_advanced/starter.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | from datetime import datetime, timedelta
 3 | 
 4 | 
 5 | st.title("Advanced State Management")
 6 | 
 7 | # Store widget value in session state
 8 | st.subheader("Store widget value in session state")
 9 | 
10 | 
11 | # Initialize widget value with session state
12 | 
13 | st.subheader("Initialize widget value with session state")
14 | 
15 | 
16 | # Callbacks
17 | st.subheader("Use callbacks")
18 | 
19 | st.markdown("#### Select your time range")
20 | 
21 | st.radio("Select a range", ["7 days", "28 days", "custom"], horizontal=True)
22 | 
23 | col1, col2, col3 = st.columns(3)
24 | 
25 | col1.date_input("Start date")
26 | col2.date_input("End date")
27 | 


--------------------------------------------------------------------------------
/19_caching_capstone2/model_starter.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from joblib import dump
 3 | from sklearn.pipeline import Pipeline
 4 | from sklearn.preprocessing import OrdinalEncoder
 5 | from sklearn.ensemble import GradientBoostingClassifier
 6 | 
 7 | URL = "https://raw.githubusercontent.com/marcopeix/MachineLearningModelDeploymentwithStreamlit/master/17_caching_capstone/data/mushrooms.csv"
 8 | COLS = ['class', 'odor', 'gill-size', 'gill-color', 'stalk-surface-above-ring',
 9 |        'stalk-surface-below-ring', 'stalk-color-above-ring',
10 |        'stalk-color-below-ring', 'ring-type', 'spore-print-color']
11 | 
12 | # Read data
13 | 
14 | # Create pipeline
15 | 
16 | # Fit the pipeline
17 | 
18 | # Save the pipeline


--------------------------------------------------------------------------------
/07_charting_elements/main.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | df = pd.read_csv("data/sample.csv")
 6 | 
 7 | # Streamlit line plot
 8 | st.line_chart(df, x="year", y=["col1", "col2", "col3"])
 9 | 
10 | # Streamlit area chart
11 | st.area_chart(df, x="year", y=["col1", "col2"])
12 | 
13 | # Streamlit bar chart
14 | st.bar_chart(df, x="year", y=["col1", "col2", "col3"])
15 | 
16 | # Streamlit map
17 | geo_df = pd.read_csv("data/sample_map.csv")
18 | 
19 | st.map(geo_df)
20 | 
21 | # Matplotlib
22 | 
23 | fig, ax = plt.subplots()
24 | ax.plot(df.year, df.col1)
25 | ax.set_title("My figure title")
26 | ax.set_xlabel("x label")
27 | ax.set_ylabel("y label")
28 | fig.autofmt_xdate()
29 | 
30 | st.pyplot(fig)
31 | 


--------------------------------------------------------------------------------
/05_text_elements/main.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | # Give your app a title
 4 | st.title("Your title")
 5 | 
 6 | # Header
 7 | st.header("Main header")
 8 | 
 9 | # Subheader
10 | st.subheader("This is a subheader")
11 | 
12 | # Markdown
13 | st.markdown("This is markdown **text**")
14 | st.markdown("# Header1")
15 | st.markdown("## Header 2")
16 | st.markdown("### Header 3")
17 | 
18 | # Caption
19 | st.caption("This is a caption")
20 | 
21 | # Code block
22 | st.code("""import pandas as pd
23 | pd.read_csv(my_csv_file)
24 | """)
25 | 
26 | # Preformatted text
27 | st.text("Some text")
28 | 
29 | # LaTeX
30 | st.latex("x = 2^2")
31 | 
32 | # Divider
33 | st.text('Text above divider')
34 | st.divider()
35 | st.text('Text below divider')
36 | 
37 | #st.write
38 | st.write('Some text')


--------------------------------------------------------------------------------
/21_state_basics/main.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | st.title("Stateful apps")
 4 | 
 5 | st.write("Here is the session state:")
 6 | st.write(st.session_state)
 7 | st.button("Update state")
 8 | 
 9 | # Set the value using the key-value syntax
10 | 
11 | if "key" not in st.session_state:
12 |     st.session_state['key'] = "value"
13 | 
14 | # Set the value using the attribute syntax
15 | 
16 | if "attribute" not in st.session_state:
17 |     st.session_state.attribute = "another value"
18 | 
19 | # Read value from session state
20 | 
21 | st.write(f"Reading with key-value syntax: {st.session_state['key']}")
22 | st.write(f"Reading with attribute syntax: {st.session_state.attribute}")
23 | 
24 | # Update values in state
25 | 
26 | st.session_state['key'] = "new value"
27 | st.session_state.attribute = "updated_value"
28 | 
29 | # Delete item in state
30 | 
31 | del st.session_state['key']


--------------------------------------------------------------------------------
/30_multipage_capstone/home_starter.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | 
 4 | # Set page config:
 5 | # The title is "Homepage"
 6 | # Choose an icon for the page
 7 | # The layout is centered
 8 | # The sidebar is set to "auto"
 9 | 
10 | # Initialize the state with the keys: [model, num_features, score]
11 | # This is where we store the info to display the ranking
12 | 
13 | # Write a function to display a DataFrame ranked in descending order of F1-Score
14 | # The DataFrame has 3 columns: Model, Number of Features, F1-Score
15 | 
16 | if __name__ == "__main__":
17 |     st.title("🏆 Model ranking")
18 | 
19 |     if len(st.session_state['model']) == 0:
20 |         st.subheader("Train a model in the next page to see the results 👉")
21 |     else:
22 |         # Function that display the DataFrame runs here
23 |         pass
24 |     
25 |     st.write(st.session_state)


--------------------------------------------------------------------------------
/32_authentication_basics/starter.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | # Initialize state to:
 4 | # Store the password
 5 | # Check if the password is correct
 6 | # Check if the form is submitted
 7 | 
 8 | # Function to check if the password is correct
 9 | def check_password():
10 |     pass
11 | 
12 | # Function to display the login form
13 | def display_login_form():
14 |     pass
15 | 
16 | if (st.session_state['pwd_correct'] == False and st.session_state['form_submitted'] == False):
17 |     display_login_form()
18 | elif (st.session_state['pwd_correct'] == False and st.session_state["form_submitted"] == True):
19 |     display_login_form()
20 |     st.error("Invalid password")
21 | elif (st.session_state['pwd_correct'] == True and st.session_state["form_submitted"] == True):
22 |     st.write("User logged in")
23 | else:
24 |     display_login_form()
25 | 
26 | st.write(st.session_state)
27 | 
28 | 


--------------------------------------------------------------------------------
/15_caching_basics/main.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import time
 3 | import numpy as np
 4 | from sklearn.linear_model import LinearRegression
 5 | 
 6 | st.title("Caching demonstration")
 7 | 
 8 | st.button('Test cache')
 9 | 
10 | st.subheader("st.cache_data")
11 | 
12 | @st.cache_data
13 | def cache_this_function():
14 |     time.sleep(2)
15 |     out = "I'm done running"
16 |     return out
17 | 
18 | out = cache_this_function()
19 | st.write(out)
20 | 
21 | st.subheader("st.cache_resource")
22 | 
23 | @st.cache_resource
24 | def create_simple_linear_regression():
25 |     time.sleep(2)
26 |     X = np.array([1,2,3,4,5,6,7]).reshape(-1,1)
27 |     y = np.array([1,2,3,4,5,6,7])
28 | 
29 |     model = LinearRegression().fit(X, y)
30 | 
31 |     return model
32 | 
33 | lr = create_simple_linear_regression()
34 | X_pred = np.array([8]).reshape(-1,1)
35 | pred = lr.predict(X_pred)
36 | 
37 | st.write(f"The prediction is: {pred[0]}")
38 | 
39 | 


--------------------------------------------------------------------------------
/37_streamlit_connect_database/main.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | from supabase import create_client, Client
 4 | 
 5 | # Initialize connection to db
 6 | 
 7 | 
 8 | @st.cache_resource
 9 | def init_connection():
10 |     url: str = st.secrets['supabase_url']
11 |     key: str = st.secrets['supabase_key']
12 | 
13 |     client: Client = create_client(url, key)
14 | 
15 |     return client
16 | 
17 | 
18 | supabase = init_connection()
19 | 
20 | # Query the db
21 | 
22 | 
23 | @st.cache_data(ttl=600)  # cache clears after 10 minutes
24 | def run_query():
25 |     # Return all data
26 |     return supabase.table('car_parts_monthly_sales').select("*").execute()
27 | 
28 |     # Filter data
29 |     # return supabase.table('car_parts_monthly_sales').select("*").eq("parts_id", 2674).execute()
30 | 
31 | 
32 | st.title("Query a database")
33 | rows = run_query()
34 | 
35 | # Store in dataframe
36 | df = pd.json_normalize(rows.data)
37 | st.write(df)
38 | 


--------------------------------------------------------------------------------
/11_layout/main.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | 
 4 | # Sidebar
 5 | with st.sidebar:
 6 |     st.write("Text in the sidebar")
 7 | 
 8 | # Columns
 9 | col1, col2, col3 = st.columns(3)
10 | 
11 | col1.write("Text in a column")
12 | 
13 | slider = col2.slider("Choose a number", min_value=0, max_value=10)
14 | 
15 | col3.write(slider)
16 | 
17 | # Tabs
18 | df = pd.read_csv("data/sample.csv")
19 | 
20 | tab1, tab2 = st.tabs(["Line plot", "Bar plot"])
21 | 
22 | with tab1:
23 |     tab1.write("A line plot")
24 |     st.line_chart(df, x="year", y=["col1", "col2", "col3"])
25 | 
26 | with tab2:
27 |     tab2.write("A bar plot")
28 |     st.bar_chart(df, x="year", y=["col1", "col2", "col3"])
29 | 
30 | # Expander (collapsible element)
31 | with st.expander("Click to expand"):
32 |     st.write("I am text that you only see when you expand")
33 | 
34 | # Container
35 | 
36 | with st.container():
37 |     st.write("This is inside the container")
38 | 
39 | st.write("This is outside the container")
40 | 


--------------------------------------------------------------------------------
/19_caching_capstone2/model.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from joblib import dump
 3 | from sklearn.pipeline import Pipeline
 4 | from sklearn.preprocessing import OrdinalEncoder
 5 | from sklearn.ensemble import GradientBoostingClassifier
 6 | 
 7 | URL = "https://raw.githubusercontent.com/marcopeix/MachineLearningModelDeploymentwithStreamlit/master/17_caching_capstone/data/mushrooms.csv"
 8 | COLS = ['class', 'odor', 'gill-size', 'gill-color', 'stalk-surface-above-ring',
 9 |        'stalk-surface-below-ring', 'stalk-color-above-ring',
10 |        'stalk-color-below-ring', 'ring-type', 'spore-print-color']
11 | 
12 | # Read data
13 | df = pd.read_csv(URL)
14 | df = df[COLS]
15 | 
16 | # Create pipeline
17 | pipe = Pipeline([
18 |     ('encoder', OrdinalEncoder()),
19 |     ('gbc', GradientBoostingClassifier(max_depth=5, random_state=42))
20 | ])
21 | 
22 | # Fit the pipeline
23 | X = df.drop(['class'], axis=1)
24 | y = df['class']
25 | 
26 | pipe.fit(X,y)
27 | 
28 | # Save the pipeline
29 | dump(pipe, 'model/pipe.joblib')


--------------------------------------------------------------------------------
/27_multipage/home.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | 
 4 | st.set_page_config(
 5 |     page_title="Homepage",
 6 |     page_icon="🏡",
 7 |     layout="centered",
 8 |     initial_sidebar_state="expanded"
 9 | )
10 | 
11 | df = pd.DataFrame({"col1": [1, 2, 3],
12 |                    "col2": [4, 5, 6]})
13 | 
14 | if "df" not in st.session_state:
15 |     st.session_state['df'] = df
16 | 
17 | if "product" not in st.session_state:
18 |     st.session_state['product'] = 0
19 | 
20 | 
21 | def multiply(x1, x2):
22 |     st.session_state["product"] = x1 * x2
23 | 
24 | 
25 | if __name__ == "__main__":
26 | 
27 |     st.title("Homepage")
28 | 
29 |     col1, col2 = st.columns(2)
30 | 
31 |     with col1:
32 |         x1 = st.number_input("Pick a number", 0, 10, key="x1")
33 |     with col2:
34 |         x2 = st.number_input("Pick another number", 0, 10, key='x2')
35 | 
36 |     st.button("Multiply!", type="primary", on_click=multiply, args=((x1, x2)))
37 | 
38 |     st.write(st.session_state['df'])
39 |     st.write(st.session_state)
40 | 


--------------------------------------------------------------------------------
/10_forms/main.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | with st.form("form_key"):
 4 |     st.write("What would like to order")
 5 |     appetizer = st.selectbox("Appetizers", options=["choice1", "choice2", "choice3"])
 6 |     main = st.selectbox("Main course", options=["choice1", "choice2", "choice3"])
 7 |     dessert = st.selectbox("Dessert", options=["choice1", "choice2", "choice3"])
 8 | 
 9 |     wine = st.checkbox("Are you bringing wine?")
10 | 
11 |     visit_date = st.date_input("When are you coming?")
12 | 
13 |     visit_time = st.time_input("At what time are you coming?")
14 | 
15 |     allergies = st.text_area("Any allergies?", placeholder="Leave us a note for allergies")
16 | 
17 |     submit_btn = st.form_submit_button("Submit")
18 | 
19 | st.write(f"""Your order summary:
20 | 
21 | Appetizer: {appetizer}
22 | 
23 | Main course: {main}
24 | 
25 | Dessert: {dessert}
26 | 
27 | Are you bringing your own wine: {"yes" if wine else "no"}
28 | 
29 | Date of visit: {visit_date}
30 | 
31 | Time of visit: {visit_time}
32 | 
33 | Allergies: {allergies}
34 | """)


--------------------------------------------------------------------------------
/32_authentication_basics/username_starter.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | if all (key not in st.session_state.keys() for key in ('username', 'pwd', 'pwd_correct', 'form_submitted')):
 4 |     st.session_state['username'] = ""
 5 |     st.session_state['pwd'] = ""
 6 |     st.session_state['pwd_correct'] = False
 7 |     st.session_state['form_submitted'] = False
 8 | 
 9 | # Check if login credentials are correct
10 | def check_login():
11 |     pass
12 | 
13 | # Function to display the login form
14 | def display_login_form():
15 |     pass
16 | 
17 | if (st.session_state['pwd_correct'] == False and st.session_state['form_submitted'] == False):
18 |     display_login_form()
19 | elif (st.session_state['pwd_correct'] == False and st.session_state["form_submitted"] == True):
20 |     display_login_form()
21 |     st.error("Invalid user/password")
22 | elif (st.session_state['pwd_correct'] == True and st.session_state["form_submitted"] == True):
23 |     st.write("User logged in")
24 | else:
25 |     display_login_form()
26 | 
27 | st.write(st.session_state)
28 | 


--------------------------------------------------------------------------------
/29_multipage_refactor/starter.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | st.set_page_config(
 4 |     page_title="Homepage",
 5 |     page_icon="🏡",
 6 |     layout="centered",
 7 |     initial_sidebar_state="expanded"
 8 | )
 9 | 
10 | if "product" not in st.session_state:
11 |     st.session_state['product'] = 0
12 | 
13 | 
14 | def multiply(x1, x2):
15 |     st.session_state["product"] = x1 * x2
16 | 
17 | # Workaround from: https://stackoverflow.com/questions/74968179/session-state-is-reset-in-streamlit-multipage-app
18 | 
19 | # Function to keep a value (copy from temporary to permanent)
20 | 
21 | # Function to reassign value to temporary key (copy from permanent to temporary)
22 | 
23 | 
24 | if __name__ == "__main__":
25 | 
26 |     st.title("Homepage")
27 | 
28 |     col1, col2 = st.columns(2)
29 | 
30 |     with col1:
31 |         x1 = st.number_input("Pick a number", 0, 10, key="x1")
32 |     with col2:
33 |         x2 = st.number_input("Pick another number", 0, 10, key='x2')
34 | 
35 |     st.button("Multiply!", type="primary", on_click=multiply, args=((x1, x2)))
36 | 
37 | st.write(st.session_state)
38 | 


--------------------------------------------------------------------------------
/30_multipage_capstone/home.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | 
 4 | st.set_page_config(
 5 |     page_title="Homepage",
 6 |     page_icon="🏡",
 7 |     layout="centered",
 8 |     initial_sidebar_state="auto"
 9 | )
10 | 
11 | if all(key not in st.session_state.keys() for key in ('model', 'num_features', 'score')):
12 |     st.session_state['model'] = []
13 |     st.session_state['num_features'] = []
14 |     st.session_state['score'] = []
15 | 
16 | def display_df():
17 |     df = pd.DataFrame({"Model": st.session_state['model'],
18 |                        "Number of features": st.session_state['num_features'],
19 |                        "F1-Score": st.session_state['score']})
20 |     
21 |     sorted_df = df.sort_values(by=['F1-Score'], ascending=False).reset_index(drop=True)
22 | 
23 |     st.write(sorted_df)
24 | 
25 | if __name__ == "__main__":
26 |     st.title("🏆 Model ranking")
27 | 
28 |     if len(st.session_state['model']) == 0:
29 |         st.subheader("Train a model in the next page to see the results 👉")
30 |     else:
31 |         display_df()
32 | 
33 |     st.write(st.session_state)


--------------------------------------------------------------------------------
/08_input_widgets/main.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | 
 4 | 
 5 | # Buttons
 6 | primary_btn = st.button(label="Primary", type="primary")
 7 | secondary_btn = st.button(label="Secondary", type="secondary")
 8 | 
 9 | if primary_btn:
10 |     st.write("Hello from primary")
11 | 
12 | if secondary_btn:
13 |     st.write("Hello from secondary")
14 | 
15 | # Checkbox
16 | st.divider()
17 | 
18 | checkbox = st.checkbox("Remember me")
19 | 
20 | if checkbox:
21 |     st.write("I will remember you")
22 | else:
23 |     st.write("I will forget you")
24 | 
25 | # Radio buttons
26 | st.divider()
27 | 
28 | df = pd.read_csv("data/sample.csv")
29 | 
30 | radio = st.radio("Choose a column", options=df.columns[1:], index=1, horizontal=True)
31 | st.write(radio)
32 | 
33 | # Selectbox
34 | st.divider()
35 | 
36 | select = st.selectbox("Choose a column", options=df.columns[1:], index=0)
37 | st.write(select)
38 | 
39 | # Mutliselect
40 | st.divider()
41 | 
42 | 
43 | # Slider
44 | st.divider()
45 | 
46 | 
47 | # Text input
48 | st.divider()
49 | 
50 | 
51 | # Number input
52 | st.divider()
53 | 
54 | 
55 | # Text area
56 | st.divider()
57 | 


--------------------------------------------------------------------------------
/12_dashboard_capstone/starter.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | URL = "https://raw.githubusercontent.com/marcopeix/MachineLearningModelDeploymentwithStreamlit/master/12_dashboard_capstone/data/quarterly_canada_population.csv"
 7 | 
 8 | df = pd.read_csv(URL, dtype={'Quarter': str, 
 9 |                             'Canada': np.int32,
10 |                             'Newfoundland and Labrador': np.int32,
11 |                             'Prince Edward Island': np.int32,
12 |                             'Nova Scotia': np.int32,
13 |                             'New Brunswick': np.int32,
14 |                             'Quebec': np.int32,
15 |                             'Ontario': np.int32,
16 |                             'Manitoba': np.int32,
17 |                             'Saskatchewan': np.int32,
18 |                             'Alberta': np.int32,
19 |                             'British Columbia': np.int32,
20 |                             'Yukon': np.int32,
21 |                             'Northwest Territories': np.int32,
22 |                             'Nunavut': np.int32})
23 | 
24 |     


--------------------------------------------------------------------------------
/32_authentication_basics/main.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | if all (key not in st.session_state.keys() for key in ('pwd', 'pwd_correct', 'form_submitted')):
 4 |     st.session_state['pwd'] = ""
 5 |     st.session_state['pwd_correct'] = False
 6 |     st.session_state['form_submitted'] = False
 7 | 
 8 | def check_password():
 9 |     st.session_state['form_submitted'] = True
10 | 
11 |     if st.session_state['pwd'] == st.secrets['password']:
12 |         st.session_state['pwd_correct'] = True
13 |         st.session_state['pwd'] = ""
14 |     else:
15 |         st.session_state['pwd_correct'] = False
16 | 
17 | def display_login_form():
18 |     with st.form("login_form"):
19 |         st.text_input("Password", type="password", key="pwd")
20 | 
21 |         st.form_submit_button("Login", on_click=check_password)
22 | 
23 | if (st.session_state['pwd_correct'] == False and st.session_state['form_submitted'] == False):
24 |     display_login_form()
25 | elif (st.session_state['pwd_correct'] == False and st.session_state["form_submitted"] == True):
26 |     display_login_form()
27 |     st.error("Invalid password")
28 | elif (st.session_state['pwd_correct'] == True and st.session_state["form_submitted"] == True):
29 |     st.write("User logged in")
30 | else:
31 |     display_login_form()
32 | 
33 | st.write(st.session_state)
34 | 
35 | 


--------------------------------------------------------------------------------
/24_state_exercise/starter.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | st.title("Exercise: State Management")
 4 | 
 5 | st.subheader("Temperature conversion")
 6 | 
 7 | # Initialize state with temperatures.
 8 | # Use the freezing point of water
 9 | 
10 | # Write a callback to convert the temperature in Celsius
11 | # to Farenheit and Kelvin. Change the values in the state
12 | # appropriately
13 | 
14 | # Same thing, but converting from Farenheit to Celsius
15 | # and Kelvin
16 | 
17 | # Same thing, but converting from Kelvin to Celsius
18 | # and Farenheint
19 | 
20 | # Write a callback that adds whatever number the user
21 | # inputs to the Celsius box. Use args.
22 | 
23 | # Write a callback to sets the temperatures depending on
24 | # which button the user clicks. Use kwargs.
25 | 
26 | col1, col2, col3 = st.columns(3)
27 | 
28 | # Hook up the first 3 callbacks to the input widgets
29 | col1.number_input("Celsius", step=0.01, key="celsius")
30 | col2.number_input("Farenheit", step=0.01, key="farenheit")
31 | col3.number_input("Kelvin", step=0.01, key="kelvin")
32 | 
33 | # Hook up the 4th callback to the button. Use args.
34 | col1, _, _ = st.columns(3)
35 | num = col1.number_input("Add to Celsius", step=1)
36 | col1.button("Add", type="primary")
37 | 
38 | col1, col2, col3 = st.columns(3)
39 | 
40 | # Hook up the last callback to each button. Use kwargs.
41 | col1.button('🧊 Freezing point of water')
42 | col2.button('🔥 Boiling point of water')
43 | col3.button('🥶 Absolute zero')
44 | 
45 | st.write(st.session_state)


--------------------------------------------------------------------------------
/29_multipage_refactor/home.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | st.set_page_config(
 4 |     page_title="Homepage",
 5 |     page_icon="🏡",
 6 |     layout="centered",
 7 |     initial_sidebar_state="expanded"
 8 | )
 9 | 
10 | if all(key not in st.session_state.keys() for key in ('product', 'x1', 'x2')):
11 |     st.session_state['x1'] = 0
12 |     st.session_state['x2'] = 0
13 |     st.session_state['product'] = 0
14 | 
15 | 
16 | def multiply(x1, x2):
17 |     st.session_state["product"] = x1 * x2
18 | 
19 | # Workaround from: https://stackoverflow.com/questions/74968179/session-state-is-reset-in-streamlit-multipage-app
20 | 
21 | 
22 | def keep(key):
23 |     # Copy from temporary widget key to permanent key
24 |     st.session_state[key] = st.session_state[f"_{key}"]
25 | 
26 | 
27 | def unkeep(key):
28 |     # Copy from permanent key to temporary widget key
29 |     st.session_state[f"_{key}"] = st.session_state[key]
30 | 
31 | 
32 | if __name__ == "__main__":
33 | 
34 |     st.title("Homepage")
35 | 
36 |     col1, col2 = st.columns(2)
37 | 
38 |     with col1:
39 |         unkeep('x1')
40 |         x1 = st.number_input("Pick a number", 0, 10,
41 |                              key="_x1", on_change=keep, args=(("x1",)))
42 |     with col2:
43 |         unkeep('x2')
44 |         x2 = st.number_input("Pick another number", 0, 10,
45 |                              key='_x2', on_change=keep, args=(("x2",)))
46 | 
47 |     st.button("Multiply!", type="primary", on_click=multiply, args=((x1, x2)))
48 | 
49 | st.write(st.session_state)
50 | 


--------------------------------------------------------------------------------
/25_state_capstone/model.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from joblib import dump
 3 | from sklearn.model_selection import train_test_split
 4 | from sklearn.compose import ColumnTransformer
 5 | from sklearn.pipeline import Pipeline
 6 | from sklearn.preprocessing import OrdinalEncoder
 7 | from sklearn.feature_selection import SelectPercentile, mutual_info_regression
 8 | 
 9 | df = pd.read_csv('data/used_car_canada_clean.csv')
10 | 
11 | # Train/test split
12 | X = df.drop(['price'], axis=1)
13 | y = df['price']
14 | 
15 | X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=df[['make', 'model']], test_size=0.2, shuffle=True, random_state=42)
16 | 
17 | # Preprocessing pipeline
18 | cat_index = [3,4,5,6,7,8,10,11]
19 | 
20 | cat_features_transformer = Pipeline(
21 |     steps=[
22 |         ("encoder", OrdinalEncoder()),
23 |         ("selector", SelectPercentile(mutual_info_regression, percentile=50))
24 |     ]
25 | )
26 | 
27 | preprocessor = ColumnTransformer(
28 |     transformers=[
29 |         ("cat", cat_features_transformer, cat_index)
30 |     ]
31 | )
32 | 
33 | # Modeling pipeline
34 | 
35 | from sklearn.ensemble import GradientBoostingRegressor
36 | 
37 | model = Pipeline(
38 |     steps=[
39 |         ("preprocessor", preprocessor),
40 |         ("regressor", GradientBoostingRegressor(random_state=42))
41 |     ]
42 | )
43 | 
44 | # Fit the model
45 | 
46 | model.fit(X_train, y_train)
47 | 
48 | # Score the model (R2)
49 | 
50 | print(model.score(X_test, y_test))
51 | 
52 | # Save the model
53 | 
54 | dump(model, 'model/model.joblib')


--------------------------------------------------------------------------------
/38_streamlit_api_calls/starter.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | import requests
 4 | 
 5 | 
 6 | # Function to make a GET request
 7 | 
 8 | 
 9 | @st.cache_data(show_spinner="Searching...")
10 | def search_gutenberg(author, title):
11 |     # Define your base url
12 | 
13 |     # Replace whitespace with %20 as per the documentation
14 |     # For the search parameters
15 | 
16 |     # Make a url from the search parameters
17 | 
18 |     # Make the final search url (combine base with params url)
19 | 
20 |     try:
21 |         pass
22 |         # Make a get request
23 | 
24 |         # Get the JSON response
25 | 
26 |         # If your JSON has no results, return False
27 |         # Else, return the JSON reponse
28 |     except:
29 |         return False
30 | 
31 | # Function to format the JSON response as a DataFrame
32 | 
33 | 
34 | @st.cache_data
35 | def format_json_res(json_res):
36 |     cols = ['Id', 'Author', 'Title', 'Language', 'Link']
37 | 
38 |     rows = []
39 | 
40 |     try:
41 |         # For loop to access all data in the response
42 | 
43 |         df = pd.DataFrame(rows, columns=cols)
44 | 
45 |         return df
46 |     except:
47 |         st.error("Error while parsing data")
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     st.title("📚 Search Project Gutenberg")
52 |     with st.form("search-form"):
53 |         col1, col2 = st.columns(2)
54 | 
55 |         with col1:
56 |             author = st.text_input("Author")
57 |         with col2:
58 |             title = st.text_input("Title")
59 | 
60 |         search = st.form_submit_button("Search", type='primary')
61 | 


--------------------------------------------------------------------------------
/32_authentication_basics/username.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | if all (key not in st.session_state.keys() for key in ('username', 'pwd', 'pwd_correct', 'form_submitted')):
 4 |     st.session_state['username'] = ""
 5 |     st.session_state['pwd'] = ""
 6 |     st.session_state['pwd_correct'] = False
 7 |     st.session_state['form_submitted'] = False
 8 | 
 9 | def check_login():
10 |     st.session_state['form_submitted'] = True
11 | 
12 |     if (
13 |         st.session_state["username"] in st.secrets["passwords"]
14 |         and 
15 |         st.session_state["pwd"] == st.secrets["passwords"][st.session_state["username"]]
16 |     ):
17 |         st.session_state['pwd_correct'] = True
18 |         st.session_state['pwd'] = ""
19 |         st.session_state['username'] = ""
20 |     else:
21 |         st.session_state['pwd_correct'] = False
22 | 
23 | def display_login_form():
24 |     with st.form("login_form"):
25 |         st.text_input("Username", key="username")
26 |         st.text_input("Password", type="password", key="pwd")
27 | 
28 |         st.form_submit_button("Login", on_click=check_login)
29 | 
30 | if (st.session_state['pwd_correct'] == False and st.session_state['form_submitted'] == False):
31 |     display_login_form()
32 | elif (st.session_state['pwd_correct'] == False and st.session_state["form_submitted"] == True):
33 |     display_login_form()
34 |     st.error("Invalid user/password")
35 | elif (st.session_state['pwd_correct'] == True and st.session_state["form_submitted"] == True):
36 |     st.write("User logged in")
37 | else:
38 |     display_login_form()
39 | 
40 | st.write(st.session_state)
41 | 


--------------------------------------------------------------------------------
/09_input_widgets_part2/main.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | 
 4 | 
 5 | # Buttons
 6 | primary_btn = st.button(label="Primary", type="primary")
 7 | secondary_btn = st.button(label="Secondary", type="secondary")
 8 | 
 9 | if primary_btn:
10 |     st.write("Hello from primary")
11 | 
12 | if secondary_btn:
13 |     st.write("Hello from secondary")
14 | 
15 | # Checkbox
16 | st.divider()
17 | 
18 | checkbox = st.checkbox("Remember me")
19 | 
20 | if checkbox:
21 |     st.write("I will remember you")
22 | else:
23 |     st.write("I will forget you")
24 | 
25 | # Radio buttons
26 | st.divider()
27 | 
28 | df = pd.read_csv("data/sample.csv")
29 | 
30 | radio = st.radio("Choose a column", options=df.columns[1:], index=1, horizontal=True)
31 | st.write(radio)
32 | 
33 | # Selectbox
34 | st.divider()
35 | 
36 | select = st.selectbox("Choose a column", options=df.columns[1:], index=0)
37 | st.write(select)
38 | 
39 | # Mutliselect
40 | st.divider()
41 | 
42 | multiselect = st.multiselect("Choose as many columns as you want", options=df.columns[1:], default=["col2"], max_selections=2)
43 | st.write(multiselect)
44 | 
45 | # Slider
46 | st.divider()
47 | 
48 | slider = st.slider("Pick a number", min_value=0.0, max_value=10.0, value=5.0, step=0.1)
49 | st.write(slider)
50 | 
51 | # Text input
52 | st.divider()
53 | 
54 | text_input = st.text_input("What's your name?", placeholder="John Doe")
55 | st.write(f"Your name is {text_input}")
56 | 
57 | # Number input
58 | st.divider()
59 | 
60 | num_input = st.number_input("Pick a number", min_value=0, max_value=10, value=0, step=1)
61 | st.write(f"You picked {num_input}")
62 | 
63 | # Text area
64 | st.divider()
65 | 
66 | txt_area = st.text_area("What do you want to tell me?", height=500, placeholder="Write your message here")
67 | st.write(txt_area)


--------------------------------------------------------------------------------
/23_state_advanced/main.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | from datetime import datetime, timedelta
 3 | 
 4 | 
 5 | st.title("Advanced State Management")
 6 | 
 7 | # Store widget value in session state
 8 | st.subheader("Store widget value in session state")
 9 | 
10 | st.slider("Select a number", 0, 10, key="slider")
11 | 
12 | st.write(st.session_state)
13 | 
14 | # Initialize widget value with session state
15 | 
16 | st.subheader("Initialize widget value with session state")
17 | 
18 | if "num_input" not in st.session_state:
19 |     st.session_state['num_input'] = 5
20 | 
21 | st.number_input("Pick a number", 0, 10, key="num_input")
22 | 
23 | # Callbacks
24 | st.subheader("Use callbacks")
25 | 
26 | st.markdown("#### Select your time range")
27 | 
28 | def add_timedelta():
29 |     initial = st.session_state['start_date']
30 | 
31 |     if st.session_state['radio_range'] == "7 days":
32 |         st.session_state['end_date'] = initial + timedelta(days=7)
33 | 
34 |     elif st.session_state['radio_range'] == "28 days":
35 |         st.session_state['end_date'] = initial + timedelta(days=28)
36 |     
37 |     else:
38 |         pass
39 | 
40 | def subtract_timedelta():
41 |     final = st.session_state['end_date']
42 | 
43 |     if st.session_state['radio_range'] == "7 days":
44 |         st.session_state['start_date'] = final - timedelta(days=7)
45 |     
46 |     elif st.session_state['radio_range'] == "28 days":
47 |         st.session_state['start_date'] = final - timedelta(days=28)
48 |     
49 |     else:
50 |         pass
51 | 
52 | st.radio("Select a range", ["7 days", "28 days", "custom"], horizontal=True, key="radio_range", on_change=add_timedelta)
53 | 
54 | col1, col2, col3 = st.columns(3)
55 | 
56 | col1.date_input("Start date", key="start_date", on_change=add_timedelta)
57 | col2.date_input("End date", key="end_date", on_change=subtract_timedelta)
58 | 


--------------------------------------------------------------------------------
/38_streamlit_api_calls/main.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | import requests
 4 | 
 5 | 
 6 | @st.cache_data(show_spinner="Searching...")
 7 | def search_gutenberg(author, title):
 8 |     BASE_URL = "https://gutendex.com/books?search="
 9 |     author = author.replace(" ", "%20")
10 |     title = title.replace(" ", "%20")
11 | 
12 |     params_url = f"{author}%20{title}"
13 | 
14 |     search_url = f"{BASE_URL}{params_url}"
15 | 
16 |     try:
17 |         res = requests.get(search_url)
18 | 
19 |         json_res = res.json()
20 | 
21 |         if json_res["count"] == 0:
22 |             return False
23 |         else:
24 |             return json_res
25 |     except:
26 |         return False
27 | 
28 | 
29 | @st.cache_data
30 | def format_json_res(json_res):
31 |     cols = ['Id', 'Author', 'Title', 'Language', 'Link']
32 | 
33 |     rows = []
34 | 
35 |     try:
36 |         for result in json_res["results"]:
37 | 
38 |             id = result["id"]
39 |             author = result["authors"][0]["name"]
40 |             title = result["title"]
41 |             language = result["languages"][0]
42 |             link = f"https://www.gutenberg.org/ebooks/{id}"
43 | 
44 |             rows.append([id, author, title, language, link])
45 | 
46 |         df = pd.DataFrame(rows, columns=cols)
47 | 
48 |         return df
49 |     except:
50 |         st.error("Error while parsing data")
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     st.title("📚 Search Project Gutenberg")
55 |     with st.form("search-form"):
56 |         col1, col2 = st.columns(2)
57 | 
58 |         with col1:
59 |             author = st.text_input("Author")
60 |         with col2:
61 |             title = st.text_input("Title")
62 | 
63 |         search = st.form_submit_button("Search", type='primary')
64 | 
65 |     if search:
66 |         json_res = search_gutenberg(author, title)
67 | 
68 |         if json_res:
69 |             df = format_json_res(json_res)
70 |             st.subheader("Results")
71 |             st.table(df)
72 |         else:
73 |             st.error("No results found")
74 | 


--------------------------------------------------------------------------------
/25_state_capstone/starter.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import numpy as np
 3 | from joblib import load
 4 | 
 5 | # Initialize the session state with an empty prediction
 6 | 
 7 | # Function to load the model (use caching)
 8 | 
 9 | # Callback function to make a prediction.
10 | # Use kwargs to pass the model as an argument
11 | # It updates the value of the prediction stored in the state
12 | 
13 | if __name__ == "__main__":
14 |     st.title("🍁Used car price calculator")
15 | 
16 |     # Load model
17 | 
18 |     with st.form(key="form"):
19 |         col1, col2, col3 = st.columns(3)
20 | 
21 |         with col1:
22 |             st.number_input("Miles", value=86132.0, min_value=0.0, step=0.1, key="miles")
23 |             st.selectbox("Model", index=0, key="model", options=['Prius', 'Highlander', 'Civic', 'Accord', 'Corolla', 'Ridgeline',
24 |        'Odyssey', 'CR-V', 'Pilot', 'Camry Solara', 'Matrix', 'RAV4',
25 |        'Rav4', 'HR-V', 'Fit', 'Yaris', 'Yaris iA', 'Tacoma', 'Camry',
26 |        'Avalon', 'Venza', 'Sienna', 'Passport', 'Accord Crosstour',
27 |        'Crosstour', 'Element', 'Tundra', 'Sequoia', 'Corolla Hatchback',
28 |        '4Runner', 'Echo', 'Tercel', 'MR2 Spyder', 'FJ Cruiser',
29 |        'Corolla iM', 'C-HR', 'Civic Hatchback', '86', 'S2000', 'Supra',
30 |        'Insight', 'Clarity', 'CR-Z', 'Prius Prime', 'Prius Plug-In',
31 |        'Prius c', 'Prius C', 'Prius v'])
32 |         with col2:
33 |             st.number_input("Year", value=2001, min_value=1886, step=1, key="year")
34 |             st.number_input("Engine size (L)", value=1.5, key="engine_size", min_value=0.9, step=0.1)
35 |         with col3:
36 |             st.selectbox("Make", key="make", index=0, options=['toyota', 'honda'])
37 |             st.selectbox("Province", index=0, key="province", options=['NB', 'QC', 'BC', 'ON', 'AB', 'MB', 'SK', 'NS', 'PE', 'NL', 'YT', 'NC', 'OH','SC'])
38 |         
39 |         st.form_submit_button("Calculate", type="primary")
40 | 
41 |     # Display the prediction
42 |     # If the value is empty, display a message to click on the button
43 |     # Otherwise, display the prediction
44 |     
45 |     st.write(st.session_state)


--------------------------------------------------------------------------------
/30_multipage_capstone/pages/starter.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | from sklearn.metrics import f1_score
 4 | from sklearn.datasets import load_wine
 5 | from sklearn.model_selection import train_test_split
 6 | from sklearn.dummy import DummyClassifier
 7 | from sklearn.tree import DecisionTreeClassifier
 8 | from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
 9 | from sklearn.feature_selection import SelectKBest, mutual_info_classif
10 | 
11 | # Set page config:
12 | # The title is "Experiment"
13 | # Choose an icon for the page
14 | # The layout is centered
15 | # The sidebar is set to "auto"
16 | 
17 | # Write a function to load the wine dataset from sklearn
18 | # Should you cache it?
19 | 
20 | # Run the function to load the data
21 | 
22 | # Write a function for train/test split.
23 | # Use stratification, and keep 30% of the data for the test set
24 | # Should you cache it?
25 | 
26 | # Run your train/test split function
27 | 
28 | # Write a function to select features using SelectKbest and mutual_info_classif
29 | # Should you cache it?
30 | 
31 | # Write a function that fits the selected model and computes the F1-score
32 | # The function must return the F1-Score
33 | # Inside this function, you must run feature selection
34 | # Should you cache it?
35 | 
36 | # Write a callback function that runs the model fitting and scoring function
37 | # The callback appends the model, number of features, and score to the state.
38 | # The callback takes 2 arguments: the model and the number of features to keep
39 | 
40 | if __name__ == "__main__":
41 |     
42 |     with st.container():
43 |         st.title("🧪 Experiments")
44 | 
45 |     col1, col2 = st.columns(2)
46 | 
47 |     with col1:
48 |         model = st.selectbox("Choose a model", ["Baseline", "Decision Tree", "Random Forest", "Gradient Boosted Classifier"])
49 |     with col2:
50 |         k = st.number_input("Choose the number of features to keep", 1, 13)
51 | 
52 |     # Plug in your callback and define the arguments
53 |     st.button("Train", type="primary")
54 | 
55 |     # Display the full dataset inside an expander
56 | 
57 |     if len(st.session_state['score']) != 0:
58 |         st.subheader(f"The model has an F1-Score of: {st.session_state['score'][-1]}")
59 |         
60 | 
61 | 
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/24_state_exercise/main.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | st.title("Exercise: State Management")
 4 | 
 5 | st.subheader("Temperature conversion")
 6 | 
 7 | if "celsius" not in st.session_state:
 8 |     st.session_state['celsius'] = 0.00
 9 | 
10 | if "farenheit" not in st.session_state:
11 |     st.session_state['farenheit'] = 32.00
12 | 
13 | if "kelvin" not in st.session_state:
14 |     st.session_state['kelvin'] = 273.15
15 | 
16 | def celsius_conversion():
17 |     celsius = st.session_state['celsius']
18 |     
19 |     st.session_state['farenheit'] = (celsius * 9 / 5) + 32
20 |     st.session_state['kelvin'] = celsius + 273.15
21 | 
22 | def farenheit_conversion():
23 |     farenheit = st.session_state['farenheit']
24 | 
25 |     st.session_state['celsius'] = (farenheit - 32) * 5 / 9
26 |     st.session_state['kelvin'] = (farenheit - 32) * 5 / 9 + 273.15
27 | 
28 | def kelvin_conversion():
29 |     kelvin = st.session_state['kelvin']
30 | 
31 |     st.session_state['celsius'] = kelvin - 273.15
32 |     st.session_state['farenheit'] = (kelvin - 273.15) * 9 / 5 + 32
33 | 
34 | def add_to_celsius(num):
35 |     st.session_state['celsius'] += num
36 |     celsius_conversion()
37 | 
38 | def set_temperatures(celsius, farenheit, kelvin):
39 |     st.session_state['celsius'] = celsius
40 |     st.session_state['farenheit'] = farenheit
41 |     st.session_state['kelvin'] = kelvin
42 | 
43 | col1, col2, col3 = st.columns(3)
44 | 
45 | col1.number_input("Celsius", step=0.01, key="celsius", on_change=celsius_conversion)
46 | col2.number_input("Farenheit", step=0.01, key="farenheit", on_change=farenheit_conversion)
47 | col3.number_input("Kelvin", step=0.01, key="kelvin", on_change=kelvin_conversion)
48 | 
49 | col1, _, _ = st.columns(3)
50 | num = col1.number_input("Add to Celsius", step=1)
51 | col1.button("Add", type="primary", 
52 |             on_click=add_to_celsius, 
53 |             args=(num,))
54 | 
55 | col1, col2, col3 = st.columns(3)
56 | 
57 | col1.button('🧊 Freezing point of water', 
58 |             on_click=set_temperatures, 
59 |             kwargs=dict(celsius=0.00, farenheit=32.00, kelvin=273.15))
60 | col2.button('🔥 Boiling point of water',
61 |             on_click=set_temperatures,
62 |             kwargs=dict(celsius=100.00, farenheit=212.00, kelvin=373.15))
63 | col3.button('🥶 Absolute zero',
64 |             on_click=set_temperatures,
65 |             kwargs=dict(celsius=-273.15, farenheit=-459.67, kelvin=0.00))
66 | 
67 | st.write(st.session_state)


--------------------------------------------------------------------------------
/19_caching_capstone2/starter.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import numpy as np
 3 | from joblib import load
 4 | 
 5 | # Function to load model
 6 | 
 7 | # Function to make a prediction
 8 | 
 9 | if __name__ == "__main__":
10 |     st.title("Mushroom classifier 🍄")
11 |     
12 |     st.subheader("Step 1: Select the values for prediction")
13 | 
14 |     col1, col2, col3 = st.columns(3)
15 | 
16 |     with col1:
17 |         odor = st.selectbox('Odor', ('a - almond', 'l - anisel', 'c - creosote', 'y - fishy', 'f - foul', 'm - musty', 'n - none', 'p - pungent', 's - spicy'))
18 |         stalk_surface_above_ring = st.selectbox('Stalk surface above ring', ('f - fibrous', 'y - scaly', 'k - silky', 's - smooth'))
19 |         stalk_color_below_ring = st.selectbox('Stalk color below ring', ('n - brown', 'b - buff', 'c - cinnamon', 'g - gray', 'o - orange', 'p - pink', 'e - red', 'w - white', 'y - yellow'))
20 |     with col2:
21 |         gill_size = st.selectbox('Gill size', ('b - broad', 'n - narrow'))
22 |         stalk_surface_below_ring = st.selectbox('Stalk surface below ring', ('f - fibrous', 'y - scaly', 'k - silky', 's - smooth'))
23 |         ring_type = st.selectbox('Ring type', ('e - evanescente', 'f - flaring', 'l - large', 'n - none', 'p - pendant', 's - sheathing', 'z - zone'))
24 |     with col3:
25 |         gill_color = st.selectbox('Gill color', ('k - black', 'n - brown', 'b - buff', 'h - chocolate', 'g - gray', 'r - green', 'o - orange', 'p - pink', 'u - purple', 'e - red', 'w - white', 'y - yellow'))
26 |         stalk_color_above_ring = st.selectbox('Stalk color above ring', ('n - brown', 'b - buff', 'c - cinnamon', 'g - gray', 'o - orange', 'p - pink', 'e - red', 'w - white', 'y - yellow'))
27 |         spore_print_color = st.selectbox('Spore print color', ('k - black', 'n - brown', 'b - buff', 'h - chocolate', 'r - green', 'o - orange', 'u - purple', 'w - white', 'y - yellow'))
28 | 
29 |     st.subheader("Step 2: Ask the model for a prediction")
30 | 
31 |     pred_btn = st.button("Predict", type="primary")
32 | 
33 |     if pred_btn:
34 |         # Load model
35 | 
36 |         x_pred = [odor, 
37 |                   gill_size, 
38 |                   gill_color, 
39 |                   stalk_surface_above_ring, 
40 |                   stalk_surface_below_ring, 
41 |                   stalk_color_above_ring, 
42 |                   stalk_color_below_ring, 
43 |                   ring_type, 
44 |                   spore_print_color]
45 |         
46 |         # Make a prediction
47 | 
48 |     
49 | 
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/25_state_capstone/main.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import numpy as np
 3 | from joblib import load
 4 | 
 5 | if "pred" not in st.session_state:
 6 |     st.session_state["pred"] = None
 7 | 
 8 | @st.cache_resource(show_spinner="Loading model...")
 9 | def load_model():
10 |     pipe = load('model/model.joblib')
11 | 
12 |     return pipe
13 | 
14 | def make_prediction(pipe):
15 |     miles = st.session_state["miles"]
16 |     year = st.session_state["year"]
17 |     make = st.session_state["make"]
18 |     model = st.session_state["model"]
19 |     engine_size = st.session_state["engine_size"]
20 |     province = st.session_state["province"]
21 | 
22 |     X_pred = np.array([miles, year, make, model, engine_size, province]).reshape(1,-1)
23 | 
24 |     pred = pipe.predict(X_pred)
25 |     pred = round(pred[0], 2)
26 | 
27 |     st.session_state["pred"] = pred
28 | 
29 | if __name__ == "__main__":
30 |     st.title("🍁Used car price calculator")
31 | 
32 |     pipe = load_model()
33 | 
34 |     with st.form(key="form"):
35 |         col1, col2, col3 = st.columns(3)
36 | 
37 |         with col1:
38 |             st.number_input("Miles", value=86132.0, min_value=0.0, step=0.1, key="miles")
39 |             st.selectbox("Model", index=0, key="model", options=['Prius', 'Highlander', 'Civic', 'Accord', 'Corolla', 'Ridgeline',
40 |        'Odyssey', 'CR-V', 'Pilot', 'Camry Solara', 'Matrix', 'RAV4',
41 |        'Rav4', 'HR-V', 'Fit', 'Yaris', 'Yaris iA', 'Tacoma', 'Camry',
42 |        'Avalon', 'Venza', 'Sienna', 'Passport', 'Accord Crosstour',
43 |        'Crosstour', 'Element', 'Tundra', 'Sequoia', 'Corolla Hatchback',
44 |        '4Runner', 'Echo', 'Tercel', 'MR2 Spyder', 'FJ Cruiser',
45 |        'Corolla iM', 'C-HR', 'Civic Hatchback', '86', 'S2000', 'Supra',
46 |        'Insight', 'Clarity', 'CR-Z', 'Prius Prime', 'Prius Plug-In',
47 |        'Prius c', 'Prius C', 'Prius v'])
48 |         with col2:
49 |             st.number_input("Year", value=2001, min_value=1886, step=1, key="year")
50 |             st.number_input("Engine size (L)", value=1.5, key="engine_size", min_value=0.9, step=0.1)
51 |         with col3:
52 |             st.selectbox("Make", key="make", index=0, options=['toyota', 'honda'])
53 |             st.selectbox("Province", index=0, key="province", options=['NB', 'QC', 'BC', 'ON', 'AB', 'MB', 'SK', 'NS', 'PE', 'NL', 'YT', 'NC', 'OH','SC'])
54 |         
55 |         st.form_submit_button("Calculate", type="primary", on_click=make_prediction, kwargs=dict(pipe=pipe))
56 | 
57 |     if st.session_state["pred"] is not None:
58 |         st.subheader(f"The estimated car price is {st.session_state.pred}$")
59 |     else:
60 |         st.write("Input information and click on Calculate to get an estimated price")
61 |     
62 |     st.write(st.session_state)


--------------------------------------------------------------------------------
/39_connect_data_capstone/starter.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import pandas as pd
  3 | import matplotlib.pyplot as plt
  4 | from supabase import create_client, Client
  5 | from statsforecast import StatsForecast
  6 | from statsforecast.models import CrostonOptimized
  7 | 
  8 | # Initialize connection to db
  9 | 
 10 | 
 11 | @st.cache_resource
 12 | def init_connection():
 13 |     pass
 14 | 
 15 | # Run the function to make the connection
 16 | 
 17 | # Function to query the db
 18 | # Return all data
 19 | 
 20 | 
 21 | @st.cache_data(ttl=600)  # cache clears after 10 minutes
 22 | def run_query():
 23 |     pass
 24 | 
 25 | # Function to create a Dataframe
 26 | # Make sure that volume is an integer
 27 | # Return dataframe
 28 | 
 29 | 
 30 | @st.cache_data(ttl=600)
 31 | def create_dataframe():
 32 |     pass
 33 | 
 34 | # Function to plot data
 35 | 
 36 | 
 37 | @st.cache_data
 38 | def plot_volume(ids):
 39 |     fig, ax = plt.subplots()
 40 | 
 41 |     df['volume'] = df['volume'].astype(int)
 42 | 
 43 |     x = df[df["parts_id"] == 2674]['date']
 44 | 
 45 |     for id in ids:
 46 |         ax.plot(x,
 47 |                 df[df['parts_id'] == id]['volume'], label=id)
 48 |     ax.xaxis.set_major_locator(plt.MaxNLocator(10))
 49 |     ax.legend(loc='best')
 50 |     fig.autofmt_xdate()
 51 | 
 52 |     st.pyplot(fig)
 53 | 
 54 | # Function to format the dataframe as expected
 55 | # by statsforecast
 56 | 
 57 | 
 58 | @st.cache_data
 59 | def format_dataset(ids):
 60 |     model_df = df[df['parts_id'].isin(ids)]
 61 |     model_df = model_df.drop(['id'], axis=1)
 62 |     model_df.rename({"parts_id": "unique_id", "date": "ds",
 63 |                     "volume": "y"}, axis=1, inplace=True)
 64 | 
 65 |     return model_df
 66 | 
 67 | # Create the statsforecast object to train the model
 68 | # Return the statsforecast object
 69 | 
 70 | 
 71 | @st.cache_resource
 72 | def create_sf_object(model_df):
 73 |     pass
 74 | 
 75 | # Function to make predictions
 76 | # Inputs: product_ids and horizon
 77 | # Returns a CSV
 78 | 
 79 | 
 80 | @st.cache_data(show_spinner="Making predictions...")
 81 | def make_predictions(ids, horizon):
 82 |     pass
 83 | 
 84 | 
 85 | if __name__ == "__main__":
 86 |     st.title("Forecast product demand")
 87 | 
 88 |     df = create_dataframe()
 89 | 
 90 |     st.subheader("Select a product")
 91 |     product_ids = st.multiselect(
 92 |         "Select product ID", options=df['parts_id'].unique())
 93 | 
 94 |     plot_volume(product_ids)
 95 | 
 96 |     with st.expander("Forecast"):
 97 |         if len(product_ids) == 0:
 98 |             st.warning("Select at least one product ID to forecast")
 99 |         else:
100 |             horizon = st.slider("Horizon", 1, 12, step=1)
101 | 
102 |             forecast_btn = st.button("Forecast", type="primary")
103 | 
104 |             # Download CSV file if the forecast button is pressed
105 | 


--------------------------------------------------------------------------------
/19_caching_capstone2/main.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import numpy as np
 3 | from joblib import load
 4 | 
 5 | @st.cache_resource(show_spinner="Loading model...")
 6 | def load_model():
 7 |     pipe = load('model/pipe.joblib')
 8 | 
 9 |     return pipe
10 | 
11 | @st.cache_data(show_spinner="Making a prediction...")
12 | def make_prediction(_pipe, X_pred):
13 | 
14 |     features = [each[0] for each in X_pred]
15 |     features = np.array(features).reshape(1,-1)
16 | 
17 |     pred = _pipe.predict(features)
18 | 
19 |     return pred[0]
20 | 
21 | if __name__ == "__main__":
22 |     st.title("Mushroom classifier 🍄")
23 |     
24 |     st.subheader("Step 1: Select the values for prediction")
25 | 
26 |     col1, col2, col3 = st.columns(3)
27 | 
28 |     with col1:
29 |         odor = st.selectbox('Odor', ('a - almond', 'l - anisel', 'c - creosote', 'y - fishy', 'f - foul', 'm - musty', 'n - none', 'p - pungent', 's - spicy'))
30 |         stalk_surface_above_ring = st.selectbox('Stalk surface above ring', ('f - fibrous', 'y - scaly', 'k - silky', 's - smooth'))
31 |         stalk_color_below_ring = st.selectbox('Stalk color below ring', ('n - brown', 'b - buff', 'c - cinnamon', 'g - gray', 'o - orange', 'p - pink', 'e - red', 'w - white', 'y - yellow'))
32 |     with col2:
33 |         gill_size = st.selectbox('Gill size', ('b - broad', 'n - narrow'))
34 |         stalk_surface_below_ring = st.selectbox('Stalk surface below ring', ('f - fibrous', 'y - scaly', 'k - silky', 's - smooth'))
35 |         ring_type = st.selectbox('Ring type', ('e - evanescente', 'f - flaring', 'l - large', 'n - none', 'p - pendant', 's - sheathing', 'z - zone'))
36 |     with col3:
37 |         gill_color = st.selectbox('Gill color', ('k - black', 'n - brown', 'b - buff', 'h - chocolate', 'g - gray', 'r - green', 'o - orange', 'p - pink', 'u - purple', 'e - red', 'w - white', 'y - yellow'))
38 |         stalk_color_above_ring = st.selectbox('Stalk color above ring', ('n - brown', 'b - buff', 'c - cinnamon', 'g - gray', 'o - orange', 'p - pink', 'e - red', 'w - white', 'y - yellow'))
39 |         spore_print_color = st.selectbox('Spore print color', ('k - black', 'n - brown', 'b - buff', 'h - chocolate', 'r - green', 'o - orange', 'u - purple', 'w - white', 'y - yellow'))
40 | 
41 |     st.subheader("Step 2: Ask the model for a prediction")
42 | 
43 |     pred_btn = st.button("Predict", type="primary")
44 | 
45 |     if pred_btn:
46 |         pipe = load_model()
47 | 
48 |         x_pred = [odor, 
49 |                   gill_size, 
50 |                   gill_color, 
51 |                   stalk_surface_above_ring, 
52 |                   stalk_surface_below_ring, 
53 |                   stalk_color_above_ring, 
54 |                   stalk_color_below_ring, 
55 |                   ring_type, 
56 |                   spore_print_color]
57 |         
58 |         pred = make_prediction(pipe, x_pred)
59 | 
60 |         nice_pred = "The mushroom is poisonous 🤢" if pred == 'p' else "The mushroom is edible 🍴"
61 | 
62 |         st.write(nice_pred)
63 | 
64 |     
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/34_streamlit_authenticator/main.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import yaml
 3 | from yaml.loader import SafeLoader
 4 | import streamlit_authenticator as stauth
 5 | 
 6 | # Open YAML file
 7 | 
 8 | with open('config.yaml') as file:
 9 |     config = yaml.load(file, Loader=SafeLoader)
10 | 
11 | # Create authenticator object
12 | authenticator = stauth.Authenticate(
13 |     config['credentials'],
14 |     config['cookie']['name'],
15 |     config['cookie']['key'],
16 |     config['cookie']['expiry_days'],
17 |     config['preauthorized']
18 | )
19 | 
20 | # Render the login widget
21 | name, authentication_status, username = authenticator.login('Login', 'main')
22 | 
23 | # Authenticate users
24 | if st.session_state["authentication_status"]:
25 |     authenticator.logout('Logout', 'main', key='unique_key')
26 |     st.write(f'Welcome *{st.session_state["name"]}*')
27 |     st.title('Some content')
28 | elif st.session_state["authentication_status"] is False:
29 |     st.error('Username/password is incorrect')
30 | elif st.session_state["authentication_status"] is None:
31 |     st.warning('Please enter your username and password')
32 | 
33 | # Password reset widget
34 | if authentication_status:
35 |     try:
36 |         if authenticator.reset_password(username, 'Reset password'):
37 |             with open('config.yaml', 'w') as file:
38 |                 yaml.dump(config, file, default_flow_style=False)
39 |             st.success('Password modified successfully')
40 |     except Exception as e:
41 |         st.error(e)
42 | 
43 | # Register new user
44 | try:
45 |     if authenticator.register_user('Register user', preauthorization=False):
46 |         with open('config.yaml', 'w') as file:
47 |             yaml.dump(config, file, default_flow_style=False)
48 |         st.success('User registered successfully')
49 | except Exception as e:
50 |     st.error(e)
51 | 
52 | # Forgot password widget
53 | try:
54 |     username_of_forgotten_password, email_of_forgotten_password, new_random_password = authenticator.forgot_password(
55 |         'Forgot password')
56 |     if username_of_forgotten_password:
57 |         st.success('New password sent securely')
58 |         # Random password to be transferred to user securely
59 |     else:
60 |         st.error('Username not found')
61 | except Exception as e:
62 |     st.error(e)
63 | 
64 | # Forgot username
65 | try:
66 |     username_of_forgotten_username, email_of_forgotten_username = authenticator.forgot_username(
67 |         'Forgot username')
68 |     if username_of_forgotten_username:
69 |         st.success('Username sent securely')
70 |         # Username to be transferred to user securely
71 |     else:
72 |         st.error('Email not found')
73 | except Exception as e:
74 |     st.error(e)
75 | 
76 | # Update user details
77 | if authentication_status:
78 |     try:
79 |         if authenticator.update_user_details(username, 'Update user details'):
80 |             with open('config.yaml', 'w') as file:
81 |                 yaml.dump(config, file, default_flow_style=False)
82 |             st.success('Entries updated successfully')
83 |     except Exception as e:
84 |         st.error(e)
85 | 
86 | 
87 | st.write(st.session_state)
88 | # Hash passwords and store them in the YAML file. Only do this once
89 | # hasehd_pwd = stauth.Hasher(['123', '12345']).generate()
90 | 
91 | # st.write(hasehd_pwd)
92 | 


--------------------------------------------------------------------------------
/18_caching_capstone/starter.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | import numpy as np
 4 | from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
 5 | from sklearn.ensemble import GradientBoostingClassifier
 6 | 
 7 | URL = "https://raw.githubusercontent.com/marcopeix/MachineLearningModelDeploymentwithStreamlit/master/17_caching_capstone/data/mushrooms.csv"
 8 | COLS = ['class', 'odor', 'gill-size', 'gill-color', 'stalk-surface-above-ring',
 9 |        'stalk-surface-below-ring', 'stalk-color-above-ring',
10 |        'stalk-color-below-ring', 'ring-type', 'spore-print-color']
11 | 
12 | # Function to read the data
13 | 
14 | # Function to fit the LabelEncoder
15 | 
16 | # Function to fit the OrdinalEncoder
17 | 
18 | # Function to encode data
19 | 
20 | # Function to train the model
21 | 
22 | # Function to make a prediction
23 | 
24 | if __name__ == "__main__":
25 |     st.title("Mushroom classifier 🍄")
26 |     
27 |     # Read the data
28 |     
29 |     st.subheader("Step 1: Select the values for prediction")
30 | 
31 |     col1, col2, col3 = st.columns(3)
32 | 
33 |     with col1:
34 |         odor = st.selectbox('Odor', ('a - almond', 'l - anisel', 'c - creosote', 'y - fishy', 'f - foul', 'm - musty', 'n - none', 'p - pungent', 's - spicy'))
35 |         stalk_surface_above_ring = st.selectbox('Stalk surface above ring', ('f - fibrous', 'y - scaly', 'k - silky', 's - smooth'))
36 |         stalk_color_below_ring = st.selectbox('Stalk color below ring', ('n - brown', 'b - buff', 'c - cinnamon', 'g - gray', 'o - orange', 'p - pink', 'e - red', 'w - white', 'y - yellow'))
37 |     with col2:
38 |         gill_size = st.selectbox('Gill size', ('b - broad', 'n - narrow'))
39 |         stalk_surface_below_ring = st.selectbox('Stalk surface below ring', ('f - fibrous', 'y - scaly', 'k - silky', 's - smooth'))
40 |         ring_type = st.selectbox('Ring type', ('e - evanescente', 'f - flaring', 'l - large', 'n - none', 'p - pendant', 's - sheathing', 'z - zone'))
41 |     with col3:
42 |         gill_color = st.selectbox('Gill color', ('k - black', 'n - brown', 'b - buff', 'h - chocolate', 'g - gray', 'r - green', 'o - orange', 'p - pink', 'u - purple', 'e - red', 'w - white', 'y - yellow'))
43 |         stalk_color_above_ring = st.selectbox('Stalk color above ring', ('n - brown', 'b - buff', 'c - cinnamon', 'g - gray', 'o - orange', 'p - pink', 'e - red', 'w - white', 'y - yellow'))
44 |         spore_print_color = st.selectbox('Spore print color', ('k - black', 'n - brown', 'b - buff', 'h - chocolate', 'r - green', 'o - orange', 'u - purple', 'w - white', 'y - yellow'))
45 | 
46 |     st.subheader("Step 2: Ask the model for a prediction")
47 | 
48 |     pred_btn = st.button("Predict", type="primary")
49 | 
50 |     # If the button is clicked:
51 |     # 1. Fit the LabelEncoder
52 |     # 2. Fit the OrdinalEncoder
53 |     # 3. Encode the data
54 |     # 4. Train the model
55 | 
56 |     x_pred = [odor, 
57 |                 gill_size, 
58 |                 gill_color, 
59 |                 stalk_surface_above_ring, 
60 |                 stalk_surface_below_ring, 
61 |                 stalk_color_above_ring, 
62 |                 stalk_color_below_ring, 
63 |                 ring_type, 
64 |                 spore_print_color]
65 |     
66 |     # 5. Make a prediction
67 |     # 6. Format the prediction to be a nice text
68 |     # 7. Output it to the screen
69 |     
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/39_connect_data_capstone/main.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import pandas as pd
  3 | import matplotlib.pyplot as plt
  4 | from supabase import create_client, Client
  5 | from statsforecast import StatsForecast
  6 | from statsforecast.models import CrostonOptimized
  7 | 
  8 | # Initialize connection to db
  9 | 
 10 | 
 11 | @st.cache_resource
 12 | def init_connection():
 13 |     url: str = st.secrets['supabase_url']
 14 |     key: str = st.secrets['supabase_key']
 15 | 
 16 |     client: Client = create_client(url, key)
 17 | 
 18 |     return client
 19 | 
 20 | 
 21 | supabase = init_connection()
 22 | 
 23 | # Query the db
 24 | 
 25 | 
 26 | @st.cache_data(ttl=600)  # cache clears after 10 minutes
 27 | def run_query():
 28 |     # Return all data
 29 |     return supabase.table('car_parts_monthly_sales').select("*").execute()
 30 | 
 31 | 
 32 | @st.cache_data(ttl=600)
 33 | def create_dataframe():
 34 |     rows = run_query()
 35 |     df = pd.json_normalize(rows.data)
 36 |     df['volume'] = df['volume'].astype(int)
 37 | 
 38 |     return df
 39 | 
 40 | 
 41 | @st.cache_data
 42 | def plot_volume(ids):
 43 |     fig, ax = plt.subplots()
 44 | 
 45 |     df['volume'] = df['volume'].astype(int)
 46 | 
 47 |     x = df[df["parts_id"] == 2674]['date']
 48 | 
 49 |     for id in ids:
 50 |         ax.plot(x,
 51 |                 df[df['parts_id'] == id]['volume'], label=id)
 52 |     ax.xaxis.set_major_locator(plt.MaxNLocator(10))
 53 |     ax.legend(loc='best')
 54 |     fig.autofmt_xdate()
 55 | 
 56 |     st.pyplot(fig)
 57 | 
 58 | 
 59 | @st.cache_data
 60 | def format_dataset(ids):
 61 |     model_df = df[df['parts_id'].isin(ids)]
 62 |     model_df = model_df.drop(['id'], axis=1)
 63 |     model_df.rename({"parts_id": "unique_id", "date": "ds",
 64 |                     "volume": "y"}, axis=1, inplace=True)
 65 | 
 66 |     return model_df
 67 | 
 68 | 
 69 | @st.cache_resource
 70 | def create_sf_object(model_df):
 71 |     models = [CrostonOptimized()]
 72 | 
 73 |     sf = StatsForecast(
 74 |         df=model_df,
 75 |         models=models,
 76 |         freq='MS',
 77 |         n_jobs=-1
 78 |     )
 79 | 
 80 |     return sf
 81 | 
 82 | 
 83 | @st.cache_data(show_spinner="Making predictions...")
 84 | def make_predictions(ids, horizon):
 85 | 
 86 |     model_df = format_dataset(ids)
 87 | 
 88 |     sf = create_sf_object(model_df)
 89 | 
 90 |     forecast_df = sf.forecast(h=horizon)
 91 | 
 92 |     return forecast_df.to_csv(header=True)
 93 | 
 94 | 
 95 | if __name__ == "__main__":
 96 |     st.title("Forecast product demand")
 97 | 
 98 |     df = create_dataframe()
 99 | 
100 |     st.subheader("Select a product")
101 |     product_ids = st.multiselect(
102 |         "Select product ID", options=df['parts_id'].unique())
103 | 
104 |     plot_volume(product_ids)
105 | 
106 |     with st.expander("Forecast"):
107 |         if len(product_ids) == 0:
108 |             st.warning("Select at least one product ID to forecast")
109 |         else:
110 |             horizon = st.slider("Horizon", 1, 12, step=1)
111 | 
112 |             forecast_btn = st.button("Forecast", type="primary")
113 | 
114 |             if forecast_btn:
115 |                 csv_file = make_predictions(product_ids, horizon)
116 |                 st.download_button(
117 |                     label="Download predictions",
118 |                     data=csv_file,
119 |                     file_name="predictions.csv",
120 |                     mime="text/csv"
121 |                 )
122 | 


--------------------------------------------------------------------------------
/30_multipage_capstone/pages/experiment.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import pandas as pd
  3 | from sklearn.metrics import f1_score
  4 | from sklearn.datasets import load_wine
  5 | from sklearn.model_selection import train_test_split
  6 | from sklearn.dummy import DummyClassifier
  7 | from sklearn.tree import DecisionTreeClassifier
  8 | from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
  9 | from sklearn.feature_selection import SelectKBest, mutual_info_classif
 10 | 
 11 | st.set_page_config(
 12 |     page_title="Experiment",
 13 |     page_icon="🧪",
 14 |     layout="centered",
 15 |     initial_sidebar_state="auto"
 16 | )
 17 | 
 18 | @st.cache_data(show_spinner="Loading data...")
 19 | def load_data():
 20 |     wine_data = load_wine()
 21 |     wine_df = pd.DataFrame(data=wine_data.data, columns=wine_data.feature_names)
 22 | 
 23 |     wine_df['target'] = wine_data.target
 24 | 
 25 |     return wine_df
 26 | 
 27 | wine_df = load_data()
 28 | 
 29 | @st.cache_data
 30 | def split_data(df):
 31 | 
 32 |     X = df.drop(['target'], axis=1)
 33 |     y = df['target']
 34 | 
 35 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, stratify=y, random_state=42)
 36 | 
 37 |     return X_train, X_test, y_train, y_test
 38 | 
 39 | X_train, X_test, y_train, y_test = split_data(wine_df)
 40 | 
 41 | @st.cache_data()
 42 | def select_features(X_train, y_train, X_test, k):
 43 |     selector = SelectKBest(mutual_info_classif, k=k)
 44 |     selector.fit(X_train, y_train)
 45 | 
 46 |     sel_X_train = selector.transform(X_train)
 47 |     sel_X_test = selector.transform(X_test)
 48 | 
 49 |     return sel_X_train, sel_X_test
 50 | 
 51 | @st.cache_data(show_spinner="Training and evaluating model")
 52 | def fit_and_score(model, k):
 53 |     
 54 |     if model == "Baseline":
 55 |         clf = DummyClassifier(strategy="stratified", random_state=42)
 56 |     elif model == "Decision Tree":
 57 |         clf = DecisionTreeClassifier(random_state=42)
 58 |     elif model == "Random Forest":
 59 |         clf = RandomForestClassifier(random_state=42)
 60 |     else:
 61 |         clf = GradientBoostingClassifier(random_state=42)
 62 |     
 63 |     sel_X_train, sel_X_test = select_features(X_train, y_train, X_test, k)
 64 | 
 65 |     clf.fit(sel_X_train, y_train)
 66 | 
 67 |     preds = clf.predict(sel_X_test)
 68 | 
 69 |     score = round(f1_score(y_test, preds, average='weighted'),3)
 70 | 
 71 |     return score
 72 |     
 73 | def save_performance(model, k):
 74 | 
 75 |     score = fit_and_score(model, k)
 76 | 
 77 |     st.session_state['model'].append(model)
 78 |     st.session_state['num_features'].append(k)
 79 |     st.session_state['score'].append(score)
 80 | 
 81 | 
 82 | if __name__ == "__main__":
 83 |     
 84 |     with st.container():
 85 |         st.title("🧪 Experiments")
 86 | 
 87 |     col1, col2 = st.columns(2)
 88 | 
 89 |     with col1:
 90 |         model = st.selectbox("Choose a model", ["Baseline", "Decision Tree", "Random Forest", "Gradient Boosted Classifier"])
 91 |     with col2:
 92 |         k = st.number_input("Choose the number of features to keep", 1, 13)
 93 | 
 94 |     st.button("Train", type="primary", on_click=save_performance, args=((model, k)))
 95 | 
 96 |     with st.expander("See full dataset"):
 97 |         st.write(wine_df)
 98 | 
 99 |     if len(st.session_state['score']) != 0:
100 |         st.subheader(f"The model has an F1-Score of: {st.session_state['score'][-1]}")
101 |         
102 | 
103 | 
104 | 
105 | 
106 | 


--------------------------------------------------------------------------------
/35_authentication_capstone/starter.py:
--------------------------------------------------------------------------------
  1 | import yaml
  2 | import streamlit as st
  3 | from yaml.loader import SafeLoader
  4 | import streamlit_authenticator as stauth
  5 | 
  6 | import pandas as pd
  7 | import numpy as np
  8 | from sklearn.metrics import silhouette_score
  9 | from sklearn.cluster import KMeans, AgglomerativeClustering
 10 | 
 11 | # hashed_passwords = stauth.Hasher(['marketing', 'datascience']).generate()
 12 | # st.write(hashed_passwords)
 13 | 
 14 | # Read config file
 15 | 
 16 | # Initialize the authenticator
 17 | 
 18 | # Function to read the data
 19 | 
 20 | 
 21 | @st.cache_data
 22 | def read_data():
 23 |     df = pd.read_csv('data/segmentation data.csv')
 24 |     return df
 25 | 
 26 | # Read data
 27 | # Assign the features to the variable "X"
 28 | 
 29 | # Function to calculate the silhouette for each algo, for each number of cluster
 30 | # Returns a Dataframe with 3 columns [n_clusters, algo1, algo2]
 31 | 
 32 | 
 33 | @st.cache_data(show_spinner="Running experiment")
 34 | def run_experiment(X):
 35 |     pass
 36 | 
 37 | 
 38 | @st.cache_data
 39 | def display_group_metrics(df, num_clusters):
 40 |     for i in range(num_clusters):
 41 |         f_df = df[df['labels'] == i]
 42 | 
 43 |         male_percentage = round(len(f_df[f_df['Sex'] == 0])/len(f_df), 2)*100
 44 |         female_percentage = 100 - male_percentage
 45 | 
 46 |         marital_percentage = round(
 47 |             len(f_df[f_df['Marital status'] == 0])/len(f_df), 2)*100
 48 | 
 49 |         mean_age = round(f_df['Age'].mean(), 0)
 50 |         min_age = f_df['Age'].min()
 51 |         max_age = f_df['Age'].max()
 52 | 
 53 |         high_school = round(len(f_df[f_df['Education'] == 1])/len(f_df), 2)*100
 54 |         university = round(
 55 |             len(f_df[(f_df["Education"] == 2) | (f_df["Education"] == 3)])/len(f_df), 2)*100
 56 | 
 57 |         mean_income = round(f_df['Income'].mean(), 0)
 58 |         max_income = f_df['Income'].max()
 59 |         min_income = f_df['Income'].min()
 60 | 
 61 |         employment_num = f_df['Occupation'].mode().values
 62 | 
 63 |         if employment_num == 0:
 64 |             employment = 'Unemployed'
 65 |         elif employment_num == 1:
 66 |             employment = 'Skilled employee'
 67 |         else:
 68 |             employment = 'Highly skilled employee'
 69 | 
 70 |         city_num = f_df['Settlement size'].mode().values
 71 | 
 72 |         if city_num == 0:
 73 |             city = 'Small city'
 74 |         elif city_num == 1:
 75 |             city = "Mid-sized city"
 76 |         else:
 77 |             city = "Large city"
 78 | 
 79 |         with st.container():
 80 |             st.header(f"Group {i+1}")
 81 | 
 82 |             st.subheader("Demographics")
 83 |             st.write(f"Percentage of men: {male_percentage}%")
 84 |             st.write(f"Percentage of female: {female_percentage}%")
 85 | 
 86 |             st.subheader("Marital status")
 87 |             st.write(f"Percentage of married clients: {marital_percentage}%")
 88 | 
 89 |             st.subheader("Age")
 90 |             st.write(f"Mean age: {mean_age}")
 91 |             st.write(f"Max age: {max_age}")
 92 |             st.write(f"Min age: {min_age}")
 93 | 
 94 |             st.subheader("Education")
 95 |             st.write(f"High school: {high_school}%")
 96 |             st.write(f"University: {university}%")
 97 | 
 98 |             st.subheader("Income")
 99 |             st.write(f"Mean income: {mean_income}$")
100 |             st.write(f"Max income: {max_income}$")
101 |             st.write(f"Min income: {min_income}$")
102 | 
103 |             st.subheader("Employment")
104 |             st.write(f"The majority is: {employment}")
105 | 
106 |             st.subheader("City size")
107 |             st.write(f"The majority is: {city}")
108 | 
109 | # Function to display content for DS
110 | 
111 | 
112 | def display_ds_content():
113 |     # Write the dataframe
114 |     # Button to run the function run_experiment
115 | 
116 |     if exp_btn:
117 |         # Run the experiment
118 | 
119 |         st.write("Silhouette scores")
120 | 
121 |         # Write the df of the results
122 | 
123 | 
124 | def display_marketing_content():
125 |     st.write(df)
126 | 
127 |     num_clusters = st.slider("Number of groups", 2, 10)
128 | 
129 |     run_clustering = st.button("Generate groups")
130 | 
131 |     if run_clustering:
132 |         c_df = df.copy()
133 | 
134 |         kmeans = KMeans(n_clusters=num_clusters)
135 |         kmeans.fit(X)
136 | 
137 |         c_df['labels'] = kmeans.predict(X)
138 | 
139 |         c_df = c_df.sort_values(by=['labels'], ascending=True)
140 | 
141 |         display_group_metrics(c_df, num_clusters)
142 | 
143 | # Logic to authenticate user
144 | # Put login in sidebar
145 | 


--------------------------------------------------------------------------------
/18_caching_capstone/main.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import pandas as pd
  3 | import numpy as np
  4 | from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
  5 | from sklearn.ensemble import GradientBoostingClassifier
  6 | 
  7 | URL = "https://raw.githubusercontent.com/marcopeix/MachineLearningModelDeploymentwithStreamlit/master/17_caching_capstone/data/mushrooms.csv"
  8 | COLS = ['class', 'odor', 'gill-size', 'gill-color', 'stalk-surface-above-ring',
  9 |        'stalk-surface-below-ring', 'stalk-color-above-ring',
 10 |        'stalk-color-below-ring', 'ring-type', 'spore-print-color']
 11 | 
 12 | @st.cache_data(show_spinner="Fetching data...")
 13 | def read_data(url, cols):
 14 |     df = pd.read_csv(url)
 15 |     df = df[cols]
 16 | 
 17 |     return df
 18 | 
 19 | @st.cache_resource
 20 | def get_target_encoder(data):
 21 |     le = LabelEncoder()
 22 |     le.fit(data['class'])
 23 | 
 24 |     return le
 25 | 
 26 | @st.cache_resource
 27 | def get_features_encoder(data):
 28 |     oe = OrdinalEncoder()
 29 |     X_cols = data.columns[1:]
 30 |     oe.fit(data[X_cols])
 31 | 
 32 |     return oe
 33 | 
 34 | @st.cache_data(show_spinner="Encoding data...")
 35 | def encode_data(data, _X_encoder, _y_encoder):
 36 |     data['class'] = _y_encoder.transform(data['class'])
 37 | 
 38 |     X_cols = data.columns[1:]
 39 |     data[X_cols] = _X_encoder.transform(data[X_cols])
 40 | 
 41 |     return data
 42 | 
 43 | @st.cache_resource(show_spinner="Training model...")
 44 | def train_model(data):
 45 |     X = data.drop(['class'], axis=1)
 46 |     y = data['class']
 47 | 
 48 |     gbc = GradientBoostingClassifier(max_depth=5, random_state=42)
 49 | 
 50 |     gbc.fit(X, y)
 51 | 
 52 |     return gbc
 53 | 
 54 | @st.cache_data(show_spinner="Making a prediction...")
 55 | def make_prediction(_model, _X_encoder, X_pred):
 56 | 
 57 |     features = [each[0] for each in X_pred]
 58 |     features = np.array(features).reshape(1,-1)
 59 |     encoded_features = _X_encoder.transform(features)
 60 | 
 61 |     pred = _model.predict(encoded_features)
 62 | 
 63 |     return pred[0]
 64 | 
 65 | if __name__ == "__main__":
 66 |     st.title("Mushroom classifier 🍄")
 67 |     
 68 |     df = read_data(URL, COLS)
 69 |     
 70 |     st.subheader("Step 1: Select the values for prediction")
 71 | 
 72 |     col1, col2, col3 = st.columns(3)
 73 | 
 74 |     with col1:
 75 |         odor = st.selectbox('Odor', ('a - almond', 'l - anisel', 'c - creosote', 'y - fishy', 'f - foul', 'm - musty', 'n - none', 'p - pungent', 's - spicy'))
 76 |         stalk_surface_above_ring = st.selectbox('Stalk surface above ring', ('f - fibrous', 'y - scaly', 'k - silky', 's - smooth'))
 77 |         stalk_color_below_ring = st.selectbox('Stalk color below ring', ('n - brown', 'b - buff', 'c - cinnamon', 'g - gray', 'o - orange', 'p - pink', 'e - red', 'w - white', 'y - yellow'))
 78 |     with col2:
 79 |         gill_size = st.selectbox('Gill size', ('b - broad', 'n - narrow'))
 80 |         stalk_surface_below_ring = st.selectbox('Stalk surface below ring', ('f - fibrous', 'y - scaly', 'k - silky', 's - smooth'))
 81 |         ring_type = st.selectbox('Ring type', ('e - evanescente', 'f - flaring', 'l - large', 'n - none', 'p - pendant', 's - sheathing', 'z - zone'))
 82 |     with col3:
 83 |         gill_color = st.selectbox('Gill color', ('k - black', 'n - brown', 'b - buff', 'h - chocolate', 'g - gray', 'r - green', 'o - orange', 'p - pink', 'u - purple', 'e - red', 'w - white', 'y - yellow'))
 84 |         stalk_color_above_ring = st.selectbox('Stalk color above ring', ('n - brown', 'b - buff', 'c - cinnamon', 'g - gray', 'o - orange', 'p - pink', 'e - red', 'w - white', 'y - yellow'))
 85 |         spore_print_color = st.selectbox('Spore print color', ('k - black', 'n - brown', 'b - buff', 'h - chocolate', 'r - green', 'o - orange', 'u - purple', 'w - white', 'y - yellow'))
 86 | 
 87 |     st.subheader("Step 2: Ask the model for a prediction")
 88 | 
 89 |     pred_btn = st.button("Predict", type="primary")
 90 | 
 91 |     if pred_btn:
 92 |         le = get_target_encoder(df)
 93 |         oe = get_features_encoder(df)
 94 | 
 95 |         encoded_df = encode_data(df, oe, le)
 96 | 
 97 |         gbc = train_model(encoded_df)
 98 | 
 99 |         x_pred = [odor, 
100 |                   gill_size, 
101 |                   gill_color, 
102 |                   stalk_surface_above_ring, 
103 |                   stalk_surface_below_ring, 
104 |                   stalk_color_above_ring, 
105 |                   stalk_color_below_ring, 
106 |                   ring_type, 
107 |                   spore_print_color]
108 |         
109 |         pred = make_prediction(gbc, oe, x_pred)
110 | 
111 |         nice_pred = "The mushroom is poisonous 🤢" if pred == 1 else "The mushroom is edible 🍴"
112 | 
113 |         st.write(nice_pred)
114 | 
115 |     
116 | 
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/16_caching_refactor/old.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import pandas as pd
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | URL = "https://raw.githubusercontent.com/marcopeix/MachineLearningModelDeploymentwithStreamlit/master/12_dashboard_capstone/data/quarterly_canada_population.csv"
  7 | 
  8 | df = pd.read_csv(URL, dtype={'Quarter': str, 
  9 |                             'Canada': np.int32,
 10 |                             'Newfoundland and Labrador': np.int32,
 11 |                             'Prince Edward Island': np.int32,
 12 |                             'Nova Scotia': np.int32,
 13 |                             'New Brunswick': np.int32,
 14 |                             'Quebec': np.int32,
 15 |                             'Ontario': np.int32,
 16 |                             'Manitoba': np.int32,
 17 |                             'Saskatchewan': np.int32,
 18 |                             'Alberta': np.int32,
 19 |                             'British Columbia': np.int32,
 20 |                             'Yukon': np.int32,
 21 |                             'Northwest Territories': np.int32,
 22 |                             'Nunavut': np.int32})
 23 | 
 24 | st.title("Population of Canada")
 25 | st.markdown("Source table can be found [here](https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1710000901)")
 26 | 
 27 | with st.expander("See full data table"):
 28 |     st.write(df)
 29 | 
 30 | with st.form("population-form"):
 31 |     col1, col2, col3 = st.columns(3)
 32 | 
 33 |     with col1:
 34 |         st.write("Choose a starting date")
 35 |         start_quarter = st.selectbox("Quarter", options=["Q1", "Q2", "Q3", "Q4"], index=2, key="start_q")
 36 |         start_year = st.slider("Year", min_value=1991, max_value=2023, value=1991, step=1, key="start_y")
 37 | 
 38 |     with col2:
 39 |         st.write("Choose an end date")
 40 |         end_quarter = st.selectbox("Quarter", options=["Q1", "Q2", "Q3", "Q4"], index=0, key="end_q")
 41 |         end_year = st.slider("Year", min_value=1991, max_value=2023, value=2023, step=1, key="end_y")
 42 |         
 43 |     with col3:
 44 |         st.write("Choose a location")
 45 |         target = st.selectbox("Choose a location", options=df.columns[1:], index=0)
 46 | 
 47 |     submit_btn = st.form_submit_button("Analyze", type="primary")
 48 | 
 49 | start_date = f"{start_quarter} {start_year}"
 50 | end_date = f"{end_quarter} {end_year}"
 51 | 
 52 | def format_date_for_comparison(date):
 53 |     if date[1] == 2:
 54 |         return float(date[2:]) + 0.25
 55 |     elif date[1] == 3:
 56 |         return float(date[2:]) + 0.50
 57 |     elif date[1] == 4:
 58 |         return float(date[2:]) + 0.75
 59 |     else:
 60 |         return float(date[2:])
 61 | 
 62 | def end_before_start(start_date, end_date):
 63 |     num_start_date = format_date_for_comparison(start_date)
 64 |     num_end_date = format_date_for_comparison(end_date)
 65 | 
 66 |     if num_start_date > num_end_date:
 67 |         return True
 68 |     else:
 69 |         return False
 70 | 
 71 | def display_dashboard(start_date, end_date, target):
 72 |     tab1, tab2 = st.tabs(["Population change", "Compare"])
 73 |     
 74 |     with tab1:
 75 |         st.subheader(f"Population change from {start_date} to {end_date}")
 76 | 
 77 |         col1, col2 = st.columns(2)
 78 |         
 79 |         with col1:
 80 |             initial = df.loc[df['Quarter'] == start_date, target].item()
 81 |             final = df.loc[df['Quarter'] == end_date, target].item()
 82 | 
 83 |             percentage_diff = round((final - initial) / initial * 100, 2)
 84 |             delta = f"{percentage_diff}%"
 85 |             st.metric(start_date, value=initial)
 86 |             st.metric(end_date, value=final, delta=delta)
 87 |         
 88 |         with col2:
 89 |             start_idx = df.loc[df['Quarter'] == start_date].index.item()
 90 |             end_idx = df.loc[df['Quarter'] == end_date].index.item()
 91 |             filtered_df = df.iloc[start_idx: end_idx+1]
 92 | 
 93 |             fig, ax = plt.subplots()
 94 |             ax.plot(filtered_df['Quarter'], filtered_df[target])
 95 |             ax.set_xlabel('Time')
 96 |             ax.set_ylabel('Population')
 97 |             ax.set_xticks([filtered_df['Quarter'].iloc[0], filtered_df['Quarter'].iloc[-1]])
 98 |             fig.autofmt_xdate()
 99 |             st.pyplot(fig)
100 | 
101 |     with tab2:
102 |         st.subheader('Compare with other locations')
103 |         all_targets = st.multiselect("Choose other locations", options=filtered_df.columns[1:], default=[target])
104 |         
105 |         fig, ax = plt.subplots()
106 |         for each in all_targets:
107 |             ax.plot(filtered_df['Quarter'], filtered_df[each])
108 |         ax.set_xlabel('Time')
109 |         ax.set_ylabel('Population')
110 |         ax.set_xticks([filtered_df['Quarter'].iloc[0], filtered_df['Quarter'].iloc[-1]])
111 |         fig.autofmt_xdate()
112 |         st.pyplot(fig)
113 | 
114 | if start_date not in df['Quarter'].tolist() or end_date not in df['Quarter'].tolist():
115 |     st.error("No data available. Check your quarter and year selection")
116 | elif end_before_start(start_date, end_date):
117 |     st.error("Dates don't work. Start date must come before end date.")
118 | else:
119 |     display_dashboard(start_date, end_date, target)


--------------------------------------------------------------------------------
/12_dashboard_capstone/main.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import pandas as pd
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | URL = "https://raw.githubusercontent.com/marcopeix/MachineLearningModelDeploymentwithStreamlit/master/12_dashboard_capstone/data/quarterly_canada_population.csv"
  7 | 
  8 | df = pd.read_csv(URL, dtype={'Quarter': str, 
  9 |                             'Canada': np.int32,
 10 |                             'Newfoundland and Labrador': np.int32,
 11 |                             'Prince Edward Island': np.int32,
 12 |                             'Nova Scotia': np.int32,
 13 |                             'New Brunswick': np.int32,
 14 |                             'Quebec': np.int32,
 15 |                             'Ontario': np.int32,
 16 |                             'Manitoba': np.int32,
 17 |                             'Saskatchewan': np.int32,
 18 |                             'Alberta': np.int32,
 19 |                             'British Columbia': np.int32,
 20 |                             'Yukon': np.int32,
 21 |                             'Northwest Territories': np.int32,
 22 |                             'Nunavut': np.int32})
 23 | 
 24 | st.title("Population of Canada")
 25 | st.markdown("Source table can be found [here](https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1710000901)")
 26 | 
 27 | with st.expander("See full data table"):
 28 |     st.write(df)
 29 | 
 30 | with st.form("population-form"):
 31 |     col1, col2, col3 = st.columns(3)
 32 | 
 33 |     with col1:
 34 |         st.write("Choose a starting date")
 35 |         start_quarter = st.selectbox("Quarter", options=["Q1", "Q2", "Q3", "Q4"], index=2, key="start_q")
 36 |         start_year = st.slider("Year", min_value=1991, max_value=2023, value=1991, step=1, key="start_y")
 37 | 
 38 |     with col2:
 39 |         st.write("Choose an end date")
 40 |         end_quarter = st.selectbox("Quarter", options=["Q1", "Q2", "Q3", "Q4"], index=0, key="end_q")
 41 |         end_year = st.slider("Year", min_value=1991, max_value=2023, value=2023, step=1, key="end_y")
 42 |         
 43 |     with col3:
 44 |         st.write("Choose a location")
 45 |         target = st.selectbox("Choose a location", options=df.columns[1:], index=0)
 46 | 
 47 |     submit_btn = st.form_submit_button("Analyze", type="primary")
 48 | 
 49 | start_date = f"{start_quarter} {start_year}"
 50 | end_date = f"{end_quarter} {end_year}"
 51 | 
 52 | def format_date_for_comparison(date):
 53 |     if date[1] == 2:
 54 |         return float(date[2:]) + 0.25
 55 |     elif date[1] == 3:
 56 |         return float(date[2:]) + 0.50
 57 |     elif date[1] == 4:
 58 |         return float(date[2:]) + 0.75
 59 |     else:
 60 |         return float(date[2:])
 61 | 
 62 | def end_before_start(start_date, end_date):
 63 |     num_start_date = format_date_for_comparison(start_date)
 64 |     num_end_date = format_date_for_comparison(end_date)
 65 | 
 66 |     if num_start_date > num_end_date:
 67 |         return True
 68 |     else:
 69 |         return False
 70 | 
 71 | def display_dashboard(start_date, end_date, target):
 72 |     tab1, tab2 = st.tabs(["Population change", "Compare"])
 73 |     
 74 |     with tab1:
 75 |         st.subheader(f"Population change from {start_date} to {end_date}")
 76 | 
 77 |         col1, col2 = st.columns(2)
 78 |         
 79 |         with col1:
 80 |             initial = df.loc[df['Quarter'] == start_date, target].item()
 81 |             final = df.loc[df['Quarter'] == end_date, target].item()
 82 | 
 83 |             percentage_diff = round((final - initial) / initial * 100, 2)
 84 |             delta = f"{percentage_diff}%"
 85 |             st.metric(start_date, value=initial)
 86 |             st.metric(end_date, value=final, delta=delta)
 87 |         
 88 |         with col2:
 89 |             start_idx = df.loc[df['Quarter'] == start_date].index.item()
 90 |             end_idx = df.loc[df['Quarter'] == end_date].index.item()
 91 |             filtered_df = df.iloc[start_idx: end_idx+1]
 92 | 
 93 |             fig, ax = plt.subplots()
 94 |             ax.plot(filtered_df['Quarter'], filtered_df[target])
 95 |             ax.set_xlabel('Time')
 96 |             ax.set_ylabel('Population')
 97 |             ax.set_xticks([filtered_df['Quarter'].iloc[0], filtered_df['Quarter'].iloc[-1]])
 98 |             fig.autofmt_xdate()
 99 |             st.pyplot(fig)
100 | 
101 |     with tab2:
102 |         st.subheader('Compare with other locations')
103 |         all_targets = st.multiselect("Choose other locations", options=filtered_df.columns[1:], default=[target])
104 |         
105 |         fig, ax = plt.subplots()
106 |         for each in all_targets:
107 |             ax.plot(filtered_df['Quarter'], filtered_df[each])
108 |         ax.set_xlabel('Time')
109 |         ax.set_ylabel('Population')
110 |         ax.set_xticks([filtered_df['Quarter'].iloc[0], filtered_df['Quarter'].iloc[-1]])
111 |         fig.autofmt_xdate()
112 |         st.pyplot(fig)
113 | 
114 | if start_date not in df['Quarter'].tolist() or end_date not in df['Quarter'].tolist():
115 |     st.error("No data available. Check your quarter and year selection")
116 | elif end_before_start(start_date, end_date):
117 |     st.error("Dates don't work. Start date must come before end date.")
118 | else:
119 |     display_dashboard(start_date, end_date, target)
120 | 
121 | 
122 |     


--------------------------------------------------------------------------------
/16_caching_refactor/main.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import pandas as pd
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | URL = "https://raw.githubusercontent.com/marcopeix/MachineLearningModelDeploymentwithStreamlit/master/12_dashboard_capstone/data/quarterly_canada_population.csv"
  7 | 
  8 | @st.cache_data
  9 | def read_data():
 10 |     df = pd.read_csv(URL, dtype={'Quarter': str, 
 11 |                                 'Canada': np.int32,
 12 |                                 'Newfoundland and Labrador': np.int32,
 13 |                                 'Prince Edward Island': np.int32,
 14 |                                 'Nova Scotia': np.int32,
 15 |                                 'New Brunswick': np.int32,
 16 |                                 'Quebec': np.int32,
 17 |                                 'Ontario': np.int32,
 18 |                                 'Manitoba': np.int32,
 19 |                                 'Saskatchewan': np.int32,
 20 |                                 'Alberta': np.int32,
 21 |                                 'British Columbia': np.int32,
 22 |                                 'Yukon': np.int32,
 23 |                                 'Northwest Territories': np.int32,
 24 |                                 'Nunavut': np.int32})
 25 |     
 26 |     return df
 27 | 
 28 | @st.cache_data
 29 | def format_date_for_comparison(date):
 30 |     if date[1] == 2:
 31 |         return float(date[2:]) + 0.25
 32 |     elif date[1] == 3:
 33 |         return float(date[2:]) + 0.50
 34 |     elif date[1] == 4:
 35 |         return float(date[2:]) + 0.75
 36 |     else:
 37 |         return float(date[2:])
 38 | 
 39 | @st.cache_data
 40 | def end_before_start(start_date, end_date):
 41 |     num_start_date = format_date_for_comparison(start_date)
 42 |     num_end_date = format_date_for_comparison(end_date)
 43 | 
 44 |     if num_start_date > num_end_date:
 45 |         return True
 46 |     else:
 47 |         return False
 48 |     
 49 | def display_dashboard(start_date, end_date, target):
 50 |     tab1, tab2 = st.tabs(["Population change", "Compare"])
 51 |     
 52 |     with tab1:
 53 |         st.subheader(f"Population change from {start_date} to {end_date}")
 54 | 
 55 |         col1, col2 = st.columns(2)
 56 |         
 57 |         with col1:
 58 |             initial = df.loc[df['Quarter'] == start_date, target].item()
 59 |             final = df.loc[df['Quarter'] == end_date, target].item()
 60 | 
 61 |             percentage_diff = round((final - initial) / initial * 100, 2)
 62 |             delta = f"{percentage_diff}%"
 63 |             st.metric(start_date, value=initial)
 64 |             st.metric(end_date, value=final, delta=delta)
 65 |         
 66 |         with col2:
 67 |             start_idx = df.loc[df['Quarter'] == start_date].index.item()
 68 |             end_idx = df.loc[df['Quarter'] == end_date].index.item()
 69 |             filtered_df = df.iloc[start_idx: end_idx+1]
 70 | 
 71 |             fig, ax = plt.subplots()
 72 |             ax.plot(filtered_df['Quarter'], filtered_df[target])
 73 |             ax.set_xlabel('Time')
 74 |             ax.set_ylabel('Population')
 75 |             ax.set_xticks([filtered_df['Quarter'].iloc[0], filtered_df['Quarter'].iloc[-1]])
 76 |             fig.autofmt_xdate()
 77 |             st.pyplot(fig)
 78 | 
 79 |     with tab2:
 80 |         st.subheader('Compare with other locations')
 81 |         all_targets = st.multiselect("Choose other locations", options=filtered_df.columns[1:], default=[target])
 82 |         
 83 |         fig, ax = plt.subplots()
 84 |         for each in all_targets:
 85 |             ax.plot(filtered_df['Quarter'], filtered_df[each])
 86 |         ax.set_xlabel('Time')
 87 |         ax.set_ylabel('Population')
 88 |         ax.set_xticks([filtered_df['Quarter'].iloc[0], filtered_df['Quarter'].iloc[-1]])
 89 |         fig.autofmt_xdate()
 90 |         st.pyplot(fig)
 91 | 
 92 | if __name__ == "__main__":
 93 | 
 94 |     df = read_data()
 95 | 
 96 |     st.title("Population of Canada")
 97 |     st.markdown("Source table can be found [here](https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1710000901)")
 98 | 
 99 |     with st.expander("See full data table"):
100 |         st.write(df)
101 | 
102 |     with st.form("population-form"):
103 |         col1, col2, col3 = st.columns(3)
104 | 
105 |         with col1:
106 |             st.write("Choose a starting date")
107 |             start_quarter = st.selectbox("Quarter", options=["Q1", "Q2", "Q3", "Q4"], index=2, key="start_q")
108 |             start_year = st.slider("Year", min_value=1991, max_value=2023, value=1991, step=1, key="start_y")
109 | 
110 |         with col2:
111 |             st.write("Choose an end date")
112 |             end_quarter = st.selectbox("Quarter", options=["Q1", "Q2", "Q3", "Q4"], index=0, key="end_q")
113 |             end_year = st.slider("Year", min_value=1991, max_value=2023, value=2023, step=1, key="end_y")
114 |             
115 |         with col3:
116 |             st.write("Choose a location")
117 |             target = st.selectbox("Choose a location", options=df.columns[1:], index=0)
118 | 
119 |         submit_btn = st.form_submit_button("Analyze", type="primary")
120 |     
121 |     start_date = f"{start_quarter} {start_year}"
122 |     end_date = f"{end_quarter} {end_year}"
123 | 
124 |     if start_date not in df['Quarter'].tolist() or end_date not in df['Quarter'].tolist():
125 |         st.error("No data available. Check your quarter and year selection")
126 |     elif end_before_start(start_date, end_date):
127 |         st.error("Dates don't work. Start date must come before end date.")
128 |     else:
129 |         display_dashboard(start_date, end_date, target)


--------------------------------------------------------------------------------
/35_authentication_capstone/main.py:
--------------------------------------------------------------------------------
  1 | import yaml
  2 | import streamlit as st
  3 | from yaml.loader import SafeLoader
  4 | import streamlit_authenticator as stauth
  5 | 
  6 | import pandas as pd
  7 | import numpy as np
  8 | from sklearn.metrics import silhouette_score
  9 | from sklearn.cluster import KMeans, AgglomerativeClustering
 10 | 
 11 | # hashed_passwords = stauth.Hasher(['marketing', 'datascience']).generate()
 12 | # st.write(hashed_passwords)
 13 | 
 14 | with open('config.yaml') as file:
 15 |     config = yaml.load(file, Loader=SafeLoader)
 16 | 
 17 | authenticator = stauth.Authenticate(
 18 |     config['credentials'],
 19 |     config['cookie']['name'],
 20 |     config['cookie']['key'],
 21 |     config['cookie']['expiry_days'],
 22 | )
 23 | 
 24 | name, authentication_status, username = authenticator.login('Login', 'sidebar')
 25 | 
 26 | 
 27 | @st.cache_data
 28 | def read_data():
 29 |     df = pd.read_csv('data/segmentation data.csv')
 30 |     return df
 31 | 
 32 | 
 33 | df = read_data()
 34 | X = df[['Sex', 'Marital status', 'Age', 'Education',
 35 |         'Income', 'Occupation', 'Settlement size']]
 36 | 
 37 | 
 38 | @st.cache_data(show_spinner="Running experiment")
 39 | def run_experiment(X):
 40 | 
 41 |     k_silhouettes = []
 42 |     agglo_silhouettes = []
 43 | 
 44 |     for i in range(2, 11):
 45 |         kmeans = KMeans(n_clusters=i)
 46 |         agglo = AgglomerativeClustering(n_clusters=i)
 47 | 
 48 |         kmeans.fit(X)
 49 |         agglo.fit(X)
 50 | 
 51 |         k_silhouettes.append(silhouette_score(X, kmeans.labels_))
 52 |         agglo_silhouettes.append(silhouette_score(X, agglo.labels_))
 53 | 
 54 |     metrics_df = pd.DataFrame({
 55 |         "n_clusters": np.arange(2, 11),
 56 |         "KMeans": k_silhouettes,
 57 |         "Agglo": agglo_silhouettes
 58 |     })
 59 | 
 60 |     return metrics_df
 61 | 
 62 | 
 63 | @st.cache_data
 64 | def display_group_metrics(df, num_clusters):
 65 |     for i in range(num_clusters):
 66 |         f_df = df[df['labels'] == i]
 67 | 
 68 |         male_percentage = round(len(f_df[f_df['Sex'] == 0])/len(f_df), 2)*100
 69 |         female_percentage = 100 - male_percentage
 70 | 
 71 |         marital_percentage = round(
 72 |             len(f_df[f_df['Marital status'] == 0])/len(f_df), 2)*100
 73 | 
 74 |         mean_age = round(f_df['Age'].mean(), 0)
 75 |         min_age = f_df['Age'].min()
 76 |         max_age = f_df['Age'].max()
 77 | 
 78 |         high_school = round(len(f_df[f_df['Education'] == 1])/len(f_df), 2)*100
 79 |         university = round(
 80 |             len(f_df[(f_df["Education"] == 2) | (f_df["Education"] == 3)])/len(f_df), 2)*100
 81 | 
 82 |         mean_income = round(f_df['Income'].mean(), 0)
 83 |         max_income = f_df['Income'].max()
 84 |         min_income = f_df['Income'].min()
 85 | 
 86 |         employment_num = f_df['Occupation'].mode().values
 87 | 
 88 |         if employment_num == 0:
 89 |             employment = 'Unemployed'
 90 |         elif employment_num == 1:
 91 |             employment = 'Skilled employee'
 92 |         else:
 93 |             employment = 'Highly skilled employee'
 94 | 
 95 |         city_num = f_df['Settlement size'].mode().values
 96 | 
 97 |         if city_num == 0:
 98 |             city = 'Small city'
 99 |         elif city_num == 1:
100 |             city = "Mid-sized city"
101 |         else:
102 |             city = "Large city"
103 | 
104 |         with st.container():
105 |             st.header(f"Group {i+1}")
106 | 
107 |             st.subheader("Demographics")
108 |             st.write(f"Percentage of men: {male_percentage}%")
109 |             st.write(f"Percentage of female: {female_percentage}%")
110 | 
111 |             st.subheader("Marital status")
112 |             st.write(f"Percentage of married clients: {marital_percentage}%")
113 | 
114 |             st.subheader("Age")
115 |             st.write(f"Mean age: {mean_age}")
116 |             st.write(f"Max age: {max_age}")
117 |             st.write(f"Min age: {min_age}")
118 | 
119 |             st.subheader("Education")
120 |             st.write(f"High school: {high_school}%")
121 |             st.write(f"University: {university}%")
122 | 
123 |             st.subheader("Income")
124 |             st.write(f"Mean income: {mean_income}$")
125 |             st.write(f"Max income: {max_income}$")
126 |             st.write(f"Min income: {min_income}$")
127 | 
128 |             st.subheader("Employment")
129 |             st.write(f"The majority is: {employment}")
130 | 
131 |             st.subheader("City size")
132 |             st.write(f"The majority is: {city}")
133 | 
134 | 
135 | def display_ds_content():
136 |     st.write(df)
137 |     exp_btn = st.button("Run experiment")
138 | 
139 |     if exp_btn:
140 |         metrics_df = run_experiment(X)
141 |         st.write("Silhouette scores")
142 |         st.write(metrics_df)
143 | 
144 | 
145 | def display_marketing_content():
146 |     st.write(df)
147 | 
148 |     num_clusters = st.slider("Number of groups", 2, 10)
149 | 
150 |     run_clustering = st.button("Generate groups")
151 | 
152 |     if run_clustering:
153 |         c_df = df.copy()
154 | 
155 |         kmeans = KMeans(n_clusters=num_clusters)
156 |         kmeans.fit(X)
157 | 
158 |         c_df['labels'] = kmeans.predict(X)
159 | 
160 |         c_df = c_df.sort_values(by=['labels'], ascending=True)
161 | 
162 |         display_group_metrics(c_df, num_clusters)
163 | 
164 | 
165 | if st.session_state["authentication_status"]:
166 |     authenticator.logout('Logout', 'sidebar', key='unique_key')
167 | 
168 |     if username == 'marketing':
169 |         st.write(f'Welcome *{st.session_state["name"]}*')
170 |         st.title('Marketing content')
171 |         display_marketing_content()
172 | 
173 |     elif username == "datascience":
174 |         st.write(f'Welcome *{st.session_state["name"]}*')
175 |         st.title("Data science content")
176 |         display_ds_content()
177 | 
178 | elif st.session_state["authentication_status"] is False:
179 |     st.error('Username/password is incorrect')
180 | elif st.session_state["authentication_status"] is None:
181 |     st.warning('Please enter your username and password')
182 | 


--------------------------------------------------------------------------------
/39_connect_data_capstone/data/car_parts_monthly_sales.csv:
--------------------------------------------------------------------------------
  1 | id,parts_id,date,volume
  2 | 1,2674,1/1/1998,0
  3 | 2,2674,2/1/1998,0
  4 | 3,2674,3/1/1998,0
  5 | 4,2674,4/1/1998,0
  6 | 5,2674,5/1/1998,2
  7 | 6,2674,6/1/1998,4
  8 | 7,2674,7/1/1998,4
  9 | 8,2674,8/1/1998,1
 10 | 9,2674,9/1/1998,4
 11 | 10,2674,10/1/1998,5
 12 | 11,2674,11/1/1998,4
 13 | 12,2674,12/1/1998,6
 14 | 13,2674,1/1/1999,2
 15 | 14,2674,2/1/1999,3
 16 | 15,2674,3/1/1999,6
 17 | 16,2674,4/1/1999,1
 18 | 17,2674,5/1/1999,3
 19 | 18,2674,6/1/1999,2
 20 | 19,2674,7/1/1999,2
 21 | 20,2674,8/1/1999,0
 22 | 21,2674,9/1/1999,5
 23 | 22,2674,10/1/1999,3
 24 | 23,2674,11/1/1999,1
 25 | 24,2674,12/1/1999,3
 26 | 25,2674,1/1/2000,4
 27 | 26,2674,2/1/2000,3
 28 | 27,2674,3/1/2000,0
 29 | 28,2674,4/1/2000,1
 30 | 29,2674,5/1/2000,1
 31 | 30,2674,6/1/2000,1
 32 | 31,2674,7/1/2000,1
 33 | 32,2674,8/1/2000,0
 34 | 33,2674,9/1/2000,0
 35 | 34,2674,10/1/2000,1
 36 | 35,2674,11/1/2000,2
 37 | 36,2674,12/1/2000,0
 38 | 37,2674,1/1/2001,2
 39 | 38,2674,2/1/2001,3
 40 | 39,2674,3/1/2001,0
 41 | 40,2674,4/1/2001,0
 42 | 41,2674,5/1/2001,1
 43 | 42,2674,6/1/2001,1
 44 | 43,2674,7/1/2001,0
 45 | 44,2674,8/1/2001,1
 46 | 45,2674,9/1/2001,0
 47 | 46,2674,10/1/2001,0
 48 | 47,2674,11/1/2001,2
 49 | 48,2674,12/1/2001,2
 50 | 49,2674,1/1/2002,0
 51 | 50,2674,2/1/2002,1
 52 | 51,2674,3/1/2002,1
 53 | 53,2673,1/1/1998,0
 54 | 54,2673,2/1/1998,0
 55 | 55,2673,3/1/1998,0
 56 | 56,2673,4/1/1998,2
 57 | 57,2673,5/1/1998,1
 58 | 58,2673,6/1/1998,0
 59 | 59,2673,7/1/1998,2
 60 | 60,2673,8/1/1998,4
 61 | 61,2673,9/1/1998,2
 62 | 62,2673,10/1/1998,2
 63 | 63,2673,11/1/1998,3
 64 | 64,2673,12/1/1998,0
 65 | 65,2673,1/1/1999,2
 66 | 66,2673,2/1/1999,2
 67 | 67,2673,3/1/1999,5
 68 | 68,2673,4/1/1999,5
 69 | 69,2673,5/1/1999,1
 70 | 70,2673,6/1/1999,3
 71 | 71,2673,7/1/1999,4
 72 | 72,2673,8/1/1999,4
 73 | 73,2673,9/1/1999,5
 74 | 74,2673,10/1/1999,0
 75 | 75,2673,11/1/1999,1
 76 | 76,2673,12/1/1999,3
 77 | 77,2673,1/1/2000,1
 78 | 78,2673,2/1/2000,0
 79 | 79,2673,3/1/2000,1
 80 | 80,2673,4/1/2000,4
 81 | 81,2673,5/1/2000,3
 82 | 82,2673,6/1/2000,3
 83 | 83,2673,7/1/2000,0
 84 | 84,2673,8/1/2000,0
 85 | 85,2673,9/1/2000,1
 86 | 86,2673,10/1/2000,2
 87 | 87,2673,11/1/2000,1
 88 | 88,2673,12/1/2000,0
 89 | 89,2673,1/1/2001,1
 90 | 90,2673,2/1/2001,1
 91 | 91,2673,3/1/2001,0
 92 | 92,2673,4/1/2001,0
 93 | 93,2673,5/1/2001,4
 94 | 94,2673,6/1/2001,0
 95 | 95,2673,7/1/2001,0
 96 | 96,2673,8/1/2001,4
 97 | 97,2673,9/1/2001,0
 98 | 98,2673,10/1/2001,1
 99 | 99,2673,11/1/2001,2
100 | 100,2673,12/1/2001,2
101 | 101,2673,1/1/2002,3
102 | 102,2673,2/1/2002,1
103 | 103,2673,3/1/2002,3
104 | 104,2672,1/1/1998,11
105 | 105,2672,2/1/1998,2
106 | 106,2672,3/1/1998,0
107 | 107,2672,4/1/1998,2
108 | 108,2672,5/1/1998,12
109 | 109,2672,6/1/1998,0
110 | 110,2672,7/1/1998,0
111 | 111,2672,8/1/1998,4
112 | 112,2672,9/1/1998,2
113 | 113,2672,10/1/1998,0
114 | 114,2672,11/1/1998,0
115 | 115,2672,12/1/1998,0
116 | 116,2672,1/1/1999,0
117 | 117,2672,2/1/1999,0
118 | 118,2672,3/1/1999,0
119 | 119,2672,4/1/1999,6
120 | 120,2672,5/1/1999,5
121 | 121,2672,6/1/1999,1
122 | 122,2672,7/1/1999,0
123 | 123,2672,8/1/1999,4
124 | 124,2672,9/1/1999,2
125 | 125,2672,10/1/1999,0
126 | 126,2672,11/1/1999,4
127 | 127,2672,12/1/1999,4
128 | 128,2672,1/1/2000,2
129 | 129,2672,2/1/2000,0
130 | 130,2672,3/1/2000,1
131 | 131,2672,4/1/2000,2
132 | 132,2672,5/1/2000,6
133 | 133,2672,6/1/2000,2
134 | 134,2672,7/1/2000,0
135 | 135,2672,8/1/2000,0
136 | 136,2672,9/1/2000,0
137 | 137,2672,10/1/2000,0
138 | 138,2672,11/1/2000,0
139 | 139,2672,12/1/2000,0
140 | 140,2672,1/1/2001,0
141 | 141,2672,2/1/2001,6
142 | 142,2672,3/1/2001,0
143 | 143,2672,4/1/2001,0
144 | 144,2672,5/1/2001,4
145 | 145,2672,6/1/2001,0
146 | 146,2672,7/1/2001,0
147 | 147,2672,8/1/2001,0
148 | 148,2672,9/1/2001,0
149 | 149,2672,10/1/2001,1
150 | 150,2672,11/1/2001,1
151 | 151,2672,12/1/2001,2
152 | 152,2672,1/1/2002,1
153 | 153,2672,2/1/2002,2
154 | 154,2672,3/1/2002,0
155 | 155,2671,1/1/1998,6
156 | 156,2671,2/1/1998,5
157 | 157,2671,3/1/1998,5
158 | 158,2671,4/1/1998,3
159 | 159,2671,5/1/1998,5
160 | 160,2671,6/1/1998,0
161 | 161,2671,7/1/1998,2
162 | 162,2671,8/1/1998,1
163 | 163,2671,9/1/1998,3
164 | 164,2671,10/1/1998,0
165 | 165,2671,11/1/1998,1
166 | 166,2671,12/1/1998,7
167 | 167,2671,1/1/1999,4
168 | 168,2671,2/1/1999,3
169 | 169,2671,3/1/1999,3
170 | 170,2671,4/1/1999,1
171 | 171,2671,5/1/1999,3
172 | 172,2671,6/1/1999,2
173 | 173,2671,7/1/1999,2
174 | 174,2671,8/1/1999,2
175 | 175,2671,9/1/1999,0
176 | 176,2671,10/1/1999,2
177 | 177,2671,11/1/1999,2
178 | 178,2671,12/1/1999,2
179 | 179,2671,1/1/2000,2
180 | 180,2671,2/1/2000,1
181 | 181,2671,3/1/2000,3
182 | 182,2671,4/1/2000,0
183 | 183,2671,5/1/2000,1
184 | 184,2671,6/1/2000,3
185 | 185,2671,7/1/2000,0
186 | 186,2671,8/1/2000,1
187 | 187,2671,9/1/2000,2
188 | 188,2671,10/1/2000,3
189 | 189,2671,11/1/2000,1
190 | 190,2671,12/1/2000,0
191 | 191,2671,1/1/2001,1
192 | 192,2671,2/1/2001,1
193 | 193,2671,3/1/2001,3
194 | 194,2671,4/1/2001,2
195 | 195,2671,5/1/2001,0
196 | 196,2671,6/1/2001,0
197 | 197,2671,7/1/2001,0
198 | 198,2671,8/1/2001,0
199 | 199,2671,9/1/2001,0
200 | 200,2671,10/1/2001,0
201 | 201,2671,11/1/2001,0
202 | 202,2671,12/1/2001,0
203 | 203,2671,1/1/2002,0
204 | 204,2671,2/1/2002,1
205 | 205,2671,3/1/2002,0
206 | 206,2670,1/1/1998,6
207 | 207,2670,2/1/1998,6
208 | 208,2670,3/1/1998,5
209 | 209,2670,4/1/1998,0
210 | 210,2670,5/1/1998,2
211 | 211,2670,6/1/1998,1
212 | 212,2670,7/1/1998,3
213 | 213,2670,8/1/1998,1
214 | 214,2670,9/1/1998,5
215 | 215,2670,10/1/1998,5
216 | 216,2670,11/1/1998,0
217 | 217,2670,12/1/1998,2
218 | 218,2670,1/1/1999,1
219 | 219,2670,2/1/1999,3
220 | 220,2670,3/1/1999,1
221 | 221,2670,4/1/1999,6
222 | 222,2670,5/1/1999,4
223 | 223,2670,6/1/1999,3
224 | 224,2670,7/1/1999,0
225 | 225,2670,8/1/1999,1
226 | 226,2670,9/1/1999,2
227 | 227,2670,10/1/1999,5
228 | 228,2670,11/1/1999,0
229 | 229,2670,12/1/1999,1
230 | 230,2670,1/1/2000,1
231 | 231,2670,2/1/2000,1
232 | 232,2670,3/1/2000,3
233 | 233,2670,4/1/2000,0
234 | 234,2670,5/1/2000,0
235 | 235,2670,6/1/2000,0
236 | 236,2670,7/1/2000,3
237 | 237,2670,8/1/2000,1
238 | 238,2670,9/1/2000,1
239 | 239,2670,10/1/2000,2
240 | 240,2670,11/1/2000,1
241 | 241,2670,12/1/2000,0
242 | 242,2670,1/1/2001,0
243 | 243,2670,2/1/2001,4
244 | 244,2670,3/1/2001,0
245 | 245,2670,4/1/2001,0
246 | 246,2670,5/1/2001,0
247 | 247,2670,6/1/2001,1
248 | 248,2670,7/1/2001,0
249 | 249,2670,8/1/2001,1
250 | 250,2670,9/1/2001,0
251 | 251,2670,10/1/2001,2
252 | 252,2670,11/1/2001,0
253 | 253,2670,12/1/2001,1
254 | 254,2670,1/1/2002,0
255 | 255,2670,2/1/2002,3
256 | 256,2670,3/1/2002,0
257 | 257,2669,1/1/1998,4
258 | 258,2669,2/1/1998,4
259 | 259,2669,3/1/1998,4
260 | 260,2669,4/1/1998,2
261 | 261,2669,5/1/1998,4
262 | 262,2669,6/1/1998,2
263 | 263,2669,7/1/1998,7
264 | 264,2669,8/1/1998,4
265 | 265,2669,9/1/1998,0
266 | 266,2669,10/1/1998,5
267 | 267,2669,11/1/1998,3
268 | 268,2669,12/1/1998,1
269 | 269,2669,1/1/1999,5
270 | 270,2669,2/1/1999,1
271 | 271,2669,3/1/1999,7
272 | 272,2669,4/1/1999,1
273 | 273,2669,5/1/1999,2
274 | 274,2669,6/1/1999,2
275 | 275,2669,7/1/1999,4
276 | 276,2669,8/1/1999,5
277 | 277,2669,9/1/1999,2
278 | 278,2669,10/1/1999,0
279 | 279,2669,11/1/1999,1
280 | 280,2669,12/1/1999,0
281 | 281,2669,1/1/2000,3
282 | 282,2669,2/1/2000,1
283 | 283,2669,3/1/2000,3
284 | 284,2669,4/1/2000,2
285 | 285,2669,5/1/2000,2
286 | 286,2669,6/1/2000,0
287 | 287,2669,7/1/2000,2
288 | 288,2669,8/1/2000,0
289 | 289,2669,9/1/2000,2
290 | 290,2669,10/1/2000,0
291 | 291,2669,11/1/2000,1
292 | 292,2669,12/1/2000,0
293 | 293,2669,1/1/2001,0
294 | 294,2669,2/1/2001,0
295 | 295,2669,3/1/2001,0
296 | 296,2669,4/1/2001,0
297 | 297,2669,5/1/2001,0
298 | 298,2669,6/1/2001,0
299 | 299,2669,7/1/2001,0
300 | 300,2669,8/1/2001,0
301 | 301,2669,9/1/2001,0
302 | 302,2669,10/1/2001,0
303 | 303,2669,11/1/2001,0
304 | 304,2669,12/1/2001,0
305 | 305,2669,1/1/2002,1
306 | 306,2669,2/1/2002,0
307 | 307,2669,3/1/2002,1
308 | 308,2668,1/1/1998,5
309 | 309,2668,2/1/1998,3
310 | 310,2668,3/1/1998,3
311 | 311,2668,4/1/1998,3
312 | 312,2668,5/1/1998,2
313 | 313,2668,6/1/1998,1
314 | 314,2668,7/1/1998,4
315 | 315,2668,8/1/1998,2
316 | 316,2668,9/1/1998,1
317 | 317,2668,10/1/1998,5
318 | 318,2668,11/1/1998,3
319 | 319,2668,12/1/1998,1
320 | 320,2668,1/1/1999,2
321 | 321,2668,2/1/1999,3
322 | 322,2668,3/1/1999,1
323 | 323,2668,4/1/1999,3
324 | 324,2668,5/1/1999,0
325 | 325,2668,6/1/1999,4
326 | 326,2668,7/1/1999,2
327 | 327,2668,8/1/1999,3
328 | 328,2668,9/1/1999,0
329 | 329,2668,10/1/1999,3
330 | 330,2668,11/1/1999,1
331 | 331,2668,12/1/1999,0
332 | 332,2668,1/1/2000,0
333 | 333,2668,2/1/2000,0
334 | 334,2668,3/1/2000,0
335 | 335,2668,4/1/2000,0
336 | 336,2668,5/1/2000,3
337 | 337,2668,6/1/2000,1
338 | 338,2668,7/1/2000,1
339 | 339,2668,8/1/2000,1
340 | 340,2668,9/1/2000,2
341 | 341,2668,10/1/2000,0
342 | 342,2668,11/1/2000,0
343 | 343,2668,12/1/2000,1
344 | 344,2668,1/1/2001,1
345 | 345,2668,2/1/2001,0
346 | 346,2668,3/1/2001,1
347 | 347,2668,4/1/2001,2
348 | 348,2668,5/1/2001,2
349 | 349,2668,6/1/2001,6
350 | 350,2668,7/1/2001,0
351 | 351,2668,8/1/2001,1
352 | 352,2668,9/1/2001,2
353 | 353,2668,10/1/2001,5
354 | 354,2668,11/1/2001,0
355 | 355,2668,12/1/2001,0
356 | 356,2668,1/1/2002,1
357 | 357,2668,2/1/2002,1
358 | 358,2668,3/1/2002,1
359 | 


--------------------------------------------------------------------------------
/37_streamlit_connect_database/data/car_parts_monthly_sales.csv:
--------------------------------------------------------------------------------
  1 | id,parts_id,date,volume
  2 | 1,2674,1/1/1998,0
  3 | 2,2674,2/1/1998,0
  4 | 3,2674,3/1/1998,0
  5 | 4,2674,4/1/1998,0
  6 | 5,2674,5/1/1998,2
  7 | 6,2674,6/1/1998,4
  8 | 7,2674,7/1/1998,4
  9 | 8,2674,8/1/1998,1
 10 | 9,2674,9/1/1998,4
 11 | 10,2674,10/1/1998,5
 12 | 11,2674,11/1/1998,4
 13 | 12,2674,12/1/1998,6
 14 | 13,2674,1/1/1999,2
 15 | 14,2674,2/1/1999,3
 16 | 15,2674,3/1/1999,6
 17 | 16,2674,4/1/1999,1
 18 | 17,2674,5/1/1999,3
 19 | 18,2674,6/1/1999,2
 20 | 19,2674,7/1/1999,2
 21 | 20,2674,8/1/1999,0
 22 | 21,2674,9/1/1999,5
 23 | 22,2674,10/1/1999,3
 24 | 23,2674,11/1/1999,1
 25 | 24,2674,12/1/1999,3
 26 | 25,2674,1/1/2000,4
 27 | 26,2674,2/1/2000,3
 28 | 27,2674,3/1/2000,0
 29 | 28,2674,4/1/2000,1
 30 | 29,2674,5/1/2000,1
 31 | 30,2674,6/1/2000,1
 32 | 31,2674,7/1/2000,1
 33 | 32,2674,8/1/2000,0
 34 | 33,2674,9/1/2000,0
 35 | 34,2674,10/1/2000,1
 36 | 35,2674,11/1/2000,2
 37 | 36,2674,12/1/2000,0
 38 | 37,2674,1/1/2001,2
 39 | 38,2674,2/1/2001,3
 40 | 39,2674,3/1/2001,0
 41 | 40,2674,4/1/2001,0
 42 | 41,2674,5/1/2001,1
 43 | 42,2674,6/1/2001,1
 44 | 43,2674,7/1/2001,0
 45 | 44,2674,8/1/2001,1
 46 | 45,2674,9/1/2001,0
 47 | 46,2674,10/1/2001,0
 48 | 47,2674,11/1/2001,2
 49 | 48,2674,12/1/2001,2
 50 | 49,2674,1/1/2002,0
 51 | 50,2674,2/1/2002,1
 52 | 51,2674,3/1/2002,1
 53 | 52,2674,4/1/2002,1
 54 | 53,2673,1/1/1998,0
 55 | 54,2673,2/1/1998,0
 56 | 55,2673,3/1/1998,0
 57 | 56,2673,4/1/1998,2
 58 | 57,2673,5/1/1998,1
 59 | 58,2673,6/1/1998,0
 60 | 59,2673,7/1/1998,2
 61 | 60,2673,8/1/1998,4
 62 | 61,2673,9/1/1998,2
 63 | 62,2673,10/1/1998,2
 64 | 63,2673,11/1/1998,3
 65 | 64,2673,12/1/1998,0
 66 | 65,2673,1/1/1999,2
 67 | 66,2673,2/1/1999,2
 68 | 67,2673,3/1/1999,5
 69 | 68,2673,4/1/1999,5
 70 | 69,2673,5/1/1999,1
 71 | 70,2673,6/1/1999,3
 72 | 71,2673,7/1/1999,4
 73 | 72,2673,8/1/1999,4
 74 | 73,2673,9/1/1999,5
 75 | 74,2673,10/1/1999,0
 76 | 75,2673,11/1/1999,1
 77 | 76,2673,12/1/1999,3
 78 | 77,2673,1/1/2000,1
 79 | 78,2673,2/1/2000,0
 80 | 79,2673,3/1/2000,1
 81 | 80,2673,4/1/2000,4
 82 | 81,2673,5/1/2000,3
 83 | 82,2673,6/1/2000,3
 84 | 83,2673,7/1/2000,0
 85 | 84,2673,8/1/2000,0
 86 | 85,2673,9/1/2000,1
 87 | 86,2673,10/1/2000,2
 88 | 87,2673,11/1/2000,1
 89 | 88,2673,12/1/2000,0
 90 | 89,2673,1/1/2001,1
 91 | 90,2673,2/1/2001,1
 92 | 91,2673,3/1/2001,0
 93 | 92,2673,4/1/2001,0
 94 | 93,2673,5/1/2001,4
 95 | 94,2673,6/1/2001,0
 96 | 95,2673,7/1/2001,0
 97 | 96,2673,8/1/2001,4
 98 | 97,2673,9/1/2001,0
 99 | 98,2673,10/1/2001,1
100 | 99,2673,11/1/2001,2
101 | 100,2673,12/1/2001,2
102 | 101,2673,1/1/2002,3
103 | 102,2673,2/1/2002,1
104 | 103,2673,3/1/2002,3
105 | 104,2672,1/1/1998,11
106 | 105,2672,2/1/1998,2
107 | 106,2672,3/1/1998,0
108 | 107,2672,4/1/1998,2
109 | 108,2672,5/1/1998,12
110 | 109,2672,6/1/1998,0
111 | 110,2672,7/1/1998,0
112 | 111,2672,8/1/1998,4
113 | 112,2672,9/1/1998,2
114 | 113,2672,10/1/1998,0
115 | 114,2672,11/1/1998,0
116 | 115,2672,12/1/1998,0
117 | 116,2672,1/1/1999,0
118 | 117,2672,2/1/1999,0
119 | 118,2672,3/1/1999,0
120 | 119,2672,4/1/1999,6
121 | 120,2672,5/1/1999,5
122 | 121,2672,6/1/1999,1
123 | 122,2672,7/1/1999,0
124 | 123,2672,8/1/1999,4
125 | 124,2672,9/1/1999,2
126 | 125,2672,10/1/1999,0
127 | 126,2672,11/1/1999,4
128 | 127,2672,12/1/1999,4
129 | 128,2672,1/1/2000,2
130 | 129,2672,2/1/2000,0
131 | 130,2672,3/1/2000,1
132 | 131,2672,4/1/2000,2
133 | 132,2672,5/1/2000,6
134 | 133,2672,6/1/2000,2
135 | 134,2672,7/1/2000,0
136 | 135,2672,8/1/2000,0
137 | 136,2672,9/1/2000,0
138 | 137,2672,10/1/2000,0
139 | 138,2672,11/1/2000,0
140 | 139,2672,12/1/2000,0
141 | 140,2672,1/1/2001,0
142 | 141,2672,2/1/2001,6
143 | 142,2672,3/1/2001,0
144 | 143,2672,4/1/2001,0
145 | 144,2672,5/1/2001,4
146 | 145,2672,6/1/2001,0
147 | 146,2672,7/1/2001,0
148 | 147,2672,8/1/2001,0
149 | 148,2672,9/1/2001,0
150 | 149,2672,10/1/2001,1
151 | 150,2672,11/1/2001,1
152 | 151,2672,12/1/2001,2
153 | 152,2672,1/1/2002,1
154 | 153,2672,2/1/2002,2
155 | 154,2672,3/1/2002,0
156 | 155,2671,1/1/1998,6
157 | 156,2671,2/1/1998,5
158 | 157,2671,3/1/1998,5
159 | 158,2671,4/1/1998,3
160 | 159,2671,5/1/1998,5
161 | 160,2671,6/1/1998,0
162 | 161,2671,7/1/1998,2
163 | 162,2671,8/1/1998,1
164 | 163,2671,9/1/1998,3
165 | 164,2671,10/1/1998,0
166 | 165,2671,11/1/1998,1
167 | 166,2671,12/1/1998,7
168 | 167,2671,1/1/1999,4
169 | 168,2671,2/1/1999,3
170 | 169,2671,3/1/1999,3
171 | 170,2671,4/1/1999,1
172 | 171,2671,5/1/1999,3
173 | 172,2671,6/1/1999,2
174 | 173,2671,7/1/1999,2
175 | 174,2671,8/1/1999,2
176 | 175,2671,9/1/1999,0
177 | 176,2671,10/1/1999,2
178 | 177,2671,11/1/1999,2
179 | 178,2671,12/1/1999,2
180 | 179,2671,1/1/2000,2
181 | 180,2671,2/1/2000,1
182 | 181,2671,3/1/2000,3
183 | 182,2671,4/1/2000,0
184 | 183,2671,5/1/2000,1
185 | 184,2671,6/1/2000,3
186 | 185,2671,7/1/2000,0
187 | 186,2671,8/1/2000,1
188 | 187,2671,9/1/2000,2
189 | 188,2671,10/1/2000,3
190 | 189,2671,11/1/2000,1
191 | 190,2671,12/1/2000,0
192 | 191,2671,1/1/2001,1
193 | 192,2671,2/1/2001,1
194 | 193,2671,3/1/2001,3
195 | 194,2671,4/1/2001,2
196 | 195,2671,5/1/2001,0
197 | 196,2671,6/1/2001,0
198 | 197,2671,7/1/2001,0
199 | 198,2671,8/1/2001,0
200 | 199,2671,9/1/2001,0
201 | 200,2671,10/1/2001,0
202 | 201,2671,11/1/2001,0
203 | 202,2671,12/1/2001,0
204 | 203,2671,1/1/2002,0
205 | 204,2671,2/1/2002,1
206 | 205,2671,3/1/2002,0
207 | 206,2670,1/1/1998,6
208 | 207,2670,2/1/1998,6
209 | 208,2670,3/1/1998,5
210 | 209,2670,4/1/1998,0
211 | 210,2670,5/1/1998,2
212 | 211,2670,6/1/1998,1
213 | 212,2670,7/1/1998,3
214 | 213,2670,8/1/1998,1
215 | 214,2670,9/1/1998,5
216 | 215,2670,10/1/1998,5
217 | 216,2670,11/1/1998,0
218 | 217,2670,12/1/1998,2
219 | 218,2670,1/1/1999,1
220 | 219,2670,2/1/1999,3
221 | 220,2670,3/1/1999,1
222 | 221,2670,4/1/1999,6
223 | 222,2670,5/1/1999,4
224 | 223,2670,6/1/1999,3
225 | 224,2670,7/1/1999,0
226 | 225,2670,8/1/1999,1
227 | 226,2670,9/1/1999,2
228 | 227,2670,10/1/1999,5
229 | 228,2670,11/1/1999,0
230 | 229,2670,12/1/1999,1
231 | 230,2670,1/1/2000,1
232 | 231,2670,2/1/2000,1
233 | 232,2670,3/1/2000,3
234 | 233,2670,4/1/2000,0
235 | 234,2670,5/1/2000,0
236 | 235,2670,6/1/2000,0
237 | 236,2670,7/1/2000,3
238 | 237,2670,8/1/2000,1
239 | 238,2670,9/1/2000,1
240 | 239,2670,10/1/2000,2
241 | 240,2670,11/1/2000,1
242 | 241,2670,12/1/2000,0
243 | 242,2670,1/1/2001,0
244 | 243,2670,2/1/2001,4
245 | 244,2670,3/1/2001,0
246 | 245,2670,4/1/2001,0
247 | 246,2670,5/1/2001,0
248 | 247,2670,6/1/2001,1
249 | 248,2670,7/1/2001,0
250 | 249,2670,8/1/2001,1
251 | 250,2670,9/1/2001,0
252 | 251,2670,10/1/2001,2
253 | 252,2670,11/1/2001,0
254 | 253,2670,12/1/2001,1
255 | 254,2670,1/1/2002,0
256 | 255,2670,2/1/2002,3
257 | 256,2670,3/1/2002,0
258 | 257,2669,1/1/1998,4
259 | 258,2669,2/1/1998,4
260 | 259,2669,3/1/1998,4
261 | 260,2669,4/1/1998,2
262 | 261,2669,5/1/1998,4
263 | 262,2669,6/1/1998,2
264 | 263,2669,7/1/1998,7
265 | 264,2669,8/1/1998,4
266 | 265,2669,9/1/1998,0
267 | 266,2669,10/1/1998,5
268 | 267,2669,11/1/1998,3
269 | 268,2669,12/1/1998,1
270 | 269,2669,1/1/1999,5
271 | 270,2669,2/1/1999,1
272 | 271,2669,3/1/1999,7
273 | 272,2669,4/1/1999,1
274 | 273,2669,5/1/1999,2
275 | 274,2669,6/1/1999,2
276 | 275,2669,7/1/1999,4
277 | 276,2669,8/1/1999,5
278 | 277,2669,9/1/1999,2
279 | 278,2669,10/1/1999,0
280 | 279,2669,11/1/1999,1
281 | 280,2669,12/1/1999,0
282 | 281,2669,1/1/2000,3
283 | 282,2669,2/1/2000,1
284 | 283,2669,3/1/2000,3
285 | 284,2669,4/1/2000,2
286 | 285,2669,5/1/2000,2
287 | 286,2669,6/1/2000,0
288 | 287,2669,7/1/2000,2
289 | 288,2669,8/1/2000,0
290 | 289,2669,9/1/2000,2
291 | 290,2669,10/1/2000,0
292 | 291,2669,11/1/2000,1
293 | 292,2669,12/1/2000,0
294 | 293,2669,1/1/2001,0
295 | 294,2669,2/1/2001,0
296 | 295,2669,3/1/2001,0
297 | 296,2669,4/1/2001,0
298 | 297,2669,5/1/2001,0
299 | 298,2669,6/1/2001,0
300 | 299,2669,7/1/2001,0
301 | 300,2669,8/1/2001,0
302 | 301,2669,9/1/2001,0
303 | 302,2669,10/1/2001,0
304 | 303,2669,11/1/2001,0
305 | 304,2669,12/1/2001,0
306 | 305,2669,1/1/2002,1
307 | 306,2669,2/1/2002,0
308 | 307,2669,3/1/2002,1
309 | 308,2668,1/1/1998,5
310 | 309,2668,2/1/1998,3
311 | 310,2668,3/1/1998,3
312 | 311,2668,4/1/1998,3
313 | 312,2668,5/1/1998,2
314 | 313,2668,6/1/1998,1
315 | 314,2668,7/1/1998,4
316 | 315,2668,8/1/1998,2
317 | 316,2668,9/1/1998,1
318 | 317,2668,10/1/1998,5
319 | 318,2668,11/1/1998,3
320 | 319,2668,12/1/1998,1
321 | 320,2668,1/1/1999,2
322 | 321,2668,2/1/1999,3
323 | 322,2668,3/1/1999,1
324 | 323,2668,4/1/1999,3
325 | 324,2668,5/1/1999,0
326 | 325,2668,6/1/1999,4
327 | 326,2668,7/1/1999,2
328 | 327,2668,8/1/1999,3
329 | 328,2668,9/1/1999,0
330 | 329,2668,10/1/1999,3
331 | 330,2668,11/1/1999,1
332 | 331,2668,12/1/1999,0
333 | 332,2668,1/1/2000,0
334 | 333,2668,2/1/2000,0
335 | 334,2668,3/1/2000,0
336 | 335,2668,4/1/2000,0
337 | 336,2668,5/1/2000,3
338 | 337,2668,6/1/2000,1
339 | 338,2668,7/1/2000,1
340 | 339,2668,8/1/2000,1
341 | 340,2668,9/1/2000,2
342 | 341,2668,10/1/2000,0
343 | 342,2668,11/1/2000,0
344 | 343,2668,12/1/2000,1
345 | 344,2668,1/1/2001,1
346 | 345,2668,2/1/2001,0
347 | 346,2668,3/1/2001,1
348 | 347,2668,4/1/2001,2
349 | 348,2668,5/1/2001,2
350 | 349,2668,6/1/2001,6
351 | 350,2668,7/1/2001,0
352 | 351,2668,8/1/2001,1
353 | 352,2668,9/1/2001,2
354 | 353,2668,10/1/2001,5
355 | 354,2668,11/1/2001,0
356 | 355,2668,12/1/2001,0
357 | 356,2668,1/1/2002,1
358 | 357,2668,2/1/2002,1
359 | 358,2668,3/1/2002,1
360 | 


--------------------------------------------------------------------------------
/16_caching_refactor/data/quarterly_canada_population.csv:
--------------------------------------------------------------------------------
  1 | Quarter,Canada,Newfoundland and Labrador,Prince Edward Island,Nova Scotia,New Brunswick,Quebec,Ontario,Manitoba,Saskatchewan,Alberta,British Columbia,Yukon,Northwest Territories,Nunavut
  2 | Q3 1991,28037420,579644,130369,914969,745567,7067396,10431316,1109604,1002713,2592306,3373787,28871,38724,22154
  3 | Q4 1991,28127327,579549,130360,916896,745886,7080336,10465562,1109400,1000942,2604031,3404049,29142,38854,22320
  4 | Q1 1992,28181477,579425,130604,917302,746571,7082645,10488022,1109978,1001136,2611786,3423217,29239,39055,22497
  5 | Q2 1992,28269699,579761,130812,917555,747232,7096705,10528346,1111050,1002831,2620771,3443237,29510,39230,22659
  6 | Q3 1992,28371264,580109,130827,919451,748121,7110010,10572205,1112689,1003995,2632672,3468802,30084,39416,22883
  7 | Q4 1992,28474177,579624,131201,921619,747746,7129062,10610665,1113209,1002960,2643421,3502209,30034,39422,23005
  8 | Q1 1993,28533602,580819,131564,922128,747892,7137514,10629994,1114180,1003443,2650886,3522509,29964,39538,23171
  9 | Q2 1993,28600864,580369,131833,922469,748320,7147047,10656924,1115554,1005037,2658293,3541936,30112,39649,23321
 10 | Q3 1993,28684764,579977,132177,923925,748812,7156537,10690038,1117618,1006900,2667292,3567772,30337,39820,23559
 11 | Q4 1993,28786619,578194,132467,925851,749454,7172016,10728737,1118603,1007016,2678623,3601746,30016,40095,23801
 12 | Q1 1994,28835772,577128,132752,925852,749531,7177923,10744762,1119284,1007554,2683346,3623717,29670,40273,23980
 13 | Q2 1994,28907448,575670,133030,926109,749473,7184599,10776819,1120863,1008540,2691443,3646632,29682,40383,24205
 14 | Q3 1994,29000663,574466,133437,926871,750185,7192403,10819146,1123230,1009575,2700606,3676075,29684,40578,24407
 15 | Q4 1994,29096372,572068,133800,927778,750670,7201106,10860406,1124263,1010784,2709011,3711468,29606,40875,24537
 16 | Q1 1995,29141902,570563,134060,927395,750819,7205074,10875308,1124947,1011784,2715701,3730564,29882,41114,24691
 17 | Q2 1995,29211541,569068,134175,927641,750692,7210305,10906895,1127234,1012795,2724946,3751377,30143,41407,24863
 18 | Q3 1995,29302311,567397,134415,928120,750943,7219219,10950119,1129150,1014187,2734519,3777390,30442,41432,24978
 19 | Q4 1995,29396274,565008,134971,930028,751174,7228600,10993416,1129547,1015349,2745255,3805461,30875,41470,25120
 20 | Q1 1996,29446857,563679,135119,929782,751581,7232952,11009307,1130288,1016198,2753463,3826739,30963,41495,25291
 21 | Q2 1996,29514217,561646,135311,929921,751872,7237307,11037392,1132025,1017872,2762903,3849847,31142,41516,25463
 22 | Q3 1996,29610218,559698,135737,931327,752268,7246897,11082903,1134196,1018945,2775133,3874317,31387,41741,25669
 23 | Q4 1996,29708206,557281,136004,932276,752526,7257616,11130574,1134619,1019408,2789691,3899256,31596,41611,25748
 24 | Q1 1997,29751536,555432,135935,932349,752334,7262945,11146270,1134997,1018326,2799561,3914490,31633,41528,25736
 25 | Q2 1997,29818012,553115,135931,931832,752447,7267820,11179959,1135846,1017683,2813157,3931056,31659,41668,25839
 26 | Q3 1997,29905948,550911,136095,932402,752511,7274611,11227651,1136128,1017902,2829848,3948583,31797,41625,25884
 27 | Q4 1997,29994790,547639,136165,932735,752248,7282871,11278893,1135649,1017426,2847526,3964677,31556,41411,25994
 28 | Q1 1998,30028506,545769,135938,932549,751969,7286008,11292059,1135770,1017504,2859305,3972821,31510,41222,26082
 29 | Q2 1998,30080180,542479,135635,932033,751080,7290497,11322038,1136177,1017105,2876753,3977912,31320,40968,26183
 30 | Q3 1998,30155173,539843,135804,931836,750530,7295935,11365901,1137489,1017332,2899066,3983113,31149,40802,26373
 31 | Q4 1998,30231639,537908,135908,932740,750708,7305302,11408804,1138328,1017748,2915781,3990451,30877,40635,26449
 32 | Q1 1999,30260117,536515,135994,932145,750127,7310238,11419589,1138946,1016883,2926079,3995643,30748,40635,26575
 33 | Q2 1999,30314696,534498,136025,932116,750075,7315053,11452857,1140502,1015716,2937393,4002433,30602,40705,26721
 34 | Q3 1999,30401286,533329,136281,933784,750601,7323250,11504759,1142448,1014524,2952692,4011375,30785,40638,26820
 35 | Q4 1999,30492106,532246,136424,935941,750643,7334722,11559464,1143618,1012582,2966682,4021600,30608,40579,26997
 36 | Q1 2000,30525872,531774,136442,934594,750786,7340269,11576994,1144428,1011141,2974517,4026664,30495,40623,27145
 37 | Q2 2000,30594030,529574,136289,934291,750543,7347179,11621255,1145873,1009177,2988465,4033319,30382,40455,27228
 38 | Q3 2000,30685730,527966,136470,933821,750517,7356951,11683290,1147313,1007565,3004198,4039230,30431,40480,27498
 39 | Q4 2000,30783969,526732,136400,934459,750252,7368772,11748348,1148183,1006027,3016947,4049297,30294,40575,27683
 40 | Q1 2001,30824441,525299,136377,933463,749715,7373978,11771945,1148456,1003469,3027941,4055229,30147,40624,27798
 41 | Q2 2001,30910996,523235,136499,932909,749789,7383844,11827345,1149684,1001643,3041238,4066132,30124,40625,27929
 42 | Q3 2001,31020902,522046,136665,932494,749820,7396456,11897534,1151454,1000239,3058108,4076950,30158,40845,28133
 43 | Q4 2001,31129119,521581,136876,933336,749295,7411504,11961789,1152150,999175,3078336,4085648,30076,41173,28180
 44 | Q1 2002,31169393,521399,136858,933773,748637,7417882,11980361,1152837,998681,3092107,4087191,30153,41176,28338
 45 | Q2 2002,31253382,520228,136892,934177,748777,7428016,12030408,1154737,997740,3108360,4094236,30142,41280,28389
 46 | Q3 2002,31360079,519481,136880,935179,749372,7441656,12094174,1156680,996807,3128429,4100564,30336,41699,28822
 47 | Q4 2002,31451764,519458,137078,935968,749408,7455892,12146053,1157298,996279,3144926,4108400,30266,41874,28864
 48 | Q1 2003,31480672,519223,137077,935509,749238,7461516,12156750,1158369,996241,3155103,4110242,30422,42006,28976
 49 | Q2 2003,31550768,518813,137120,935800,749265,7471775,12195501,1160863,995748,3169049,4114925,30573,42219,29117
 50 | Q3 2003,31644028,518459,137227,937717,749441,7485753,12245039,1163596,996386,3183065,4124482,30941,42600,29322
 51 | Q4 2003,31737869,518786,137425,938790,749463,7503709,12291669,1165530,996555,3197858,4134638,31218,42857,29371
 52 | Q1 2004,31777704,518739,137510,938997,749190,7510408,12305320,1167656,996558,3209372,4139946,31319,43171,29518
 53 | Q2 2004,31846669,517940,137629,938783,749336,7520262,12341656,1170347,997080,3223490,4145951,31401,43210,29584
 54 | Q3 2004,31940655,517423,137680,939664,749419,7535590,12391421,1173238,997283,3238668,4155651,31455,43306,29857
 55 | Q4 2004,32039959,516906,137695,939884,749243,7553651,12437664,1174675,996993,3260590,4167810,31689,43352,29807
 56 | Q1 2005,32076720,516794,137689,939215,749002,7559625,12446466,1176051,995900,3276797,4174018,31771,43442,29950
 57 | Q2 2005,32141943,515358,137788,937991,748693,7567307,12477967,1177285,994892,3296271,4182963,31862,43457,30109
 58 | Q3 2005,32243753,514332,138064,937926,748057,7581476,12528663,1178264,993500,3321768,4196062,31902,43400,30339
 59 | Q4 2005,32353968,513740,138130,938753,747672,7598887,12578931,1179086,993288,3346754,4212666,32128,43526,30407
 60 | Q1 2006,32395309,512564,137907,938117,746983,7604258,12587531,1179920,992237,3371823,4217962,32112,43318,30577
 61 | Q2 2006,32470303,510961,137846,937664,746337,7615505,12618444,1181635,991228,3396802,4227986,32152,43101,30642
 62 | Q3 2006,32571174,510592,137867,937882,745621,7631966,12661878,1183562,992314,3421434,4241794,32272,43179,30813
 63 | Q4 2006,32680712,510329,137898,937046,744975,7653092,12700994,1183981,993361,3453944,4258655,32361,43197,30879
 64 | Q1 2007,32717701,510335,137778,936203,744853,7661583,12702838,1184503,995174,3472771,4265321,32315,43074,30953
 65 | Q2 2007,32786014,509131,137708,934995,744938,7674385,12726728,1186430,997550,3490842,4276743,32273,43107,31184
 66 | Q3 2007,32889025,509055,137711,935115,745433,7692916,12764806,1189451,1002086,3514147,4290984,32554,43372,31395
 67 | Q4 2007,33002138,510262,138026,935379,746138,7713173,12807208,1191690,1007323,3533413,4312042,32662,43262,31560
 68 | Q1 2008,33050613,511008,138000,935475,746288,7725513,12814446,1193408,1010304,3548796,4319652,32751,43357,31615
 69 | Q2 2008,33127520,511253,138186,935635,746533,7740347,12840272,1194662,1013478,3567458,4331699,32954,43362,31681
 70 | Q3 2008,33247118,511581,138749,935897,746877,7761725,12883583,1197775,1017404,3595856,4349336,33083,43360,31892
 71 | Q4 2008,33372418,513437,139465,937045,747769,7786475,12927148,1199548,1022553,3619292,4371124,33306,43293,31963
 72 | Q1 2009,33427050,514223,139136,937382,748373,7799115,12932234,1201702,1025993,3638482,4381829,33404,43116,32061
 73 | Q2 2009,33511275,514719,139206,937429,749161,7817022,12956332,1204014,1029901,3659228,4395190,33582,43106,32385
 74 | Q3 2009,33628895,516751,139891,938208,749956,7843383,12998345,1208556,1034819,3678996,4410506,33731,43156,32597
 75 | Q4 2009,33757077,518985,140611,940501,751066,7871361,13047485,1211875,1039554,3694303,4431576,34025,42924,32811
 76 | Q1 2010,33807529,520283,140612,940823,751281,7885717,13058435,1213933,1042952,3702266,4441329,34007,42970,32921
 77 | Q2 2010,33889236,521204,140990,940919,752021,7903339,13088106,1216610,1046799,3714974,4453744,34241,43152,33137
 78 | Q3 2010,34004889,522009,141654,942107,753035,7929222,13135778,1220780,1051443,3732082,4465546,34596,43285,33352
 79 | Q4 2010,34131683,522865,142732,944179,754287,7955007,13189339,1224662,1056115,3747226,4483609,34789,43303,33570
 80 | Q1 2011,34166099,523726,143020,944068,754794,7967185,13198650,1227212,1058963,3754424,4482260,34801,43263,33733
 81 | Q2 2011,34230378,524239,143265,943918,754841,7982119,13221999,1229063,1061484,3768773,4488508,34983,43263,33923
 82 | Q3 2011,34339328,524999,143963,944274,755705,8005090,13261381,1233649,1066026,3789030,4502104,35411,43504,34192
 83 | Q4 2011,34457998,525936,144378,945016,756420,8021860,13308659,1238019,1070687,3807726,4525684,35668,43593,34352
 84 | Q1 2012,34516352,526215,144310,944307,757278,8029498,13323332,1242141,1074265,3825072,4536084,35782,43648,34420
 85 | Q2 2012,34592779,525655,144472,943746,757270,8039492,13348263,1245692,1077892,3847722,4548438,35843,43701,34593
 86 | Q3 2012,34714222,526345,144530,943635,758378,8061101,13390632,1249975,1083755,3874548,4566769,36234,43648,34672
 87 | Q4 2012,34836008,527180,144520,943341,758673,8078388,13434943,1254178,1088959,3900333,4590545,36425,43681,34842
 88 | Q1 2013,34883119,527248,144175,942598,758298,8082771,13444704,1257940,1091547,3920499,4598255,36373,43737,34974
 89 | Q2 2013,34958216,527313,143948,941230,758132,8090916,13467829,1260834,1094390,3946798,4611648,36313,43867,34998
 90 | Q3 2013,35082954,527114,144094,940434,758544,8110880,13510781,1264620,1099736,3981011,4630077,36521,43805,35337
 91 | Q4 2013,35211866,528117,144103,940105,758670,8124660,13558528,1268428,1103730,4010903,4658875,36448,43756,35543
 92 | Q1 2014,35249639,528242,143996,939867,759018,8125478,13562583,1271562,1106374,4029951,4666788,36393,43800,35587
 93 | Q2 2014,35323533,527498,143984,938913,758876,8132425,13582747,1274407,1109631,4054280,4684490,36682,43794,35806
 94 | Q3 2014,35437435,528159,144283,938545,758976,8150183,13617553,1279014,1112979,4083648,4707103,37137,43884,35971
 95 | Q4 2014,35559047,528461,144538,938972,759524,8165447,13661314,1283366,1116203,4106056,4737725,37272,44064,36105
 96 | Q1 2015,35575187,528361,144368,938296,759617,8163132,13657737,1286453,1118181,4116047,4745468,37168,44140,36219
 97 | Q2 2015,35611271,528063,144259,936271,758948,8163063,13669860,1288094,1118806,4128300,4757759,37372,44193,36283
 98 | Q3 2015,35702908,528117,144546,936525,758842,8175272,13707118,1292227,1120967,4144491,4776388,37690,44237,36488
 99 | Q4 2015,35822894,528696,144894,938320,759778,8190014,13757688,1297329,1124919,4159519,4802955,37751,44428,36603
100 | Q1 2016,35871136,528800,145202,938690,760580,8193624,13773629,1302544,1127773,4169830,4811465,37770,44507,36722
101 | Q2 2016,35970303,529063,145784,940382,762289,8204085,13816545,1307689,1131147,4181765,4832155,38042,44590,36767
102 | Q3 2016,36109487,529426,146969,942790,763350,8225950,13875394,1314139,1135987,4196061,4859250,38547,44649,36975
103 | Q4 2016,36258726,529900,147936,946030,764844,8249692,13946431,1319911,1140690,4208958,4884002,38750,44542,37040
104 | Q1 2017,36314099,529347,148181,945943,764617,8257450,13971785,1324129,1143313,4217855,4890848,38848,44607,37176
105 | Q2 2017,36398013,528544,148860,947384,764813,8272894,14006386,1328412,1146218,4227969,4905210,39175,44813,37335
106 | Q3 2017,36545236,528249,150402,950108,766621,8302063,14070141,1334790,1150331,4241100,4929384,39610,44891,37546
107 | Q4 2017,36721223,528288,151376,953683,767935,8336460,14150783,1340565,1154755,4258195,4956550,39804,45126,37703
108 | Q1 2018,36798408,527544,151516,953838,768005,8350601,14186830,1344418,1156659,4268853,4967421,39972,45066,37685
109 | Q2 2018,36898431,526395,152039,954618,768522,8367551,14235643,1347055,1158451,4281706,4983183,40141,45320,37807
110 | Q3 2018,37065084,525560,153396,958406,770301,8401738,14308697,1352825,1161767,4298275,5010476,40519,44981,38143
111 | Q4 2018,37250385,525729,155152,963447,772597,8434705,14392903,1357968,1165929,4317665,5040353,40679,44970,38288
112 | Q1 2019,37325235,524803,155254,964073,772828,8447803,14426822,1361820,1168447,4330833,5048144,40854,45202,38352
113 | Q2 2019,37422946,524131,155916,965553,774035,8466531,14467552,1364667,1169397,4344454,5066120,40932,45137,38521
114 | Q3 2019,37601230,523427,157419,970243,777128,8503483,14544701,1369954,1172479,4362576,5094796,41362,45070,38592
115 | Q4 2019,37810038,523889,158760,976483,780597,8542964,14636131,1373845,1176282,4384848,5130780,41648,45186,38625
116 | Q1 2020,37909001,523345,159369,977498,780677,8559876,14686092,1377248,1179135,4401362,5138710,41699,45270,38720
117 | Q2 2020,37986182,522505,160390,979075,782092,8574709,14718133,1379138,1179409,4414332,5150199,42015,45351,38834
118 | Q3 2020,38007166,521359,161305,981691,782996,8576595,14726022,1379888,1178467,4416682,5155495,42163,45346,39157
119 | Q4 2020,38007048,520203,161455,981266,783354,8578733,14721852,1381572,1177980,4421857,5151920,42327,45275,39254
120 | Q1 2021,38043450,519736,161662,981793,784472,8579010,14740102,1384025,1179119,4429077,5157293,42401,45355,39405
121 | Q2 2021,38124373,519694,162866,985238,786529,8585460,14773618,1388726,1181040,4436944,5176360,42719,45660,39519
122 | Q3 2021,38226498,520452,164758,991117,790398,8602335,14809257,1391979,1181493,4443773,5202378,43250,45597,39711
123 | Q4 2021,38426473,521854,166435,998387,795851,8630187,14901607,1395430,1182984,4466124,5238713,43373,45607,39921
124 | Q1 2022,38516138,522501,166858,1002441,799245,8637650,14940912,1398303,1185311,4482385,5251578,43241,45710,40003
125 | Q2 2022,38644920,523043,168358,1007360,802862,8650692,14996014,1401967,1188338,4502858,5273809,43518,45698,40403
126 | Q3 2022,38929902,525972,170688,1019725,812061,8695659,15109416,1409223,1194803,4543111,5319324,43789,45605,40526
127 | Q4 2022,39292355,528818,172707,1030953,820786,8751352,15262660,1420228,1205119,4601314,5368266,43964,45602,40586
128 | Q1 2023,39566248,531948,173954,1037782,825474,8787554,15386407,1431792,1214618,4647178,5399118,44238,45493,40692
129 | 


--------------------------------------------------------------------------------
/12_dashboard_capstone/data/quarterly_canada_population.csv:
--------------------------------------------------------------------------------
  1 | Quarter,Canada,Newfoundland and Labrador,Prince Edward Island,Nova Scotia,New Brunswick,Quebec,Ontario,Manitoba,Saskatchewan,Alberta,British Columbia,Yukon,Northwest Territories,Nunavut
  2 | Q3 1991,28037420,579644,130369,914969,745567,7067396,10431316,1109604,1002713,2592306,3373787,28871,38724,22154
  3 | Q4 1991,28127327,579549,130360,916896,745886,7080336,10465562,1109400,1000942,2604031,3404049,29142,38854,22320
  4 | Q1 1992,28181477,579425,130604,917302,746571,7082645,10488022,1109978,1001136,2611786,3423217,29239,39055,22497
  5 | Q2 1992,28269699,579761,130812,917555,747232,7096705,10528346,1111050,1002831,2620771,3443237,29510,39230,22659
  6 | Q3 1992,28371264,580109,130827,919451,748121,7110010,10572205,1112689,1003995,2632672,3468802,30084,39416,22883
  7 | Q4 1992,28474177,579624,131201,921619,747746,7129062,10610665,1113209,1002960,2643421,3502209,30034,39422,23005
  8 | Q1 1993,28533602,580819,131564,922128,747892,7137514,10629994,1114180,1003443,2650886,3522509,29964,39538,23171
  9 | Q2 1993,28600864,580369,131833,922469,748320,7147047,10656924,1115554,1005037,2658293,3541936,30112,39649,23321
 10 | Q3 1993,28684764,579977,132177,923925,748812,7156537,10690038,1117618,1006900,2667292,3567772,30337,39820,23559
 11 | Q4 1993,28786619,578194,132467,925851,749454,7172016,10728737,1118603,1007016,2678623,3601746,30016,40095,23801
 12 | Q1 1994,28835772,577128,132752,925852,749531,7177923,10744762,1119284,1007554,2683346,3623717,29670,40273,23980
 13 | Q2 1994,28907448,575670,133030,926109,749473,7184599,10776819,1120863,1008540,2691443,3646632,29682,40383,24205
 14 | Q3 1994,29000663,574466,133437,926871,750185,7192403,10819146,1123230,1009575,2700606,3676075,29684,40578,24407
 15 | Q4 1994,29096372,572068,133800,927778,750670,7201106,10860406,1124263,1010784,2709011,3711468,29606,40875,24537
 16 | Q1 1995,29141902,570563,134060,927395,750819,7205074,10875308,1124947,1011784,2715701,3730564,29882,41114,24691
 17 | Q2 1995,29211541,569068,134175,927641,750692,7210305,10906895,1127234,1012795,2724946,3751377,30143,41407,24863
 18 | Q3 1995,29302311,567397,134415,928120,750943,7219219,10950119,1129150,1014187,2734519,3777390,30442,41432,24978
 19 | Q4 1995,29396274,565008,134971,930028,751174,7228600,10993416,1129547,1015349,2745255,3805461,30875,41470,25120
 20 | Q1 1996,29446857,563679,135119,929782,751581,7232952,11009307,1130288,1016198,2753463,3826739,30963,41495,25291
 21 | Q2 1996,29514217,561646,135311,929921,751872,7237307,11037392,1132025,1017872,2762903,3849847,31142,41516,25463
 22 | Q3 1996,29610218,559698,135737,931327,752268,7246897,11082903,1134196,1018945,2775133,3874317,31387,41741,25669
 23 | Q4 1996,29708206,557281,136004,932276,752526,7257616,11130574,1134619,1019408,2789691,3899256,31596,41611,25748
 24 | Q1 1997,29751536,555432,135935,932349,752334,7262945,11146270,1134997,1018326,2799561,3914490,31633,41528,25736
 25 | Q2 1997,29818012,553115,135931,931832,752447,7267820,11179959,1135846,1017683,2813157,3931056,31659,41668,25839
 26 | Q3 1997,29905948,550911,136095,932402,752511,7274611,11227651,1136128,1017902,2829848,3948583,31797,41625,25884
 27 | Q4 1997,29994790,547639,136165,932735,752248,7282871,11278893,1135649,1017426,2847526,3964677,31556,41411,25994
 28 | Q1 1998,30028506,545769,135938,932549,751969,7286008,11292059,1135770,1017504,2859305,3972821,31510,41222,26082
 29 | Q2 1998,30080180,542479,135635,932033,751080,7290497,11322038,1136177,1017105,2876753,3977912,31320,40968,26183
 30 | Q3 1998,30155173,539843,135804,931836,750530,7295935,11365901,1137489,1017332,2899066,3983113,31149,40802,26373
 31 | Q4 1998,30231639,537908,135908,932740,750708,7305302,11408804,1138328,1017748,2915781,3990451,30877,40635,26449
 32 | Q1 1999,30260117,536515,135994,932145,750127,7310238,11419589,1138946,1016883,2926079,3995643,30748,40635,26575
 33 | Q2 1999,30314696,534498,136025,932116,750075,7315053,11452857,1140502,1015716,2937393,4002433,30602,40705,26721
 34 | Q3 1999,30401286,533329,136281,933784,750601,7323250,11504759,1142448,1014524,2952692,4011375,30785,40638,26820
 35 | Q4 1999,30492106,532246,136424,935941,750643,7334722,11559464,1143618,1012582,2966682,4021600,30608,40579,26997
 36 | Q1 2000,30525872,531774,136442,934594,750786,7340269,11576994,1144428,1011141,2974517,4026664,30495,40623,27145
 37 | Q2 2000,30594030,529574,136289,934291,750543,7347179,11621255,1145873,1009177,2988465,4033319,30382,40455,27228
 38 | Q3 2000,30685730,527966,136470,933821,750517,7356951,11683290,1147313,1007565,3004198,4039230,30431,40480,27498
 39 | Q4 2000,30783969,526732,136400,934459,750252,7368772,11748348,1148183,1006027,3016947,4049297,30294,40575,27683
 40 | Q1 2001,30824441,525299,136377,933463,749715,7373978,11771945,1148456,1003469,3027941,4055229,30147,40624,27798
 41 | Q2 2001,30910996,523235,136499,932909,749789,7383844,11827345,1149684,1001643,3041238,4066132,30124,40625,27929
 42 | Q3 2001,31020902,522046,136665,932494,749820,7396456,11897534,1151454,1000239,3058108,4076950,30158,40845,28133
 43 | Q4 2001,31129119,521581,136876,933336,749295,7411504,11961789,1152150,999175,3078336,4085648,30076,41173,28180
 44 | Q1 2002,31169393,521399,136858,933773,748637,7417882,11980361,1152837,998681,3092107,4087191,30153,41176,28338
 45 | Q2 2002,31253382,520228,136892,934177,748777,7428016,12030408,1154737,997740,3108360,4094236,30142,41280,28389
 46 | Q3 2002,31360079,519481,136880,935179,749372,7441656,12094174,1156680,996807,3128429,4100564,30336,41699,28822
 47 | Q4 2002,31451764,519458,137078,935968,749408,7455892,12146053,1157298,996279,3144926,4108400,30266,41874,28864
 48 | Q1 2003,31480672,519223,137077,935509,749238,7461516,12156750,1158369,996241,3155103,4110242,30422,42006,28976
 49 | Q2 2003,31550768,518813,137120,935800,749265,7471775,12195501,1160863,995748,3169049,4114925,30573,42219,29117
 50 | Q3 2003,31644028,518459,137227,937717,749441,7485753,12245039,1163596,996386,3183065,4124482,30941,42600,29322
 51 | Q4 2003,31737869,518786,137425,938790,749463,7503709,12291669,1165530,996555,3197858,4134638,31218,42857,29371
 52 | Q1 2004,31777704,518739,137510,938997,749190,7510408,12305320,1167656,996558,3209372,4139946,31319,43171,29518
 53 | Q2 2004,31846669,517940,137629,938783,749336,7520262,12341656,1170347,997080,3223490,4145951,31401,43210,29584
 54 | Q3 2004,31940655,517423,137680,939664,749419,7535590,12391421,1173238,997283,3238668,4155651,31455,43306,29857
 55 | Q4 2004,32039959,516906,137695,939884,749243,7553651,12437664,1174675,996993,3260590,4167810,31689,43352,29807
 56 | Q1 2005,32076720,516794,137689,939215,749002,7559625,12446466,1176051,995900,3276797,4174018,31771,43442,29950
 57 | Q2 2005,32141943,515358,137788,937991,748693,7567307,12477967,1177285,994892,3296271,4182963,31862,43457,30109
 58 | Q3 2005,32243753,514332,138064,937926,748057,7581476,12528663,1178264,993500,3321768,4196062,31902,43400,30339
 59 | Q4 2005,32353968,513740,138130,938753,747672,7598887,12578931,1179086,993288,3346754,4212666,32128,43526,30407
 60 | Q1 2006,32395309,512564,137907,938117,746983,7604258,12587531,1179920,992237,3371823,4217962,32112,43318,30577
 61 | Q2 2006,32470303,510961,137846,937664,746337,7615505,12618444,1181635,991228,3396802,4227986,32152,43101,30642
 62 | Q3 2006,32571174,510592,137867,937882,745621,7631966,12661878,1183562,992314,3421434,4241794,32272,43179,30813
 63 | Q4 2006,32680712,510329,137898,937046,744975,7653092,12700994,1183981,993361,3453944,4258655,32361,43197,30879
 64 | Q1 2007,32717701,510335,137778,936203,744853,7661583,12702838,1184503,995174,3472771,4265321,32315,43074,30953
 65 | Q2 2007,32786014,509131,137708,934995,744938,7674385,12726728,1186430,997550,3490842,4276743,32273,43107,31184
 66 | Q3 2007,32889025,509055,137711,935115,745433,7692916,12764806,1189451,1002086,3514147,4290984,32554,43372,31395
 67 | Q4 2007,33002138,510262,138026,935379,746138,7713173,12807208,1191690,1007323,3533413,4312042,32662,43262,31560
 68 | Q1 2008,33050613,511008,138000,935475,746288,7725513,12814446,1193408,1010304,3548796,4319652,32751,43357,31615
 69 | Q2 2008,33127520,511253,138186,935635,746533,7740347,12840272,1194662,1013478,3567458,4331699,32954,43362,31681
 70 | Q3 2008,33247118,511581,138749,935897,746877,7761725,12883583,1197775,1017404,3595856,4349336,33083,43360,31892
 71 | Q4 2008,33372418,513437,139465,937045,747769,7786475,12927148,1199548,1022553,3619292,4371124,33306,43293,31963
 72 | Q1 2009,33427050,514223,139136,937382,748373,7799115,12932234,1201702,1025993,3638482,4381829,33404,43116,32061
 73 | Q2 2009,33511275,514719,139206,937429,749161,7817022,12956332,1204014,1029901,3659228,4395190,33582,43106,32385
 74 | Q3 2009,33628895,516751,139891,938208,749956,7843383,12998345,1208556,1034819,3678996,4410506,33731,43156,32597
 75 | Q4 2009,33757077,518985,140611,940501,751066,7871361,13047485,1211875,1039554,3694303,4431576,34025,42924,32811
 76 | Q1 2010,33807529,520283,140612,940823,751281,7885717,13058435,1213933,1042952,3702266,4441329,34007,42970,32921
 77 | Q2 2010,33889236,521204,140990,940919,752021,7903339,13088106,1216610,1046799,3714974,4453744,34241,43152,33137
 78 | Q3 2010,34004889,522009,141654,942107,753035,7929222,13135778,1220780,1051443,3732082,4465546,34596,43285,33352
 79 | Q4 2010,34131683,522865,142732,944179,754287,7955007,13189339,1224662,1056115,3747226,4483609,34789,43303,33570
 80 | Q1 2011,34166099,523726,143020,944068,754794,7967185,13198650,1227212,1058963,3754424,4482260,34801,43263,33733
 81 | Q2 2011,34230378,524239,143265,943918,754841,7982119,13221999,1229063,1061484,3768773,4488508,34983,43263,33923
 82 | Q3 2011,34339328,524999,143963,944274,755705,8005090,13261381,1233649,1066026,3789030,4502104,35411,43504,34192
 83 | Q4 2011,34457998,525936,144378,945016,756420,8021860,13308659,1238019,1070687,3807726,4525684,35668,43593,34352
 84 | Q1 2012,34516352,526215,144310,944307,757278,8029498,13323332,1242141,1074265,3825072,4536084,35782,43648,34420
 85 | Q2 2012,34592779,525655,144472,943746,757270,8039492,13348263,1245692,1077892,3847722,4548438,35843,43701,34593
 86 | Q3 2012,34714222,526345,144530,943635,758378,8061101,13390632,1249975,1083755,3874548,4566769,36234,43648,34672
 87 | Q4 2012,34836008,527180,144520,943341,758673,8078388,13434943,1254178,1088959,3900333,4590545,36425,43681,34842
 88 | Q1 2013,34883119,527248,144175,942598,758298,8082771,13444704,1257940,1091547,3920499,4598255,36373,43737,34974
 89 | Q2 2013,34958216,527313,143948,941230,758132,8090916,13467829,1260834,1094390,3946798,4611648,36313,43867,34998
 90 | Q3 2013,35082954,527114,144094,940434,758544,8110880,13510781,1264620,1099736,3981011,4630077,36521,43805,35337
 91 | Q4 2013,35211866,528117,144103,940105,758670,8124660,13558528,1268428,1103730,4010903,4658875,36448,43756,35543
 92 | Q1 2014,35249639,528242,143996,939867,759018,8125478,13562583,1271562,1106374,4029951,4666788,36393,43800,35587
 93 | Q2 2014,35323533,527498,143984,938913,758876,8132425,13582747,1274407,1109631,4054280,4684490,36682,43794,35806
 94 | Q3 2014,35437435,528159,144283,938545,758976,8150183,13617553,1279014,1112979,4083648,4707103,37137,43884,35971
 95 | Q4 2014,35559047,528461,144538,938972,759524,8165447,13661314,1283366,1116203,4106056,4737725,37272,44064,36105
 96 | Q1 2015,35575187,528361,144368,938296,759617,8163132,13657737,1286453,1118181,4116047,4745468,37168,44140,36219
 97 | Q2 2015,35611271,528063,144259,936271,758948,8163063,13669860,1288094,1118806,4128300,4757759,37372,44193,36283
 98 | Q3 2015,35702908,528117,144546,936525,758842,8175272,13707118,1292227,1120967,4144491,4776388,37690,44237,36488
 99 | Q4 2015,35822894,528696,144894,938320,759778,8190014,13757688,1297329,1124919,4159519,4802955,37751,44428,36603
100 | Q1 2016,35871136,528800,145202,938690,760580,8193624,13773629,1302544,1127773,4169830,4811465,37770,44507,36722
101 | Q2 2016,35970303,529063,145784,940382,762289,8204085,13816545,1307689,1131147,4181765,4832155,38042,44590,36767
102 | Q3 2016,36109487,529426,146969,942790,763350,8225950,13875394,1314139,1135987,4196061,4859250,38547,44649,36975
103 | Q4 2016,36258726,529900,147936,946030,764844,8249692,13946431,1319911,1140690,4208958,4884002,38750,44542,37040
104 | Q1 2017,36314099,529347,148181,945943,764617,8257450,13971785,1324129,1143313,4217855,4890848,38848,44607,37176
105 | Q2 2017,36398013,528544,148860,947384,764813,8272894,14006386,1328412,1146218,4227969,4905210,39175,44813,37335
106 | Q3 2017,36545236,528249,150402,950108,766621,8302063,14070141,1334790,1150331,4241100,4929384,39610,44891,37546
107 | Q4 2017,36721223,528288,151376,953683,767935,8336460,14150783,1340565,1154755,4258195,4956550,39804,45126,37703
108 | Q1 2018,36798408,527544,151516,953838,768005,8350601,14186830,1344418,1156659,4268853,4967421,39972,45066,37685
109 | Q2 2018,36898431,526395,152039,954618,768522,8367551,14235643,1347055,1158451,4281706,4983183,40141,45320,37807
110 | Q3 2018,37065084,525560,153396,958406,770301,8401738,14308697,1352825,1161767,4298275,5010476,40519,44981,38143
111 | Q4 2018,37250385,525729,155152,963447,772597,8434705,14392903,1357968,1165929,4317665,5040353,40679,44970,38288
112 | Q1 2019,37325235,524803,155254,964073,772828,8447803,14426822,1361820,1168447,4330833,5048144,40854,45202,38352
113 | Q2 2019,37422946,524131,155916,965553,774035,8466531,14467552,1364667,1169397,4344454,5066120,40932,45137,38521
114 | Q3 2019,37601230,523427,157419,970243,777128,8503483,14544701,1369954,1172479,4362576,5094796,41362,45070,38592
115 | Q4 2019,37810038,523889,158760,976483,780597,8542964,14636131,1373845,1176282,4384848,5130780,41648,45186,38625
116 | Q1 2020,37909001,523345,159369,977498,780677,8559876,14686092,1377248,1179135,4401362,5138710,41699,45270,38720
117 | Q2 2020,37986182,522505,160390,979075,782092,8574709,14718133,1379138,1179409,4414332,5150199,42015,45351,38834
118 | Q3 2020,38007166,521359,161305,981691,782996,8576595,14726022,1379888,1178467,4416682,5155495,42163,45346,39157
119 | Q4 2020,38007048,520203,161455,981266,783354,8578733,14721852,1381572,1177980,4421857,5151920,42327,45275,39254
120 | Q1 2021,38043450,519736,161662,981793,784472,8579010,14740102,1384025,1179119,4429077,5157293,42401,45355,39405
121 | Q2 2021,38124373,519694,162866,985238,786529,8585460,14773618,1388726,1181040,4436944,5176360,42719,45660,39519
122 | Q3 2021,38226498,520452,164758,991117,790398,8602335,14809257,1391979,1181493,4443773,5202378,43250,45597,39711
123 | Q4 2021,38426473,521854,166435,998387,795851,8630187,14901607,1395430,1182984,4466124,5238713,43373,45607,39921
124 | Q1 2022,38516138,522501,166858,1002441,799245,8637650,14940912,1398303,1185311,4482385,5251578,43241,45710,40003
125 | Q2 2022,38644920,523043,168358,1007360,802862,8650692,14996014,1401967,1188338,4502858,5273809,43518,45698,40403
126 | Q3 2022,38929902,525972,170688,1019725,812061,8695659,15109416,1409223,1194803,4543111,5319324,43789,45605,40526
127 | Q4 2022,39292355,528818,172707,1030953,820786,8751352,15262660,1420228,1205119,4601314,5368266,43964,45602,40586
128 | Q1 2023,39566248,531948,173954,1037782,825474,8787554,15386407,1431792,1214618,4647178,5399118,44238,45493,40692
129 | 


--------------------------------------------------------------------------------
/25_state_capstone/model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "b82c2262",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import pandas as pd\n",
 11 |     "import numpy as np"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "id": "3927599a",
 18 |    "metadata": {},
 19 |    "outputs": [
 20 |     {
 21 |      "data": {
 22 |       "text/html": [
 23 |        "<div>\n",
 24 |        "<style scoped>\n",
 25 |        "    .dataframe tbody tr th:only-of-type {\n",
 26 |        "        vertical-align: middle;\n",
 27 |        "    }\n",
 28 |        "\n",
 29 |        "    .dataframe tbody tr th {\n",
 30 |        "        vertical-align: top;\n",
 31 |        "    }\n",
 32 |        "\n",
 33 |        "    .dataframe thead th {\n",
 34 |        "        text-align: right;\n",
 35 |        "    }\n",
 36 |        "</style>\n",
 37 |        "<table border=\"1\" class=\"dataframe\">\n",
 38 |        "  <thead>\n",
 39 |        "    <tr style=\"text-align: right;\">\n",
 40 |        "      <th></th>\n",
 41 |        "      <th>price</th>\n",
 42 |        "      <th>miles</th>\n",
 43 |        "      <th>year</th>\n",
 44 |        "      <th>make</th>\n",
 45 |        "      <th>model</th>\n",
 46 |        "      <th>body_type</th>\n",
 47 |        "      <th>vehicle_type</th>\n",
 48 |        "      <th>drivetrain</th>\n",
 49 |        "      <th>transmission</th>\n",
 50 |        "      <th>fuel_type</th>\n",
 51 |        "      <th>engine_size</th>\n",
 52 |        "      <th>engine_block</th>\n",
 53 |        "      <th>state</th>\n",
 54 |        "    </tr>\n",
 55 |        "  </thead>\n",
 56 |        "  <tbody>\n",
 57 |        "    <tr>\n",
 58 |        "      <th>0</th>\n",
 59 |        "      <td>179999.0</td>\n",
 60 |        "      <td>9966.0</td>\n",
 61 |        "      <td>2017.0</td>\n",
 62 |        "      <td>acura</td>\n",
 63 |        "      <td>NSX</td>\n",
 64 |        "      <td>coupe</td>\n",
 65 |        "      <td>Car</td>\n",
 66 |        "      <td>4WD</td>\n",
 67 |        "      <td>Automatic</td>\n",
 68 |        "      <td>hyrid</td>\n",
 69 |        "      <td>3.5</td>\n",
 70 |        "      <td>V</td>\n",
 71 |        "      <td>NB</td>\n",
 72 |        "    </tr>\n",
 73 |        "    <tr>\n",
 74 |        "      <th>1</th>\n",
 75 |        "      <td>179995.0</td>\n",
 76 |        "      <td>5988.0</td>\n",
 77 |        "      <td>2017.0</td>\n",
 78 |        "      <td>acura</td>\n",
 79 |        "      <td>NSX</td>\n",
 80 |        "      <td>coupe</td>\n",
 81 |        "      <td>Car</td>\n",
 82 |        "      <td>4WD</td>\n",
 83 |        "      <td>Automatic</td>\n",
 84 |        "      <td>hyrid</td>\n",
 85 |        "      <td>3.5</td>\n",
 86 |        "      <td>V</td>\n",
 87 |        "      <td>QC</td>\n",
 88 |        "    </tr>\n",
 89 |        "    <tr>\n",
 90 |        "      <th>2</th>\n",
 91 |        "      <td>168528.0</td>\n",
 92 |        "      <td>24242.0</td>\n",
 93 |        "      <td>2017.0</td>\n",
 94 |        "      <td>acura</td>\n",
 95 |        "      <td>NSX</td>\n",
 96 |        "      <td>coupe</td>\n",
 97 |        "      <td>Car</td>\n",
 98 |        "      <td>4WD</td>\n",
 99 |        "      <td>Automatic</td>\n",
100 |        "      <td>hyrid</td>\n",
101 |        "      <td>3.5</td>\n",
102 |        "      <td>V</td>\n",
103 |        "      <td>BC</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>3</th>\n",
107 |        "      <td>220000.0</td>\n",
108 |        "      <td>6637.0</td>\n",
109 |        "      <td>2020.0</td>\n",
110 |        "      <td>acura</td>\n",
111 |        "      <td>NSX</td>\n",
112 |        "      <td>coupe</td>\n",
113 |        "      <td>Car</td>\n",
114 |        "      <td>4WD</td>\n",
115 |        "      <td>Automatic</td>\n",
116 |        "      <td>hyrid</td>\n",
117 |        "      <td>3.5</td>\n",
118 |        "      <td>V</td>\n",
119 |        "      <td>ON</td>\n",
120 |        "    </tr>\n",
121 |        "    <tr>\n",
122 |        "      <th>4</th>\n",
123 |        "      <td>220000.0</td>\n",
124 |        "      <td>6637.0</td>\n",
125 |        "      <td>2020.0</td>\n",
126 |        "      <td>acura</td>\n",
127 |        "      <td>NSX</td>\n",
128 |        "      <td>coupe</td>\n",
129 |        "      <td>Car</td>\n",
130 |        "      <td>4WD</td>\n",
131 |        "      <td>Automatic</td>\n",
132 |        "      <td>hyrid</td>\n",
133 |        "      <td>3.5</td>\n",
134 |        "      <td>V</td>\n",
135 |        "      <td>ON</td>\n",
136 |        "    </tr>\n",
137 |        "  </tbody>\n",
138 |        "</table>\n",
139 |        "</div>"
140 |       ],
141 |       "text/plain": [
142 |        "      price    miles    year   make model body_type vehicle_type drivetrain  \\\n",
143 |        "0  179999.0   9966.0  2017.0  acura   NSX     coupe          Car        4WD   \n",
144 |        "1  179995.0   5988.0  2017.0  acura   NSX     coupe          Car        4WD   \n",
145 |        "2  168528.0  24242.0  2017.0  acura   NSX     coupe          Car        4WD   \n",
146 |        "3  220000.0   6637.0  2020.0  acura   NSX     coupe          Car        4WD   \n",
147 |        "4  220000.0   6637.0  2020.0  acura   NSX     coupe          Car        4WD   \n",
148 |        "\n",
149 |        "  transmission fuel_type  engine_size engine_block state  \n",
150 |        "0    Automatic     hyrid          3.5            V    NB  \n",
151 |        "1    Automatic     hyrid          3.5            V    QC  \n",
152 |        "2    Automatic     hyrid          3.5            V    BC  \n",
153 |        "3    Automatic     hyrid          3.5            V    ON  \n",
154 |        "4    Automatic     hyrid          3.5            V    ON  "
155 |       ]
156 |      },
157 |      "execution_count": 2,
158 |      "metadata": {},
159 |      "output_type": "execute_result"
160 |     }
161 |    ],
162 |    "source": [
163 |     "df = pd.read_csv('data/used_car_canada_clean.csv')\n",
164 |     "df.head()"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 3,
170 |    "id": "a0a9b482",
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "cols_to_drop = ['body_type', 'vehicle_type', 'drivetrain', 'transmission', 'fuel_type', 'engine_block']\n",
175 |     "df = df.drop(cols_to_drop, axis=1)"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 4,
181 |    "id": "64e04ab5",
182 |    "metadata": {},
183 |    "outputs": [
184 |     {
185 |      "data": {
186 |       "text/html": [
187 |        "<div>\n",
188 |        "<style scoped>\n",
189 |        "    .dataframe tbody tr th:only-of-type {\n",
190 |        "        vertical-align: middle;\n",
191 |        "    }\n",
192 |        "\n",
193 |        "    .dataframe tbody tr th {\n",
194 |        "        vertical-align: top;\n",
195 |        "    }\n",
196 |        "\n",
197 |        "    .dataframe thead th {\n",
198 |        "        text-align: right;\n",
199 |        "    }\n",
200 |        "</style>\n",
201 |        "<table border=\"1\" class=\"dataframe\">\n",
202 |        "  <thead>\n",
203 |        "    <tr style=\"text-align: right;\">\n",
204 |        "      <th></th>\n",
205 |        "      <th>price</th>\n",
206 |        "      <th>miles</th>\n",
207 |        "      <th>year</th>\n",
208 |        "      <th>make</th>\n",
209 |        "      <th>model</th>\n",
210 |        "      <th>engine_size</th>\n",
211 |        "      <th>state</th>\n",
212 |        "    </tr>\n",
213 |        "  </thead>\n",
214 |        "  <tbody>\n",
215 |        "    <tr>\n",
216 |        "      <th>0</th>\n",
217 |        "      <td>179999.0</td>\n",
218 |        "      <td>9966.0</td>\n",
219 |        "      <td>2017.0</td>\n",
220 |        "      <td>acura</td>\n",
221 |        "      <td>NSX</td>\n",
222 |        "      <td>3.5</td>\n",
223 |        "      <td>NB</td>\n",
224 |        "    </tr>\n",
225 |        "    <tr>\n",
226 |        "      <th>1</th>\n",
227 |        "      <td>179995.0</td>\n",
228 |        "      <td>5988.0</td>\n",
229 |        "      <td>2017.0</td>\n",
230 |        "      <td>acura</td>\n",
231 |        "      <td>NSX</td>\n",
232 |        "      <td>3.5</td>\n",
233 |        "      <td>QC</td>\n",
234 |        "    </tr>\n",
235 |        "    <tr>\n",
236 |        "      <th>2</th>\n",
237 |        "      <td>168528.0</td>\n",
238 |        "      <td>24242.0</td>\n",
239 |        "      <td>2017.0</td>\n",
240 |        "      <td>acura</td>\n",
241 |        "      <td>NSX</td>\n",
242 |        "      <td>3.5</td>\n",
243 |        "      <td>BC</td>\n",
244 |        "    </tr>\n",
245 |        "    <tr>\n",
246 |        "      <th>3</th>\n",
247 |        "      <td>220000.0</td>\n",
248 |        "      <td>6637.0</td>\n",
249 |        "      <td>2020.0</td>\n",
250 |        "      <td>acura</td>\n",
251 |        "      <td>NSX</td>\n",
252 |        "      <td>3.5</td>\n",
253 |        "      <td>ON</td>\n",
254 |        "    </tr>\n",
255 |        "    <tr>\n",
256 |        "      <th>4</th>\n",
257 |        "      <td>220000.0</td>\n",
258 |        "      <td>6637.0</td>\n",
259 |        "      <td>2020.0</td>\n",
260 |        "      <td>acura</td>\n",
261 |        "      <td>NSX</td>\n",
262 |        "      <td>3.5</td>\n",
263 |        "      <td>ON</td>\n",
264 |        "    </tr>\n",
265 |        "  </tbody>\n",
266 |        "</table>\n",
267 |        "</div>"
268 |       ],
269 |       "text/plain": [
270 |        "      price    miles    year   make model  engine_size state\n",
271 |        "0  179999.0   9966.0  2017.0  acura   NSX          3.5    NB\n",
272 |        "1  179995.0   5988.0  2017.0  acura   NSX          3.5    QC\n",
273 |        "2  168528.0  24242.0  2017.0  acura   NSX          3.5    BC\n",
274 |        "3  220000.0   6637.0  2020.0  acura   NSX          3.5    ON\n",
275 |        "4  220000.0   6637.0  2020.0  acura   NSX          3.5    ON"
276 |       ]
277 |      },
278 |      "execution_count": 4,
279 |      "metadata": {},
280 |      "output_type": "execute_result"
281 |     }
282 |    ],
283 |    "source": [
284 |     "df.head()"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "code",
289 |    "execution_count": 6,
290 |    "id": "50211b4e",
291 |    "metadata": {},
292 |    "outputs": [],
293 |    "source": [
294 |     "df_toyota_honda = df.loc[(df['make'] == 'honda') | (df['make'] == 'toyota')]"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "execution_count": 8,
300 |    "id": "2587f01f",
301 |    "metadata": {},
302 |    "outputs": [],
303 |    "source": [
304 |     "df_toyota_honda.to_csv('data/honda_toyota_ca.csv', index=False, header=True)"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "markdown",
309 |    "id": "90e413a4",
310 |    "metadata": {},
311 |    "source": [
312 |     "## Model "
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": 9,
318 |    "id": "0973175a",
319 |    "metadata": {},
320 |    "outputs": [
321 |     {
322 |      "data": {
323 |       "text/html": [
324 |        "<div>\n",
325 |        "<style scoped>\n",
326 |        "    .dataframe tbody tr th:only-of-type {\n",
327 |        "        vertical-align: middle;\n",
328 |        "    }\n",
329 |        "\n",
330 |        "    .dataframe tbody tr th {\n",
331 |        "        vertical-align: top;\n",
332 |        "    }\n",
333 |        "\n",
334 |        "    .dataframe thead th {\n",
335 |        "        text-align: right;\n",
336 |        "    }\n",
337 |        "</style>\n",
338 |        "<table border=\"1\" class=\"dataframe\">\n",
339 |        "  <thead>\n",
340 |        "    <tr style=\"text-align: right;\">\n",
341 |        "      <th></th>\n",
342 |        "      <th>price</th>\n",
343 |        "      <th>miles</th>\n",
344 |        "      <th>year</th>\n",
345 |        "      <th>make</th>\n",
346 |        "      <th>model</th>\n",
347 |        "      <th>engine_size</th>\n",
348 |        "      <th>state</th>\n",
349 |        "    </tr>\n",
350 |        "  </thead>\n",
351 |        "  <tbody>\n",
352 |        "    <tr>\n",
353 |        "      <th>0</th>\n",
354 |        "      <td>4980.0</td>\n",
355 |        "      <td>86132.0</td>\n",
356 |        "      <td>2001.0</td>\n",
357 |        "      <td>toyota</td>\n",
358 |        "      <td>Prius</td>\n",
359 |        "      <td>1.5</td>\n",
360 |        "      <td>BC</td>\n",
361 |        "    </tr>\n",
362 |        "    <tr>\n",
363 |        "      <th>1</th>\n",
364 |        "      <td>18926.0</td>\n",
365 |        "      <td>80516.0</td>\n",
366 |        "      <td>2017.0</td>\n",
367 |        "      <td>toyota</td>\n",
368 |        "      <td>Prius</td>\n",
369 |        "      <td>1.8</td>\n",
370 |        "      <td>ON</td>\n",
371 |        "    </tr>\n",
372 |        "    <tr>\n",
373 |        "      <th>2</th>\n",
374 |        "      <td>23900.0</td>\n",
375 |        "      <td>29295.0</td>\n",
376 |        "      <td>2018.0</td>\n",
377 |        "      <td>toyota</td>\n",
378 |        "      <td>Prius</td>\n",
379 |        "      <td>1.8</td>\n",
380 |        "      <td>ON</td>\n",
381 |        "    </tr>\n",
382 |        "    <tr>\n",
383 |        "      <th>3</th>\n",
384 |        "      <td>27980.0</td>\n",
385 |        "      <td>57894.0</td>\n",
386 |        "      <td>2018.0</td>\n",
387 |        "      <td>toyota</td>\n",
388 |        "      <td>Prius</td>\n",
389 |        "      <td>1.8</td>\n",
390 |        "      <td>BC</td>\n",
391 |        "    </tr>\n",
392 |        "    <tr>\n",
393 |        "      <th>4</th>\n",
394 |        "      <td>22887.0</td>\n",
395 |        "      <td>95106.0</td>\n",
396 |        "      <td>2016.0</td>\n",
397 |        "      <td>toyota</td>\n",
398 |        "      <td>Prius</td>\n",
399 |        "      <td>1.8</td>\n",
400 |        "      <td>AB</td>\n",
401 |        "    </tr>\n",
402 |        "  </tbody>\n",
403 |        "</table>\n",
404 |        "</div>"
405 |       ],
406 |       "text/plain": [
407 |        "     price    miles    year    make  model  engine_size state\n",
408 |        "0   4980.0  86132.0  2001.0  toyota  Prius          1.5    BC\n",
409 |        "1  18926.0  80516.0  2017.0  toyota  Prius          1.8    ON\n",
410 |        "2  23900.0  29295.0  2018.0  toyota  Prius          1.8    ON\n",
411 |        "3  27980.0  57894.0  2018.0  toyota  Prius          1.8    BC\n",
412 |        "4  22887.0  95106.0  2016.0  toyota  Prius          1.8    AB"
413 |       ]
414 |      },
415 |      "execution_count": 9,
416 |      "metadata": {},
417 |      "output_type": "execute_result"
418 |     }
419 |    ],
420 |    "source": [
421 |     "df = pd.read_csv('data/honda_toyota_ca.csv')\n",
422 |     "df.head()"
423 |    ]
424 |   },
425 |   {
426 |    "cell_type": "code",
427 |    "execution_count": 10,
428 |    "id": "8bffb126",
429 |    "metadata": {},
430 |    "outputs": [],
431 |    "source": [
432 |     "from sklearn.model_selection import train_test_split\n",
433 |     "\n",
434 |     "X = df.drop(['price'], axis=1)\n",
435 |     "y = df['price']\n",
436 |     "\n",
437 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=df[['make', 'model']], test_size=0.2, shuffle=True, random_state=42)"
438 |    ]
439 |   },
440 |   {
441 |    "cell_type": "code",
442 |    "execution_count": 13,
443 |    "id": "9e35e3a0",
444 |    "metadata": {},
445 |    "outputs": [],
446 |    "source": [
447 |     "from sklearn.compose import ColumnTransformer\n",
448 |     "from sklearn.pipeline import Pipeline\n",
449 |     "from sklearn.preprocessing import OneHotEncoder\n",
450 |     "from sklearn.ensemble import GradientBoostingRegressor\n",
451 |     "\n",
452 |     "cat_index = [2,3,5]\n",
453 |     "\n",
454 |     "cat_features_transformer = Pipeline(\n",
455 |     "    steps=[\n",
456 |     "        (\"encoder\", OneHotEncoder()),\n",
457 |     "    ]\n",
458 |     ")\n",
459 |     "\n",
460 |     "preprocessor = ColumnTransformer(\n",
461 |     "    transformers=[\n",
462 |     "        (\"cat\", cat_features_transformer, cat_index)\n",
463 |     "    ]\n",
464 |     ")\n",
465 |     "\n",
466 |     "\n",
467 |     "model = Pipeline(\n",
468 |     "    steps=[\n",
469 |     "        (\"preprocessor\", preprocessor),\n",
470 |     "        (\"regressor\", GradientBoostingRegressor(random_state=42))\n",
471 |     "    ]\n",
472 |     ")"
473 |    ]
474 |   },
475 |   {
476 |    "cell_type": "code",
477 |    "execution_count": 14,
478 |    "id": "de4b7b62",
479 |    "metadata": {},
480 |    "outputs": [
481 |     {
482 |      "data": {
483 |       "text/html": [
484 |        "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;preprocessor&#x27;,\n",
485 |        "                 ColumnTransformer(transformers=[(&#x27;cat&#x27;,\n",
486 |        "                                                  Pipeline(steps=[(&#x27;encoder&#x27;,\n",
487 |        "                                                                   OneHotEncoder())]),\n",
488 |        "                                                  [2, 3, 5])])),\n",
489 |        "                (&#x27;regressor&#x27;, GradientBoostingRegressor(random_state=42))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;preprocessor&#x27;,\n",
490 |        "                 ColumnTransformer(transformers=[(&#x27;cat&#x27;,\n",
491 |        "                                                  Pipeline(steps=[(&#x27;encoder&#x27;,\n",
492 |        "                                                                   OneHotEncoder())]),\n",
493 |        "                                                  [2, 3, 5])])),\n",
494 |        "                (&#x27;regressor&#x27;, GradientBoostingRegressor(random_state=42))])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">preprocessor: ColumnTransformer</label><div class=\"sk-toggleable__content\"><pre>ColumnTransformer(transformers=[(&#x27;cat&#x27;,\n",
495 |        "                                 Pipeline(steps=[(&#x27;encoder&#x27;, OneHotEncoder())]),\n",
496 |        "                                 [2, 3, 5])])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">cat</label><div class=\"sk-toggleable__content\"><pre>[2, 3, 5]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder()</pre></div></div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">GradientBoostingRegressor</label><div class=\"sk-toggleable__content\"><pre>GradientBoostingRegressor(random_state=42)</pre></div></div></div></div></div></div></div>"
497 |       ],
498 |       "text/plain": [
499 |        "Pipeline(steps=[('preprocessor',\n",
500 |        "                 ColumnTransformer(transformers=[('cat',\n",
501 |        "                                                  Pipeline(steps=[('encoder',\n",
502 |        "                                                                   OneHotEncoder())]),\n",
503 |        "                                                  [2, 3, 5])])),\n",
504 |        "                ('regressor', GradientBoostingRegressor(random_state=42))])"
505 |       ]
506 |      },
507 |      "execution_count": 14,
508 |      "metadata": {},
509 |      "output_type": "execute_result"
510 |     }
511 |    ],
512 |    "source": [
513 |     "model.fit(X_train, y_train)"
514 |    ]
515 |   },
516 |   {
517 |    "cell_type": "code",
518 |    "execution_count": 15,
519 |    "id": "fd936fd5",
520 |    "metadata": {},
521 |    "outputs": [
522 |     {
523 |      "data": {
524 |       "text/plain": [
525 |        "0.4865534075230413"
526 |       ]
527 |      },
528 |      "execution_count": 15,
529 |      "metadata": {},
530 |      "output_type": "execute_result"
531 |     }
532 |    ],
533 |    "source": [
534 |     "model.score(X_test, y_test)"
535 |    ]
536 |   },
537 |   {
538 |    "cell_type": "code",
539 |    "execution_count": 16,
540 |    "id": "3ada59d9",
541 |    "metadata": {},
542 |    "outputs": [
543 |     {
544 |      "data": {
545 |       "text/plain": [
546 |        "['model/model.joblib']"
547 |       ]
548 |      },
549 |      "execution_count": 16,
550 |      "metadata": {},
551 |      "output_type": "execute_result"
552 |     }
553 |    ],
554 |    "source": [
555 |     "from joblib import dump\n",
556 |     "\n",
557 |     "dump(model, 'model/model.joblib')"
558 |    ]
559 |   },
560 |   {
561 |    "cell_type": "code",
562 |    "execution_count": null,
563 |    "id": "64146686",
564 |    "metadata": {},
565 |    "outputs": [],
566 |    "source": []
567 |   }
568 |  ],
569 |  "metadata": {
570 |   "kernelspec": {
571 |    "display_name": "Python 3",
572 |    "language": "python",
573 |    "name": "python3"
574 |   },
575 |   "language_info": {
576 |    "codemirror_mode": {
577 |     "name": "ipython",
578 |     "version": 3
579 |    },
580 |    "file_extension": ".py",
581 |    "mimetype": "text/x-python",
582 |    "name": "python",
583 |    "nbconvert_exporter": "python",
584 |    "pygments_lexer": "ipython3",
585 |    "version": "3.10.9"
586 |   }
587 |  },
588 |  "nbformat": 4,
589 |  "nbformat_minor": 5
590 | }
591 | 


--------------------------------------------------------------------------------