├── README.md
├── app_10_regression_bioinformatics_solubility
├── solubility-app.py
├── solubility-logo.jpg
├── solubility-web-app.ipynb
└── solubility_model.pkl
├── app_1_simple_stock_price
├── myapp.py
└── myapp2.py
├── app_2_simple_bioinformatics_dna
├── dna-app.py
└── dna-logo.jpg
├── app_3_eda_basketball
└── basketball_app.py
├── app_4_eda_football
└── football_app.py
├── app_5_eda_sp500_stock
└── sp500-app.py
├── app_6_eda_cryptocurrency
├── crypto-price-app.py
└── logo.jpg
├── app_7_classification_iris
└── iris-ml-app.py
├── app_8_classification_penguins
├── penguins-app.py
├── penguins-model-building.py
├── penguins_cleaned.csv
├── penguins_clf.pkl
└── penguins_example.csv
└── app_9_regression_boston_housing
└── boston-house-ml-app.py
/README.md:
--------------------------------------------------------------------------------
1 | # streamlit_freecodecamp
2 | ## Build 12 Data Apps in Python with Streamlit
3 |
4 | Learn how to build interactive and data-driven web apps in Python using the Streamlit library.
5 |
6 | Video | Title
7 | ---|---
8 |
| [Build 12 Data Science Apps with Python and Streamlit - Full Course](https://youtu.be/JwSS70SZdyM)
9 |
10 |
--------------------------------------------------------------------------------
/app_10_regression_bioinformatics_solubility/solubility-app.py:
--------------------------------------------------------------------------------
1 | ######################
2 | # Import libraries
3 | ######################
4 | import numpy as np
5 | import pandas as pd
6 | import streamlit as st
7 | import pickle
8 | from PIL import Image
9 | from rdkit import Chem
10 | from rdkit.Chem import Descriptors
11 |
12 | ######################
13 | # Custom function
14 | ######################
15 | ## Calculate molecular descriptors
16 | def AromaticProportion(m):
17 | aromatic_atoms = [m.GetAtomWithIdx(i).GetIsAromatic() for i in range(m.GetNumAtoms())]
18 | aa_count = []
19 | for i in aromatic_atoms:
20 | if i==True:
21 | aa_count.append(1)
22 | AromaticAtom = sum(aa_count)
23 | HeavyAtom = Descriptors.HeavyAtomCount(m)
24 | AR = AromaticAtom/HeavyAtom
25 | return AR
26 |
27 | def generate(smiles, verbose=False):
28 |
29 | moldata= []
30 | for elem in smiles:
31 | mol=Chem.MolFromSmiles(elem)
32 | moldata.append(mol)
33 |
34 | baseData= np.arange(1,1)
35 | i=0
36 | for mol in moldata:
37 |
38 | desc_MolLogP = Descriptors.MolLogP(mol)
39 | desc_MolWt = Descriptors.MolWt(mol)
40 | desc_NumRotatableBonds = Descriptors.NumRotatableBonds(mol)
41 | desc_AromaticProportion = AromaticProportion(mol)
42 |
43 | row = np.array([desc_MolLogP,
44 | desc_MolWt,
45 | desc_NumRotatableBonds,
46 | desc_AromaticProportion])
47 |
48 | if(i==0):
49 | baseData=row
50 | else:
51 | baseData=np.vstack([baseData, row])
52 | i=i+1
53 |
54 | columnNames=["MolLogP","MolWt","NumRotatableBonds","AromaticProportion"]
55 | descriptors = pd.DataFrame(data=baseData,columns=columnNames)
56 |
57 | return descriptors
58 |
59 | ######################
60 | # Page Title
61 | ######################
62 |
63 | image = Image.open('solubility-logo.jpg')
64 |
65 | st.image(image, use_column_width=True)
66 |
67 | st.write("""
68 | # Molecular Solubility Prediction Web App
69 |
70 | This app predicts the **Solubility (LogS)** values of molecules!
71 |
72 | Data obtained from the John S. Delaney. [ESOL: Estimating Aqueous Solubility Directly from Molecular Structure](https://pubs.acs.org/doi/10.1021/ci034243x). ***J. Chem. Inf. Comput. Sci.*** 2004, 44, 3, 1000-1005.
73 | ***
74 | """)
75 |
76 |
77 | ######################
78 | # Input molecules (Side Panel)
79 | ######################
80 |
81 | st.sidebar.header('User Input Features')
82 |
83 | ## Read SMILES input
84 | SMILES_input = "NCCCC\nCCC\nCN"
85 |
86 | SMILES = st.sidebar.text_area("SMILES input", SMILES_input)
87 | SMILES = "C\n" + SMILES #Adds C as a dummy, first item
88 | SMILES = SMILES.split('\n')
89 |
90 | st.header('Input SMILES')
91 | SMILES[1:] # Skips the dummy first item
92 |
93 | ## Calculate molecular descriptors
94 | st.header('Computed molecular descriptors')
95 | X = generate(SMILES)
96 | X[1:] # Skips the dummy first item
97 |
98 | ######################
99 | # Pre-built model
100 | ######################
101 |
102 | # Reads in saved model
103 | load_model = pickle.load(open('solubility_model.pkl', 'rb'))
104 |
105 | # Apply model to make predictions
106 | prediction = load_model.predict(X)
107 | #prediction_proba = load_model.predict_proba(X)
108 |
109 | st.header('Predicted LogS values')
110 | prediction[1:] # Skips the dummy first item
111 |
--------------------------------------------------------------------------------
/app_10_regression_bioinformatics_solubility/solubility-logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dataprofessor/streamlit_freecodecamp/d44c4c1320f8417b3d5494902e5366b715314528/app_10_regression_bioinformatics_solubility/solubility-logo.jpg
--------------------------------------------------------------------------------
/app_10_regression_bioinformatics_solubility/solubility-web-app.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "solubility-web-app.ipynb",
7 | "provenance": [],
8 | "toc_visible": true
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | }
14 | },
15 | "cells": [
16 | {
17 | "cell_type": "markdown",
18 | "metadata": {
19 | "id": "QQHZHevuXdEy",
20 | "colab_type": "text"
21 | },
22 | "source": [
23 | "# **Model Building for Solubility Dataset**\n",
24 | "\n",
25 | "Chanin Nantasenamat\n",
26 | "\n",
27 | "*Data Professor YouTube channel, http://youtube.com/dataprofessor*"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {
33 | "id": "g1qtHa0zXfWM",
34 | "colab_type": "text"
35 | },
36 | "source": [
37 | "# Read in data"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "metadata": {
43 | "id": "9MdfbvFKXtXq",
44 | "colab_type": "code",
45 | "colab": {}
46 | },
47 | "source": [
48 | "import pandas as pd"
49 | ],
50 | "execution_count": 1,
51 | "outputs": []
52 | },
53 | {
54 | "cell_type": "code",
55 | "metadata": {
56 | "id": "nerGP0fCXfgP",
57 | "colab_type": "code",
58 | "colab": {
59 | "base_uri": "https://localhost:8080/",
60 | "height": 419
61 | },
62 | "outputId": "2bb155a6-2710-4461-accb-df64045ba70d"
63 | },
64 | "source": [
65 | "delaney_with_descriptors_url = 'https://raw.githubusercontent.com/dataprofessor/data/master/delaney_solubility_with_descriptors.csv'\n",
66 | "dataset = pd.read_csv(delaney_with_descriptors_url)\n",
67 | "dataset"
68 | ],
69 | "execution_count": 2,
70 | "outputs": [
71 | {
72 | "output_type": "execute_result",
73 | "data": {
74 | "text/html": [
75 | "
\n",
76 | "\n",
89 | "
\n",
90 | " \n",
91 | " \n",
92 | " | \n",
93 | " MolLogP | \n",
94 | " MolWt | \n",
95 | " NumRotatableBonds | \n",
96 | " AromaticProportion | \n",
97 | " logS | \n",
98 | "
\n",
99 | " \n",
100 | " \n",
101 | " \n",
102 | " 0 | \n",
103 | " 2.59540 | \n",
104 | " 167.850 | \n",
105 | " 0.0 | \n",
106 | " 0.000000 | \n",
107 | " -2.180 | \n",
108 | "
\n",
109 | " \n",
110 | " 1 | \n",
111 | " 2.37650 | \n",
112 | " 133.405 | \n",
113 | " 0.0 | \n",
114 | " 0.000000 | \n",
115 | " -2.000 | \n",
116 | "
\n",
117 | " \n",
118 | " 2 | \n",
119 | " 2.59380 | \n",
120 | " 167.850 | \n",
121 | " 1.0 | \n",
122 | " 0.000000 | \n",
123 | " -1.740 | \n",
124 | "
\n",
125 | " \n",
126 | " 3 | \n",
127 | " 2.02890 | \n",
128 | " 133.405 | \n",
129 | " 1.0 | \n",
130 | " 0.000000 | \n",
131 | " -1.480 | \n",
132 | "
\n",
133 | " \n",
134 | " 4 | \n",
135 | " 2.91890 | \n",
136 | " 187.375 | \n",
137 | " 1.0 | \n",
138 | " 0.000000 | \n",
139 | " -3.040 | \n",
140 | "
\n",
141 | " \n",
142 | " ... | \n",
143 | " ... | \n",
144 | " ... | \n",
145 | " ... | \n",
146 | " ... | \n",
147 | " ... | \n",
148 | "
\n",
149 | " \n",
150 | " 1139 | \n",
151 | " 1.98820 | \n",
152 | " 287.343 | \n",
153 | " 8.0 | \n",
154 | " 0.000000 | \n",
155 | " 1.144 | \n",
156 | "
\n",
157 | " \n",
158 | " 1140 | \n",
159 | " 3.42130 | \n",
160 | " 286.114 | \n",
161 | " 2.0 | \n",
162 | " 0.333333 | \n",
163 | " -4.925 | \n",
164 | "
\n",
165 | " \n",
166 | " 1141 | \n",
167 | " 3.60960 | \n",
168 | " 308.333 | \n",
169 | " 4.0 | \n",
170 | " 0.695652 | \n",
171 | " -3.893 | \n",
172 | "
\n",
173 | " \n",
174 | " 1142 | \n",
175 | " 2.56214 | \n",
176 | " 354.815 | \n",
177 | " 3.0 | \n",
178 | " 0.521739 | \n",
179 | " -3.790 | \n",
180 | "
\n",
181 | " \n",
182 | " 1143 | \n",
183 | " 2.02164 | \n",
184 | " 179.219 | \n",
185 | " 1.0 | \n",
186 | " 0.461538 | \n",
187 | " -2.581 | \n",
188 | "
\n",
189 | " \n",
190 | "
\n",
191 | "
1144 rows × 5 columns
\n",
192 | "
"
193 | ],
194 | "text/plain": [
195 | " MolLogP MolWt NumRotatableBonds AromaticProportion logS\n",
196 | "0 2.59540 167.850 0.0 0.000000 -2.180\n",
197 | "1 2.37650 133.405 0.0 0.000000 -2.000\n",
198 | "2 2.59380 167.850 1.0 0.000000 -1.740\n",
199 | "3 2.02890 133.405 1.0 0.000000 -1.480\n",
200 | "4 2.91890 187.375 1.0 0.000000 -3.040\n",
201 | "... ... ... ... ... ...\n",
202 | "1139 1.98820 287.343 8.0 0.000000 1.144\n",
203 | "1140 3.42130 286.114 2.0 0.333333 -4.925\n",
204 | "1141 3.60960 308.333 4.0 0.695652 -3.893\n",
205 | "1142 2.56214 354.815 3.0 0.521739 -3.790\n",
206 | "1143 2.02164 179.219 1.0 0.461538 -2.581\n",
207 | "\n",
208 | "[1144 rows x 5 columns]"
209 | ]
210 | },
211 | "metadata": {
212 | "tags": []
213 | },
214 | "execution_count": 2
215 | }
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "metadata": {
221 | "id": "tgFxx8m_YEUy",
222 | "colab_type": "code",
223 | "colab": {
224 | "base_uri": "https://localhost:8080/",
225 | "height": 419
226 | },
227 | "outputId": "fd6feedd-253b-4189-d400-a8d3f5bf1f25"
228 | },
229 | "source": [
230 | "X = dataset.drop(['logS'], axis=1)\n",
231 | "X"
232 | ],
233 | "execution_count": 5,
234 | "outputs": [
235 | {
236 | "output_type": "execute_result",
237 | "data": {
238 | "text/html": [
239 | "\n",
240 | "\n",
253 | "
\n",
254 | " \n",
255 | " \n",
256 | " | \n",
257 | " MolLogP | \n",
258 | " MolWt | \n",
259 | " NumRotatableBonds | \n",
260 | " AromaticProportion | \n",
261 | "
\n",
262 | " \n",
263 | " \n",
264 | " \n",
265 | " 0 | \n",
266 | " 2.59540 | \n",
267 | " 167.850 | \n",
268 | " 0.0 | \n",
269 | " 0.000000 | \n",
270 | "
\n",
271 | " \n",
272 | " 1 | \n",
273 | " 2.37650 | \n",
274 | " 133.405 | \n",
275 | " 0.0 | \n",
276 | " 0.000000 | \n",
277 | "
\n",
278 | " \n",
279 | " 2 | \n",
280 | " 2.59380 | \n",
281 | " 167.850 | \n",
282 | " 1.0 | \n",
283 | " 0.000000 | \n",
284 | "
\n",
285 | " \n",
286 | " 3 | \n",
287 | " 2.02890 | \n",
288 | " 133.405 | \n",
289 | " 1.0 | \n",
290 | " 0.000000 | \n",
291 | "
\n",
292 | " \n",
293 | " 4 | \n",
294 | " 2.91890 | \n",
295 | " 187.375 | \n",
296 | " 1.0 | \n",
297 | " 0.000000 | \n",
298 | "
\n",
299 | " \n",
300 | " ... | \n",
301 | " ... | \n",
302 | " ... | \n",
303 | " ... | \n",
304 | " ... | \n",
305 | "
\n",
306 | " \n",
307 | " 1139 | \n",
308 | " 1.98820 | \n",
309 | " 287.343 | \n",
310 | " 8.0 | \n",
311 | " 0.000000 | \n",
312 | "
\n",
313 | " \n",
314 | " 1140 | \n",
315 | " 3.42130 | \n",
316 | " 286.114 | \n",
317 | " 2.0 | \n",
318 | " 0.333333 | \n",
319 | "
\n",
320 | " \n",
321 | " 1141 | \n",
322 | " 3.60960 | \n",
323 | " 308.333 | \n",
324 | " 4.0 | \n",
325 | " 0.695652 | \n",
326 | "
\n",
327 | " \n",
328 | " 1142 | \n",
329 | " 2.56214 | \n",
330 | " 354.815 | \n",
331 | " 3.0 | \n",
332 | " 0.521739 | \n",
333 | "
\n",
334 | " \n",
335 | " 1143 | \n",
336 | " 2.02164 | \n",
337 | " 179.219 | \n",
338 | " 1.0 | \n",
339 | " 0.461538 | \n",
340 | "
\n",
341 | " \n",
342 | "
\n",
343 | "
1144 rows × 4 columns
\n",
344 | "
"
345 | ],
346 | "text/plain": [
347 | " MolLogP MolWt NumRotatableBonds AromaticProportion\n",
348 | "0 2.59540 167.850 0.0 0.000000\n",
349 | "1 2.37650 133.405 0.0 0.000000\n",
350 | "2 2.59380 167.850 1.0 0.000000\n",
351 | "3 2.02890 133.405 1.0 0.000000\n",
352 | "4 2.91890 187.375 1.0 0.000000\n",
353 | "... ... ... ... ...\n",
354 | "1139 1.98820 287.343 8.0 0.000000\n",
355 | "1140 3.42130 286.114 2.0 0.333333\n",
356 | "1141 3.60960 308.333 4.0 0.695652\n",
357 | "1142 2.56214 354.815 3.0 0.521739\n",
358 | "1143 2.02164 179.219 1.0 0.461538\n",
359 | "\n",
360 | "[1144 rows x 4 columns]"
361 | ]
362 | },
363 | "metadata": {
364 | "tags": []
365 | },
366 | "execution_count": 5
367 | }
368 | ]
369 | },
370 | {
371 | "cell_type": "code",
372 | "metadata": {
373 | "id": "JDwxgKHqYmD4",
374 | "colab_type": "code",
375 | "colab": {
376 | "base_uri": "https://localhost:8080/",
377 | "height": 221
378 | },
379 | "outputId": "a725d7b7-baad-4a99-9686-4dfe1d852c22"
380 | },
381 | "source": [
382 | "Y = dataset.iloc[:,-1]\n",
383 | "Y"
384 | ],
385 | "execution_count": 6,
386 | "outputs": [
387 | {
388 | "output_type": "execute_result",
389 | "data": {
390 | "text/plain": [
391 | "0 -2.180\n",
392 | "1 -2.000\n",
393 | "2 -1.740\n",
394 | "3 -1.480\n",
395 | "4 -3.040\n",
396 | " ... \n",
397 | "1139 1.144\n",
398 | "1140 -4.925\n",
399 | "1141 -3.893\n",
400 | "1142 -3.790\n",
401 | "1143 -2.581\n",
402 | "Name: logS, Length: 1144, dtype: float64"
403 | ]
404 | },
405 | "metadata": {
406 | "tags": []
407 | },
408 | "execution_count": 6
409 | }
410 | ]
411 | },
412 | {
413 | "cell_type": "markdown",
414 | "metadata": {
415 | "id": "LNohCdqQY5VZ",
416 | "colab_type": "text"
417 | },
418 | "source": [
419 | "# Linear Regression Model"
420 | ]
421 | },
422 | {
423 | "cell_type": "code",
424 | "metadata": {
425 | "id": "EanoyG2eX9cV",
426 | "colab_type": "code",
427 | "colab": {}
428 | },
429 | "source": [
430 | "from sklearn import linear_model\n",
431 | "from sklearn.metrics import mean_squared_error, r2_score"
432 | ],
433 | "execution_count": 3,
434 | "outputs": []
435 | },
436 | {
437 | "cell_type": "code",
438 | "metadata": {
439 | "id": "mLQJ2KLLY_9a",
440 | "colab_type": "code",
441 | "colab": {
442 | "base_uri": "https://localhost:8080/",
443 | "height": 34
444 | },
445 | "outputId": "6349fa74-f087-4d81-916e-294789c6455c"
446 | },
447 | "source": [
448 | "model = linear_model.LinearRegression()\n",
449 | "model.fit(X, Y)"
450 | ],
451 | "execution_count": 7,
452 | "outputs": [
453 | {
454 | "output_type": "execute_result",
455 | "data": {
456 | "text/plain": [
457 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
458 | ]
459 | },
460 | "metadata": {
461 | "tags": []
462 | },
463 | "execution_count": 7
464 | }
465 | ]
466 | },
467 | {
468 | "cell_type": "markdown",
469 | "metadata": {
470 | "id": "F5f8KGWjZRSc",
471 | "colab_type": "text"
472 | },
473 | "source": [
474 | "## Model Prediction"
475 | ]
476 | },
477 | {
478 | "cell_type": "code",
479 | "metadata": {
480 | "id": "MI3c8LB2ZCYW",
481 | "colab_type": "code",
482 | "colab": {
483 | "base_uri": "https://localhost:8080/",
484 | "height": 51
485 | },
486 | "outputId": "19b50c6a-7d1c-4bfd-8789-d5884b42d594"
487 | },
488 | "source": [
489 | "Y_pred = model.predict(X)\n",
490 | "Y_pred"
491 | ],
492 | "execution_count": 8,
493 | "outputs": [
494 | {
495 | "output_type": "execute_result",
496 | "data": {
497 | "text/plain": [
498 | "array([-2.77628837, -2.38661054, -2.77190108, ..., -4.73721496,\n",
499 | " -4.19663007, -2.61784284])"
500 | ]
501 | },
502 | "metadata": {
503 | "tags": []
504 | },
505 | "execution_count": 8
506 | }
507 | ]
508 | },
509 | {
510 | "cell_type": "markdown",
511 | "metadata": {
512 | "id": "fXv7bcolZqa-",
513 | "colab_type": "text"
514 | },
515 | "source": [
516 | "## Model Performance"
517 | ]
518 | },
519 | {
520 | "cell_type": "code",
521 | "metadata": {
522 | "id": "6f13gYleZVKy",
523 | "colab_type": "code",
524 | "colab": {
525 | "base_uri": "https://localhost:8080/",
526 | "height": 85
527 | },
528 | "outputId": "99894d58-83b4-4b64-f54c-848e8430cd5e"
529 | },
530 | "source": [
531 | "print('Coefficients:', model.coef_)\n",
532 | "print('Intercept:', model.intercept_)\n",
533 | "print('Mean squared error (MSE): %.2f'\n",
534 | " % mean_squared_error(Y, Y_pred))\n",
535 | "print('Coefficient of determination (R^2): %.2f'\n",
536 | " % r2_score(Y, Y_pred))"
537 | ],
538 | "execution_count": 9,
539 | "outputs": [
540 | {
541 | "output_type": "stream",
542 | "text": [
543 | "Coefficients: [-0.74173609 -0.00659927 0.00320051 -0.42316387]\n",
544 | "Intercept: 0.2565006830997194\n",
545 | "Mean squared error (MSE): 1.01\n",
546 | "Coefficient of determination (R^2): 0.77\n"
547 | ],
548 | "name": "stdout"
549 | }
550 | ]
551 | },
552 | {
553 | "cell_type": "markdown",
554 | "metadata": {
555 | "id": "Yhuc402dZsk3",
556 | "colab_type": "text"
557 | },
558 | "source": [
559 | "## Model Equation"
560 | ]
561 | },
562 | {
563 | "cell_type": "code",
564 | "metadata": {
565 | "id": "QnoUESmXZcMo",
566 | "colab_type": "code",
567 | "colab": {
568 | "base_uri": "https://localhost:8080/",
569 | "height": 34
570 | },
571 | "outputId": "c2e3b76f-4d9a-425c-99e4-5793dc6c1620"
572 | },
573 | "source": [
574 | "print('LogS = %.2f %.2f LogP %.4f MW + %.4f RB %.2f AP' % (model.intercept_, model.coef_[0], model.coef_[1], model.coef_[2], model.coef_[3] ) )"
575 | ],
576 | "execution_count": 10,
577 | "outputs": [
578 | {
579 | "output_type": "stream",
580 | "text": [
581 | "LogS = 0.26 -0.74 LogP -0.0066 MW + 0.0032 RB -0.42 AP\n"
582 | ],
583 | "name": "stdout"
584 | }
585 | ]
586 | },
587 | {
588 | "cell_type": "markdown",
589 | "metadata": {
590 | "id": "uWvxj1iSaL3n",
591 | "colab_type": "text"
592 | },
593 | "source": [
594 | "# Data Visualization (Experimental vs Predicted LogS for Training Data)"
595 | ]
596 | },
597 | {
598 | "cell_type": "code",
599 | "metadata": {
600 | "id": "iPcFF0MjZlh8",
601 | "colab_type": "code",
602 | "colab": {}
603 | },
604 | "source": [
605 | "import matplotlib.pyplot as plt\n",
606 | "import numpy as np"
607 | ],
608 | "execution_count": 11,
609 | "outputs": []
610 | },
611 | {
612 | "cell_type": "code",
613 | "metadata": {
614 | "id": "QRNyIlGAaQQI",
615 | "colab_type": "code",
616 | "colab": {
617 | "base_uri": "https://localhost:8080/",
618 | "height": 351
619 | },
620 | "outputId": "949bd284-5952-496f-a57e-47a1333fd50b"
621 | },
622 | "source": [
623 | "plt.figure(figsize=(5,5))\n",
624 | "plt.scatter(x=Y, y=Y_pred, c=\"#7CAE00\", alpha=0.3)\n",
625 | "\n",
626 | "# Add trendline\n",
627 | "# https://stackoverflow.com/questions/26447191/how-to-add-trendline-in-python-matplotlib-dot-scatter-graphs\n",
628 | "z = np.polyfit(Y, Y_pred, 1)\n",
629 | "p = np.poly1d(z)\n",
630 | "\n",
631 | "plt.plot(Y,p(Y),\"#F8766D\")\n",
632 | "plt.ylabel('Predicted LogS')\n",
633 | "plt.xlabel('Experimental LogS')"
634 | ],
635 | "execution_count": 17,
636 | "outputs": [
637 | {
638 | "output_type": "execute_result",
639 | "data": {
640 | "text/plain": [
641 | "Text(0.5, 0, 'Experimental LogS')"
642 | ]
643 | },
644 | "metadata": {
645 | "tags": []
646 | },
647 | "execution_count": 17
648 | },
649 | {
650 | "output_type": "display_data",
651 | "data": {
652 | "image/png": "\n",
653 | "text/plain": [
654 | ""
655 | ]
656 | },
657 | "metadata": {
658 | "tags": [],
659 | "needs_background": "light"
660 | }
661 | }
662 | ]
663 | },
664 | {
665 | "cell_type": "markdown",
666 | "metadata": {
667 | "id": "YzKTmvZrbFVI",
668 | "colab_type": "text"
669 | },
670 | "source": [
671 | "# Save Model as Pickle Object"
672 | ]
673 | },
674 | {
675 | "cell_type": "code",
676 | "metadata": {
677 | "id": "DzjpPyVyb8XO",
678 | "colab_type": "code",
679 | "colab": {}
680 | },
681 | "source": [
682 | "import pickle"
683 | ],
684 | "execution_count": 18,
685 | "outputs": []
686 | },
687 | {
688 | "cell_type": "code",
689 | "metadata": {
690 | "id": "b2K9ajBaaYUk",
691 | "colab_type": "code",
692 | "colab": {}
693 | },
694 | "source": [
695 | "pickle.dump(model, open('solubility_model.pkl', 'wb'))"
696 | ],
697 | "execution_count": 19,
698 | "outputs": []
699 | },
700 | {
701 | "cell_type": "code",
702 | "metadata": {
703 | "id": "ef4fyvrEb-NC",
704 | "colab_type": "code",
705 | "colab": {}
706 | },
707 | "source": [
708 | ""
709 | ],
710 | "execution_count": null,
711 | "outputs": []
712 | }
713 | ]
714 | }
715 |
--------------------------------------------------------------------------------
/app_10_regression_bioinformatics_solubility/solubility_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dataprofessor/streamlit_freecodecamp/d44c4c1320f8417b3d5494902e5366b715314528/app_10_regression_bioinformatics_solubility/solubility_model.pkl
--------------------------------------------------------------------------------
/app_1_simple_stock_price/myapp.py:
--------------------------------------------------------------------------------
1 | import yfinance as yf
2 | import streamlit as st
3 |
4 | st.write("""
5 | # Simple Stock Price App
6 |
7 | Shown are the stock closing price and volume of Google!
8 |
9 | """)
10 |
11 | # https://towardsdatascience.com/how-to-get-stock-data-using-python-c0de1df17e75
12 | #define the ticker symbol
13 | tickerSymbol = 'GOOGL'
14 | #get data on this ticker
15 | tickerData = yf.Ticker(tickerSymbol)
16 | #get the historical prices for this ticker
17 | tickerDf = tickerData.history(period='1d', start='2010-5-31', end='2020-5-31')
18 | # Open High Low Close Volume Dividends Stock Splits
19 |
20 | st.line_chart(tickerDf.Close)
21 | st.line_chart(tickerDf.Volume)
22 |
--------------------------------------------------------------------------------
/app_1_simple_stock_price/myapp2.py:
--------------------------------------------------------------------------------
1 | import yfinance as yf
2 | import streamlit as st
3 |
4 | st.write("""
5 | # Simple Stock Price App
6 |
7 | Shown are the stock **closing price** and ***volume*** of Google!
8 |
9 | """)
10 |
11 | # https://towardsdatascience.com/how-to-get-stock-data-using-python-c0de1df17e75
12 | #define the ticker symbol
13 | tickerSymbol = 'GOOGL'
14 | #get data on this ticker
15 | tickerData = yf.Ticker(tickerSymbol)
16 | #get the historical prices for this ticker
17 | tickerDf = tickerData.history(period='1d', start='2010-5-31', end='2020-5-31')
18 | # Open High Low Close Volume Dividends Stock Splits
19 |
20 | st.write("""
21 | ## Closing Price
22 | """)
23 | st.line_chart(tickerDf.Close)
24 | st.write("""
25 | ## Volume Price
26 | """)
27 | st.line_chart(tickerDf.Volume)
28 |
--------------------------------------------------------------------------------
/app_2_simple_bioinformatics_dna/dna-app.py:
--------------------------------------------------------------------------------
1 | ######################
2 | # Import libraries
3 | ######################
4 |
5 | import pandas as pd
6 | import streamlit as st
7 | import altair as alt
8 | from PIL import Image
9 |
10 | ######################
11 | # Page Title
12 | ######################
13 |
14 | image = Image.open('dna-logo.jpg')
15 |
16 | st.image(image, use_column_width=True)
17 |
18 | st.write("""
19 | # DNA Nucleotide Count Web App
20 |
21 | This app counts the nucleotide composition of query DNA!
22 |
23 | ***
24 | """)
25 |
26 |
27 | ######################
28 | # Input Text Box
29 | ######################
30 |
31 | #st.sidebar.header('Enter DNA sequence')
32 | st.header('Enter DNA sequence')
33 |
34 | sequence_input = ">DNA Query 2\nGAACACGTGGAGGCAAACAGGAAGGTGAAGAAGAACTTATCCTATCAGGACGGAAGGTCCTGTGCTCGGG\nATCTTCCAGACGTCGCGACTCTAAATTGCCCCCTCTGAGGTCAAGGAACACAAGATGGTTTTGGAAATGC\nTGAACCCGATACATTATAACATCACCAGCATCGTGCCTGAAGCCATGCCTGCTGCCACCATGCCAGTCCT"
35 |
36 | #sequence = st.sidebar.text_area("Sequence input", sequence_input, height=250)
37 | sequence = st.text_area("Sequence input", sequence_input, height=250)
38 | sequence = sequence.splitlines()
39 | sequence = sequence[1:] # Skips the sequence name (first line)
40 | sequence = ''.join(sequence) # Concatenates list to string
41 |
42 | st.write("""
43 | ***
44 | """)
45 |
46 | ## Prints the input DNA sequence
47 | st.header('INPUT (DNA Query)')
48 | sequence
49 |
50 | ## DNA nucleotide count
51 | st.header('OUTPUT (DNA Nucleotide Count)')
52 |
53 | ### 1. Print dictionary
54 | st.subheader('1. Print dictionary')
55 | def DNA_nucleotide_count(seq):
56 | d = dict([
57 | ('A',seq.count('A')),
58 | ('T',seq.count('T')),
59 | ('G',seq.count('G')),
60 | ('C',seq.count('C'))
61 | ])
62 | return d
63 |
64 | X = DNA_nucleotide_count(sequence)
65 |
66 | #X_label = list(X)
67 | #X_values = list(X.values())
68 |
69 | X
70 |
71 | ### 2. Print text
72 | st.subheader('2. Print text')
73 | st.write('There are ' + str(X['A']) + ' adenine (A)')
74 | st.write('There are ' + str(X['T']) + ' thymine (T)')
75 | st.write('There are ' + str(X['G']) + ' guanine (G)')
76 | st.write('There are ' + str(X['C']) + ' cytosine (C)')
77 |
78 | ### 3. Display DataFrame
79 | st.subheader('3. Display DataFrame')
80 | df = pd.DataFrame.from_dict(X, orient='index')
81 | df = df.rename({0: 'count'}, axis='columns')
82 | df.reset_index(inplace=True)
83 | df = df.rename(columns = {'index':'nucleotide'})
84 | st.write(df)
85 |
86 | ### 4. Display Bar Chart using Altair
87 | st.subheader('4. Display Bar chart')
88 | p = alt.Chart(df).mark_bar().encode(
89 | x='nucleotide',
90 | y='count'
91 | )
92 | p = p.properties(
93 | width=alt.Step(80) # controls width of bar.
94 | )
95 | st.write(p)
96 |
--------------------------------------------------------------------------------
/app_2_simple_bioinformatics_dna/dna-logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dataprofessor/streamlit_freecodecamp/d44c4c1320f8417b3d5494902e5366b715314528/app_2_simple_bioinformatics_dna/dna-logo.jpg
--------------------------------------------------------------------------------
/app_3_eda_basketball/basketball_app.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | import pandas as pd
3 | import base64
4 | import matplotlib.pyplot as plt
5 | import seaborn as sns
6 | import numpy as np
7 |
8 | st.title('NBA Player Stats Explorer')
9 |
10 | st.markdown("""
11 | This app performs simple webscraping of NBA player stats data!
12 | * **Python libraries:** base64, pandas, streamlit
13 | * **Data source:** [Basketball-reference.com](https://www.basketball-reference.com/).
14 | """)
15 |
16 | st.sidebar.header('User Input Features')
17 | selected_year = st.sidebar.selectbox('Year', list(reversed(range(1950,2020))))
18 |
19 | # Web scraping of NBA player stats
20 | @st.cache
21 | def load_data(year):
22 | url = "https://www.basketball-reference.com/leagues/NBA_" + str(year) + "_per_game.html"
23 | html = pd.read_html(url, header = 0)
24 | df = html[0]
25 | raw = df.drop(df[df.Age == 'Age'].index) # Deletes repeating headers in content
26 | raw = raw.fillna(0)
27 | playerstats = raw.drop(['Rk'], axis=1)
28 | return playerstats
29 | playerstats = load_data(selected_year)
30 |
31 | # Sidebar - Team selection
32 | sorted_unique_team = sorted(playerstats.Tm.unique())
33 | selected_team = st.sidebar.multiselect('Team', sorted_unique_team, sorted_unique_team)
34 |
35 | # Sidebar - Position selection
36 | unique_pos = ['C','PF','SF','PG','SG']
37 | selected_pos = st.sidebar.multiselect('Position', unique_pos, unique_pos)
38 |
39 | # Filtering data
40 | df_selected_team = playerstats[(playerstats.Tm.isin(selected_team)) & (playerstats.Pos.isin(selected_pos))]
41 |
42 | st.header('Display Player Stats of Selected Team(s)')
43 | st.write('Data Dimension: ' + str(df_selected_team.shape[0]) + ' rows and ' + str(df_selected_team.shape[1]) + ' columns.')
44 | st.dataframe(df_selected_team)
45 |
46 | # Download NBA player stats data
47 | # https://discuss.streamlit.io/t/how-to-download-file-in-streamlit/1806
48 | def filedownload(df):
49 | csv = df.to_csv(index=False)
50 | b64 = base64.b64encode(csv.encode()).decode() # strings <-> bytes conversions
51 | href = f'Download CSV File'
52 | return href
53 |
54 | st.markdown(filedownload(df_selected_team), unsafe_allow_html=True)
55 |
56 | # Heatmap
57 | if st.button('Intercorrelation Heatmap'):
58 | st.header('Intercorrelation Matrix Heatmap')
59 | df_selected_team.to_csv('output.csv',index=False)
60 | df = pd.read_csv('output.csv')
61 |
62 | corr = df.corr()
63 | mask = np.zeros_like(corr)
64 | mask[np.triu_indices_from(mask)] = True
65 | with sns.axes_style("white"):
66 | f, ax = plt.subplots(figsize=(7, 5))
67 | ax = sns.heatmap(corr, mask=mask, vmax=1, square=True)
68 | st.pyplot()
69 |
--------------------------------------------------------------------------------
/app_4_eda_football/football_app.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | import pandas as pd
3 | import base64
4 | import matplotlib.pyplot as plt
5 | import seaborn as sns
6 | import numpy as np
7 |
8 | st.title('NFL Football Stats (Rushing) Explorer')
9 |
10 | st.markdown("""
11 | This app performs simple webscraping of NFL Football player stats data (focusing on Rushing)!
12 | * **Python libraries:** base64, pandas, streamlit, numpy, matplotlib, seaborn
13 | * **Data source:** [pro-football-reference.com](https://www.pro-football-reference.com/).
14 | """)
15 |
16 | st.sidebar.header('User Input Features')
17 | selected_year = st.sidebar.selectbox('Year', list(reversed(range(1990,2020))))
18 |
19 | # Web scraping of NFL player stats
20 | # https://www.pro-football-reference.com/years/2019/rushing.htm
21 | @st.cache
22 | def load_data(year):
23 | url = "https://www.pro-football-reference.com/years/" + str(year) + "/rushing.htm"
24 | html = pd.read_html(url, header = 1)
25 | df = html[0]
26 | raw = df.drop(df[df.Age == 'Age'].index) # Deletes repeating headers in content
27 | raw = raw.fillna(0)
28 | playerstats = raw.drop(['Rk'], axis=1)
29 | return playerstats
30 | playerstats = load_data(selected_year)
31 |
32 | # Sidebar - Team selection
33 | sorted_unique_team = sorted(playerstats.Tm.unique())
34 | selected_team = st.sidebar.multiselect('Team', sorted_unique_team, sorted_unique_team)
35 |
36 | # Sidebar - Position selection
37 | unique_pos = ['RB','QB','WR','FB','TE']
38 | selected_pos = st.sidebar.multiselect('Position', unique_pos, unique_pos)
39 |
40 | # Filtering data
41 | df_selected_team = playerstats[(playerstats.Tm.isin(selected_team)) & (playerstats.Pos.isin(selected_pos))]
42 |
43 | st.header('Display Player Stats of Selected Team(s)')
44 | st.write('Data Dimension: ' + str(df_selected_team.shape[0]) + ' rows and ' + str(df_selected_team.shape[1]) + ' columns.')
45 | st.dataframe(df_selected_team)
46 |
47 | # Download NBA player stats data
48 | # https://discuss.streamlit.io/t/how-to-download-file-in-streamlit/1806
49 | def filedownload(df):
50 | csv = df.to_csv(index=False)
51 | b64 = base64.b64encode(csv.encode()).decode() # strings <-> bytes conversions
52 | href = f'Download CSV File'
53 | return href
54 |
55 | st.markdown(filedownload(df_selected_team), unsafe_allow_html=True)
56 |
57 | # Heatmap
58 | if st.button('Intercorrelation Heatmap'):
59 | st.header('Intercorrelation Matrix Heatmap')
60 | df_selected_team.to_csv('output.csv',index=False)
61 | df = pd.read_csv('output.csv')
62 |
63 | corr = df.corr()
64 | mask = np.zeros_like(corr)
65 | mask[np.triu_indices_from(mask)] = True
66 | with sns.axes_style("white"):
67 | f, ax = plt.subplots(figsize=(7, 5))
68 | ax = sns.heatmap(corr, mask=mask, vmax=1, square=True)
69 | st.pyplot()
70 |
--------------------------------------------------------------------------------
/app_5_eda_sp500_stock/sp500-app.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | import pandas as pd
3 | import base64
4 | import matplotlib.pyplot as plt
5 | import seaborn as sns
6 | import numpy as np
7 | import yfinance as yf
8 |
9 | st.title('S&P 500 App')
10 |
11 | st.markdown("""
12 | This app retrieves the list of the **S&P 500** (from Wikipedia) and its corresponding **stock closing price** (year-to-date)!
13 | * **Python libraries:** base64, pandas, streamlit, numpy, matplotlib, seaborn
14 | * **Data source:** [Wikipedia](https://en.wikipedia.org/wiki/List_of_S%26P_500_companies).
15 | """)
16 |
17 | st.sidebar.header('User Input Features')
18 |
19 | # Web scraping of S&P 500 data
20 | #
21 | @st.cache
22 | def load_data():
23 | url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
24 | html = pd.read_html(url, header = 0)
25 | df = html[0]
26 | return df
27 |
28 | df = load_data()
29 | sector = df.groupby('GICS Sector')
30 |
31 | # Sidebar - Sector selection
32 | sorted_sector_unique = sorted( df['GICS Sector'].unique() )
33 | selected_sector = st.sidebar.multiselect('Sector', sorted_sector_unique, sorted_sector_unique)
34 |
35 | # Filtering data
36 | df_selected_sector = df[ (df['GICS Sector'].isin(selected_sector)) ]
37 |
38 | st.header('Display Companies in Selected Sector')
39 | st.write('Data Dimension: ' + str(df_selected_sector.shape[0]) + ' rows and ' + str(df_selected_sector.shape[1]) + ' columns.')
40 | st.dataframe(df_selected_sector)
41 |
42 | # Download S&P500 data
43 | # https://discuss.streamlit.io/t/how-to-download-file-in-streamlit/1806
44 | def filedownload(df):
45 | csv = df.to_csv(index=False)
46 | b64 = base64.b64encode(csv.encode()).decode() # strings <-> bytes conversions
47 | href = f'Download CSV File'
48 | return href
49 |
50 | st.markdown(filedownload(df_selected_sector), unsafe_allow_html=True)
51 |
52 | # https://pypi.org/project/yfinance/
53 |
54 | data = yf.download(
55 | tickers = list(df_selected_sector[:10].Symbol),
56 | period = "ytd",
57 | interval = "1d",
58 | group_by = 'ticker',
59 | auto_adjust = True,
60 | prepost = True,
61 | threads = True,
62 | proxy = None
63 | )
64 |
65 | # Plot Closing Price of Query Symbol
66 | def price_plot(symbol):
67 | df = pd.DataFrame(data[symbol].Close)
68 | df['Date'] = df.index
69 | plt.fill_between(df.Date, df.Close, color='skyblue', alpha=0.3)
70 | plt.plot(df.Date, df.Close, color='skyblue', alpha=0.8)
71 | plt.xticks(rotation=90)
72 | plt.title(symbol, fontweight='bold')
73 | plt.xlabel('Date', fontweight='bold')
74 | plt.ylabel('Closing Price', fontweight='bold')
75 | return st.pyplot()
76 |
77 | num_company = st.sidebar.slider('Number of Companies', 1, 5)
78 |
79 | if st.button('Show Plots'):
80 | st.header('Stock Closing Price')
81 | for i in list(df_selected_sector.Symbol)[:num_company]:
82 | price_plot(i)
83 |
--------------------------------------------------------------------------------
/app_6_eda_cryptocurrency/crypto-price-app.py:
--------------------------------------------------------------------------------
1 | # This app is for educational purpose only. Insights gained is not financial advice. Use at your own risk!
2 | import streamlit as st
3 | from PIL import Image
4 | import pandas as pd
5 | import base64
6 | import matplotlib.pyplot as plt
7 | from bs4 import BeautifulSoup
8 | import requests
9 | import json
10 | import time
11 | #---------------------------------#
12 | # New feature (make sure to upgrade your streamlit library)
13 | # pip install --upgrade streamlit
14 |
15 | #---------------------------------#
16 | # Page layout
17 | ## Page expands to full width
18 | st.set_page_config(layout="wide")
19 | #---------------------------------#
20 | # Title
21 |
22 | image = Image.open('logo.jpg')
23 |
24 | st.image(image, width = 500)
25 |
26 | st.title('Crypto Price App')
27 | st.markdown("""
28 | This app retrieves cryptocurrency prices for the top 100 cryptocurrency from the **CoinMarketCap**!
29 |
30 | """)
31 | #---------------------------------#
32 | # About
33 | expander_bar = st.beta_expander("About")
34 | expander_bar.markdown("""
35 | * **Python libraries:** base64, pandas, streamlit, numpy, matplotlib, seaborn, BeautifulSoup, requests, json, time
36 | * **Data source:** [CoinMarketCap](http://coinmarketcap.com).
37 | * **Credit:** Web scraper adapted from the Medium article *[Web Scraping Crypto Prices With Python](https://towardsdatascience.com/web-scraping-crypto-prices-with-python-41072ea5b5bf)* written by [Bryan Feng](https://medium.com/@bryanf).
38 | """)
39 |
40 |
41 | #---------------------------------#
42 | # Page layout (continued)
43 | ## Divide page to 3 columns (col1 = sidebar, col2 and col3 = page contents)
44 | col1 = st.sidebar
45 | col2, col3 = st.beta_columns((2,1))
46 |
47 | #---------------------------------#
48 | # Sidebar + Main panel
49 | col1.header('Input Options')
50 |
51 | ## Sidebar - Currency price unit
52 | currency_price_unit = col1.selectbox('Select currency for price', ('USD', 'BTC', 'ETH'))
53 |
54 | # Web scraping of CoinMarketCap data
55 | @st.cache
56 | def load_data():
57 | cmc = requests.get('https://coinmarketcap.com')
58 | soup = BeautifulSoup(cmc.content, 'html.parser')
59 |
60 | data = soup.find('script', id='__NEXT_DATA__', type='application/json')
61 | coins = {}
62 | coin_data = json.loads(data.contents[0])
63 | listings = coin_data['props']['initialState']['cryptocurrency']['listingLatest']['data']
64 | for i in listings:
65 | coins[str(i['id'])] = i['slug']
66 |
67 | coin_name = []
68 | coin_symbol = []
69 | market_cap = []
70 | percent_change_1h = []
71 | percent_change_24h = []
72 | percent_change_7d = []
73 | price = []
74 | volume_24h = []
75 |
76 | for i in listings:
77 | coin_name.append(i['slug'])
78 | coin_symbol.append(i['symbol'])
79 | price.append(i['quote'][currency_price_unit]['price'])
80 | percent_change_1h.append(i['quote'][currency_price_unit]['percent_change_1h'])
81 | percent_change_24h.append(i['quote'][currency_price_unit]['percent_change_24h'])
82 | percent_change_7d.append(i['quote'][currency_price_unit]['percent_change_7d'])
83 | market_cap.append(i['quote'][currency_price_unit]['market_cap'])
84 | volume_24h.append(i['quote'][currency_price_unit]['volume_24h'])
85 |
86 | df = pd.DataFrame(columns=['coin_name', 'coin_symbol', 'market_cap', 'percent_change_1h', 'percent_change_24h', 'percent_change_7d', 'price', 'volume_24h'])
87 | df['coin_name'] = coin_name
88 | df['coin_symbol'] = coin_symbol
89 | df['price'] = price
90 | df['percent_change_1h'] = percent_change_1h
91 | df['percent_change_24h'] = percent_change_24h
92 | df['percent_change_7d'] = percent_change_7d
93 | df['market_cap'] = market_cap
94 | df['volume_24h'] = volume_24h
95 | return df
96 |
97 | df = load_data()
98 |
99 | ## Sidebar - Cryptocurrency selections
100 | sorted_coin = sorted( df['coin_symbol'] )
101 | selected_coin = col1.multiselect('Cryptocurrency', sorted_coin, sorted_coin)
102 |
103 | df_selected_coin = df[ (df['coin_symbol'].isin(selected_coin)) ] # Filtering data
104 |
105 | ## Sidebar - Number of coins to display
106 | num_coin = col1.slider('Display Top N Coins', 1, 100, 100)
107 | df_coins = df_selected_coin[:num_coin]
108 |
109 | ## Sidebar - Percent change timeframe
110 | percent_timeframe = col1.selectbox('Percent change time frame',
111 | ['7d','24h', '1h'])
112 | percent_dict = {"7d":'percent_change_7d',"24h":'percent_change_24h',"1h":'percent_change_1h'}
113 | selected_percent_timeframe = percent_dict[percent_timeframe]
114 |
115 | ## Sidebar - Sorting values
116 | sort_values = col1.selectbox('Sort values?', ['Yes', 'No'])
117 |
118 | col2.subheader('Price Data of Selected Cryptocurrency')
119 | col2.write('Data Dimension: ' + str(df_selected_coin.shape[0]) + ' rows and ' + str(df_selected_coin.shape[1]) + ' columns.')
120 |
121 | col2.dataframe(df_coins)
122 |
123 | # Download CSV data
124 | # https://discuss.streamlit.io/t/how-to-download-file-in-streamlit/1806
125 | def filedownload(df):
126 | csv = df.to_csv(index=False)
127 | b64 = base64.b64encode(csv.encode()).decode() # strings <-> bytes conversions
128 | href = f'Download CSV File'
129 | return href
130 |
131 | col2.markdown(filedownload(df_selected_coin), unsafe_allow_html=True)
132 |
133 | #---------------------------------#
134 | # Preparing data for Bar plot of % Price change
135 | col2.subheader('Table of % Price Change')
136 | df_change = pd.concat([df_coins.coin_symbol, df_coins.percent_change_1h, df_coins.percent_change_24h, df_coins.percent_change_7d], axis=1)
137 | df_change = df_change.set_index('coin_symbol')
138 | df_change['positive_percent_change_1h'] = df_change['percent_change_1h'] > 0
139 | df_change['positive_percent_change_24h'] = df_change['percent_change_24h'] > 0
140 | df_change['positive_percent_change_7d'] = df_change['percent_change_7d'] > 0
141 | col2.dataframe(df_change)
142 |
143 | # Conditional creation of Bar plot (time frame)
144 | col3.subheader('Bar plot of % Price Change')
145 |
146 | if percent_timeframe == '7d':
147 | if sort_values == 'Yes':
148 | df_change = df_change.sort_values(by=['percent_change_7d'])
149 | col3.write('*7 days period*')
150 | plt.figure(figsize=(5,25))
151 | plt.subplots_adjust(top = 1, bottom = 0)
152 | df_change['percent_change_7d'].plot(kind='barh', color=df_change.positive_percent_change_7d.map({True: 'g', False: 'r'}))
153 | col3.pyplot(plt)
154 | elif percent_timeframe == '24h':
155 | if sort_values == 'Yes':
156 | df_change = df_change.sort_values(by=['percent_change_24h'])
157 | col3.write('*24 hour period*')
158 | plt.figure(figsize=(5,25))
159 | plt.subplots_adjust(top = 1, bottom = 0)
160 | df_change['percent_change_24h'].plot(kind='barh', color=df_change.positive_percent_change_24h.map({True: 'g', False: 'r'}))
161 | col3.pyplot(plt)
162 | else:
163 | if sort_values == 'Yes':
164 | df_change = df_change.sort_values(by=['percent_change_1h'])
165 | col3.write('*1 hour period*')
166 | plt.figure(figsize=(5,25))
167 | plt.subplots_adjust(top = 1, bottom = 0)
168 | df_change['percent_change_1h'].plot(kind='barh', color=df_change.positive_percent_change_1h.map({True: 'g', False: 'r'}))
169 | col3.pyplot(plt)
170 |
--------------------------------------------------------------------------------
/app_6_eda_cryptocurrency/logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dataprofessor/streamlit_freecodecamp/d44c4c1320f8417b3d5494902e5366b715314528/app_6_eda_cryptocurrency/logo.jpg
--------------------------------------------------------------------------------
/app_7_classification_iris/iris-ml-app.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | import pandas as pd
3 | from sklearn import datasets
4 | from sklearn.ensemble import RandomForestClassifier
5 |
6 | st.write("""
7 | # Simple Iris Flower Prediction App
8 |
9 | This app predicts the **Iris flower** type!
10 | """)
11 |
12 | st.sidebar.header('User Input Parameters')
13 |
14 | def user_input_features():
15 | sepal_length = st.sidebar.slider('Sepal length', 4.3, 7.9, 5.4)
16 | sepal_width = st.sidebar.slider('Sepal width', 2.0, 4.4, 3.4)
17 | petal_length = st.sidebar.slider('Petal length', 1.0, 6.9, 1.3)
18 | petal_width = st.sidebar.slider('Petal width', 0.1, 2.5, 0.2)
19 | data = {'sepal_length': sepal_length,
20 | 'sepal_width': sepal_width,
21 | 'petal_length': petal_length,
22 | 'petal_width': petal_width}
23 | features = pd.DataFrame(data, index=[0])
24 | return features
25 |
26 | df = user_input_features()
27 |
28 | st.subheader('User Input parameters')
29 | st.write(df)
30 |
31 | iris = datasets.load_iris()
32 | X = iris.data
33 | Y = iris.target
34 |
35 | clf = RandomForestClassifier()
36 | clf.fit(X, Y)
37 |
38 | prediction = clf.predict(df)
39 | prediction_proba = clf.predict_proba(df)
40 |
41 | st.subheader('Class labels and their corresponding index number')
42 | st.write(iris.target_names)
43 |
44 | st.subheader('Prediction')
45 | st.write(iris.target_names[prediction])
46 | #st.write(prediction)
47 |
48 | st.subheader('Prediction Probability')
49 | st.write(prediction_proba)
50 |
--------------------------------------------------------------------------------
/app_8_classification_penguins/penguins-app.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | import pandas as pd
3 | import numpy as np
4 | import pickle
5 | from sklearn.ensemble import RandomForestClassifier
6 |
7 | st.write("""
8 | # Penguin Prediction App
9 |
10 | This app predicts the **Palmer Penguin** species!
11 |
12 | Data obtained from the [palmerpenguins library](https://github.com/allisonhorst/palmerpenguins) in R by Allison Horst.
13 | """)
14 |
15 | st.sidebar.header('User Input Features')
16 |
17 | st.sidebar.markdown("""
18 | [Example CSV input file](https://raw.githubusercontent.com/dataprofessor/data/master/penguins_example.csv)
19 | """)
20 |
21 | # Collects user input features into dataframe
22 | uploaded_file = st.sidebar.file_uploader("Upload your input CSV file", type=["csv"])
23 | if uploaded_file is not None:
24 | input_df = pd.read_csv(uploaded_file)
25 | else:
26 | def user_input_features():
27 | island = st.sidebar.selectbox('Island',('Biscoe','Dream','Torgersen'))
28 | sex = st.sidebar.selectbox('Sex',('male','female'))
29 | bill_length_mm = st.sidebar.slider('Bill length (mm)', 32.1,59.6,43.9)
30 | bill_depth_mm = st.sidebar.slider('Bill depth (mm)', 13.1,21.5,17.2)
31 | flipper_length_mm = st.sidebar.slider('Flipper length (mm)', 172.0,231.0,201.0)
32 | body_mass_g = st.sidebar.slider('Body mass (g)', 2700.0,6300.0,4207.0)
33 | data = {'island': island,
34 | 'bill_length_mm': bill_length_mm,
35 | 'bill_depth_mm': bill_depth_mm,
36 | 'flipper_length_mm': flipper_length_mm,
37 | 'body_mass_g': body_mass_g,
38 | 'sex': sex}
39 | features = pd.DataFrame(data, index=[0])
40 | return features
41 | input_df = user_input_features()
42 |
43 | # Combines user input features with entire penguins dataset
44 | # This will be useful for the encoding phase
45 | penguins_raw = pd.read_csv('penguins_cleaned.csv')
46 | penguins = penguins_raw.drop(columns=['species'])
47 | df = pd.concat([input_df,penguins],axis=0)
48 |
49 | # Encoding of ordinal features
50 | # https://www.kaggle.com/pratik1120/penguin-dataset-eda-classification-and-clustering
51 | encode = ['sex','island']
52 | for col in encode:
53 | dummy = pd.get_dummies(df[col], prefix=col)
54 | df = pd.concat([df,dummy], axis=1)
55 | del df[col]
56 | df = df[:1] # Selects only the first row (the user input data)
57 |
58 | # Displays the user input features
59 | st.subheader('User Input features')
60 |
61 | if uploaded_file is not None:
62 | st.write(df)
63 | else:
64 | st.write('Awaiting CSV file to be uploaded. Currently using example input parameters (shown below).')
65 | st.write(df)
66 |
67 | # Reads in saved classification model
68 | load_clf = pickle.load(open('penguins_clf.pkl', 'rb'))
69 |
70 | # Apply model to make predictions
71 | prediction = load_clf.predict(df)
72 | prediction_proba = load_clf.predict_proba(df)
73 |
74 |
75 | st.subheader('Prediction')
76 | penguins_species = np.array(['Adelie','Chinstrap','Gentoo'])
77 | st.write(penguins_species[prediction])
78 |
79 | st.subheader('Prediction Probability')
80 | st.write(prediction_proba)
81 |
--------------------------------------------------------------------------------
/app_8_classification_penguins/penguins-model-building.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | penguins = pd.read_csv('penguins_cleaned.csv')
3 |
4 | # Ordinal feature encoding
5 | # https://www.kaggle.com/pratik1120/penguin-dataset-eda-classification-and-clustering
6 | df = penguins.copy()
7 | target = 'species'
8 | encode = ['sex','island']
9 |
10 | for col in encode:
11 | dummy = pd.get_dummies(df[col], prefix=col)
12 | df = pd.concat([df,dummy], axis=1)
13 | del df[col]
14 |
15 | target_mapper = {'Adelie':0, 'Chinstrap':1, 'Gentoo':2}
16 | def target_encode(val):
17 | return target_mapper[val]
18 |
19 | df['species'] = df['species'].apply(target_encode)
20 |
21 | # Separating X and y
22 | X = df.drop('species', axis=1)
23 | Y = df['species']
24 |
25 | # Build random forest model
26 | from sklearn.ensemble import RandomForestClassifier
27 | clf = RandomForestClassifier()
28 | clf.fit(X, Y)
29 |
30 | # Saving the model
31 | import pickle
32 | pickle.dump(clf, open('penguins_clf.pkl', 'wb'))
33 |
--------------------------------------------------------------------------------
/app_8_classification_penguins/penguins_cleaned.csv:
--------------------------------------------------------------------------------
1 | "species","island","bill_length_mm","bill_depth_mm","flipper_length_mm","body_mass_g","sex"
2 | "Adelie","Torgersen",39.1,18.7,181,3750,"male"
3 | "Adelie","Torgersen",39.5,17.4,186,3800,"female"
4 | "Adelie","Torgersen",40.3,18,195,3250,"female"
5 | "Adelie","Torgersen",36.7,19.3,193,3450,"female"
6 | "Adelie","Torgersen",39.3,20.6,190,3650,"male"
7 | "Adelie","Torgersen",38.9,17.8,181,3625,"female"
8 | "Adelie","Torgersen",39.2,19.6,195,4675,"male"
9 | "Adelie","Torgersen",41.1,17.6,182,3200,"female"
10 | "Adelie","Torgersen",38.6,21.2,191,3800,"male"
11 | "Adelie","Torgersen",34.6,21.1,198,4400,"male"
12 | "Adelie","Torgersen",36.6,17.8,185,3700,"female"
13 | "Adelie","Torgersen",38.7,19,195,3450,"female"
14 | "Adelie","Torgersen",42.5,20.7,197,4500,"male"
15 | "Adelie","Torgersen",34.4,18.4,184,3325,"female"
16 | "Adelie","Torgersen",46,21.5,194,4200,"male"
17 | "Adelie","Biscoe",37.8,18.3,174,3400,"female"
18 | "Adelie","Biscoe",37.7,18.7,180,3600,"male"
19 | "Adelie","Biscoe",35.9,19.2,189,3800,"female"
20 | "Adelie","Biscoe",38.2,18.1,185,3950,"male"
21 | "Adelie","Biscoe",38.8,17.2,180,3800,"male"
22 | "Adelie","Biscoe",35.3,18.9,187,3800,"female"
23 | "Adelie","Biscoe",40.6,18.6,183,3550,"male"
24 | "Adelie","Biscoe",40.5,17.9,187,3200,"female"
25 | "Adelie","Biscoe",37.9,18.6,172,3150,"female"
26 | "Adelie","Biscoe",40.5,18.9,180,3950,"male"
27 | "Adelie","Dream",39.5,16.7,178,3250,"female"
28 | "Adelie","Dream",37.2,18.1,178,3900,"male"
29 | "Adelie","Dream",39.5,17.8,188,3300,"female"
30 | "Adelie","Dream",40.9,18.9,184,3900,"male"
31 | "Adelie","Dream",36.4,17,195,3325,"female"
32 | "Adelie","Dream",39.2,21.1,196,4150,"male"
33 | "Adelie","Dream",38.8,20,190,3950,"male"
34 | "Adelie","Dream",42.2,18.5,180,3550,"female"
35 | "Adelie","Dream",37.6,19.3,181,3300,"female"
36 | "Adelie","Dream",39.8,19.1,184,4650,"male"
37 | "Adelie","Dream",36.5,18,182,3150,"female"
38 | "Adelie","Dream",40.8,18.4,195,3900,"male"
39 | "Adelie","Dream",36,18.5,186,3100,"female"
40 | "Adelie","Dream",44.1,19.7,196,4400,"male"
41 | "Adelie","Dream",37,16.9,185,3000,"female"
42 | "Adelie","Dream",39.6,18.8,190,4600,"male"
43 | "Adelie","Dream",41.1,19,182,3425,"male"
44 | "Adelie","Dream",36,17.9,190,3450,"female"
45 | "Adelie","Dream",42.3,21.2,191,4150,"male"
46 | "Adelie","Biscoe",39.6,17.7,186,3500,"female"
47 | "Adelie","Biscoe",40.1,18.9,188,4300,"male"
48 | "Adelie","Biscoe",35,17.9,190,3450,"female"
49 | "Adelie","Biscoe",42,19.5,200,4050,"male"
50 | "Adelie","Biscoe",34.5,18.1,187,2900,"female"
51 | "Adelie","Biscoe",41.4,18.6,191,3700,"male"
52 | "Adelie","Biscoe",39,17.5,186,3550,"female"
53 | "Adelie","Biscoe",40.6,18.8,193,3800,"male"
54 | "Adelie","Biscoe",36.5,16.6,181,2850,"female"
55 | "Adelie","Biscoe",37.6,19.1,194,3750,"male"
56 | "Adelie","Biscoe",35.7,16.9,185,3150,"female"
57 | "Adelie","Biscoe",41.3,21.1,195,4400,"male"
58 | "Adelie","Biscoe",37.6,17,185,3600,"female"
59 | "Adelie","Biscoe",41.1,18.2,192,4050,"male"
60 | "Adelie","Biscoe",36.4,17.1,184,2850,"female"
61 | "Adelie","Biscoe",41.6,18,192,3950,"male"
62 | "Adelie","Biscoe",35.5,16.2,195,3350,"female"
63 | "Adelie","Biscoe",41.1,19.1,188,4100,"male"
64 | "Adelie","Torgersen",35.9,16.6,190,3050,"female"
65 | "Adelie","Torgersen",41.8,19.4,198,4450,"male"
66 | "Adelie","Torgersen",33.5,19,190,3600,"female"
67 | "Adelie","Torgersen",39.7,18.4,190,3900,"male"
68 | "Adelie","Torgersen",39.6,17.2,196,3550,"female"
69 | "Adelie","Torgersen",45.8,18.9,197,4150,"male"
70 | "Adelie","Torgersen",35.5,17.5,190,3700,"female"
71 | "Adelie","Torgersen",42.8,18.5,195,4250,"male"
72 | "Adelie","Torgersen",40.9,16.8,191,3700,"female"
73 | "Adelie","Torgersen",37.2,19.4,184,3900,"male"
74 | "Adelie","Torgersen",36.2,16.1,187,3550,"female"
75 | "Adelie","Torgersen",42.1,19.1,195,4000,"male"
76 | "Adelie","Torgersen",34.6,17.2,189,3200,"female"
77 | "Adelie","Torgersen",42.9,17.6,196,4700,"male"
78 | "Adelie","Torgersen",36.7,18.8,187,3800,"female"
79 | "Adelie","Torgersen",35.1,19.4,193,4200,"male"
80 | "Adelie","Dream",37.3,17.8,191,3350,"female"
81 | "Adelie","Dream",41.3,20.3,194,3550,"male"
82 | "Adelie","Dream",36.3,19.5,190,3800,"male"
83 | "Adelie","Dream",36.9,18.6,189,3500,"female"
84 | "Adelie","Dream",38.3,19.2,189,3950,"male"
85 | "Adelie","Dream",38.9,18.8,190,3600,"female"
86 | "Adelie","Dream",35.7,18,202,3550,"female"
87 | "Adelie","Dream",41.1,18.1,205,4300,"male"
88 | "Adelie","Dream",34,17.1,185,3400,"female"
89 | "Adelie","Dream",39.6,18.1,186,4450,"male"
90 | "Adelie","Dream",36.2,17.3,187,3300,"female"
91 | "Adelie","Dream",40.8,18.9,208,4300,"male"
92 | "Adelie","Dream",38.1,18.6,190,3700,"female"
93 | "Adelie","Dream",40.3,18.5,196,4350,"male"
94 | "Adelie","Dream",33.1,16.1,178,2900,"female"
95 | "Adelie","Dream",43.2,18.5,192,4100,"male"
96 | "Adelie","Biscoe",35,17.9,192,3725,"female"
97 | "Adelie","Biscoe",41,20,203,4725,"male"
98 | "Adelie","Biscoe",37.7,16,183,3075,"female"
99 | "Adelie","Biscoe",37.8,20,190,4250,"male"
100 | "Adelie","Biscoe",37.9,18.6,193,2925,"female"
101 | "Adelie","Biscoe",39.7,18.9,184,3550,"male"
102 | "Adelie","Biscoe",38.6,17.2,199,3750,"female"
103 | "Adelie","Biscoe",38.2,20,190,3900,"male"
104 | "Adelie","Biscoe",38.1,17,181,3175,"female"
105 | "Adelie","Biscoe",43.2,19,197,4775,"male"
106 | "Adelie","Biscoe",38.1,16.5,198,3825,"female"
107 | "Adelie","Biscoe",45.6,20.3,191,4600,"male"
108 | "Adelie","Biscoe",39.7,17.7,193,3200,"female"
109 | "Adelie","Biscoe",42.2,19.5,197,4275,"male"
110 | "Adelie","Biscoe",39.6,20.7,191,3900,"female"
111 | "Adelie","Biscoe",42.7,18.3,196,4075,"male"
112 | "Adelie","Torgersen",38.6,17,188,2900,"female"
113 | "Adelie","Torgersen",37.3,20.5,199,3775,"male"
114 | "Adelie","Torgersen",35.7,17,189,3350,"female"
115 | "Adelie","Torgersen",41.1,18.6,189,3325,"male"
116 | "Adelie","Torgersen",36.2,17.2,187,3150,"female"
117 | "Adelie","Torgersen",37.7,19.8,198,3500,"male"
118 | "Adelie","Torgersen",40.2,17,176,3450,"female"
119 | "Adelie","Torgersen",41.4,18.5,202,3875,"male"
120 | "Adelie","Torgersen",35.2,15.9,186,3050,"female"
121 | "Adelie","Torgersen",40.6,19,199,4000,"male"
122 | "Adelie","Torgersen",38.8,17.6,191,3275,"female"
123 | "Adelie","Torgersen",41.5,18.3,195,4300,"male"
124 | "Adelie","Torgersen",39,17.1,191,3050,"female"
125 | "Adelie","Torgersen",44.1,18,210,4000,"male"
126 | "Adelie","Torgersen",38.5,17.9,190,3325,"female"
127 | "Adelie","Torgersen",43.1,19.2,197,3500,"male"
128 | "Adelie","Dream",36.8,18.5,193,3500,"female"
129 | "Adelie","Dream",37.5,18.5,199,4475,"male"
130 | "Adelie","Dream",38.1,17.6,187,3425,"female"
131 | "Adelie","Dream",41.1,17.5,190,3900,"male"
132 | "Adelie","Dream",35.6,17.5,191,3175,"female"
133 | "Adelie","Dream",40.2,20.1,200,3975,"male"
134 | "Adelie","Dream",37,16.5,185,3400,"female"
135 | "Adelie","Dream",39.7,17.9,193,4250,"male"
136 | "Adelie","Dream",40.2,17.1,193,3400,"female"
137 | "Adelie","Dream",40.6,17.2,187,3475,"male"
138 | "Adelie","Dream",32.1,15.5,188,3050,"female"
139 | "Adelie","Dream",40.7,17,190,3725,"male"
140 | "Adelie","Dream",37.3,16.8,192,3000,"female"
141 | "Adelie","Dream",39,18.7,185,3650,"male"
142 | "Adelie","Dream",39.2,18.6,190,4250,"male"
143 | "Adelie","Dream",36.6,18.4,184,3475,"female"
144 | "Adelie","Dream",36,17.8,195,3450,"female"
145 | "Adelie","Dream",37.8,18.1,193,3750,"male"
146 | "Adelie","Dream",36,17.1,187,3700,"female"
147 | "Adelie","Dream",41.5,18.5,201,4000,"male"
148 | "Gentoo","Biscoe",46.1,13.2,211,4500,"female"
149 | "Gentoo","Biscoe",50,16.3,230,5700,"male"
150 | "Gentoo","Biscoe",48.7,14.1,210,4450,"female"
151 | "Gentoo","Biscoe",50,15.2,218,5700,"male"
152 | "Gentoo","Biscoe",47.6,14.5,215,5400,"male"
153 | "Gentoo","Biscoe",46.5,13.5,210,4550,"female"
154 | "Gentoo","Biscoe",45.4,14.6,211,4800,"female"
155 | "Gentoo","Biscoe",46.7,15.3,219,5200,"male"
156 | "Gentoo","Biscoe",43.3,13.4,209,4400,"female"
157 | "Gentoo","Biscoe",46.8,15.4,215,5150,"male"
158 | "Gentoo","Biscoe",40.9,13.7,214,4650,"female"
159 | "Gentoo","Biscoe",49,16.1,216,5550,"male"
160 | "Gentoo","Biscoe",45.5,13.7,214,4650,"female"
161 | "Gentoo","Biscoe",48.4,14.6,213,5850,"male"
162 | "Gentoo","Biscoe",45.8,14.6,210,4200,"female"
163 | "Gentoo","Biscoe",49.3,15.7,217,5850,"male"
164 | "Gentoo","Biscoe",42,13.5,210,4150,"female"
165 | "Gentoo","Biscoe",49.2,15.2,221,6300,"male"
166 | "Gentoo","Biscoe",46.2,14.5,209,4800,"female"
167 | "Gentoo","Biscoe",48.7,15.1,222,5350,"male"
168 | "Gentoo","Biscoe",50.2,14.3,218,5700,"male"
169 | "Gentoo","Biscoe",45.1,14.5,215,5000,"female"
170 | "Gentoo","Biscoe",46.5,14.5,213,4400,"female"
171 | "Gentoo","Biscoe",46.3,15.8,215,5050,"male"
172 | "Gentoo","Biscoe",42.9,13.1,215,5000,"female"
173 | "Gentoo","Biscoe",46.1,15.1,215,5100,"male"
174 | "Gentoo","Biscoe",47.8,15,215,5650,"male"
175 | "Gentoo","Biscoe",48.2,14.3,210,4600,"female"
176 | "Gentoo","Biscoe",50,15.3,220,5550,"male"
177 | "Gentoo","Biscoe",47.3,15.3,222,5250,"male"
178 | "Gentoo","Biscoe",42.8,14.2,209,4700,"female"
179 | "Gentoo","Biscoe",45.1,14.5,207,5050,"female"
180 | "Gentoo","Biscoe",59.6,17,230,6050,"male"
181 | "Gentoo","Biscoe",49.1,14.8,220,5150,"female"
182 | "Gentoo","Biscoe",48.4,16.3,220,5400,"male"
183 | "Gentoo","Biscoe",42.6,13.7,213,4950,"female"
184 | "Gentoo","Biscoe",44.4,17.3,219,5250,"male"
185 | "Gentoo","Biscoe",44,13.6,208,4350,"female"
186 | "Gentoo","Biscoe",48.7,15.7,208,5350,"male"
187 | "Gentoo","Biscoe",42.7,13.7,208,3950,"female"
188 | "Gentoo","Biscoe",49.6,16,225,5700,"male"
189 | "Gentoo","Biscoe",45.3,13.7,210,4300,"female"
190 | "Gentoo","Biscoe",49.6,15,216,4750,"male"
191 | "Gentoo","Biscoe",50.5,15.9,222,5550,"male"
192 | "Gentoo","Biscoe",43.6,13.9,217,4900,"female"
193 | "Gentoo","Biscoe",45.5,13.9,210,4200,"female"
194 | "Gentoo","Biscoe",50.5,15.9,225,5400,"male"
195 | "Gentoo","Biscoe",44.9,13.3,213,5100,"female"
196 | "Gentoo","Biscoe",45.2,15.8,215,5300,"male"
197 | "Gentoo","Biscoe",46.6,14.2,210,4850,"female"
198 | "Gentoo","Biscoe",48.5,14.1,220,5300,"male"
199 | "Gentoo","Biscoe",45.1,14.4,210,4400,"female"
200 | "Gentoo","Biscoe",50.1,15,225,5000,"male"
201 | "Gentoo","Biscoe",46.5,14.4,217,4900,"female"
202 | "Gentoo","Biscoe",45,15.4,220,5050,"male"
203 | "Gentoo","Biscoe",43.8,13.9,208,4300,"female"
204 | "Gentoo","Biscoe",45.5,15,220,5000,"male"
205 | "Gentoo","Biscoe",43.2,14.5,208,4450,"female"
206 | "Gentoo","Biscoe",50.4,15.3,224,5550,"male"
207 | "Gentoo","Biscoe",45.3,13.8,208,4200,"female"
208 | "Gentoo","Biscoe",46.2,14.9,221,5300,"male"
209 | "Gentoo","Biscoe",45.7,13.9,214,4400,"female"
210 | "Gentoo","Biscoe",54.3,15.7,231,5650,"male"
211 | "Gentoo","Biscoe",45.8,14.2,219,4700,"female"
212 | "Gentoo","Biscoe",49.8,16.8,230,5700,"male"
213 | "Gentoo","Biscoe",49.5,16.2,229,5800,"male"
214 | "Gentoo","Biscoe",43.5,14.2,220,4700,"female"
215 | "Gentoo","Biscoe",50.7,15,223,5550,"male"
216 | "Gentoo","Biscoe",47.7,15,216,4750,"female"
217 | "Gentoo","Biscoe",46.4,15.6,221,5000,"male"
218 | "Gentoo","Biscoe",48.2,15.6,221,5100,"male"
219 | "Gentoo","Biscoe",46.5,14.8,217,5200,"female"
220 | "Gentoo","Biscoe",46.4,15,216,4700,"female"
221 | "Gentoo","Biscoe",48.6,16,230,5800,"male"
222 | "Gentoo","Biscoe",47.5,14.2,209,4600,"female"
223 | "Gentoo","Biscoe",51.1,16.3,220,6000,"male"
224 | "Gentoo","Biscoe",45.2,13.8,215,4750,"female"
225 | "Gentoo","Biscoe",45.2,16.4,223,5950,"male"
226 | "Gentoo","Biscoe",49.1,14.5,212,4625,"female"
227 | "Gentoo","Biscoe",52.5,15.6,221,5450,"male"
228 | "Gentoo","Biscoe",47.4,14.6,212,4725,"female"
229 | "Gentoo","Biscoe",50,15.9,224,5350,"male"
230 | "Gentoo","Biscoe",44.9,13.8,212,4750,"female"
231 | "Gentoo","Biscoe",50.8,17.3,228,5600,"male"
232 | "Gentoo","Biscoe",43.4,14.4,218,4600,"female"
233 | "Gentoo","Biscoe",51.3,14.2,218,5300,"male"
234 | "Gentoo","Biscoe",47.5,14,212,4875,"female"
235 | "Gentoo","Biscoe",52.1,17,230,5550,"male"
236 | "Gentoo","Biscoe",47.5,15,218,4950,"female"
237 | "Gentoo","Biscoe",52.2,17.1,228,5400,"male"
238 | "Gentoo","Biscoe",45.5,14.5,212,4750,"female"
239 | "Gentoo","Biscoe",49.5,16.1,224,5650,"male"
240 | "Gentoo","Biscoe",44.5,14.7,214,4850,"female"
241 | "Gentoo","Biscoe",50.8,15.7,226,5200,"male"
242 | "Gentoo","Biscoe",49.4,15.8,216,4925,"male"
243 | "Gentoo","Biscoe",46.9,14.6,222,4875,"female"
244 | "Gentoo","Biscoe",48.4,14.4,203,4625,"female"
245 | "Gentoo","Biscoe",51.1,16.5,225,5250,"male"
246 | "Gentoo","Biscoe",48.5,15,219,4850,"female"
247 | "Gentoo","Biscoe",55.9,17,228,5600,"male"
248 | "Gentoo","Biscoe",47.2,15.5,215,4975,"female"
249 | "Gentoo","Biscoe",49.1,15,228,5500,"male"
250 | "Gentoo","Biscoe",46.8,16.1,215,5500,"male"
251 | "Gentoo","Biscoe",41.7,14.7,210,4700,"female"
252 | "Gentoo","Biscoe",53.4,15.8,219,5500,"male"
253 | "Gentoo","Biscoe",43.3,14,208,4575,"female"
254 | "Gentoo","Biscoe",48.1,15.1,209,5500,"male"
255 | "Gentoo","Biscoe",50.5,15.2,216,5000,"female"
256 | "Gentoo","Biscoe",49.8,15.9,229,5950,"male"
257 | "Gentoo","Biscoe",43.5,15.2,213,4650,"female"
258 | "Gentoo","Biscoe",51.5,16.3,230,5500,"male"
259 | "Gentoo","Biscoe",46.2,14.1,217,4375,"female"
260 | "Gentoo","Biscoe",55.1,16,230,5850,"male"
261 | "Gentoo","Biscoe",48.8,16.2,222,6000,"male"
262 | "Gentoo","Biscoe",47.2,13.7,214,4925,"female"
263 | "Gentoo","Biscoe",46.8,14.3,215,4850,"female"
264 | "Gentoo","Biscoe",50.4,15.7,222,5750,"male"
265 | "Gentoo","Biscoe",45.2,14.8,212,5200,"female"
266 | "Gentoo","Biscoe",49.9,16.1,213,5400,"male"
267 | "Chinstrap","Dream",46.5,17.9,192,3500,"female"
268 | "Chinstrap","Dream",50,19.5,196,3900,"male"
269 | "Chinstrap","Dream",51.3,19.2,193,3650,"male"
270 | "Chinstrap","Dream",45.4,18.7,188,3525,"female"
271 | "Chinstrap","Dream",52.7,19.8,197,3725,"male"
272 | "Chinstrap","Dream",45.2,17.8,198,3950,"female"
273 | "Chinstrap","Dream",46.1,18.2,178,3250,"female"
274 | "Chinstrap","Dream",51.3,18.2,197,3750,"male"
275 | "Chinstrap","Dream",46,18.9,195,4150,"female"
276 | "Chinstrap","Dream",51.3,19.9,198,3700,"male"
277 | "Chinstrap","Dream",46.6,17.8,193,3800,"female"
278 | "Chinstrap","Dream",51.7,20.3,194,3775,"male"
279 | "Chinstrap","Dream",47,17.3,185,3700,"female"
280 | "Chinstrap","Dream",52,18.1,201,4050,"male"
281 | "Chinstrap","Dream",45.9,17.1,190,3575,"female"
282 | "Chinstrap","Dream",50.5,19.6,201,4050,"male"
283 | "Chinstrap","Dream",50.3,20,197,3300,"male"
284 | "Chinstrap","Dream",58,17.8,181,3700,"female"
285 | "Chinstrap","Dream",46.4,18.6,190,3450,"female"
286 | "Chinstrap","Dream",49.2,18.2,195,4400,"male"
287 | "Chinstrap","Dream",42.4,17.3,181,3600,"female"
288 | "Chinstrap","Dream",48.5,17.5,191,3400,"male"
289 | "Chinstrap","Dream",43.2,16.6,187,2900,"female"
290 | "Chinstrap","Dream",50.6,19.4,193,3800,"male"
291 | "Chinstrap","Dream",46.7,17.9,195,3300,"female"
292 | "Chinstrap","Dream",52,19,197,4150,"male"
293 | "Chinstrap","Dream",50.5,18.4,200,3400,"female"
294 | "Chinstrap","Dream",49.5,19,200,3800,"male"
295 | "Chinstrap","Dream",46.4,17.8,191,3700,"female"
296 | "Chinstrap","Dream",52.8,20,205,4550,"male"
297 | "Chinstrap","Dream",40.9,16.6,187,3200,"female"
298 | "Chinstrap","Dream",54.2,20.8,201,4300,"male"
299 | "Chinstrap","Dream",42.5,16.7,187,3350,"female"
300 | "Chinstrap","Dream",51,18.8,203,4100,"male"
301 | "Chinstrap","Dream",49.7,18.6,195,3600,"male"
302 | "Chinstrap","Dream",47.5,16.8,199,3900,"female"
303 | "Chinstrap","Dream",47.6,18.3,195,3850,"female"
304 | "Chinstrap","Dream",52,20.7,210,4800,"male"
305 | "Chinstrap","Dream",46.9,16.6,192,2700,"female"
306 | "Chinstrap","Dream",53.5,19.9,205,4500,"male"
307 | "Chinstrap","Dream",49,19.5,210,3950,"male"
308 | "Chinstrap","Dream",46.2,17.5,187,3650,"female"
309 | "Chinstrap","Dream",50.9,19.1,196,3550,"male"
310 | "Chinstrap","Dream",45.5,17,196,3500,"female"
311 | "Chinstrap","Dream",50.9,17.9,196,3675,"female"
312 | "Chinstrap","Dream",50.8,18.5,201,4450,"male"
313 | "Chinstrap","Dream",50.1,17.9,190,3400,"female"
314 | "Chinstrap","Dream",49,19.6,212,4300,"male"
315 | "Chinstrap","Dream",51.5,18.7,187,3250,"male"
316 | "Chinstrap","Dream",49.8,17.3,198,3675,"female"
317 | "Chinstrap","Dream",48.1,16.4,199,3325,"female"
318 | "Chinstrap","Dream",51.4,19,201,3950,"male"
319 | "Chinstrap","Dream",45.7,17.3,193,3600,"female"
320 | "Chinstrap","Dream",50.7,19.7,203,4050,"male"
321 | "Chinstrap","Dream",42.5,17.3,187,3350,"female"
322 | "Chinstrap","Dream",52.2,18.8,197,3450,"male"
323 | "Chinstrap","Dream",45.2,16.6,191,3250,"female"
324 | "Chinstrap","Dream",49.3,19.9,203,4050,"male"
325 | "Chinstrap","Dream",50.2,18.8,202,3800,"male"
326 | "Chinstrap","Dream",45.6,19.4,194,3525,"female"
327 | "Chinstrap","Dream",51.9,19.5,206,3950,"male"
328 | "Chinstrap","Dream",46.8,16.5,189,3650,"female"
329 | "Chinstrap","Dream",45.7,17,195,3650,"female"
330 | "Chinstrap","Dream",55.8,19.8,207,4000,"male"
331 | "Chinstrap","Dream",43.5,18.1,202,3400,"female"
332 | "Chinstrap","Dream",49.6,18.2,193,3775,"male"
333 | "Chinstrap","Dream",50.8,19,210,4100,"male"
334 | "Chinstrap","Dream",50.2,18.7,198,3775,"female"
335 |
--------------------------------------------------------------------------------
/app_8_classification_penguins/penguins_clf.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dataprofessor/streamlit_freecodecamp/d44c4c1320f8417b3d5494902e5366b715314528/app_8_classification_penguins/penguins_clf.pkl
--------------------------------------------------------------------------------
/app_8_classification_penguins/penguins_example.csv:
--------------------------------------------------------------------------------
1 | island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
2 | Biscoe,43.9,17.2,201.0,4207.0,male
3 |
--------------------------------------------------------------------------------
/app_9_regression_boston_housing/boston-house-ml-app.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | import pandas as pd
3 | import shap
4 | import matplotlib.pyplot as plt
5 | from sklearn import datasets
6 | from sklearn.ensemble import RandomForestRegressor
7 |
8 | st.write("""
9 | # Boston House Price Prediction App
10 |
11 | This app predicts the **Boston House Price**!
12 | """)
13 | st.write('---')
14 |
15 | # Loads the Boston House Price Dataset
16 | boston = datasets.load_boston()
17 | X = pd.DataFrame(boston.data, columns=boston.feature_names)
18 | Y = pd.DataFrame(boston.target, columns=["MEDV"])
19 |
20 | # Sidebar
21 | # Header of Specify Input Parameters
22 | st.sidebar.header('Specify Input Parameters')
23 |
24 | def user_input_features():
25 | CRIM = st.sidebar.slider('CRIM', X.CRIM.min(), X.CRIM.max(), X.CRIM.mean())
26 | ZN = st.sidebar.slider('ZN', X.ZN.min(), X.ZN.max(), X.ZN.mean())
27 | INDUS = st.sidebar.slider('INDUS', X.INDUS.min(), X.INDUS.max(), X.INDUS.mean())
28 | CHAS = st.sidebar.slider('CHAS', X.CHAS.min(), X.CHAS.max(), X.CHAS.mean())
29 | NOX = st.sidebar.slider('NOX', X.NOX.min(), X.NOX.max(), X.NOX.mean())
30 | RM = st.sidebar.slider('RM', X.RM.min(), X.RM.max(), X.RM.mean())
31 | AGE = st.sidebar.slider('AGE', X.AGE.min(), X.AGE.max(), X.AGE.mean())
32 | DIS = st.sidebar.slider('DIS', X.DIS.min(), X.DIS.max(), X.DIS.mean())
33 | RAD = st.sidebar.slider('RAD', X.RAD.min(), X.RAD.max(), X.RAD.mean())
34 | TAX = st.sidebar.slider('TAX', X.TAX.min(), X.TAX.max(), X.TAX.mean())
35 | PTRATIO = st.sidebar.slider('PTRATIO', X.PTRATIO.min(), X.PTRATIO.max(), X.PTRATIO.mean())
36 | B = st.sidebar.slider('B', X.B.min(), X.B.max(), X.B.mean())
37 | LSTAT = st.sidebar.slider('LSTAT', X.LSTAT.min(), X.LSTAT.max(), X.LSTAT.mean())
38 | data = {'CRIM': CRIM,
39 | 'ZN': ZN,
40 | 'INDUS': INDUS,
41 | 'CHAS': CHAS,
42 | 'NOX': NOX,
43 | 'RM': RM,
44 | 'AGE': AGE,
45 | 'DIS': DIS,
46 | 'RAD': RAD,
47 | 'TAX': TAX,
48 | 'PTRATIO': PTRATIO,
49 | 'B': B,
50 | 'LSTAT': LSTAT}
51 | features = pd.DataFrame(data, index=[0])
52 | return features
53 |
54 | df = user_input_features()
55 |
56 | # Main Panel
57 |
58 | # Print specified input parameters
59 | st.header('Specified Input parameters')
60 | st.write(df)
61 | st.write('---')
62 |
63 | # Build Regression Model
64 | model = RandomForestRegressor()
65 | model.fit(X, Y)
66 | # Apply Model to Make Prediction
67 | prediction = model.predict(df)
68 |
69 | st.header('Prediction of MEDV')
70 | st.write(prediction)
71 | st.write('---')
72 |
73 | # Explaining the model's predictions using SHAP values
74 | # https://github.com/slundberg/shap
75 | explainer = shap.TreeExplainer(model)
76 | shap_values = explainer.shap_values(X)
77 |
78 | st.header('Feature Importance')
79 | plt.title('Feature importance based on SHAP values')
80 | shap.summary_plot(shap_values, X)
81 | st.pyplot(bbox_inches='tight')
82 | st.write('---')
83 |
84 | plt.title('Feature importance based on SHAP values (Bar)')
85 | shap.summary_plot(shap_values, X, plot_type="bar")
86 | st.pyplot(bbox_inches='tight')
87 |
--------------------------------------------------------------------------------