├── README.md ├── app_10_regression_bioinformatics_solubility ├── solubility-app.py ├── solubility-logo.jpg ├── solubility-web-app.ipynb └── solubility_model.pkl ├── app_1_simple_stock_price ├── myapp.py └── myapp2.py ├── app_2_simple_bioinformatics_dna ├── dna-app.py └── dna-logo.jpg ├── app_3_eda_basketball └── basketball_app.py ├── app_4_eda_football └── football_app.py ├── app_5_eda_sp500_stock └── sp500-app.py ├── app_6_eda_cryptocurrency ├── crypto-price-app.py └── logo.jpg ├── app_7_classification_iris └── iris-ml-app.py ├── app_8_classification_penguins ├── penguins-app.py ├── penguins-model-building.py ├── penguins_cleaned.csv ├── penguins_clf.pkl └── penguins_example.csv └── app_9_regression_boston_housing └── boston-house-ml-app.py /README.md: -------------------------------------------------------------------------------- 1 | # streamlit_freecodecamp 2 | ## Build 12 Data Apps in Python with Streamlit 3 | 4 | Learn how to build interactive and data-driven web apps in Python using the Streamlit library. 5 | 6 | Video | Title 7 | ---|--- 8 | Build 12 Data Science Apps with Python and Streamlit - Full Course | [Build 12 Data Science Apps with Python and Streamlit - Full Course](https://youtu.be/JwSS70SZdyM) 9 | 10 | -------------------------------------------------------------------------------- /app_10_regression_bioinformatics_solubility/solubility-app.py: -------------------------------------------------------------------------------- 1 | ###################### 2 | # Import libraries 3 | ###################### 4 | import numpy as np 5 | import pandas as pd 6 | import streamlit as st 7 | import pickle 8 | from PIL import Image 9 | from rdkit import Chem 10 | from rdkit.Chem import Descriptors 11 | 12 | ###################### 13 | # Custom function 14 | ###################### 15 | ## Calculate molecular descriptors 16 | def AromaticProportion(m): 17 | aromatic_atoms = [m.GetAtomWithIdx(i).GetIsAromatic() for i in range(m.GetNumAtoms())] 18 | aa_count = [] 19 | for i in aromatic_atoms: 20 | if i==True: 21 | aa_count.append(1) 22 | AromaticAtom = sum(aa_count) 23 | HeavyAtom = Descriptors.HeavyAtomCount(m) 24 | AR = AromaticAtom/HeavyAtom 25 | return AR 26 | 27 | def generate(smiles, verbose=False): 28 | 29 | moldata= [] 30 | for elem in smiles: 31 | mol=Chem.MolFromSmiles(elem) 32 | moldata.append(mol) 33 | 34 | baseData= np.arange(1,1) 35 | i=0 36 | for mol in moldata: 37 | 38 | desc_MolLogP = Descriptors.MolLogP(mol) 39 | desc_MolWt = Descriptors.MolWt(mol) 40 | desc_NumRotatableBonds = Descriptors.NumRotatableBonds(mol) 41 | desc_AromaticProportion = AromaticProportion(mol) 42 | 43 | row = np.array([desc_MolLogP, 44 | desc_MolWt, 45 | desc_NumRotatableBonds, 46 | desc_AromaticProportion]) 47 | 48 | if(i==0): 49 | baseData=row 50 | else: 51 | baseData=np.vstack([baseData, row]) 52 | i=i+1 53 | 54 | columnNames=["MolLogP","MolWt","NumRotatableBonds","AromaticProportion"] 55 | descriptors = pd.DataFrame(data=baseData,columns=columnNames) 56 | 57 | return descriptors 58 | 59 | ###################### 60 | # Page Title 61 | ###################### 62 | 63 | image = Image.open('solubility-logo.jpg') 64 | 65 | st.image(image, use_column_width=True) 66 | 67 | st.write(""" 68 | # Molecular Solubility Prediction Web App 69 | 70 | This app predicts the **Solubility (LogS)** values of molecules! 71 | 72 | Data obtained from the John S. Delaney. [ESOL:  Estimating Aqueous Solubility Directly from Molecular Structure](https://pubs.acs.org/doi/10.1021/ci034243x). ***J. Chem. Inf. Comput. Sci.*** 2004, 44, 3, 1000-1005. 73 | *** 74 | """) 75 | 76 | 77 | ###################### 78 | # Input molecules (Side Panel) 79 | ###################### 80 | 81 | st.sidebar.header('User Input Features') 82 | 83 | ## Read SMILES input 84 | SMILES_input = "NCCCC\nCCC\nCN" 85 | 86 | SMILES = st.sidebar.text_area("SMILES input", SMILES_input) 87 | SMILES = "C\n" + SMILES #Adds C as a dummy, first item 88 | SMILES = SMILES.split('\n') 89 | 90 | st.header('Input SMILES') 91 | SMILES[1:] # Skips the dummy first item 92 | 93 | ## Calculate molecular descriptors 94 | st.header('Computed molecular descriptors') 95 | X = generate(SMILES) 96 | X[1:] # Skips the dummy first item 97 | 98 | ###################### 99 | # Pre-built model 100 | ###################### 101 | 102 | # Reads in saved model 103 | load_model = pickle.load(open('solubility_model.pkl', 'rb')) 104 | 105 | # Apply model to make predictions 106 | prediction = load_model.predict(X) 107 | #prediction_proba = load_model.predict_proba(X) 108 | 109 | st.header('Predicted LogS values') 110 | prediction[1:] # Skips the dummy first item 111 | -------------------------------------------------------------------------------- /app_10_regression_bioinformatics_solubility/solubility-logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataprofessor/streamlit_freecodecamp/d44c4c1320f8417b3d5494902e5366b715314528/app_10_regression_bioinformatics_solubility/solubility-logo.jpg -------------------------------------------------------------------------------- /app_10_regression_bioinformatics_solubility/solubility-web-app.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "solubility-web-app.ipynb", 7 | "provenance": [], 8 | "toc_visible": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "cell_type": "markdown", 18 | "metadata": { 19 | "id": "QQHZHevuXdEy", 20 | "colab_type": "text" 21 | }, 22 | "source": [ 23 | "# **Model Building for Solubility Dataset**\n", 24 | "\n", 25 | "Chanin Nantasenamat\n", 26 | "\n", 27 | "*Data Professor YouTube channel, http://youtube.com/dataprofessor*" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": { 33 | "id": "g1qtHa0zXfWM", 34 | "colab_type": "text" 35 | }, 36 | "source": [ 37 | "# Read in data" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "metadata": { 43 | "id": "9MdfbvFKXtXq", 44 | "colab_type": "code", 45 | "colab": {} 46 | }, 47 | "source": [ 48 | "import pandas as pd" 49 | ], 50 | "execution_count": 1, 51 | "outputs": [] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "metadata": { 56 | "id": "nerGP0fCXfgP", 57 | "colab_type": "code", 58 | "colab": { 59 | "base_uri": "https://localhost:8080/", 60 | "height": 419 61 | }, 62 | "outputId": "2bb155a6-2710-4461-accb-df64045ba70d" 63 | }, 64 | "source": [ 65 | "delaney_with_descriptors_url = 'https://raw.githubusercontent.com/dataprofessor/data/master/delaney_solubility_with_descriptors.csv'\n", 66 | "dataset = pd.read_csv(delaney_with_descriptors_url)\n", 67 | "dataset" 68 | ], 69 | "execution_count": 2, 70 | "outputs": [ 71 | { 72 | "output_type": "execute_result", 73 | "data": { 74 | "text/html": [ 75 | "
\n", 76 | "\n", 89 | "\n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | "
MolLogPMolWtNumRotatableBondsAromaticProportionlogS
02.59540167.8500.00.000000-2.180
12.37650133.4050.00.000000-2.000
22.59380167.8501.00.000000-1.740
32.02890133.4051.00.000000-1.480
42.91890187.3751.00.000000-3.040
..................
11391.98820287.3438.00.0000001.144
11403.42130286.1142.00.333333-4.925
11413.60960308.3334.00.695652-3.893
11422.56214354.8153.00.521739-3.790
11432.02164179.2191.00.461538-2.581
\n", 191 | "

1144 rows × 5 columns

\n", 192 | "
" 193 | ], 194 | "text/plain": [ 195 | " MolLogP MolWt NumRotatableBonds AromaticProportion logS\n", 196 | "0 2.59540 167.850 0.0 0.000000 -2.180\n", 197 | "1 2.37650 133.405 0.0 0.000000 -2.000\n", 198 | "2 2.59380 167.850 1.0 0.000000 -1.740\n", 199 | "3 2.02890 133.405 1.0 0.000000 -1.480\n", 200 | "4 2.91890 187.375 1.0 0.000000 -3.040\n", 201 | "... ... ... ... ... ...\n", 202 | "1139 1.98820 287.343 8.0 0.000000 1.144\n", 203 | "1140 3.42130 286.114 2.0 0.333333 -4.925\n", 204 | "1141 3.60960 308.333 4.0 0.695652 -3.893\n", 205 | "1142 2.56214 354.815 3.0 0.521739 -3.790\n", 206 | "1143 2.02164 179.219 1.0 0.461538 -2.581\n", 207 | "\n", 208 | "[1144 rows x 5 columns]" 209 | ] 210 | }, 211 | "metadata": { 212 | "tags": [] 213 | }, 214 | "execution_count": 2 215 | } 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "metadata": { 221 | "id": "tgFxx8m_YEUy", 222 | "colab_type": "code", 223 | "colab": { 224 | "base_uri": "https://localhost:8080/", 225 | "height": 419 226 | }, 227 | "outputId": "fd6feedd-253b-4189-d400-a8d3f5bf1f25" 228 | }, 229 | "source": [ 230 | "X = dataset.drop(['logS'], axis=1)\n", 231 | "X" 232 | ], 233 | "execution_count": 5, 234 | "outputs": [ 235 | { 236 | "output_type": "execute_result", 237 | "data": { 238 | "text/html": [ 239 | "
\n", 240 | "\n", 253 | "\n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | "
MolLogPMolWtNumRotatableBondsAromaticProportion
02.59540167.8500.00.000000
12.37650133.4050.00.000000
22.59380167.8501.00.000000
32.02890133.4051.00.000000
42.91890187.3751.00.000000
...............
11391.98820287.3438.00.000000
11403.42130286.1142.00.333333
11413.60960308.3334.00.695652
11422.56214354.8153.00.521739
11432.02164179.2191.00.461538
\n", 343 | "

1144 rows × 4 columns

\n", 344 | "
" 345 | ], 346 | "text/plain": [ 347 | " MolLogP MolWt NumRotatableBonds AromaticProportion\n", 348 | "0 2.59540 167.850 0.0 0.000000\n", 349 | "1 2.37650 133.405 0.0 0.000000\n", 350 | "2 2.59380 167.850 1.0 0.000000\n", 351 | "3 2.02890 133.405 1.0 0.000000\n", 352 | "4 2.91890 187.375 1.0 0.000000\n", 353 | "... ... ... ... ...\n", 354 | "1139 1.98820 287.343 8.0 0.000000\n", 355 | "1140 3.42130 286.114 2.0 0.333333\n", 356 | "1141 3.60960 308.333 4.0 0.695652\n", 357 | "1142 2.56214 354.815 3.0 0.521739\n", 358 | "1143 2.02164 179.219 1.0 0.461538\n", 359 | "\n", 360 | "[1144 rows x 4 columns]" 361 | ] 362 | }, 363 | "metadata": { 364 | "tags": [] 365 | }, 366 | "execution_count": 5 367 | } 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "metadata": { 373 | "id": "JDwxgKHqYmD4", 374 | "colab_type": "code", 375 | "colab": { 376 | "base_uri": "https://localhost:8080/", 377 | "height": 221 378 | }, 379 | "outputId": "a725d7b7-baad-4a99-9686-4dfe1d852c22" 380 | }, 381 | "source": [ 382 | "Y = dataset.iloc[:,-1]\n", 383 | "Y" 384 | ], 385 | "execution_count": 6, 386 | "outputs": [ 387 | { 388 | "output_type": "execute_result", 389 | "data": { 390 | "text/plain": [ 391 | "0 -2.180\n", 392 | "1 -2.000\n", 393 | "2 -1.740\n", 394 | "3 -1.480\n", 395 | "4 -3.040\n", 396 | " ... \n", 397 | "1139 1.144\n", 398 | "1140 -4.925\n", 399 | "1141 -3.893\n", 400 | "1142 -3.790\n", 401 | "1143 -2.581\n", 402 | "Name: logS, Length: 1144, dtype: float64" 403 | ] 404 | }, 405 | "metadata": { 406 | "tags": [] 407 | }, 408 | "execution_count": 6 409 | } 410 | ] 411 | }, 412 | { 413 | "cell_type": "markdown", 414 | "metadata": { 415 | "id": "LNohCdqQY5VZ", 416 | "colab_type": "text" 417 | }, 418 | "source": [ 419 | "# Linear Regression Model" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "metadata": { 425 | "id": "EanoyG2eX9cV", 426 | "colab_type": "code", 427 | "colab": {} 428 | }, 429 | "source": [ 430 | "from sklearn import linear_model\n", 431 | "from sklearn.metrics import mean_squared_error, r2_score" 432 | ], 433 | "execution_count": 3, 434 | "outputs": [] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "metadata": { 439 | "id": "mLQJ2KLLY_9a", 440 | "colab_type": "code", 441 | "colab": { 442 | "base_uri": "https://localhost:8080/", 443 | "height": 34 444 | }, 445 | "outputId": "6349fa74-f087-4d81-916e-294789c6455c" 446 | }, 447 | "source": [ 448 | "model = linear_model.LinearRegression()\n", 449 | "model.fit(X, Y)" 450 | ], 451 | "execution_count": 7, 452 | "outputs": [ 453 | { 454 | "output_type": "execute_result", 455 | "data": { 456 | "text/plain": [ 457 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" 458 | ] 459 | }, 460 | "metadata": { 461 | "tags": [] 462 | }, 463 | "execution_count": 7 464 | } 465 | ] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "metadata": { 470 | "id": "F5f8KGWjZRSc", 471 | "colab_type": "text" 472 | }, 473 | "source": [ 474 | "## Model Prediction" 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "metadata": { 480 | "id": "MI3c8LB2ZCYW", 481 | "colab_type": "code", 482 | "colab": { 483 | "base_uri": "https://localhost:8080/", 484 | "height": 51 485 | }, 486 | "outputId": "19b50c6a-7d1c-4bfd-8789-d5884b42d594" 487 | }, 488 | "source": [ 489 | "Y_pred = model.predict(X)\n", 490 | "Y_pred" 491 | ], 492 | "execution_count": 8, 493 | "outputs": [ 494 | { 495 | "output_type": "execute_result", 496 | "data": { 497 | "text/plain": [ 498 | "array([-2.77628837, -2.38661054, -2.77190108, ..., -4.73721496,\n", 499 | " -4.19663007, -2.61784284])" 500 | ] 501 | }, 502 | "metadata": { 503 | "tags": [] 504 | }, 505 | "execution_count": 8 506 | } 507 | ] 508 | }, 509 | { 510 | "cell_type": "markdown", 511 | "metadata": { 512 | "id": "fXv7bcolZqa-", 513 | "colab_type": "text" 514 | }, 515 | "source": [ 516 | "## Model Performance" 517 | ] 518 | }, 519 | { 520 | "cell_type": "code", 521 | "metadata": { 522 | "id": "6f13gYleZVKy", 523 | "colab_type": "code", 524 | "colab": { 525 | "base_uri": "https://localhost:8080/", 526 | "height": 85 527 | }, 528 | "outputId": "99894d58-83b4-4b64-f54c-848e8430cd5e" 529 | }, 530 | "source": [ 531 | "print('Coefficients:', model.coef_)\n", 532 | "print('Intercept:', model.intercept_)\n", 533 | "print('Mean squared error (MSE): %.2f'\n", 534 | " % mean_squared_error(Y, Y_pred))\n", 535 | "print('Coefficient of determination (R^2): %.2f'\n", 536 | " % r2_score(Y, Y_pred))" 537 | ], 538 | "execution_count": 9, 539 | "outputs": [ 540 | { 541 | "output_type": "stream", 542 | "text": [ 543 | "Coefficients: [-0.74173609 -0.00659927 0.00320051 -0.42316387]\n", 544 | "Intercept: 0.2565006830997194\n", 545 | "Mean squared error (MSE): 1.01\n", 546 | "Coefficient of determination (R^2): 0.77\n" 547 | ], 548 | "name": "stdout" 549 | } 550 | ] 551 | }, 552 | { 553 | "cell_type": "markdown", 554 | "metadata": { 555 | "id": "Yhuc402dZsk3", 556 | "colab_type": "text" 557 | }, 558 | "source": [ 559 | "## Model Equation" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "metadata": { 565 | "id": "QnoUESmXZcMo", 566 | "colab_type": "code", 567 | "colab": { 568 | "base_uri": "https://localhost:8080/", 569 | "height": 34 570 | }, 571 | "outputId": "c2e3b76f-4d9a-425c-99e4-5793dc6c1620" 572 | }, 573 | "source": [ 574 | "print('LogS = %.2f %.2f LogP %.4f MW + %.4f RB %.2f AP' % (model.intercept_, model.coef_[0], model.coef_[1], model.coef_[2], model.coef_[3] ) )" 575 | ], 576 | "execution_count": 10, 577 | "outputs": [ 578 | { 579 | "output_type": "stream", 580 | "text": [ 581 | "LogS = 0.26 -0.74 LogP -0.0066 MW + 0.0032 RB -0.42 AP\n" 582 | ], 583 | "name": "stdout" 584 | } 585 | ] 586 | }, 587 | { 588 | "cell_type": "markdown", 589 | "metadata": { 590 | "id": "uWvxj1iSaL3n", 591 | "colab_type": "text" 592 | }, 593 | "source": [ 594 | "# Data Visualization (Experimental vs Predicted LogS for Training Data)" 595 | ] 596 | }, 597 | { 598 | "cell_type": "code", 599 | "metadata": { 600 | "id": "iPcFF0MjZlh8", 601 | "colab_type": "code", 602 | "colab": {} 603 | }, 604 | "source": [ 605 | "import matplotlib.pyplot as plt\n", 606 | "import numpy as np" 607 | ], 608 | "execution_count": 11, 609 | "outputs": [] 610 | }, 611 | { 612 | "cell_type": "code", 613 | "metadata": { 614 | "id": "QRNyIlGAaQQI", 615 | "colab_type": "code", 616 | "colab": { 617 | "base_uri": "https://localhost:8080/", 618 | "height": 351 619 | }, 620 | "outputId": "949bd284-5952-496f-a57e-47a1333fd50b" 621 | }, 622 | "source": [ 623 | "plt.figure(figsize=(5,5))\n", 624 | "plt.scatter(x=Y, y=Y_pred, c=\"#7CAE00\", alpha=0.3)\n", 625 | "\n", 626 | "# Add trendline\n", 627 | "# https://stackoverflow.com/questions/26447191/how-to-add-trendline-in-python-matplotlib-dot-scatter-graphs\n", 628 | "z = np.polyfit(Y, Y_pred, 1)\n", 629 | "p = np.poly1d(z)\n", 630 | "\n", 631 | "plt.plot(Y,p(Y),\"#F8766D\")\n", 632 | "plt.ylabel('Predicted LogS')\n", 633 | "plt.xlabel('Experimental LogS')" 634 | ], 635 | "execution_count": 17, 636 | "outputs": [ 637 | { 638 | "output_type": "execute_result", 639 | "data": { 640 | "text/plain": [ 641 | "Text(0.5, 0, 'Experimental LogS')" 642 | ] 643 | }, 644 | "metadata": { 645 | "tags": [] 646 | }, 647 | "execution_count": 17 648 | }, 649 | { 650 | "output_type": "display_data", 651 | "data": { 652 | "image/png": "\n", 653 | "text/plain": [ 654 | "
" 655 | ] 656 | }, 657 | "metadata": { 658 | "tags": [], 659 | "needs_background": "light" 660 | } 661 | } 662 | ] 663 | }, 664 | { 665 | "cell_type": "markdown", 666 | "metadata": { 667 | "id": "YzKTmvZrbFVI", 668 | "colab_type": "text" 669 | }, 670 | "source": [ 671 | "# Save Model as Pickle Object" 672 | ] 673 | }, 674 | { 675 | "cell_type": "code", 676 | "metadata": { 677 | "id": "DzjpPyVyb8XO", 678 | "colab_type": "code", 679 | "colab": {} 680 | }, 681 | "source": [ 682 | "import pickle" 683 | ], 684 | "execution_count": 18, 685 | "outputs": [] 686 | }, 687 | { 688 | "cell_type": "code", 689 | "metadata": { 690 | "id": "b2K9ajBaaYUk", 691 | "colab_type": "code", 692 | "colab": {} 693 | }, 694 | "source": [ 695 | "pickle.dump(model, open('solubility_model.pkl', 'wb'))" 696 | ], 697 | "execution_count": 19, 698 | "outputs": [] 699 | }, 700 | { 701 | "cell_type": "code", 702 | "metadata": { 703 | "id": "ef4fyvrEb-NC", 704 | "colab_type": "code", 705 | "colab": {} 706 | }, 707 | "source": [ 708 | "" 709 | ], 710 | "execution_count": null, 711 | "outputs": [] 712 | } 713 | ] 714 | } 715 | -------------------------------------------------------------------------------- /app_10_regression_bioinformatics_solubility/solubility_model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataprofessor/streamlit_freecodecamp/d44c4c1320f8417b3d5494902e5366b715314528/app_10_regression_bioinformatics_solubility/solubility_model.pkl -------------------------------------------------------------------------------- /app_1_simple_stock_price/myapp.py: -------------------------------------------------------------------------------- 1 | import yfinance as yf 2 | import streamlit as st 3 | 4 | st.write(""" 5 | # Simple Stock Price App 6 | 7 | Shown are the stock closing price and volume of Google! 8 | 9 | """) 10 | 11 | # https://towardsdatascience.com/how-to-get-stock-data-using-python-c0de1df17e75 12 | #define the ticker symbol 13 | tickerSymbol = 'GOOGL' 14 | #get data on this ticker 15 | tickerData = yf.Ticker(tickerSymbol) 16 | #get the historical prices for this ticker 17 | tickerDf = tickerData.history(period='1d', start='2010-5-31', end='2020-5-31') 18 | # Open High Low Close Volume Dividends Stock Splits 19 | 20 | st.line_chart(tickerDf.Close) 21 | st.line_chart(tickerDf.Volume) 22 | -------------------------------------------------------------------------------- /app_1_simple_stock_price/myapp2.py: -------------------------------------------------------------------------------- 1 | import yfinance as yf 2 | import streamlit as st 3 | 4 | st.write(""" 5 | # Simple Stock Price App 6 | 7 | Shown are the stock **closing price** and ***volume*** of Google! 8 | 9 | """) 10 | 11 | # https://towardsdatascience.com/how-to-get-stock-data-using-python-c0de1df17e75 12 | #define the ticker symbol 13 | tickerSymbol = 'GOOGL' 14 | #get data on this ticker 15 | tickerData = yf.Ticker(tickerSymbol) 16 | #get the historical prices for this ticker 17 | tickerDf = tickerData.history(period='1d', start='2010-5-31', end='2020-5-31') 18 | # Open High Low Close Volume Dividends Stock Splits 19 | 20 | st.write(""" 21 | ## Closing Price 22 | """) 23 | st.line_chart(tickerDf.Close) 24 | st.write(""" 25 | ## Volume Price 26 | """) 27 | st.line_chart(tickerDf.Volume) 28 | -------------------------------------------------------------------------------- /app_2_simple_bioinformatics_dna/dna-app.py: -------------------------------------------------------------------------------- 1 | ###################### 2 | # Import libraries 3 | ###################### 4 | 5 | import pandas as pd 6 | import streamlit as st 7 | import altair as alt 8 | from PIL import Image 9 | 10 | ###################### 11 | # Page Title 12 | ###################### 13 | 14 | image = Image.open('dna-logo.jpg') 15 | 16 | st.image(image, use_column_width=True) 17 | 18 | st.write(""" 19 | # DNA Nucleotide Count Web App 20 | 21 | This app counts the nucleotide composition of query DNA! 22 | 23 | *** 24 | """) 25 | 26 | 27 | ###################### 28 | # Input Text Box 29 | ###################### 30 | 31 | #st.sidebar.header('Enter DNA sequence') 32 | st.header('Enter DNA sequence') 33 | 34 | sequence_input = ">DNA Query 2\nGAACACGTGGAGGCAAACAGGAAGGTGAAGAAGAACTTATCCTATCAGGACGGAAGGTCCTGTGCTCGGG\nATCTTCCAGACGTCGCGACTCTAAATTGCCCCCTCTGAGGTCAAGGAACACAAGATGGTTTTGGAAATGC\nTGAACCCGATACATTATAACATCACCAGCATCGTGCCTGAAGCCATGCCTGCTGCCACCATGCCAGTCCT" 35 | 36 | #sequence = st.sidebar.text_area("Sequence input", sequence_input, height=250) 37 | sequence = st.text_area("Sequence input", sequence_input, height=250) 38 | sequence = sequence.splitlines() 39 | sequence = sequence[1:] # Skips the sequence name (first line) 40 | sequence = ''.join(sequence) # Concatenates list to string 41 | 42 | st.write(""" 43 | *** 44 | """) 45 | 46 | ## Prints the input DNA sequence 47 | st.header('INPUT (DNA Query)') 48 | sequence 49 | 50 | ## DNA nucleotide count 51 | st.header('OUTPUT (DNA Nucleotide Count)') 52 | 53 | ### 1. Print dictionary 54 | st.subheader('1. Print dictionary') 55 | def DNA_nucleotide_count(seq): 56 | d = dict([ 57 | ('A',seq.count('A')), 58 | ('T',seq.count('T')), 59 | ('G',seq.count('G')), 60 | ('C',seq.count('C')) 61 | ]) 62 | return d 63 | 64 | X = DNA_nucleotide_count(sequence) 65 | 66 | #X_label = list(X) 67 | #X_values = list(X.values()) 68 | 69 | X 70 | 71 | ### 2. Print text 72 | st.subheader('2. Print text') 73 | st.write('There are ' + str(X['A']) + ' adenine (A)') 74 | st.write('There are ' + str(X['T']) + ' thymine (T)') 75 | st.write('There are ' + str(X['G']) + ' guanine (G)') 76 | st.write('There are ' + str(X['C']) + ' cytosine (C)') 77 | 78 | ### 3. Display DataFrame 79 | st.subheader('3. Display DataFrame') 80 | df = pd.DataFrame.from_dict(X, orient='index') 81 | df = df.rename({0: 'count'}, axis='columns') 82 | df.reset_index(inplace=True) 83 | df = df.rename(columns = {'index':'nucleotide'}) 84 | st.write(df) 85 | 86 | ### 4. Display Bar Chart using Altair 87 | st.subheader('4. Display Bar chart') 88 | p = alt.Chart(df).mark_bar().encode( 89 | x='nucleotide', 90 | y='count' 91 | ) 92 | p = p.properties( 93 | width=alt.Step(80) # controls width of bar. 94 | ) 95 | st.write(p) 96 | -------------------------------------------------------------------------------- /app_2_simple_bioinformatics_dna/dna-logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataprofessor/streamlit_freecodecamp/d44c4c1320f8417b3d5494902e5366b715314528/app_2_simple_bioinformatics_dna/dna-logo.jpg -------------------------------------------------------------------------------- /app_3_eda_basketball/basketball_app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import pandas as pd 3 | import base64 4 | import matplotlib.pyplot as plt 5 | import seaborn as sns 6 | import numpy as np 7 | 8 | st.title('NBA Player Stats Explorer') 9 | 10 | st.markdown(""" 11 | This app performs simple webscraping of NBA player stats data! 12 | * **Python libraries:** base64, pandas, streamlit 13 | * **Data source:** [Basketball-reference.com](https://www.basketball-reference.com/). 14 | """) 15 | 16 | st.sidebar.header('User Input Features') 17 | selected_year = st.sidebar.selectbox('Year', list(reversed(range(1950,2020)))) 18 | 19 | # Web scraping of NBA player stats 20 | @st.cache 21 | def load_data(year): 22 | url = "https://www.basketball-reference.com/leagues/NBA_" + str(year) + "_per_game.html" 23 | html = pd.read_html(url, header = 0) 24 | df = html[0] 25 | raw = df.drop(df[df.Age == 'Age'].index) # Deletes repeating headers in content 26 | raw = raw.fillna(0) 27 | playerstats = raw.drop(['Rk'], axis=1) 28 | return playerstats 29 | playerstats = load_data(selected_year) 30 | 31 | # Sidebar - Team selection 32 | sorted_unique_team = sorted(playerstats.Tm.unique()) 33 | selected_team = st.sidebar.multiselect('Team', sorted_unique_team, sorted_unique_team) 34 | 35 | # Sidebar - Position selection 36 | unique_pos = ['C','PF','SF','PG','SG'] 37 | selected_pos = st.sidebar.multiselect('Position', unique_pos, unique_pos) 38 | 39 | # Filtering data 40 | df_selected_team = playerstats[(playerstats.Tm.isin(selected_team)) & (playerstats.Pos.isin(selected_pos))] 41 | 42 | st.header('Display Player Stats of Selected Team(s)') 43 | st.write('Data Dimension: ' + str(df_selected_team.shape[0]) + ' rows and ' + str(df_selected_team.shape[1]) + ' columns.') 44 | st.dataframe(df_selected_team) 45 | 46 | # Download NBA player stats data 47 | # https://discuss.streamlit.io/t/how-to-download-file-in-streamlit/1806 48 | def filedownload(df): 49 | csv = df.to_csv(index=False) 50 | b64 = base64.b64encode(csv.encode()).decode() # strings <-> bytes conversions 51 | href = f'Download CSV File' 52 | return href 53 | 54 | st.markdown(filedownload(df_selected_team), unsafe_allow_html=True) 55 | 56 | # Heatmap 57 | if st.button('Intercorrelation Heatmap'): 58 | st.header('Intercorrelation Matrix Heatmap') 59 | df_selected_team.to_csv('output.csv',index=False) 60 | df = pd.read_csv('output.csv') 61 | 62 | corr = df.corr() 63 | mask = np.zeros_like(corr) 64 | mask[np.triu_indices_from(mask)] = True 65 | with sns.axes_style("white"): 66 | f, ax = plt.subplots(figsize=(7, 5)) 67 | ax = sns.heatmap(corr, mask=mask, vmax=1, square=True) 68 | st.pyplot() 69 | -------------------------------------------------------------------------------- /app_4_eda_football/football_app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import pandas as pd 3 | import base64 4 | import matplotlib.pyplot as plt 5 | import seaborn as sns 6 | import numpy as np 7 | 8 | st.title('NFL Football Stats (Rushing) Explorer') 9 | 10 | st.markdown(""" 11 | This app performs simple webscraping of NFL Football player stats data (focusing on Rushing)! 12 | * **Python libraries:** base64, pandas, streamlit, numpy, matplotlib, seaborn 13 | * **Data source:** [pro-football-reference.com](https://www.pro-football-reference.com/). 14 | """) 15 | 16 | st.sidebar.header('User Input Features') 17 | selected_year = st.sidebar.selectbox('Year', list(reversed(range(1990,2020)))) 18 | 19 | # Web scraping of NFL player stats 20 | # https://www.pro-football-reference.com/years/2019/rushing.htm 21 | @st.cache 22 | def load_data(year): 23 | url = "https://www.pro-football-reference.com/years/" + str(year) + "/rushing.htm" 24 | html = pd.read_html(url, header = 1) 25 | df = html[0] 26 | raw = df.drop(df[df.Age == 'Age'].index) # Deletes repeating headers in content 27 | raw = raw.fillna(0) 28 | playerstats = raw.drop(['Rk'], axis=1) 29 | return playerstats 30 | playerstats = load_data(selected_year) 31 | 32 | # Sidebar - Team selection 33 | sorted_unique_team = sorted(playerstats.Tm.unique()) 34 | selected_team = st.sidebar.multiselect('Team', sorted_unique_team, sorted_unique_team) 35 | 36 | # Sidebar - Position selection 37 | unique_pos = ['RB','QB','WR','FB','TE'] 38 | selected_pos = st.sidebar.multiselect('Position', unique_pos, unique_pos) 39 | 40 | # Filtering data 41 | df_selected_team = playerstats[(playerstats.Tm.isin(selected_team)) & (playerstats.Pos.isin(selected_pos))] 42 | 43 | st.header('Display Player Stats of Selected Team(s)') 44 | st.write('Data Dimension: ' + str(df_selected_team.shape[0]) + ' rows and ' + str(df_selected_team.shape[1]) + ' columns.') 45 | st.dataframe(df_selected_team) 46 | 47 | # Download NBA player stats data 48 | # https://discuss.streamlit.io/t/how-to-download-file-in-streamlit/1806 49 | def filedownload(df): 50 | csv = df.to_csv(index=False) 51 | b64 = base64.b64encode(csv.encode()).decode() # strings <-> bytes conversions 52 | href = f'Download CSV File' 53 | return href 54 | 55 | st.markdown(filedownload(df_selected_team), unsafe_allow_html=True) 56 | 57 | # Heatmap 58 | if st.button('Intercorrelation Heatmap'): 59 | st.header('Intercorrelation Matrix Heatmap') 60 | df_selected_team.to_csv('output.csv',index=False) 61 | df = pd.read_csv('output.csv') 62 | 63 | corr = df.corr() 64 | mask = np.zeros_like(corr) 65 | mask[np.triu_indices_from(mask)] = True 66 | with sns.axes_style("white"): 67 | f, ax = plt.subplots(figsize=(7, 5)) 68 | ax = sns.heatmap(corr, mask=mask, vmax=1, square=True) 69 | st.pyplot() 70 | -------------------------------------------------------------------------------- /app_5_eda_sp500_stock/sp500-app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import pandas as pd 3 | import base64 4 | import matplotlib.pyplot as plt 5 | import seaborn as sns 6 | import numpy as np 7 | import yfinance as yf 8 | 9 | st.title('S&P 500 App') 10 | 11 | st.markdown(""" 12 | This app retrieves the list of the **S&P 500** (from Wikipedia) and its corresponding **stock closing price** (year-to-date)! 13 | * **Python libraries:** base64, pandas, streamlit, numpy, matplotlib, seaborn 14 | * **Data source:** [Wikipedia](https://en.wikipedia.org/wiki/List_of_S%26P_500_companies). 15 | """) 16 | 17 | st.sidebar.header('User Input Features') 18 | 19 | # Web scraping of S&P 500 data 20 | # 21 | @st.cache 22 | def load_data(): 23 | url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies' 24 | html = pd.read_html(url, header = 0) 25 | df = html[0] 26 | return df 27 | 28 | df = load_data() 29 | sector = df.groupby('GICS Sector') 30 | 31 | # Sidebar - Sector selection 32 | sorted_sector_unique = sorted( df['GICS Sector'].unique() ) 33 | selected_sector = st.sidebar.multiselect('Sector', sorted_sector_unique, sorted_sector_unique) 34 | 35 | # Filtering data 36 | df_selected_sector = df[ (df['GICS Sector'].isin(selected_sector)) ] 37 | 38 | st.header('Display Companies in Selected Sector') 39 | st.write('Data Dimension: ' + str(df_selected_sector.shape[0]) + ' rows and ' + str(df_selected_sector.shape[1]) + ' columns.') 40 | st.dataframe(df_selected_sector) 41 | 42 | # Download S&P500 data 43 | # https://discuss.streamlit.io/t/how-to-download-file-in-streamlit/1806 44 | def filedownload(df): 45 | csv = df.to_csv(index=False) 46 | b64 = base64.b64encode(csv.encode()).decode() # strings <-> bytes conversions 47 | href = f'Download CSV File' 48 | return href 49 | 50 | st.markdown(filedownload(df_selected_sector), unsafe_allow_html=True) 51 | 52 | # https://pypi.org/project/yfinance/ 53 | 54 | data = yf.download( 55 | tickers = list(df_selected_sector[:10].Symbol), 56 | period = "ytd", 57 | interval = "1d", 58 | group_by = 'ticker', 59 | auto_adjust = True, 60 | prepost = True, 61 | threads = True, 62 | proxy = None 63 | ) 64 | 65 | # Plot Closing Price of Query Symbol 66 | def price_plot(symbol): 67 | df = pd.DataFrame(data[symbol].Close) 68 | df['Date'] = df.index 69 | plt.fill_between(df.Date, df.Close, color='skyblue', alpha=0.3) 70 | plt.plot(df.Date, df.Close, color='skyblue', alpha=0.8) 71 | plt.xticks(rotation=90) 72 | plt.title(symbol, fontweight='bold') 73 | plt.xlabel('Date', fontweight='bold') 74 | plt.ylabel('Closing Price', fontweight='bold') 75 | return st.pyplot() 76 | 77 | num_company = st.sidebar.slider('Number of Companies', 1, 5) 78 | 79 | if st.button('Show Plots'): 80 | st.header('Stock Closing Price') 81 | for i in list(df_selected_sector.Symbol)[:num_company]: 82 | price_plot(i) 83 | -------------------------------------------------------------------------------- /app_6_eda_cryptocurrency/crypto-price-app.py: -------------------------------------------------------------------------------- 1 | # This app is for educational purpose only. Insights gained is not financial advice. Use at your own risk! 2 | import streamlit as st 3 | from PIL import Image 4 | import pandas as pd 5 | import base64 6 | import matplotlib.pyplot as plt 7 | from bs4 import BeautifulSoup 8 | import requests 9 | import json 10 | import time 11 | #---------------------------------# 12 | # New feature (make sure to upgrade your streamlit library) 13 | # pip install --upgrade streamlit 14 | 15 | #---------------------------------# 16 | # Page layout 17 | ## Page expands to full width 18 | st.set_page_config(layout="wide") 19 | #---------------------------------# 20 | # Title 21 | 22 | image = Image.open('logo.jpg') 23 | 24 | st.image(image, width = 500) 25 | 26 | st.title('Crypto Price App') 27 | st.markdown(""" 28 | This app retrieves cryptocurrency prices for the top 100 cryptocurrency from the **CoinMarketCap**! 29 | 30 | """) 31 | #---------------------------------# 32 | # About 33 | expander_bar = st.beta_expander("About") 34 | expander_bar.markdown(""" 35 | * **Python libraries:** base64, pandas, streamlit, numpy, matplotlib, seaborn, BeautifulSoup, requests, json, time 36 | * **Data source:** [CoinMarketCap](http://coinmarketcap.com). 37 | * **Credit:** Web scraper adapted from the Medium article *[Web Scraping Crypto Prices With Python](https://towardsdatascience.com/web-scraping-crypto-prices-with-python-41072ea5b5bf)* written by [Bryan Feng](https://medium.com/@bryanf). 38 | """) 39 | 40 | 41 | #---------------------------------# 42 | # Page layout (continued) 43 | ## Divide page to 3 columns (col1 = sidebar, col2 and col3 = page contents) 44 | col1 = st.sidebar 45 | col2, col3 = st.beta_columns((2,1)) 46 | 47 | #---------------------------------# 48 | # Sidebar + Main panel 49 | col1.header('Input Options') 50 | 51 | ## Sidebar - Currency price unit 52 | currency_price_unit = col1.selectbox('Select currency for price', ('USD', 'BTC', 'ETH')) 53 | 54 | # Web scraping of CoinMarketCap data 55 | @st.cache 56 | def load_data(): 57 | cmc = requests.get('https://coinmarketcap.com') 58 | soup = BeautifulSoup(cmc.content, 'html.parser') 59 | 60 | data = soup.find('script', id='__NEXT_DATA__', type='application/json') 61 | coins = {} 62 | coin_data = json.loads(data.contents[0]) 63 | listings = coin_data['props']['initialState']['cryptocurrency']['listingLatest']['data'] 64 | for i in listings: 65 | coins[str(i['id'])] = i['slug'] 66 | 67 | coin_name = [] 68 | coin_symbol = [] 69 | market_cap = [] 70 | percent_change_1h = [] 71 | percent_change_24h = [] 72 | percent_change_7d = [] 73 | price = [] 74 | volume_24h = [] 75 | 76 | for i in listings: 77 | coin_name.append(i['slug']) 78 | coin_symbol.append(i['symbol']) 79 | price.append(i['quote'][currency_price_unit]['price']) 80 | percent_change_1h.append(i['quote'][currency_price_unit]['percent_change_1h']) 81 | percent_change_24h.append(i['quote'][currency_price_unit]['percent_change_24h']) 82 | percent_change_7d.append(i['quote'][currency_price_unit]['percent_change_7d']) 83 | market_cap.append(i['quote'][currency_price_unit]['market_cap']) 84 | volume_24h.append(i['quote'][currency_price_unit]['volume_24h']) 85 | 86 | df = pd.DataFrame(columns=['coin_name', 'coin_symbol', 'market_cap', 'percent_change_1h', 'percent_change_24h', 'percent_change_7d', 'price', 'volume_24h']) 87 | df['coin_name'] = coin_name 88 | df['coin_symbol'] = coin_symbol 89 | df['price'] = price 90 | df['percent_change_1h'] = percent_change_1h 91 | df['percent_change_24h'] = percent_change_24h 92 | df['percent_change_7d'] = percent_change_7d 93 | df['market_cap'] = market_cap 94 | df['volume_24h'] = volume_24h 95 | return df 96 | 97 | df = load_data() 98 | 99 | ## Sidebar - Cryptocurrency selections 100 | sorted_coin = sorted( df['coin_symbol'] ) 101 | selected_coin = col1.multiselect('Cryptocurrency', sorted_coin, sorted_coin) 102 | 103 | df_selected_coin = df[ (df['coin_symbol'].isin(selected_coin)) ] # Filtering data 104 | 105 | ## Sidebar - Number of coins to display 106 | num_coin = col1.slider('Display Top N Coins', 1, 100, 100) 107 | df_coins = df_selected_coin[:num_coin] 108 | 109 | ## Sidebar - Percent change timeframe 110 | percent_timeframe = col1.selectbox('Percent change time frame', 111 | ['7d','24h', '1h']) 112 | percent_dict = {"7d":'percent_change_7d',"24h":'percent_change_24h',"1h":'percent_change_1h'} 113 | selected_percent_timeframe = percent_dict[percent_timeframe] 114 | 115 | ## Sidebar - Sorting values 116 | sort_values = col1.selectbox('Sort values?', ['Yes', 'No']) 117 | 118 | col2.subheader('Price Data of Selected Cryptocurrency') 119 | col2.write('Data Dimension: ' + str(df_selected_coin.shape[0]) + ' rows and ' + str(df_selected_coin.shape[1]) + ' columns.') 120 | 121 | col2.dataframe(df_coins) 122 | 123 | # Download CSV data 124 | # https://discuss.streamlit.io/t/how-to-download-file-in-streamlit/1806 125 | def filedownload(df): 126 | csv = df.to_csv(index=False) 127 | b64 = base64.b64encode(csv.encode()).decode() # strings <-> bytes conversions 128 | href = f'Download CSV File' 129 | return href 130 | 131 | col2.markdown(filedownload(df_selected_coin), unsafe_allow_html=True) 132 | 133 | #---------------------------------# 134 | # Preparing data for Bar plot of % Price change 135 | col2.subheader('Table of % Price Change') 136 | df_change = pd.concat([df_coins.coin_symbol, df_coins.percent_change_1h, df_coins.percent_change_24h, df_coins.percent_change_7d], axis=1) 137 | df_change = df_change.set_index('coin_symbol') 138 | df_change['positive_percent_change_1h'] = df_change['percent_change_1h'] > 0 139 | df_change['positive_percent_change_24h'] = df_change['percent_change_24h'] > 0 140 | df_change['positive_percent_change_7d'] = df_change['percent_change_7d'] > 0 141 | col2.dataframe(df_change) 142 | 143 | # Conditional creation of Bar plot (time frame) 144 | col3.subheader('Bar plot of % Price Change') 145 | 146 | if percent_timeframe == '7d': 147 | if sort_values == 'Yes': 148 | df_change = df_change.sort_values(by=['percent_change_7d']) 149 | col3.write('*7 days period*') 150 | plt.figure(figsize=(5,25)) 151 | plt.subplots_adjust(top = 1, bottom = 0) 152 | df_change['percent_change_7d'].plot(kind='barh', color=df_change.positive_percent_change_7d.map({True: 'g', False: 'r'})) 153 | col3.pyplot(plt) 154 | elif percent_timeframe == '24h': 155 | if sort_values == 'Yes': 156 | df_change = df_change.sort_values(by=['percent_change_24h']) 157 | col3.write('*24 hour period*') 158 | plt.figure(figsize=(5,25)) 159 | plt.subplots_adjust(top = 1, bottom = 0) 160 | df_change['percent_change_24h'].plot(kind='barh', color=df_change.positive_percent_change_24h.map({True: 'g', False: 'r'})) 161 | col3.pyplot(plt) 162 | else: 163 | if sort_values == 'Yes': 164 | df_change = df_change.sort_values(by=['percent_change_1h']) 165 | col3.write('*1 hour period*') 166 | plt.figure(figsize=(5,25)) 167 | plt.subplots_adjust(top = 1, bottom = 0) 168 | df_change['percent_change_1h'].plot(kind='barh', color=df_change.positive_percent_change_1h.map({True: 'g', False: 'r'})) 169 | col3.pyplot(plt) 170 | -------------------------------------------------------------------------------- /app_6_eda_cryptocurrency/logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataprofessor/streamlit_freecodecamp/d44c4c1320f8417b3d5494902e5366b715314528/app_6_eda_cryptocurrency/logo.jpg -------------------------------------------------------------------------------- /app_7_classification_iris/iris-ml-app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import pandas as pd 3 | from sklearn import datasets 4 | from sklearn.ensemble import RandomForestClassifier 5 | 6 | st.write(""" 7 | # Simple Iris Flower Prediction App 8 | 9 | This app predicts the **Iris flower** type! 10 | """) 11 | 12 | st.sidebar.header('User Input Parameters') 13 | 14 | def user_input_features(): 15 | sepal_length = st.sidebar.slider('Sepal length', 4.3, 7.9, 5.4) 16 | sepal_width = st.sidebar.slider('Sepal width', 2.0, 4.4, 3.4) 17 | petal_length = st.sidebar.slider('Petal length', 1.0, 6.9, 1.3) 18 | petal_width = st.sidebar.slider('Petal width', 0.1, 2.5, 0.2) 19 | data = {'sepal_length': sepal_length, 20 | 'sepal_width': sepal_width, 21 | 'petal_length': petal_length, 22 | 'petal_width': petal_width} 23 | features = pd.DataFrame(data, index=[0]) 24 | return features 25 | 26 | df = user_input_features() 27 | 28 | st.subheader('User Input parameters') 29 | st.write(df) 30 | 31 | iris = datasets.load_iris() 32 | X = iris.data 33 | Y = iris.target 34 | 35 | clf = RandomForestClassifier() 36 | clf.fit(X, Y) 37 | 38 | prediction = clf.predict(df) 39 | prediction_proba = clf.predict_proba(df) 40 | 41 | st.subheader('Class labels and their corresponding index number') 42 | st.write(iris.target_names) 43 | 44 | st.subheader('Prediction') 45 | st.write(iris.target_names[prediction]) 46 | #st.write(prediction) 47 | 48 | st.subheader('Prediction Probability') 49 | st.write(prediction_proba) 50 | -------------------------------------------------------------------------------- /app_8_classification_penguins/penguins-app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import pandas as pd 3 | import numpy as np 4 | import pickle 5 | from sklearn.ensemble import RandomForestClassifier 6 | 7 | st.write(""" 8 | # Penguin Prediction App 9 | 10 | This app predicts the **Palmer Penguin** species! 11 | 12 | Data obtained from the [palmerpenguins library](https://github.com/allisonhorst/palmerpenguins) in R by Allison Horst. 13 | """) 14 | 15 | st.sidebar.header('User Input Features') 16 | 17 | st.sidebar.markdown(""" 18 | [Example CSV input file](https://raw.githubusercontent.com/dataprofessor/data/master/penguins_example.csv) 19 | """) 20 | 21 | # Collects user input features into dataframe 22 | uploaded_file = st.sidebar.file_uploader("Upload your input CSV file", type=["csv"]) 23 | if uploaded_file is not None: 24 | input_df = pd.read_csv(uploaded_file) 25 | else: 26 | def user_input_features(): 27 | island = st.sidebar.selectbox('Island',('Biscoe','Dream','Torgersen')) 28 | sex = st.sidebar.selectbox('Sex',('male','female')) 29 | bill_length_mm = st.sidebar.slider('Bill length (mm)', 32.1,59.6,43.9) 30 | bill_depth_mm = st.sidebar.slider('Bill depth (mm)', 13.1,21.5,17.2) 31 | flipper_length_mm = st.sidebar.slider('Flipper length (mm)', 172.0,231.0,201.0) 32 | body_mass_g = st.sidebar.slider('Body mass (g)', 2700.0,6300.0,4207.0) 33 | data = {'island': island, 34 | 'bill_length_mm': bill_length_mm, 35 | 'bill_depth_mm': bill_depth_mm, 36 | 'flipper_length_mm': flipper_length_mm, 37 | 'body_mass_g': body_mass_g, 38 | 'sex': sex} 39 | features = pd.DataFrame(data, index=[0]) 40 | return features 41 | input_df = user_input_features() 42 | 43 | # Combines user input features with entire penguins dataset 44 | # This will be useful for the encoding phase 45 | penguins_raw = pd.read_csv('penguins_cleaned.csv') 46 | penguins = penguins_raw.drop(columns=['species']) 47 | df = pd.concat([input_df,penguins],axis=0) 48 | 49 | # Encoding of ordinal features 50 | # https://www.kaggle.com/pratik1120/penguin-dataset-eda-classification-and-clustering 51 | encode = ['sex','island'] 52 | for col in encode: 53 | dummy = pd.get_dummies(df[col], prefix=col) 54 | df = pd.concat([df,dummy], axis=1) 55 | del df[col] 56 | df = df[:1] # Selects only the first row (the user input data) 57 | 58 | # Displays the user input features 59 | st.subheader('User Input features') 60 | 61 | if uploaded_file is not None: 62 | st.write(df) 63 | else: 64 | st.write('Awaiting CSV file to be uploaded. Currently using example input parameters (shown below).') 65 | st.write(df) 66 | 67 | # Reads in saved classification model 68 | load_clf = pickle.load(open('penguins_clf.pkl', 'rb')) 69 | 70 | # Apply model to make predictions 71 | prediction = load_clf.predict(df) 72 | prediction_proba = load_clf.predict_proba(df) 73 | 74 | 75 | st.subheader('Prediction') 76 | penguins_species = np.array(['Adelie','Chinstrap','Gentoo']) 77 | st.write(penguins_species[prediction]) 78 | 79 | st.subheader('Prediction Probability') 80 | st.write(prediction_proba) 81 | -------------------------------------------------------------------------------- /app_8_classification_penguins/penguins-model-building.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | penguins = pd.read_csv('penguins_cleaned.csv') 3 | 4 | # Ordinal feature encoding 5 | # https://www.kaggle.com/pratik1120/penguin-dataset-eda-classification-and-clustering 6 | df = penguins.copy() 7 | target = 'species' 8 | encode = ['sex','island'] 9 | 10 | for col in encode: 11 | dummy = pd.get_dummies(df[col], prefix=col) 12 | df = pd.concat([df,dummy], axis=1) 13 | del df[col] 14 | 15 | target_mapper = {'Adelie':0, 'Chinstrap':1, 'Gentoo':2} 16 | def target_encode(val): 17 | return target_mapper[val] 18 | 19 | df['species'] = df['species'].apply(target_encode) 20 | 21 | # Separating X and y 22 | X = df.drop('species', axis=1) 23 | Y = df['species'] 24 | 25 | # Build random forest model 26 | from sklearn.ensemble import RandomForestClassifier 27 | clf = RandomForestClassifier() 28 | clf.fit(X, Y) 29 | 30 | # Saving the model 31 | import pickle 32 | pickle.dump(clf, open('penguins_clf.pkl', 'wb')) 33 | -------------------------------------------------------------------------------- /app_8_classification_penguins/penguins_cleaned.csv: -------------------------------------------------------------------------------- 1 | "species","island","bill_length_mm","bill_depth_mm","flipper_length_mm","body_mass_g","sex" 2 | "Adelie","Torgersen",39.1,18.7,181,3750,"male" 3 | "Adelie","Torgersen",39.5,17.4,186,3800,"female" 4 | "Adelie","Torgersen",40.3,18,195,3250,"female" 5 | "Adelie","Torgersen",36.7,19.3,193,3450,"female" 6 | "Adelie","Torgersen",39.3,20.6,190,3650,"male" 7 | "Adelie","Torgersen",38.9,17.8,181,3625,"female" 8 | "Adelie","Torgersen",39.2,19.6,195,4675,"male" 9 | "Adelie","Torgersen",41.1,17.6,182,3200,"female" 10 | "Adelie","Torgersen",38.6,21.2,191,3800,"male" 11 | "Adelie","Torgersen",34.6,21.1,198,4400,"male" 12 | "Adelie","Torgersen",36.6,17.8,185,3700,"female" 13 | "Adelie","Torgersen",38.7,19,195,3450,"female" 14 | "Adelie","Torgersen",42.5,20.7,197,4500,"male" 15 | "Adelie","Torgersen",34.4,18.4,184,3325,"female" 16 | "Adelie","Torgersen",46,21.5,194,4200,"male" 17 | "Adelie","Biscoe",37.8,18.3,174,3400,"female" 18 | "Adelie","Biscoe",37.7,18.7,180,3600,"male" 19 | "Adelie","Biscoe",35.9,19.2,189,3800,"female" 20 | "Adelie","Biscoe",38.2,18.1,185,3950,"male" 21 | "Adelie","Biscoe",38.8,17.2,180,3800,"male" 22 | "Adelie","Biscoe",35.3,18.9,187,3800,"female" 23 | "Adelie","Biscoe",40.6,18.6,183,3550,"male" 24 | "Adelie","Biscoe",40.5,17.9,187,3200,"female" 25 | "Adelie","Biscoe",37.9,18.6,172,3150,"female" 26 | "Adelie","Biscoe",40.5,18.9,180,3950,"male" 27 | "Adelie","Dream",39.5,16.7,178,3250,"female" 28 | "Adelie","Dream",37.2,18.1,178,3900,"male" 29 | "Adelie","Dream",39.5,17.8,188,3300,"female" 30 | "Adelie","Dream",40.9,18.9,184,3900,"male" 31 | "Adelie","Dream",36.4,17,195,3325,"female" 32 | "Adelie","Dream",39.2,21.1,196,4150,"male" 33 | "Adelie","Dream",38.8,20,190,3950,"male" 34 | "Adelie","Dream",42.2,18.5,180,3550,"female" 35 | "Adelie","Dream",37.6,19.3,181,3300,"female" 36 | "Adelie","Dream",39.8,19.1,184,4650,"male" 37 | "Adelie","Dream",36.5,18,182,3150,"female" 38 | "Adelie","Dream",40.8,18.4,195,3900,"male" 39 | "Adelie","Dream",36,18.5,186,3100,"female" 40 | "Adelie","Dream",44.1,19.7,196,4400,"male" 41 | "Adelie","Dream",37,16.9,185,3000,"female" 42 | "Adelie","Dream",39.6,18.8,190,4600,"male" 43 | "Adelie","Dream",41.1,19,182,3425,"male" 44 | "Adelie","Dream",36,17.9,190,3450,"female" 45 | "Adelie","Dream",42.3,21.2,191,4150,"male" 46 | "Adelie","Biscoe",39.6,17.7,186,3500,"female" 47 | "Adelie","Biscoe",40.1,18.9,188,4300,"male" 48 | "Adelie","Biscoe",35,17.9,190,3450,"female" 49 | "Adelie","Biscoe",42,19.5,200,4050,"male" 50 | "Adelie","Biscoe",34.5,18.1,187,2900,"female" 51 | "Adelie","Biscoe",41.4,18.6,191,3700,"male" 52 | "Adelie","Biscoe",39,17.5,186,3550,"female" 53 | "Adelie","Biscoe",40.6,18.8,193,3800,"male" 54 | "Adelie","Biscoe",36.5,16.6,181,2850,"female" 55 | "Adelie","Biscoe",37.6,19.1,194,3750,"male" 56 | "Adelie","Biscoe",35.7,16.9,185,3150,"female" 57 | "Adelie","Biscoe",41.3,21.1,195,4400,"male" 58 | "Adelie","Biscoe",37.6,17,185,3600,"female" 59 | "Adelie","Biscoe",41.1,18.2,192,4050,"male" 60 | "Adelie","Biscoe",36.4,17.1,184,2850,"female" 61 | "Adelie","Biscoe",41.6,18,192,3950,"male" 62 | "Adelie","Biscoe",35.5,16.2,195,3350,"female" 63 | "Adelie","Biscoe",41.1,19.1,188,4100,"male" 64 | "Adelie","Torgersen",35.9,16.6,190,3050,"female" 65 | "Adelie","Torgersen",41.8,19.4,198,4450,"male" 66 | "Adelie","Torgersen",33.5,19,190,3600,"female" 67 | "Adelie","Torgersen",39.7,18.4,190,3900,"male" 68 | "Adelie","Torgersen",39.6,17.2,196,3550,"female" 69 | "Adelie","Torgersen",45.8,18.9,197,4150,"male" 70 | "Adelie","Torgersen",35.5,17.5,190,3700,"female" 71 | "Adelie","Torgersen",42.8,18.5,195,4250,"male" 72 | "Adelie","Torgersen",40.9,16.8,191,3700,"female" 73 | "Adelie","Torgersen",37.2,19.4,184,3900,"male" 74 | "Adelie","Torgersen",36.2,16.1,187,3550,"female" 75 | "Adelie","Torgersen",42.1,19.1,195,4000,"male" 76 | "Adelie","Torgersen",34.6,17.2,189,3200,"female" 77 | "Adelie","Torgersen",42.9,17.6,196,4700,"male" 78 | "Adelie","Torgersen",36.7,18.8,187,3800,"female" 79 | "Adelie","Torgersen",35.1,19.4,193,4200,"male" 80 | "Adelie","Dream",37.3,17.8,191,3350,"female" 81 | "Adelie","Dream",41.3,20.3,194,3550,"male" 82 | "Adelie","Dream",36.3,19.5,190,3800,"male" 83 | "Adelie","Dream",36.9,18.6,189,3500,"female" 84 | "Adelie","Dream",38.3,19.2,189,3950,"male" 85 | "Adelie","Dream",38.9,18.8,190,3600,"female" 86 | "Adelie","Dream",35.7,18,202,3550,"female" 87 | "Adelie","Dream",41.1,18.1,205,4300,"male" 88 | "Adelie","Dream",34,17.1,185,3400,"female" 89 | "Adelie","Dream",39.6,18.1,186,4450,"male" 90 | "Adelie","Dream",36.2,17.3,187,3300,"female" 91 | "Adelie","Dream",40.8,18.9,208,4300,"male" 92 | "Adelie","Dream",38.1,18.6,190,3700,"female" 93 | "Adelie","Dream",40.3,18.5,196,4350,"male" 94 | "Adelie","Dream",33.1,16.1,178,2900,"female" 95 | "Adelie","Dream",43.2,18.5,192,4100,"male" 96 | "Adelie","Biscoe",35,17.9,192,3725,"female" 97 | "Adelie","Biscoe",41,20,203,4725,"male" 98 | "Adelie","Biscoe",37.7,16,183,3075,"female" 99 | "Adelie","Biscoe",37.8,20,190,4250,"male" 100 | "Adelie","Biscoe",37.9,18.6,193,2925,"female" 101 | "Adelie","Biscoe",39.7,18.9,184,3550,"male" 102 | "Adelie","Biscoe",38.6,17.2,199,3750,"female" 103 | "Adelie","Biscoe",38.2,20,190,3900,"male" 104 | "Adelie","Biscoe",38.1,17,181,3175,"female" 105 | "Adelie","Biscoe",43.2,19,197,4775,"male" 106 | "Adelie","Biscoe",38.1,16.5,198,3825,"female" 107 | "Adelie","Biscoe",45.6,20.3,191,4600,"male" 108 | "Adelie","Biscoe",39.7,17.7,193,3200,"female" 109 | "Adelie","Biscoe",42.2,19.5,197,4275,"male" 110 | "Adelie","Biscoe",39.6,20.7,191,3900,"female" 111 | "Adelie","Biscoe",42.7,18.3,196,4075,"male" 112 | "Adelie","Torgersen",38.6,17,188,2900,"female" 113 | "Adelie","Torgersen",37.3,20.5,199,3775,"male" 114 | "Adelie","Torgersen",35.7,17,189,3350,"female" 115 | "Adelie","Torgersen",41.1,18.6,189,3325,"male" 116 | "Adelie","Torgersen",36.2,17.2,187,3150,"female" 117 | "Adelie","Torgersen",37.7,19.8,198,3500,"male" 118 | "Adelie","Torgersen",40.2,17,176,3450,"female" 119 | "Adelie","Torgersen",41.4,18.5,202,3875,"male" 120 | "Adelie","Torgersen",35.2,15.9,186,3050,"female" 121 | "Adelie","Torgersen",40.6,19,199,4000,"male" 122 | "Adelie","Torgersen",38.8,17.6,191,3275,"female" 123 | "Adelie","Torgersen",41.5,18.3,195,4300,"male" 124 | "Adelie","Torgersen",39,17.1,191,3050,"female" 125 | "Adelie","Torgersen",44.1,18,210,4000,"male" 126 | "Adelie","Torgersen",38.5,17.9,190,3325,"female" 127 | "Adelie","Torgersen",43.1,19.2,197,3500,"male" 128 | "Adelie","Dream",36.8,18.5,193,3500,"female" 129 | "Adelie","Dream",37.5,18.5,199,4475,"male" 130 | "Adelie","Dream",38.1,17.6,187,3425,"female" 131 | "Adelie","Dream",41.1,17.5,190,3900,"male" 132 | "Adelie","Dream",35.6,17.5,191,3175,"female" 133 | "Adelie","Dream",40.2,20.1,200,3975,"male" 134 | "Adelie","Dream",37,16.5,185,3400,"female" 135 | "Adelie","Dream",39.7,17.9,193,4250,"male" 136 | "Adelie","Dream",40.2,17.1,193,3400,"female" 137 | "Adelie","Dream",40.6,17.2,187,3475,"male" 138 | "Adelie","Dream",32.1,15.5,188,3050,"female" 139 | "Adelie","Dream",40.7,17,190,3725,"male" 140 | "Adelie","Dream",37.3,16.8,192,3000,"female" 141 | "Adelie","Dream",39,18.7,185,3650,"male" 142 | "Adelie","Dream",39.2,18.6,190,4250,"male" 143 | "Adelie","Dream",36.6,18.4,184,3475,"female" 144 | "Adelie","Dream",36,17.8,195,3450,"female" 145 | "Adelie","Dream",37.8,18.1,193,3750,"male" 146 | "Adelie","Dream",36,17.1,187,3700,"female" 147 | "Adelie","Dream",41.5,18.5,201,4000,"male" 148 | "Gentoo","Biscoe",46.1,13.2,211,4500,"female" 149 | "Gentoo","Biscoe",50,16.3,230,5700,"male" 150 | "Gentoo","Biscoe",48.7,14.1,210,4450,"female" 151 | "Gentoo","Biscoe",50,15.2,218,5700,"male" 152 | "Gentoo","Biscoe",47.6,14.5,215,5400,"male" 153 | "Gentoo","Biscoe",46.5,13.5,210,4550,"female" 154 | "Gentoo","Biscoe",45.4,14.6,211,4800,"female" 155 | "Gentoo","Biscoe",46.7,15.3,219,5200,"male" 156 | "Gentoo","Biscoe",43.3,13.4,209,4400,"female" 157 | "Gentoo","Biscoe",46.8,15.4,215,5150,"male" 158 | "Gentoo","Biscoe",40.9,13.7,214,4650,"female" 159 | "Gentoo","Biscoe",49,16.1,216,5550,"male" 160 | "Gentoo","Biscoe",45.5,13.7,214,4650,"female" 161 | "Gentoo","Biscoe",48.4,14.6,213,5850,"male" 162 | "Gentoo","Biscoe",45.8,14.6,210,4200,"female" 163 | "Gentoo","Biscoe",49.3,15.7,217,5850,"male" 164 | "Gentoo","Biscoe",42,13.5,210,4150,"female" 165 | "Gentoo","Biscoe",49.2,15.2,221,6300,"male" 166 | "Gentoo","Biscoe",46.2,14.5,209,4800,"female" 167 | "Gentoo","Biscoe",48.7,15.1,222,5350,"male" 168 | "Gentoo","Biscoe",50.2,14.3,218,5700,"male" 169 | "Gentoo","Biscoe",45.1,14.5,215,5000,"female" 170 | "Gentoo","Biscoe",46.5,14.5,213,4400,"female" 171 | "Gentoo","Biscoe",46.3,15.8,215,5050,"male" 172 | "Gentoo","Biscoe",42.9,13.1,215,5000,"female" 173 | "Gentoo","Biscoe",46.1,15.1,215,5100,"male" 174 | "Gentoo","Biscoe",47.8,15,215,5650,"male" 175 | "Gentoo","Biscoe",48.2,14.3,210,4600,"female" 176 | "Gentoo","Biscoe",50,15.3,220,5550,"male" 177 | "Gentoo","Biscoe",47.3,15.3,222,5250,"male" 178 | "Gentoo","Biscoe",42.8,14.2,209,4700,"female" 179 | "Gentoo","Biscoe",45.1,14.5,207,5050,"female" 180 | "Gentoo","Biscoe",59.6,17,230,6050,"male" 181 | "Gentoo","Biscoe",49.1,14.8,220,5150,"female" 182 | "Gentoo","Biscoe",48.4,16.3,220,5400,"male" 183 | "Gentoo","Biscoe",42.6,13.7,213,4950,"female" 184 | "Gentoo","Biscoe",44.4,17.3,219,5250,"male" 185 | "Gentoo","Biscoe",44,13.6,208,4350,"female" 186 | "Gentoo","Biscoe",48.7,15.7,208,5350,"male" 187 | "Gentoo","Biscoe",42.7,13.7,208,3950,"female" 188 | "Gentoo","Biscoe",49.6,16,225,5700,"male" 189 | "Gentoo","Biscoe",45.3,13.7,210,4300,"female" 190 | "Gentoo","Biscoe",49.6,15,216,4750,"male" 191 | "Gentoo","Biscoe",50.5,15.9,222,5550,"male" 192 | "Gentoo","Biscoe",43.6,13.9,217,4900,"female" 193 | "Gentoo","Biscoe",45.5,13.9,210,4200,"female" 194 | "Gentoo","Biscoe",50.5,15.9,225,5400,"male" 195 | "Gentoo","Biscoe",44.9,13.3,213,5100,"female" 196 | "Gentoo","Biscoe",45.2,15.8,215,5300,"male" 197 | "Gentoo","Biscoe",46.6,14.2,210,4850,"female" 198 | "Gentoo","Biscoe",48.5,14.1,220,5300,"male" 199 | "Gentoo","Biscoe",45.1,14.4,210,4400,"female" 200 | "Gentoo","Biscoe",50.1,15,225,5000,"male" 201 | "Gentoo","Biscoe",46.5,14.4,217,4900,"female" 202 | "Gentoo","Biscoe",45,15.4,220,5050,"male" 203 | "Gentoo","Biscoe",43.8,13.9,208,4300,"female" 204 | "Gentoo","Biscoe",45.5,15,220,5000,"male" 205 | "Gentoo","Biscoe",43.2,14.5,208,4450,"female" 206 | "Gentoo","Biscoe",50.4,15.3,224,5550,"male" 207 | "Gentoo","Biscoe",45.3,13.8,208,4200,"female" 208 | "Gentoo","Biscoe",46.2,14.9,221,5300,"male" 209 | "Gentoo","Biscoe",45.7,13.9,214,4400,"female" 210 | "Gentoo","Biscoe",54.3,15.7,231,5650,"male" 211 | "Gentoo","Biscoe",45.8,14.2,219,4700,"female" 212 | "Gentoo","Biscoe",49.8,16.8,230,5700,"male" 213 | "Gentoo","Biscoe",49.5,16.2,229,5800,"male" 214 | "Gentoo","Biscoe",43.5,14.2,220,4700,"female" 215 | "Gentoo","Biscoe",50.7,15,223,5550,"male" 216 | "Gentoo","Biscoe",47.7,15,216,4750,"female" 217 | "Gentoo","Biscoe",46.4,15.6,221,5000,"male" 218 | "Gentoo","Biscoe",48.2,15.6,221,5100,"male" 219 | "Gentoo","Biscoe",46.5,14.8,217,5200,"female" 220 | "Gentoo","Biscoe",46.4,15,216,4700,"female" 221 | "Gentoo","Biscoe",48.6,16,230,5800,"male" 222 | "Gentoo","Biscoe",47.5,14.2,209,4600,"female" 223 | "Gentoo","Biscoe",51.1,16.3,220,6000,"male" 224 | "Gentoo","Biscoe",45.2,13.8,215,4750,"female" 225 | "Gentoo","Biscoe",45.2,16.4,223,5950,"male" 226 | "Gentoo","Biscoe",49.1,14.5,212,4625,"female" 227 | "Gentoo","Biscoe",52.5,15.6,221,5450,"male" 228 | "Gentoo","Biscoe",47.4,14.6,212,4725,"female" 229 | "Gentoo","Biscoe",50,15.9,224,5350,"male" 230 | "Gentoo","Biscoe",44.9,13.8,212,4750,"female" 231 | "Gentoo","Biscoe",50.8,17.3,228,5600,"male" 232 | "Gentoo","Biscoe",43.4,14.4,218,4600,"female" 233 | "Gentoo","Biscoe",51.3,14.2,218,5300,"male" 234 | "Gentoo","Biscoe",47.5,14,212,4875,"female" 235 | "Gentoo","Biscoe",52.1,17,230,5550,"male" 236 | "Gentoo","Biscoe",47.5,15,218,4950,"female" 237 | "Gentoo","Biscoe",52.2,17.1,228,5400,"male" 238 | "Gentoo","Biscoe",45.5,14.5,212,4750,"female" 239 | "Gentoo","Biscoe",49.5,16.1,224,5650,"male" 240 | "Gentoo","Biscoe",44.5,14.7,214,4850,"female" 241 | "Gentoo","Biscoe",50.8,15.7,226,5200,"male" 242 | "Gentoo","Biscoe",49.4,15.8,216,4925,"male" 243 | "Gentoo","Biscoe",46.9,14.6,222,4875,"female" 244 | "Gentoo","Biscoe",48.4,14.4,203,4625,"female" 245 | "Gentoo","Biscoe",51.1,16.5,225,5250,"male" 246 | "Gentoo","Biscoe",48.5,15,219,4850,"female" 247 | "Gentoo","Biscoe",55.9,17,228,5600,"male" 248 | "Gentoo","Biscoe",47.2,15.5,215,4975,"female" 249 | "Gentoo","Biscoe",49.1,15,228,5500,"male" 250 | "Gentoo","Biscoe",46.8,16.1,215,5500,"male" 251 | "Gentoo","Biscoe",41.7,14.7,210,4700,"female" 252 | "Gentoo","Biscoe",53.4,15.8,219,5500,"male" 253 | "Gentoo","Biscoe",43.3,14,208,4575,"female" 254 | "Gentoo","Biscoe",48.1,15.1,209,5500,"male" 255 | "Gentoo","Biscoe",50.5,15.2,216,5000,"female" 256 | "Gentoo","Biscoe",49.8,15.9,229,5950,"male" 257 | "Gentoo","Biscoe",43.5,15.2,213,4650,"female" 258 | "Gentoo","Biscoe",51.5,16.3,230,5500,"male" 259 | "Gentoo","Biscoe",46.2,14.1,217,4375,"female" 260 | "Gentoo","Biscoe",55.1,16,230,5850,"male" 261 | "Gentoo","Biscoe",48.8,16.2,222,6000,"male" 262 | "Gentoo","Biscoe",47.2,13.7,214,4925,"female" 263 | "Gentoo","Biscoe",46.8,14.3,215,4850,"female" 264 | "Gentoo","Biscoe",50.4,15.7,222,5750,"male" 265 | "Gentoo","Biscoe",45.2,14.8,212,5200,"female" 266 | "Gentoo","Biscoe",49.9,16.1,213,5400,"male" 267 | "Chinstrap","Dream",46.5,17.9,192,3500,"female" 268 | "Chinstrap","Dream",50,19.5,196,3900,"male" 269 | "Chinstrap","Dream",51.3,19.2,193,3650,"male" 270 | "Chinstrap","Dream",45.4,18.7,188,3525,"female" 271 | "Chinstrap","Dream",52.7,19.8,197,3725,"male" 272 | "Chinstrap","Dream",45.2,17.8,198,3950,"female" 273 | "Chinstrap","Dream",46.1,18.2,178,3250,"female" 274 | "Chinstrap","Dream",51.3,18.2,197,3750,"male" 275 | "Chinstrap","Dream",46,18.9,195,4150,"female" 276 | "Chinstrap","Dream",51.3,19.9,198,3700,"male" 277 | "Chinstrap","Dream",46.6,17.8,193,3800,"female" 278 | "Chinstrap","Dream",51.7,20.3,194,3775,"male" 279 | "Chinstrap","Dream",47,17.3,185,3700,"female" 280 | "Chinstrap","Dream",52,18.1,201,4050,"male" 281 | "Chinstrap","Dream",45.9,17.1,190,3575,"female" 282 | "Chinstrap","Dream",50.5,19.6,201,4050,"male" 283 | "Chinstrap","Dream",50.3,20,197,3300,"male" 284 | "Chinstrap","Dream",58,17.8,181,3700,"female" 285 | "Chinstrap","Dream",46.4,18.6,190,3450,"female" 286 | "Chinstrap","Dream",49.2,18.2,195,4400,"male" 287 | "Chinstrap","Dream",42.4,17.3,181,3600,"female" 288 | "Chinstrap","Dream",48.5,17.5,191,3400,"male" 289 | "Chinstrap","Dream",43.2,16.6,187,2900,"female" 290 | "Chinstrap","Dream",50.6,19.4,193,3800,"male" 291 | "Chinstrap","Dream",46.7,17.9,195,3300,"female" 292 | "Chinstrap","Dream",52,19,197,4150,"male" 293 | "Chinstrap","Dream",50.5,18.4,200,3400,"female" 294 | "Chinstrap","Dream",49.5,19,200,3800,"male" 295 | "Chinstrap","Dream",46.4,17.8,191,3700,"female" 296 | "Chinstrap","Dream",52.8,20,205,4550,"male" 297 | "Chinstrap","Dream",40.9,16.6,187,3200,"female" 298 | "Chinstrap","Dream",54.2,20.8,201,4300,"male" 299 | "Chinstrap","Dream",42.5,16.7,187,3350,"female" 300 | "Chinstrap","Dream",51,18.8,203,4100,"male" 301 | "Chinstrap","Dream",49.7,18.6,195,3600,"male" 302 | "Chinstrap","Dream",47.5,16.8,199,3900,"female" 303 | "Chinstrap","Dream",47.6,18.3,195,3850,"female" 304 | "Chinstrap","Dream",52,20.7,210,4800,"male" 305 | "Chinstrap","Dream",46.9,16.6,192,2700,"female" 306 | "Chinstrap","Dream",53.5,19.9,205,4500,"male" 307 | "Chinstrap","Dream",49,19.5,210,3950,"male" 308 | "Chinstrap","Dream",46.2,17.5,187,3650,"female" 309 | "Chinstrap","Dream",50.9,19.1,196,3550,"male" 310 | "Chinstrap","Dream",45.5,17,196,3500,"female" 311 | "Chinstrap","Dream",50.9,17.9,196,3675,"female" 312 | "Chinstrap","Dream",50.8,18.5,201,4450,"male" 313 | "Chinstrap","Dream",50.1,17.9,190,3400,"female" 314 | "Chinstrap","Dream",49,19.6,212,4300,"male" 315 | "Chinstrap","Dream",51.5,18.7,187,3250,"male" 316 | "Chinstrap","Dream",49.8,17.3,198,3675,"female" 317 | "Chinstrap","Dream",48.1,16.4,199,3325,"female" 318 | "Chinstrap","Dream",51.4,19,201,3950,"male" 319 | "Chinstrap","Dream",45.7,17.3,193,3600,"female" 320 | "Chinstrap","Dream",50.7,19.7,203,4050,"male" 321 | "Chinstrap","Dream",42.5,17.3,187,3350,"female" 322 | "Chinstrap","Dream",52.2,18.8,197,3450,"male" 323 | "Chinstrap","Dream",45.2,16.6,191,3250,"female" 324 | "Chinstrap","Dream",49.3,19.9,203,4050,"male" 325 | "Chinstrap","Dream",50.2,18.8,202,3800,"male" 326 | "Chinstrap","Dream",45.6,19.4,194,3525,"female" 327 | "Chinstrap","Dream",51.9,19.5,206,3950,"male" 328 | "Chinstrap","Dream",46.8,16.5,189,3650,"female" 329 | "Chinstrap","Dream",45.7,17,195,3650,"female" 330 | "Chinstrap","Dream",55.8,19.8,207,4000,"male" 331 | "Chinstrap","Dream",43.5,18.1,202,3400,"female" 332 | "Chinstrap","Dream",49.6,18.2,193,3775,"male" 333 | "Chinstrap","Dream",50.8,19,210,4100,"male" 334 | "Chinstrap","Dream",50.2,18.7,198,3775,"female" 335 | -------------------------------------------------------------------------------- /app_8_classification_penguins/penguins_clf.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dataprofessor/streamlit_freecodecamp/d44c4c1320f8417b3d5494902e5366b715314528/app_8_classification_penguins/penguins_clf.pkl -------------------------------------------------------------------------------- /app_8_classification_penguins/penguins_example.csv: -------------------------------------------------------------------------------- 1 | island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex 2 | Biscoe,43.9,17.2,201.0,4207.0,male 3 | -------------------------------------------------------------------------------- /app_9_regression_boston_housing/boston-house-ml-app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import pandas as pd 3 | import shap 4 | import matplotlib.pyplot as plt 5 | from sklearn import datasets 6 | from sklearn.ensemble import RandomForestRegressor 7 | 8 | st.write(""" 9 | # Boston House Price Prediction App 10 | 11 | This app predicts the **Boston House Price**! 12 | """) 13 | st.write('---') 14 | 15 | # Loads the Boston House Price Dataset 16 | boston = datasets.load_boston() 17 | X = pd.DataFrame(boston.data, columns=boston.feature_names) 18 | Y = pd.DataFrame(boston.target, columns=["MEDV"]) 19 | 20 | # Sidebar 21 | # Header of Specify Input Parameters 22 | st.sidebar.header('Specify Input Parameters') 23 | 24 | def user_input_features(): 25 | CRIM = st.sidebar.slider('CRIM', X.CRIM.min(), X.CRIM.max(), X.CRIM.mean()) 26 | ZN = st.sidebar.slider('ZN', X.ZN.min(), X.ZN.max(), X.ZN.mean()) 27 | INDUS = st.sidebar.slider('INDUS', X.INDUS.min(), X.INDUS.max(), X.INDUS.mean()) 28 | CHAS = st.sidebar.slider('CHAS', X.CHAS.min(), X.CHAS.max(), X.CHAS.mean()) 29 | NOX = st.sidebar.slider('NOX', X.NOX.min(), X.NOX.max(), X.NOX.mean()) 30 | RM = st.sidebar.slider('RM', X.RM.min(), X.RM.max(), X.RM.mean()) 31 | AGE = st.sidebar.slider('AGE', X.AGE.min(), X.AGE.max(), X.AGE.mean()) 32 | DIS = st.sidebar.slider('DIS', X.DIS.min(), X.DIS.max(), X.DIS.mean()) 33 | RAD = st.sidebar.slider('RAD', X.RAD.min(), X.RAD.max(), X.RAD.mean()) 34 | TAX = st.sidebar.slider('TAX', X.TAX.min(), X.TAX.max(), X.TAX.mean()) 35 | PTRATIO = st.sidebar.slider('PTRATIO', X.PTRATIO.min(), X.PTRATIO.max(), X.PTRATIO.mean()) 36 | B = st.sidebar.slider('B', X.B.min(), X.B.max(), X.B.mean()) 37 | LSTAT = st.sidebar.slider('LSTAT', X.LSTAT.min(), X.LSTAT.max(), X.LSTAT.mean()) 38 | data = {'CRIM': CRIM, 39 | 'ZN': ZN, 40 | 'INDUS': INDUS, 41 | 'CHAS': CHAS, 42 | 'NOX': NOX, 43 | 'RM': RM, 44 | 'AGE': AGE, 45 | 'DIS': DIS, 46 | 'RAD': RAD, 47 | 'TAX': TAX, 48 | 'PTRATIO': PTRATIO, 49 | 'B': B, 50 | 'LSTAT': LSTAT} 51 | features = pd.DataFrame(data, index=[0]) 52 | return features 53 | 54 | df = user_input_features() 55 | 56 | # Main Panel 57 | 58 | # Print specified input parameters 59 | st.header('Specified Input parameters') 60 | st.write(df) 61 | st.write('---') 62 | 63 | # Build Regression Model 64 | model = RandomForestRegressor() 65 | model.fit(X, Y) 66 | # Apply Model to Make Prediction 67 | prediction = model.predict(df) 68 | 69 | st.header('Prediction of MEDV') 70 | st.write(prediction) 71 | st.write('---') 72 | 73 | # Explaining the model's predictions using SHAP values 74 | # https://github.com/slundberg/shap 75 | explainer = shap.TreeExplainer(model) 76 | shap_values = explainer.shap_values(X) 77 | 78 | st.header('Feature Importance') 79 | plt.title('Feature importance based on SHAP values') 80 | shap.summary_plot(shap_values, X) 81 | st.pyplot(bbox_inches='tight') 82 | st.write('---') 83 | 84 | plt.title('Feature importance based on SHAP values (Bar)') 85 | shap.summary_plot(shap_values, X, plot_type="bar") 86 | st.pyplot(bbox_inches='tight') 87 | --------------------------------------------------------------------------------