├── .gitignore ├── .gitmodules ├── Conditionals ├── Conditions.ipynb └── README.md ├── Exercise ├── Exercise_oct2023.ipynb ├── Exercise_oct2023_solution.ipynb ├── README.md └── datasets │ ├── cirrhosis.csv │ └── healthcare-dataset-stroke-data.csv ├── Extra ├── List_comprehension.ipynb └── README.md ├── Iterables ├── README.md └── iterables.ipynb ├── LICENSE ├── Loops ├── Loops.ipynb └── README.md ├── Pandas ├── IO_Pandas.ipynb ├── Pandas.ipynb └── README.md ├── README.md ├── Recap ├── README.md └── recap.ipynb ├── Variables_data_types ├── README.md ├── Variables_slides_Oct22.pdf └── variables.ipynb ├── Visualizations ├── PlotlyExpress_ComprehensiveGuide.ipynb ├── README.md ├── plotly.ipynb └── plotly_extra_material.ipynb ├── cheat_sheets ├── Bokeh_Cheat_Sheet.pdf ├── Importing_Data_Cheat_sheet.pdf ├── Jupyter_Notebook_Cheat_Sheet.pdf ├── Numpy_Python_Cheat_Sheet.pdf ├── Pandas_Cheat_Sheet.pdf ├── Plotly_Cheat_Sheet.pdf ├── Python_Matplotlib_Cheat_Sheet.pdf ├── Scikit-learn_Cheat_Sheet.pdf ├── Scipy-LinearAlgebra_Cheat_Sheet.pdf ├── Seaborn_Cheat_Sheet.pdf └── cheat_sheet_day0.pdf ├── environment.yml ├── figures ├── HeaDS_logo_large_withTitle.png ├── Program_October_2021.png ├── colab_restart_runtime_after_install.png ├── colab_save_in_drive.png ├── colab_save_in_drive_2.png ├── colab_toc.png ├── df_loc.png ├── df_loc_condition.png ├── github_raw_file_view.png ├── long_format.png ├── matplotlib │ ├── fig_axes_axis.png │ └── handout-beginner.png ├── pandas_dataframe.png ├── pandas_indexing.png ├── program.PNG ├── program_june2022.png ├── program_march2023.png ├── program_oct_screen_GR.png ├── program_spring_2022.PNG ├── quartile-percentile.jpg ├── tsunami_logo.PNG └── wide_format.png ├── slides ├── Python Tsunami Local Installations vs code.pdf └── Python Tsunami intro.pdf └── solutions ├── conditions_solutions.ipynb ├── functions_solutions.ipynb ├── iterables_wSolutions.ipynb ├── loops_solutions.ipynb ├── pandas_solutions.ipynb ├── plotly_solutions.ipynb ├── recap_solutions.ipynb └── variables_solutions.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | Untitled.ipynb 131 | .DS_Store 132 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "data/covid-19"] 2 | path = data/covid-19 3 | url = https://github.com/datasets/covid-19.git 4 | -------------------------------------------------------------------------------- /Conditionals/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 |

5 | 6 | 7 | notebook | content 8 | ---- | ------ 9 | [Conditions.ipynb](Conditions.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Conditionals/Conditions.ipynb) | Conditionals 10 | -------------------------------------------------------------------------------- /Exercise/Exercise_oct2023.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "id": "3YLOMOoHRwRR" 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "import plotly.express as px" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": { 18 | "id": "k9Nxy_ic9GgE" 19 | }, 20 | "source": [ 21 | "# Exercise\n", 22 | "\n", 23 | "Now that we have learned some of the basics of python, we should practice how to use this new superpower. We have here prepared a loosely guided exercise that focusses on data exploration and visualization on two example datasets, one on strokes and one on cirrhosis. You can also explore a dataset of your choosing, though the questions are prepared with the example datasets in mind.\n", 24 | "\n", 25 | "Here you can see the [metadata](https://www.kaggle.com/datasets/fedesoriano/cirrhosis-prediction-dataset) for the cirrhosis dataset which describes what each of the columns are. For the stroke data the meaning of the columns is more straightforward." 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "id": "XVWSFVPK9TCe" 32 | }, 33 | "source": [ 34 | "## 1. Data Loading\n", 35 | "\n", 36 | "We will start with the **stroke** dataset. You can find it on the GitHub repository under Exercise/datasets. Load the stroke data into colab by using one of the two approaches detailed below and assign it to variable name ```data```." 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": { 42 | "id": "SmuuZeHpx3S_" 43 | }, 44 | "source": [ 45 | "### 1.1 Loading the data\n", 46 | "\n", 47 | "**Option 1:**\n", 48 | "\n", 49 | "Use the pandas csv reader with a link to the data on GitHub. To do this, go to the github repository, find the stroke dataset and click on the 'raw' button. Copy that link and enter it as the file path in pandas csv reader." 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": { 56 | "id": "vR5EptJg0G1J" 57 | }, 58 | "outputs": [], 59 | "source": [] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": { 64 | "id": "MXMXD20CxXHT" 65 | }, 66 | "source": [ 67 | "... or\n", 68 | "\n", 69 | "**Option 2**\n", 70 | "\n", 71 | "Manually load the dataset into colab and then read it with the pandas csv reader. See steps below:\n", 72 | "\n", 73 | "1. go to the left side bar and click on the folder icon\n", 74 | "2. click on data upload\n", 75 | "3. select dataset from your computer\n", 76 | "4. call pandas csv reader with the name of the dataset" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "id": "LWfDOeGB9Uw2" 84 | }, 85 | "outputs": [], 86 | "source": [] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": { 91 | "id": "Gf5b1jRGxVOO" 92 | }, 93 | "source": [ 94 | "### 1.2 First look\n", 95 | "\n", 96 | "Have a first look at the data. There are some neat built-in pandas functions to get an initial understanding of the data, i.e. by using the info function: `df.info()`, or the use pandas `df.describe()` function.\n", 97 | "\n", 98 | "Questions you might want to answer here:\n", 99 | "- What different types of columns do you have?\n", 100 | "- Is there a column that describes a variable that can be understood as an 'outcome' ? Which one?\n", 101 | "- How many values does each variable, i.e. column, have and what are some preliminary statistics of the features? (tip: use pandas `describe` function)\n", 102 | "\n" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": { 109 | "id": "-U3eWqWn9fCX" 110 | }, 111 | "outputs": [], 112 | "source": [] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": { 117 | "id": "4simFEW-teti" 118 | }, 119 | "source": [ 120 | "It helps to know which column is the outcome variable. In the stroke datasets (and many others!) the outcome variable is coded as a numerical variable. However, during analysis it should be interpreted as categorical.\n", 121 | "\n", 122 | "Identify the column of the outcome variable and change its type to \"category\" by using `astype()`. You can see an example in the [API reference on categorical data](https://pandas.pydata.org/pandas-docs/stable/user_guide/categorical.html). Remember to save your changes!\n", 123 | "\n", 124 | "Then, use `info()` on the dataframe again. Has it changed?" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": { 131 | "id": "YDLT6I4gtetj" 132 | }, 133 | "outputs": [], 134 | "source": [] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": { 139 | "id": "ScZ9eNl99zEN" 140 | }, 141 | "source": [ 142 | "## 2. Exploratory analysis\n", 143 | "\n", 144 | "Get to know your data better. If you want to first visually inspect the data it can help to explore with some plots.\n" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": { 150 | "id": "3O904uWZtetj" 151 | }, 152 | "source": [ 153 | "### 2.1 Violin plots and histograms\n", 154 | "\n", 155 | "Consider you dataframe columns that are not the outcome variable. How are the measurements distributed?\n", 156 | "\n", 157 | "To study the distributions we want to make **violin plots** of variables, i.e. data columns, that are numeric and **histograms** of the variables that are strings/categorical.\n", 158 | "\n", 159 | "To check for the data type of a column, have a look at how data types are specified in `dataframe.dtypes`. Then check the data type of each column. Remember a column is a **pandas `Series`**, so it has a `dtype` attribute instead (only dataframes have `dtypes`!).\n", 160 | "\n", 161 | "You can start by figuring out how to make a plot of the data in one column. Once you have that, make one plot for each column that is numeric or a string (except the outcome). This is a repetitive task, so it is ideally suited for a loop. Remember to use `fig.show()` to actually display your plots during the loop.\n", 162 | "\n", 163 | "**Pro version**: Some columns are not actually explanatory variables, such as a the ID column. You can identify these columns i.e. by seeing that each of their values is unique (this would be very unlikely for a measured variable). Skip them when making the plots.\n" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": { 170 | "id": "llINtmry-NqA" 171 | }, 172 | "outputs": [], 173 | "source": [] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": { 179 | "id": "aEIouSF6tetj" 180 | }, 181 | "outputs": [], 182 | "source": [] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": { 188 | "id": "9XT_fy_utetk" 189 | }, 190 | "outputs": [], 191 | "source": [] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": { 196 | "id": "-d-4HSAay-xc" 197 | }, 198 | "source": [ 199 | "### 2.2 Correlation coefficients\n", 200 | "\n", 201 | "Plot the correlation coefficients of all numerical features:\n", 202 | "\n", 203 | "1. Use the method `corr()` on the dataframe. What is the result?\n", 204 | "\n", 205 | "2. Now use a heatmap to show the correlation coeffcients graphically.\n", 206 | "\n", 207 | "3. Try some different options to make your heatmap look nicer." 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": { 214 | "id": "yc3GuLRe-U6l" 215 | }, 216 | "outputs": [], 217 | "source": [] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": { 223 | "id": "uQQfqB9atetk" 224 | }, 225 | "outputs": [], 226 | "source": [] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "metadata": { 232 | "id": "TOdMk4Qrtetk" 233 | }, 234 | "outputs": [], 235 | "source": [] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": { 240 | "id": "v9Ez8XDCzCGW" 241 | }, 242 | "source": [ 243 | "### 2.3 Scatter plot\n", 244 | "\n", 245 | "Make a scatter plot of the two variables with the highest correlation. Divide the plots by the outcome variable and add marginal plots and a trendline:\n", 246 | "\n", 247 | "1. Find the pair of variables that has the highest correlation with each other and make a scatter plot of them.\n", 248 | "\n", 249 | "2. Divide the scatter plot into two by the outcome variable. Have a look at ``facet`` and the visualization lecture if you have trouble.\n", 250 | "\n", 251 | "3. Add marginal distributions on one of the axis and a trendline. " 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": { 258 | "id": "5xNHY4LwAJx8" 259 | }, 260 | "outputs": [], 261 | "source": [] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": { 267 | "id": "a1kHLkWbtetl" 268 | }, 269 | "outputs": [], 270 | "source": [] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": { 276 | "id": "vyf61JtAtetm" 277 | }, 278 | "outputs": [], 279 | "source": [] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": { 284 | "id": "XzWC3UdNAhdt" 285 | }, 286 | "source": [ 287 | "## 3. Data cleaning\n", 288 | "\n", 289 | "Now, we switch the [cirrhosis dataset]('https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/spring2022/Exercise/datasets/cirrhosis.csv').\n", 290 | "\n", 291 | "We will investigate what data is missing and try to impute it.\n", 292 | "\n", 293 | "A word of caution:\n", 294 | "\n", 295 | "Note that imputation is a __complex subject__ and whether it makes sense to do it and the method used highly depend on the data set. Sometimes, the mean of a value across all non-missing observations is a good approximation for the missing value. On the other hand, if you have a column that says whether or not the person was treated with the drug or the placebo we have no good way to guess which treatment the person received. Replacing missing values in this column with the most common value (which is that they did get the drug) will produce extremely __wrong data__ and lead you to __wrong conclusions__. Do not do that!\n" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": { 301 | "id": "uyHh_Y1Qtetm" 302 | }, 303 | "source": [ 304 | "### 3.0 Load the data\n", 305 | "\n", 306 | "Load in the cirrhosis dataset using one of the two methods you used earlier for the stroke data. Change as well the outcome variable to a type \"category\"." 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": null, 312 | "metadata": { 313 | "id": "0UKiUqBNtetm" 314 | }, 315 | "outputs": [], 316 | "source": [] 317 | }, 318 | { 319 | "cell_type": "markdown", 320 | "metadata": { 321 | "id": "giu-oZSQtetm" 322 | }, 323 | "source": [ 324 | "### 3.1 Missing data\n", 325 | "\n", 326 | "1. Use the pandas method `isnull`.\n", 327 | "\n", 328 | "2. Get the number of missing values per column by calling `sum()` on the result of `isnull`. Which features, i.e. columns have missing values?\n", 329 | "\n", 330 | "3. Make a barplot that shows the number of missing values per column.\n" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": null, 336 | "metadata": { 337 | "id": "KB_RPg-U9i8J" 338 | }, 339 | "outputs": [], 340 | "source": [] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": null, 345 | "metadata": { 346 | "id": "XEbMbPHqAj2X" 347 | }, 348 | "outputs": [], 349 | "source": [] 350 | }, 351 | { 352 | "cell_type": "markdown", 353 | "metadata": { 354 | "id": "3qBOePAlzIrv" 355 | }, 356 | "source": [ 357 | "### 3.2 Omitting observations with missing values\n", 358 | "\n", 359 | "1. Create a subset in which you omit all patients, i.e. rows, which have missing values in any column. Take care to not overwrite the original dataframe. If you did, you can re-import it.\n", 360 | "\n", 361 | "2. How many observations, i.e. patients, would you be left with if you removed all missing values?\n", 362 | "\n", 363 | "3. How many if you only omit patients where the outcome is missing?\n" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": null, 369 | "metadata": { 370 | "id": "j3rEUU7iA1La" 371 | }, 372 | "outputs": [], 373 | "source": [] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": null, 378 | "metadata": { 379 | "id": "0SbRFgmGChph" 380 | }, 381 | "outputs": [], 382 | "source": [] 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "metadata": { 387 | "id": "t1_fL5hEC2LN" 388 | }, 389 | "source": [ 390 | "### 3.3 Effects of removing data\n", 391 | "\n", 392 | "We can now have a look at how removing nans effects the data.\n", 393 | "\n", 394 | "\n", 395 | "1. First, plot the correlation coefficient between all numerical columns in the original cirrhosis dataframe. (Analogous to 2.2).\n", 396 | "\n", 397 | "2. Now, remake the plot for the subset where you have removed all rows with any missing data. Have the correlations changed?" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": null, 403 | "metadata": { 404 | "id": "r5G9AqzzDSBJ" 405 | }, 406 | "outputs": [], 407 | "source": [] 408 | }, 409 | { 410 | "cell_type": "markdown", 411 | "metadata": { 412 | "id": "9b40PElUzT-2" 413 | }, 414 | "source": [ 415 | "### 3.4 Imputation\n", 416 | "\n", 417 | "Use the method `fillna()` to impute missing values in the columns **where it makes sense**. Have a look at the documentation: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.fillna.html\n", 418 | "\n", 419 | "1. A good way to impute numerical data can be i.e. the mean or median. Calculate the mean for all numerical columns.\n", 420 | "\n", 421 | "2. Perform the imputation.\n", 422 | "\n", 423 | "3. Re-make the barplot from 3.1. to check that it worked.\n", 424 | "\n", 425 | "4. Recalculate correlation coefficients between all numerical columns and show it in a heatmap.\n" 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": null, 431 | "metadata": { 432 | "id": "SF8PoOvOcp7n" 433 | }, 434 | "outputs": [], 435 | "source": [] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": null, 440 | "metadata": { 441 | "id": "-KAI_fEIteto" 442 | }, 443 | "outputs": [], 444 | "source": [] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": null, 449 | "metadata": { 450 | "id": "m5Wq7uL3teto" 451 | }, 452 | "outputs": [], 453 | "source": [] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": null, 458 | "metadata": { 459 | "id": "fSFkAHeoteto" 460 | }, 461 | "outputs": [], 462 | "source": [] 463 | } 464 | ], 465 | "metadata": { 466 | "colab": { 467 | "collapsed_sections": [ 468 | "Hto297viwyzY", 469 | "Gf5b1jRGxVOO", 470 | "TC6ewdf5qFze", 471 | "-d-4HSAay-xc", 472 | "v9Ez8XDCzCGW", 473 | "FxHy9gKJqtNE", 474 | "3qBOePAlzIrv", 475 | "nKknLSw1zOqi", 476 | "9b40PElUzT-2", 477 | "_CwrlBmMOzif", 478 | "ATnfchJIyGKj", 479 | "SS-KMKN7zeNw", 480 | "feYvDsddzhUl", 481 | "f8vhdvqVzjXi" 482 | ], 483 | "provenance": [] 484 | }, 485 | "kernelspec": { 486 | "display_name": "Python 3", 487 | "language": "python", 488 | "name": "python3" 489 | }, 490 | "language_info": { 491 | "codemirror_mode": { 492 | "name": "ipython", 493 | "version": 3 494 | }, 495 | "file_extension": ".py", 496 | "mimetype": "text/x-python", 497 | "name": "python", 498 | "nbconvert_exporter": "python", 499 | "pygments_lexer": "ipython3", 500 | "version": "3.12.6" 501 | } 502 | }, 503 | "nbformat": 4, 504 | "nbformat_minor": 0 505 | } 506 | -------------------------------------------------------------------------------- /Exercise/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 |

5 | 6 | 7 | notebook | content 8 | ---- | ------ 9 | [Exercise_oct2023.ipynb](Exercise_oct2023.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Exercise/Exercise_oct2023.ipynb) | Exercise 10 | 11 | -------------------------------------------------------------------------------- /Extra/List_comprehension.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# List comprehensions" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "A list comprehension is an implicit for loop where we want to do *something* to every element of an existing list and create a new list.\n" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "## General syntax\n", 22 | "\n", 23 | "The general syntax is:\n", 24 | "\n", 25 | "```python\n", 26 | "new_list = [expression for iterator in old_list]\n", 27 | "```" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 5, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | "[1, 4, 9, 16, 25, 36]\n" 40 | ] 41 | } 42 | ], 43 | "source": [ 44 | "#an example: square a list of numbers\n", 45 | "\n", 46 | "numbers = [1,2,3,4,5,6]\n", 47 | "squares = []\n", 48 | "\n", 49 | "for x in numbers:\n", 50 | " squares.append(x**2)\n", 51 | "\n", 52 | "print(squares)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "We can do this in one line instead with a list comprehension:" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 6, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "name": "stdout", 69 | "output_type": "stream", 70 | "text": [ 71 | "[1, 4, 9, 16, 25, 36]\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "squares = [x**2 for x in numbers]\n", 77 | "print(squares)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "You could also skip defining the old list, `numbers`, and straight-up use an iterator like `range`:" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 7, 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "name": "stdout", 94 | "output_type": "stream", 95 | "text": [ 96 | "[1, 4, 9, 16, 25, 36]\n" 97 | ] 98 | } 99 | ], 100 | "source": [ 101 | "#remember, we want to start from 1 and go up to 6 so we need range(1,7)\n", 102 | "squares = [x**2 for x in range(1,7)]\n", 103 | "print(squares)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "So in terms of syntax we mean:\n", 111 | "\n", 112 | "```python\n", 113 | "new_list = [expression for iterator in old_list]\n", 114 | "```\n", 115 | "\n", 116 | "where:\n", 117 | "* `old_list` - the name of the list we want to iterate over\n", 118 | "* `iterator` - what we want to call the entry of `old list` we are currently looking at, i.e. `x` or `i`\n", 119 | "* `expression` - what we want to do each entry of `old list`\n" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "## Conditional comprehensions\n", 127 | "\n", 128 | "Where list comprehensions really shine is when we want to do *something* to only *some* entries of a list, depending on a condition.\n", 129 | "\n", 130 | "Here we will square only even numbers:" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 8, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "name": "stdout", 140 | "output_type": "stream", 141 | "text": [ 142 | "[4, 16, 36]\n" 143 | ] 144 | } 145 | ], 146 | "source": [ 147 | "numbers = [1,2,3,4,5,6]\n", 148 | "even_squares = []\n", 149 | "\n", 150 | "for x in numbers: #go through all entries in numbers\n", 151 | " if x % 2 == 0: #if the entry is even, square it and append it to the new list\n", 152 | " even_squares.append(x**2)\n", 153 | "\n", 154 | "print(even_squares) " 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "We can do the same in a list comprehension:" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 9, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "data": { 171 | "text/plain": [ 172 | "[4, 16, 36]" 173 | ] 174 | }, 175 | "execution_count": 9, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "even_squares = [x**2 for x in numbers if x % 2 == 0]\n", 182 | "even_squares" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "If we instead frontload the condition we can also add an `else`.\n", 190 | "\n", 191 | "Here we square the even numbers and add the odd numbers to themselves:" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 12, 197 | "metadata": {}, 198 | "outputs": [ 199 | { 200 | "name": "stdout", 201 | "output_type": "stream", 202 | "text": [ 203 | "[1, 2, 3, 4, 5, 6]\n", 204 | "[2, 4, 6, 16, 10, 36]\n" 205 | ] 206 | } 207 | ], 208 | "source": [ 209 | "print(numbers)\n", 210 | "even_square_odd_one = [x**2 if x % 2 == 0 else x+x for x in numbers]\n", 211 | "print(even_square_odd_one)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "## Exercise\n", 219 | "\n", 220 | "1. Write a list comprehension that adds 5 to every number in `numbers`." 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": {}, 227 | "outputs": [], 228 | "source": [ 229 | "numbers = [0, 91, 69, -15, -54, 58, -58, 62, 4, 54, 53, -43, -87, 28, 23, -21, 69, -17, -60, 21]" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": null, 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "#your code here" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "\n", 246 | "\n", 247 | "2. Write a list comprehension that goes through the list `numbers` and replaces every negative number with 0. The result should be saved in a new list called `positive_numbers`." 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "#your code here" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "We can use `random.randint()` to generate a pseudo random inter in python. It works like this:" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 56, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "import random" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 61, 278 | "metadata": {}, 279 | "outputs": [ 280 | { 281 | "data": { 282 | "text/plain": [ 283 | "-24" 284 | ] 285 | }, 286 | "execution_count": 61, 287 | "metadata": {}, 288 | "output_type": "execute_result" 289 | } 290 | ], 291 | "source": [ 292 | "#run this cell a couple of times to create some different random numbers\n", 293 | "random.randint(-100,100)" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "3. Write a list comprehension that creates 20 random integers. You can use `range` to define how many times you want the explicit loop to run." 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 58, 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "name": "stdout", 310 | "output_type": "stream", 311 | "text": [ 312 | "[0, 91, 69, -15, -54, 58, -58, 62, 4, 54, 53, -43, -87, 28, 23, -21, 69, -17, -60, 21]\n" 313 | ] 314 | } 315 | ], 316 | "source": [ 317 | "#your code here" 318 | ] 319 | }, 320 | { 321 | "cell_type": "markdown", 322 | "metadata": {}, 323 | "source": [ 324 | "## Practical applications\n", 325 | "\n", 326 | "### Finding all occurances of a certain item in a list.\n", 327 | "\n", 328 | "I want to know all the indices of 'red' in the 'colors' list." 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 19, 334 | "metadata": {}, 335 | "outputs": [], 336 | "source": [ 337 | "colors = ['red', 'green', 'orange', 'yellow', 'black', 'green', 'red', 'blue', 'purple', 'yellow', 'red']" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": 20, 343 | "metadata": {}, 344 | "outputs": [ 345 | { 346 | "data": { 347 | "text/plain": [ 348 | "[0, 6, 10]" 349 | ] 350 | }, 351 | "execution_count": 20, 352 | "metadata": {}, 353 | "output_type": "execute_result" 354 | } 355 | ], 356 | "source": [ 357 | "indices = [i for i, x in enumerate(colors) if x == \"red\"]\n", 358 | "indices" 359 | ] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "metadata": {}, 364 | "source": [ 365 | "### Tallying list items\n", 366 | "\n", 367 | "Finding out how many times each item in a list occurs is also referred to as tallying. \n", 368 | "\n", 369 | "We want to know how many times each of the colors occurs in 'colors'. Or in other words, we want to use `.count()` for each unique item in the list (how many times do we see 'red', ' yellow', 'purple', ect)." 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": 41, 375 | "metadata": {}, 376 | "outputs": [ 377 | { 378 | "data": { 379 | "text/plain": [ 380 | "[('green', 2),\n", 381 | " ('black', 1),\n", 382 | " ('purple', 1),\n", 383 | " ('red', 3),\n", 384 | " ('yellow', 2),\n", 385 | " ('orange', 1),\n", 386 | " ('blue', 1)]" 387 | ] 388 | }, 389 | "execution_count": 41, 390 | "metadata": {}, 391 | "output_type": "execute_result" 392 | } 393 | ], 394 | "source": [ 395 | "color_counts = [(x,colors.count(x)) for x in set(colors)]\n", 396 | "color_counts" 397 | ] 398 | }, 399 | { 400 | "cell_type": "markdown", 401 | "metadata": {}, 402 | "source": [ 403 | "We can then reverse the order to have the count first:" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 42, 409 | "metadata": {}, 410 | "outputs": [ 411 | { 412 | "data": { 413 | "text/plain": [ 414 | "[(2, 'green'),\n", 415 | " (1, 'black'),\n", 416 | " (1, 'purple'),\n", 417 | " (3, 'red'),\n", 418 | " (2, 'yellow'),\n", 419 | " (1, 'orange'),\n", 420 | " (1, 'blue')]" 421 | ] 422 | }, 423 | "execution_count": 42, 424 | "metadata": {}, 425 | "output_type": "execute_result" 426 | } 427 | ], 428 | "source": [ 429 | "color_counts = [(colors.count(x),x) for x in set(colors)]\n", 430 | "color_counts" 431 | ] 432 | }, 433 | { 434 | "cell_type": "markdown", 435 | "metadata": {}, 436 | "source": [ 437 | "Which will help us with imposing `sorted` of the list of tuples to get the most common entry on top:" 438 | ] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "execution_count": 43, 443 | "metadata": {}, 444 | "outputs": [ 445 | { 446 | "data": { 447 | "text/plain": [ 448 | "[(3, 'red'),\n", 449 | " (2, 'yellow'),\n", 450 | " (2, 'green'),\n", 451 | " (1, 'purple'),\n", 452 | " (1, 'orange'),\n", 453 | " (1, 'blue'),\n", 454 | " (1, 'black')]" 455 | ] 456 | }, 457 | "execution_count": 43, 458 | "metadata": {}, 459 | "output_type": "execute_result" 460 | } 461 | ], 462 | "source": [ 463 | "color_counts = sorted([(colors.count(x),x) for x in set(colors)], reverse=True)\n", 464 | "color_counts" 465 | ] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "metadata": {}, 470 | "source": [ 471 | "### Creating dictionaries from existing lists" 472 | ] 473 | }, 474 | { 475 | "cell_type": "markdown", 476 | "metadata": {}, 477 | "source": [ 478 | "I have these two lists and I want to make them into a dictionary where one list is the key (the city name) and the other is the value (population). But I don't want to type out everything again." 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": 52, 484 | "metadata": {}, 485 | "outputs": [], 486 | "source": [ 487 | "cities = ['Tokyo', 'Berlin', 'New York', 'Copenhagen', 'Los Angeles']\n", 488 | "population = [37115035, 3576873, 8260000, 1391205, 3895836]" 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "execution_count": 53, 494 | "metadata": {}, 495 | "outputs": [ 496 | { 497 | "data": { 498 | "text/plain": [ 499 | "[{'Tokyo': 37115035},\n", 500 | " {'Berlin': 3576873},\n", 501 | " {'New York': 8260000},\n", 502 | " {'Copenhagen': 1391205},\n", 503 | " {'Los Angeles': 3895836}]" 504 | ] 505 | }, 506 | "execution_count": 53, 507 | "metadata": {}, 508 | "output_type": "execute_result" 509 | } 510 | ], 511 | "source": [ 512 | "city_population = [{city : pop} for city, pop in zip(cities, population)]\n", 513 | "\n", 514 | "city_population" 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": 54, 520 | "metadata": {}, 521 | "outputs": [ 522 | { 523 | "data": { 524 | "text/plain": [ 525 | "[('Tokyo', 37115035),\n", 526 | " ('Berlin', 3576873),\n", 527 | " ('New York', 8260000),\n", 528 | " ('Copenhagen', 1391205),\n", 529 | " ('Los Angeles', 3895836)]" 530 | ] 531 | }, 532 | "execution_count": 54, 533 | "metadata": {}, 534 | "output_type": "execute_result" 535 | } 536 | ], 537 | "source": [ 538 | "bla = list(zip(cities,population))\n", 539 | "bla" 540 | ] 541 | }, 542 | { 543 | "cell_type": "code", 544 | "execution_count": 55, 545 | "metadata": {}, 546 | "outputs": [ 547 | { 548 | "name": "stdout", 549 | "output_type": "stream", 550 | "text": [ 551 | "Tokyo : 37115035\n", 552 | "Berlin : 3576873\n", 553 | "New York : 8260000\n", 554 | "Copenhagen : 1391205\n", 555 | "Los Angeles : 3895836\n" 556 | ] 557 | } 558 | ], 559 | "source": [ 560 | "for city, pop in zip(cities,population):\n", 561 | " print(city, \":\", pop)" 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": null, 567 | "metadata": {}, 568 | "outputs": [], 569 | "source": [] 570 | } 571 | ], 572 | "metadata": { 573 | "kernelspec": { 574 | "display_name": "Python 3", 575 | "language": "python", 576 | "name": "python3" 577 | }, 578 | "language_info": { 579 | "codemirror_mode": { 580 | "name": "ipython", 581 | "version": 3 582 | }, 583 | "file_extension": ".py", 584 | "mimetype": "text/x-python", 585 | "name": "python", 586 | "nbconvert_exporter": "python", 587 | "pygments_lexer": "ipython3", 588 | "version": "3.11.1" 589 | } 590 | }, 591 | "nbformat": 4, 592 | "nbformat_minor": 2 593 | } 594 | -------------------------------------------------------------------------------- /Extra/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 |

5 | 6 | 7 | notebook | content 8 | ---- | ------ 9 | [List_comprehension.ipynb](List_comprehension.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Extra/List_comprehension.ipynb) | List Comprehension 10 | -------------------------------------------------------------------------------- /Iterables/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 |

5 | 6 | 7 | notebook | content 8 | ---- | ------ 9 | [iterables.ipynb](iterables.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Iterables/iterables.ipynb) | Data structures 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 pythontsunami 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Loops/Loops.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "KFib138AzHLW" 7 | }, 8 | "source": [ 9 | "\n", 10 | "\n", 11 | "\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "id": "jzzrpQS-zxnF" 18 | }, 19 | "source": [ 20 | "# Loops" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "id": "HrDeygtfzx7P" 27 | }, 28 | "source": [ 29 | "Consider the code below: It prints the numbers 1 through 10 using what we've learned so far. \n", 30 | "This notebook is about how to do the same task less tediously. **Loops** are a way to repeatedly execute some code, in a simple and succinct way." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "id": "tjj39-AazwAb" 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "print(1)\n", 42 | "print(2)\n", 43 | "print(3)\n", 44 | "print(4)\n", 45 | "print(5)\n", 46 | "print(6)\n", 47 | "print(7)\n", 48 | "print(8)\n", 49 | "print(9)\n", 50 | "print(10)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": { 56 | "id": "VBLCVfQoseEh" 57 | }, 58 | "source": [ 59 | "Indeed, we can use a for loop to do the same in only two lines:\n", 60 | "\n", 61 | "```python\n", 62 | "#pseudo code\n", 63 | "for number in number_list:\n", 64 | " print number\n", 65 | "```\n" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "id": "aCmVJXXBzHLn" 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "number_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n", 77 | "\n", 78 | "#the variable number is initialized inside the for statement.\n", 79 | "#You do not need to declare it before. It will continue to exist after the loop.\n", 80 | "for number in number_list:\n", 81 | " print(number)\n", 82 | "\n", 83 | "print(\"Now we are done.\")\n", 84 | "print(\"What is number now?\", number)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": { 90 | "id": "DGXTd_1SseEj" 91 | }, 92 | "source": [ 93 | "Loops are part of flow control. The code inside the loop is (usually) executed several times, whereas lines such as above are only executed one time. The program also needs to know when the loop is over and we return to 'linear' flow. Like in `if` blocks, this is made clear with indentation." 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": { 99 | "id": "EClTb9Qy0i8U" 100 | }, 101 | "source": [ 102 | "## **`for`** loops\n", 103 | "\n", 104 | "In Python, **`for`** loops are written like this:\n", 105 | "\n", 106 | "```python\n", 107 | "for element in sequence:\n", 108 | " this code is executed inside the loop\n", 109 | " and this code too\n", 110 | " \n", 111 | "now we are not in the loop anymore \n", 112 | "```\n", 113 | "The idea is that we go through our sequence step by step and perform a certain action (here `print()` on each element in the sequence).\n", 114 | "\n", 115 | "- ``element`` is a variable and can be called whatever you want.\n", 116 | "\n", 117 | "- ``sequence`` is a sequence we iterate over. It is some kind of collection of items, for instance: a `str` of characters, a `range`, a list etc. It is also often called an iterable.\n", 118 | "\n", 119 | "Note that the body of the loop is **indented**. This is important for [**flow**](https://colab.research.google.com/drive/11xJCNmKS1pFDxEjnYhJruDYAOGEbb3RK#scrollTo=7PmpZ4oTyPHw). When we write a command on the same indentation level as the initial `for` statement, the loop is over. This will be executed after the loop." 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": { 125 | "id": "Zlj0LILxu9TA" 126 | }, 127 | "source": [ 128 | "### `for` loops using `for ... in`\n", 129 | "\n", 130 | "You go through the contents of any iterable such as a list or a dictionary using `for ... in` like shown before with the `number_list`:" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": { 137 | "id": "jHskWVZ-vVy-" 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "#try it out!\n", 142 | "countries = ['Denmark', 'Spain', 'Italy']\n", 143 | "\n", 144 | "# iterate over the ountries as we did with the list of numbers above:\n", 145 | "for country in countries:\n", 146 | " print(country)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": { 152 | "id": "gSAzxl6bvdoB" 153 | }, 154 | "source": [ 155 | "The `country` part is about how we want to refer to the element we are looking at right now. You can freely choose this variable name." 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": { 161 | "id": "rP62SBxWzHLw" 162 | }, 163 | "source": [ 164 | "### `for` loops using `range()`\n", 165 | "\n", 166 | "Instead of writing out a list with all the numerical values we want to go through there is quicker way. We can create it using `range()`:" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": { 173 | "id": "aI1zSyEw1Tvk" 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "for number in range(1, 11):\n", 178 | " print(number)" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": { 184 | "id": "ZCj6EX711bw1" 185 | }, 186 | "source": [ 187 | "The [**`range()`**](https://docs.python.org/3/library/functions.html#func-range) function returns a sequence of numbers, starting from 0 by default, and increments by 1 by default, and stops at a specified number (which is not included in the range).\n", 188 | "\n", 189 | "Based on what we learned so far you might think that it creates a list, but it does **not**. In fact, range **does not do anything** by itself, but can be used inside a for loop to create the sequence to loop over.\n", 190 | "\n", 191 | "> The syntax is: `range(start, stop, step)`\n", 192 | "\n", 193 | "The *step* parameter tells the function how many steps to skip and which direction to count (**`+`** for **up** and **`-`** for **down**).\n", 194 | "\n", 195 | "Examples:\n", 196 | "\n", 197 | "- `range(8)` gives you integers from 0 through 7.\n", 198 | "\n", 199 | "- `range(2, 9)` will give you integers from 2 to 8.\n", 200 | "\n", 201 | "- `range(10, 20, 2)` will give you even numbers from 10 to 18. Remember, the upper limit of the range is excluded!\n", 202 | "\n", 203 | "- `range(9, 0, -1)` will start from 9 and give you integers down to 1.\n" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": { 210 | "id": "dI_lYduNseEn" 211 | }, 212 | "outputs": [], 213 | "source": [ 214 | "#try it out!\n" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": { 220 | "id": "5nylK238seEn" 221 | }, 222 | "source": [ 223 | "### `for` loops using `enumerate()`\n", 224 | "\n", 225 | "Another useful function to know for `for` loops is `enumerate`. Like its name hints, `enumerate` helps us to *enumerate* the contents of an iterable.\n", 226 | "\n", 227 | "The different to `for ... in` is that `enumerate` will also tell us the position of an element in the iterable:" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "metadata": { 234 | "id": "LzVKG8MTseEo" 235 | }, 236 | "outputs": [], 237 | "source": [ 238 | "# get both items and their position\n", 239 | "for index, country in enumerate(countries):\n", 240 | " print(\"My number\" + str(index) + \" favorite country is: \" + country)" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": { 246 | "id": "o42yFkWx4WyO" 247 | }, 248 | "source": [ 249 | "# Exercise 1\n", 250 | "\n", 251 | "_~ 20 minutes_" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": { 257 | "id": "xqhI0vljzHL0" 258 | }, 259 | "source": [ 260 | "**a.** Use a for loop to iterate over `range(4)`. Which numbers does it produce?" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": { 267 | "id": "amW1Ja7kzHL0" 268 | }, 269 | "outputs": [], 270 | "source": [ 271 | "# your code goes here\n" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": { 277 | "id": "pHR_ctAfzHL0" 278 | }, 279 | "source": [ 280 | "**b.** Now write a for loop using `range` that prints the numbers 1 to 4." 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": { 287 | "id": "nJj4ebrLzHL1" 288 | }, 289 | "outputs": [], 290 | "source": [ 291 | "# your code goes here" 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": { 297 | "id": "1453WLTVzHL1" 298 | }, 299 | "source": [ 300 | "**c.** What numbers do you get when you use the following range inside a for loop? Write out the loop to check.\n", 301 | "\n", 302 | "`range(12,0,-3)`" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": { 309 | "id": "B1LU-GHpzHL2" 310 | }, 311 | "outputs": [], 312 | "source": [ 313 | "# your code goes here" 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": { 319 | "id": "YqDcWibUzHL2" 320 | }, 321 | "source": [ 322 | "**d.** Loop through numbers 1-20:\n", 323 | "- If the number is 4 or 13, print \"x is unlucky\"\n", 324 | "- Otherwise:\n", 325 | " - If the number is even, print \"x is even\"\n", 326 | " - If the number is odd, print \"x is odd\"\n", 327 | "\n", 328 | "> check [`Conditions.ipynb`](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/fall2021/Conditionals/Conditions.ipynb)" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "metadata": { 335 | "id": "AC4dgNEPzHL3" 336 | }, 337 | "outputs": [], 338 | "source": [] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": { 343 | "id": "LwSW0hrCseEs" 344 | }, 345 | "source": [ 346 | "**e.** In the code below we're counting from 0 as python usually does. Can you fix so that it starts writing from 1?\n", 347 | "\n", 348 | "```python\n", 349 | "# get both items and their position\n", 350 | "for index, country in enumerate(countries):\n", 351 | " print(\"My number\" + str(index) + \" favorite country is: \" + country)\n", 352 | "```\n" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": null, 358 | "metadata": { 359 | "id": "fuNGQzXLseEs" 360 | }, 361 | "outputs": [], 362 | "source": [] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": { 367 | "id": "4NJV8SnW2dcf" 368 | }, 369 | "source": [ 370 | "## **`while`** loops\n", 371 | "\n", 372 | "We can also iterate over a sequence using a **`while`** loop, which has a different format:\n", 373 | "\n", 374 | "```python\n", 375 | "while condition:\n", 376 | " expression\n", 377 | "```\n", 378 | "`while` loops continue to execute while a certain condition is `True`, and will end when it becomes `False`.\n", 379 | "\n", 380 | "```python\n", 381 | "user_response = \"Something...\"\n", 382 | "while user_response != \"please\":\n", 383 | " user_response = input(\"Ah ah ah, you didn't say the magic word: \")\n", 384 | "```\n", 385 | "\n", 386 | "`while` loops require more careful setup than `for` loops, since you have to specify the termination conditions manually.\n", 387 | "\n", 388 | "Be careful! If the condition doesn't become `False` at some point, your loop will continue ***forever***!" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": null, 394 | "metadata": { 395 | "id": "dKlSQ64XzHL4" 396 | }, 397 | "outputs": [], 398 | "source": [ 399 | "my_float = 50.0\n", 400 | "\n", 401 | "while my_float > 1:\n", 402 | " my_float = my_float / 4\n", 403 | " print(my_float)" 404 | ] 405 | }, 406 | { 407 | "cell_type": "markdown", 408 | "metadata": { 409 | "id": "xKOz5qSs5bfV" 410 | }, 411 | "source": [ 412 | "# Exercise 2\n", 413 | "\n", 414 | "_~15 minutes_\n", 415 | "\n", 416 | "**a.** What does the following loop do?\n", 417 | "```python\n", 418 | " i = 1\n", 419 | " while i < 5:\n", 420 | " i + i\n", 421 | " print(i)\n", 422 | "```\n", 423 | " \n", 424 | "> Hint: is the value of `i` changing?" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": null, 430 | "metadata": { 431 | "id": "MEX_6xXPzHL4" 432 | }, 433 | "outputs": [], 434 | "source": [ 435 | "# your code goes here" 436 | ] 437 | }, 438 | { 439 | "cell_type": "markdown", 440 | "metadata": { 441 | "id": "NlbI1VH4zHL5" 442 | }, 443 | "source": [ 444 | "**b.** What does the following loop do?\n", 445 | "```python\n", 446 | " i = 0\n", 447 | " while i <= 5:\n", 448 | " i = i + 1\n", 449 | " print(i)\n", 450 | "```" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": null, 456 | "metadata": { 457 | "id": "iu3oul_BzHL5" 458 | }, 459 | "outputs": [], 460 | "source": [ 461 | "# your code goes here" 462 | ] 463 | }, 464 | { 465 | "cell_type": "markdown", 466 | "metadata": { 467 | "id": "CYMshsgmzHL6" 468 | }, 469 | "source": [ 470 | "**c.** Fix the infinite loop below so that it doesn't run endlessly anymore:\n", 471 | "```python\n", 472 | " # this code runs forever...\n", 473 | " x = 0\n", 474 | " while x != 11:\n", 475 | " x += 2\n", 476 | " print(x)\n", 477 | "```" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": null, 483 | "metadata": { 484 | "id": "aH1WA-K7zHL6" 485 | }, 486 | "outputs": [], 487 | "source": [ 488 | "# your code goes here" 489 | ] 490 | }, 491 | { 492 | "cell_type": "markdown", 493 | "metadata": { 494 | "id": "XKETo3jK3aue" 495 | }, 496 | "source": [ 497 | "## Python loop control\n", 498 | "\n", 499 | "Controlled exit, skipping a block of code, or ignoring external factors that might influence your code, can be achieved with the Python statements: `break`, `continue`, and `pass`." 500 | ] 501 | }, 502 | { 503 | "cell_type": "markdown", 504 | "metadata": { 505 | "id": "xFv803XuzHL7" 506 | }, 507 | "source": [ 508 | "### ***`break`*** statement\n", 509 | "\n", 510 | "The keyword `break` gives us the ability to exit out of a loop whenever we want, and can be used in both `while` and `for` loops.\n", 511 | "\n", 512 | "Example:\n", 513 | "\n", 514 | "``` python\n", 515 | "for letter in 'Python':\n", 516 | " if letter == 'h':\n", 517 | " break\n", 518 | " print('Current Letter:', letter)\n", 519 | "```" 520 | ] 521 | }, 522 | { 523 | "cell_type": "markdown", 524 | "metadata": { 525 | "id": "Eff6JwP0zHL7" 526 | }, 527 | "source": [ 528 | "The `break` statement needs to be within the block of code under your loop statement, ususally after a conditional `if` statement." 529 | ] 530 | }, 531 | { 532 | "cell_type": "code", 533 | "execution_count": null, 534 | "metadata": { 535 | "id": "VsEuKOb731qP" 536 | }, 537 | "outputs": [], 538 | "source": [ 539 | "for letter in 'Python':\n", 540 | " if letter == 'h':\n", 541 | " break\n", 542 | " print('Current Letter :', letter)" 543 | ] 544 | }, 545 | { 546 | "cell_type": "markdown", 547 | "metadata": { 548 | "id": "Uf24LfUJzHL8" 549 | }, 550 | "source": [ 551 | "### ***`continue`*** statement\n", 552 | "\n", 553 | "The [`continue`](https://docs.python.org/3/tutorial/controlflow.html#break-and-continue-statements-and-else-clauses-on-loops) statement in Python gives you the option to skip over the part of a loop where a condition is met, but to go on to complete the rest of the loop. That is, it disrupts the iteration of the loop that fulfills the condition and returns the control to the beginning of the loop. It works with both `while` and `for` loops.\n", 554 | "\n", 555 | "Example:\n", 556 | "\n", 557 | "``` python\n", 558 | "for letter in 'Python':\n", 559 | " if letter == 'h':\n", 560 | " continue\n", 561 | " print('Current Letter :', letter)\n", 562 | "```" 563 | ] 564 | }, 565 | { 566 | "cell_type": "markdown", 567 | "metadata": { 568 | "id": "V3Bic6PVzHL9" 569 | }, 570 | "source": [ 571 | "The difference in using `continue` rather than `break` is that the loop will continue despite the disruption when the condition is met." 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": 1, 577 | "metadata": { 578 | "colab": { 579 | "base_uri": "https://localhost:8080/" 580 | }, 581 | "id": "8DJUYVlNzHL9", 582 | "outputId": "0e7f26f8-3053-4c76-9ccf-e996895f3560" 583 | }, 584 | "outputs": [ 585 | { 586 | "name": "stdout", 587 | "output_type": "stream", 588 | "text": [ 589 | "Current Letter : P\n", 590 | "Current Letter : y\n", 591 | "Current Letter : t\n", 592 | "Current Letter : o\n", 593 | "Current Letter : n\n" 594 | ] 595 | } 596 | ], 597 | "source": [ 598 | "for letter in 'Python':\n", 599 | " if letter == 'h':\n", 600 | " continue\n", 601 | " print('Current Letter :', letter)" 602 | ] 603 | }, 604 | { 605 | "cell_type": "markdown", 606 | "metadata": { 607 | "id": "n_HVORV8zHL9" 608 | }, 609 | "source": [ 610 | "### ***`pass`*** statement\n", 611 | "\n", 612 | "The [`pass`](https://docs.python.org/3/tutorial/controlflow.html#pass-statements) statement is used when a statement is required syntactically but you do not want any command or code to execute. It's a *null* operation.\n", 613 | "\n", 614 | "Now what does this mean? Because of flow control, statements like `if` and `for` need to be followed by an indented block of code or the program will crash. There can however be special situation where we want literally nothing to happen, or we don't know yet what should happen. Then we use `pass`.\n", 615 | "\n", 616 | "A common reason for this that some operation should happen eventually but we haven't gotten around to implementing it yet.\n" 617 | ] 618 | }, 619 | { 620 | "cell_type": "markdown", 621 | "metadata": { 622 | "id": "nLgJjFzzj1EH" 623 | }, 624 | "source": [ 625 | "Compare the output of this to the code block above where we used `continue`:" 626 | ] 627 | }, 628 | { 629 | "cell_type": "code", 630 | "execution_count": 2, 631 | "metadata": { 632 | "colab": { 633 | "base_uri": "https://localhost:8080/" 634 | }, 635 | "id": "JJ4O026WzHL-", 636 | "outputId": "8a1bf996-2aa8-4190-bb19-9f1984f09642" 637 | }, 638 | "outputs": [ 639 | { 640 | "name": "stdout", 641 | "output_type": "stream", 642 | "text": [ 643 | "Current Letter : P\n", 644 | "Current Letter : y\n", 645 | "Current Letter : t\n", 646 | "Current Letter : h\n", 647 | "Current Letter : o\n", 648 | "Current Letter : n\n" 649 | ] 650 | } 651 | ], 652 | "source": [ 653 | "for letter in 'Python':\n", 654 | " if letter == 'h':\n", 655 | " pass\n", 656 | " #perhaps in the future something special should happen when the letter is h\n", 657 | "\n", 658 | " print('Current Letter :', letter)" 659 | ] 660 | }, 661 | { 662 | "cell_type": "markdown", 663 | "metadata": { 664 | "id": "FIEj-qc5zHL-" 665 | }, 666 | "source": [ 667 | "# Group Exercise \n", 668 | "\n", 669 | "In your group, take the next 10 mins to solve this exercise: \n", 670 | "\n", 671 | "Write a loop that:\n", 672 | "\n", 673 | "- iterates over each character in the string `\"I live in CPH, and I like it here.\"`;\n", 674 | "- for each character checks if it is a space;\n", 675 | "- if it is a space, then just continue with the loop;\n", 676 | "- if the character is not a space, do the following:\n", 677 | "- check if it is a comma `,` ;\n", 678 | "- if the character is a comma `,`, break the loop;\n", 679 | "- if the character is not a comma, print it." 680 | ] 681 | }, 682 | { 683 | "cell_type": "code", 684 | "execution_count": null, 685 | "metadata": { 686 | "id": "XfAKZMP9seEy" 687 | }, 688 | "outputs": [], 689 | "source": [] 690 | } 691 | ], 692 | "metadata": { 693 | "colab": { 694 | "provenance": [], 695 | "toc_visible": true 696 | }, 697 | "kernelspec": { 698 | "display_name": "Python 3 (ipykernel)", 699 | "language": "python", 700 | "name": "python3" 701 | }, 702 | "language_info": { 703 | "codemirror_mode": { 704 | "name": "ipython", 705 | "version": 3 706 | }, 707 | "file_extension": ".py", 708 | "mimetype": "text/x-python", 709 | "name": "python", 710 | "nbconvert_exporter": "python", 711 | "pygments_lexer": "ipython3", 712 | "version": "3.10.9" 713 | }, 714 | "toc": { 715 | "base_numbering": 1, 716 | "nav_menu": {}, 717 | "number_sections": true, 718 | "sideBar": true, 719 | "skip_h1_title": false, 720 | "title_cell": "Table of Contents", 721 | "title_sidebar": "Contents", 722 | "toc_cell": false, 723 | "toc_position": {}, 724 | "toc_section_display": true, 725 | "toc_window_display": false 726 | }, 727 | "toc-autonumbering": true 728 | }, 729 | "nbformat": 4, 730 | "nbformat_minor": 1 731 | } 732 | -------------------------------------------------------------------------------- /Loops/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 |

5 | 6 | 7 | notebook | content 8 | ---- | ------ 9 | [Loops.ipynb](Loops.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Loops/Loops.ipynb) | Loops 10 | -------------------------------------------------------------------------------- /Pandas/IO_Pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "slideshow": { 16 | "slide_type": "slide" 17 | } 18 | }, 19 | "source": [ 20 | "# Pandas: Working with Different File types" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "slideshow": { 27 | "slide_type": "fragment" 28 | } 29 | }, 30 | "source": [ 31 | "The Pandas library offers a wide range of possibilities for creating, writing and reading files. There are two types of files that can be handled in Python, normal text files and binary files.\n", 32 | "\n", 33 | "\n", 34 | "In this notebook we will learn more about working with these different formats: CSV, Excel, JSON, HTML, SQL, Pickle, Matlab .mat, and HDF5 files.\n" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": { 40 | "slideshow": { 41 | "slide_type": "subslide" 42 | } 43 | }, 44 | "source": [ 45 | "### CSV (Comma-Separated Values) Files\n", 46 | "\n", 47 | "As we saw before, a CSV file is a plaintext file with a .csv extension that holds tabular data. This is one of the most popular file formats for storing large amounts of data. \n", 48 | "\n", 49 | "Each line of the file represents one record, and the fields are, by default, separated by commas, but you could change the separator to a semicolon, tab, space, or some other character. If the fields are labelled, the first line pf the file (referred to as \"header\") will contain the field names.\n", 50 | "\n", 51 | "Example of CSV file:\n", 52 | "```\n", 53 | "month,height,weight\n", 54 | "Jan,1.2,76\n", 55 | "Feb,1.21,77\n", 56 | "March,1.21,76\n", 57 | "```" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": { 63 | "slideshow": { 64 | "slide_type": "fragment" 65 | } 66 | }, 67 | "source": [ 68 | "Previously we learnt that to read CSV files, python comes with a csv reader that works quite well.\n", 69 | "\n", 70 | "```python\n", 71 | "import csv\n", 72 | "\n", 73 | "with open('file.csv', 'r') as f:\n", 74 | " reader = csv.reader(f)\n", 75 | " header = next(reader)\n", 76 | " data = list(reader)\n", 77 | "```\n", 78 | "\n", 79 | "Once you have read the data, it can go to a DataFrame, for example:\n", 80 | "\n", 81 | "```python\n", 82 | "import pandas as pd\n", 83 | "\n", 84 | "df = pd.DataFrame(data=data, columns=header)\n", 85 | "```" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": { 91 | "slideshow": { 92 | "slide_type": "fragment" 93 | } 94 | }, 95 | "source": [ 96 | "You can also use the Pandas csv read function `pandas.read_csv()`, which can get the data into a DataFrame. This is what we usually use.\n", 97 | "\n", 98 | "The major advantage of this function is that it has a lot of options and does good file format and data format inference.\n", 99 | "\n", 100 | "```python\n", 101 | "import pandas as pd\n", 102 | "\n", 103 | "df = pd.read_csv('file.csv')\n", 104 | "```\n", 105 | "\n", 106 | "The input `'file.csv'` can be any valid path, including URLs.\n", 107 | "\n", 108 | "You can read about all the options [here](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html).\n", 109 | "\n", 110 | "`read_csv` is accompained by the `to_csv` function, to write data from a `DataFrame` to disk in `csv`-format:\n", 111 | "\n", 112 | "```python\n", 113 | "df.to_csv('file.csv')\n", 114 | "```" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": { 121 | "slideshow": { 122 | "slide_type": "fragment" 123 | } 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "# using the covid 19 data from before\n", 128 | "import pandas as pd\n", 129 | "df = pd.read_csv(\"https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/data.csv\", index_col='dateRep')\n", 130 | "sample = df.sample(10)\n", 131 | "sample" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "Let's us create some sample data, containing 10 entires." 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "sample.to_csv('testdata.txt')\n", 148 | "sample.to_csv('testdata.csv')" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "And look at the create text-file: [testdata.csv](testdata.csv) (displayed nicely already, try the [testdata.txt](testdata.txt) file!)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": { 161 | "slideshow": { 162 | "slide_type": "subslide" 163 | } 164 | }, 165 | "source": [ 166 | "### JSON (Javascript Object Notation) Files\n", 167 | "\n", 168 | "The next file type we will look at is JSON. This is a popular format for transferring data over the web via APIs, and is also a plaintext file format.\n", 169 | "\n", 170 | "JSON is very similar to the text representation of a Python dictionary and lists:" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": { 177 | "slideshow": { 178 | "slide_type": "fragment" 179 | } 180 | }, 181 | "outputs": [], 182 | "source": [ 183 | "data = \"\"\"\n", 184 | "{\n", 185 | "\"day\": \"Saturday\",\n", 186 | "\"week\": 3,\n", 187 | "\"isSunny\": true,\n", 188 | "\"goals\": [\"eat breakfast\", \"write a book\", \"eat lunch\"]\n", 189 | "}\n", 190 | "\"\"\"\n", 191 | "print(data)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": { 197 | "slideshow": { 198 | "slide_type": "fragment" 199 | } 200 | }, 201 | "source": [ 202 | "The main downside with **hand-writing** JSON is that it is very picky about getting everything right. Even though it's very readable, it should not be considered human writable." 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": { 208 | "slideshow": { 209 | "slide_type": "fragment" 210 | } 211 | }, 212 | "source": [ 213 | "Python and Pandas work well with JSON files, as Python's json library offers buit-in support for them.\n", 214 | "Tabular data can be stored in JSON in a variety of ways, called \"orientations\".\n", 215 | "\n", 216 | "- `'split'` : dict like {'index' -> [index], 'columns' -> [columns], 'data' -> [values]}\n", 217 | "- `'records'` : list like [{column -> value}, ... , {column -> value}]\n", 218 | "- `'index'` : dict like {index -> {column -> value}}\n", 219 | "- `'columns'` : dict like {column -> {index -> value}}\n", 220 | "- `'values'` : just the values array\n", 221 | "- `'table'` : dict like {'schema': {schema}, 'data': {data}}" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": { 227 | "slideshow": { 228 | "slide_type": "fragment" 229 | } 230 | }, 231 | "source": [ 232 | "You can save the data from your DataFrame to a JSON file with `to_json()` function:\n", 233 | "\n", 234 | "```python\n", 235 | "df.to_json('data.json', orient='index')\n", 236 | "```" 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": { 242 | "slideshow": { 243 | "slide_type": "fragment" 244 | } 245 | }, 246 | "source": [ 247 | "You can also load the data from a JSON file with `read_json()`.\n", 248 | "\n", 249 | "```python\n", 250 | "df = pd.read_json('data.json', orient='index')\n", 251 | "```\n", 252 | "\n", 253 | "In this case, the *orient* parameter is very important because it specifies how Pandas understands the structure of the file." 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": { 259 | "slideshow": { 260 | "slide_type": "fragment" 261 | } 262 | }, 263 | "source": [ 264 | "Alternatively, you can use the **json module** to load (read) and dump (write) JSON files.\n", 265 | "This module has 4 main functions:\n", 266 | " \n", 267 | "| function | read/write | file/string |\n", 268 | "| :---: | :----: | :-----: |\n", 269 | "| load() | read | file |\n", 270 | "| dump() | write | file |\n", 271 | "| loads() | read | string |\n", 272 | "| dumps() | write | string |\n", 273 | "\n", 274 | "\n", 275 | "To read the data example we created above, which means converting from JSON to Python:\n", 276 | "\n", 277 | "```python\n", 278 | "import json\n", 279 | "\n", 280 | "json.loads(data)\n", 281 | "```\n", 282 | "\n", 283 | "And to convert a Python object to JSON:\n", 284 | "\n", 285 | "```python\n", 286 | "import json\n", 287 | "\n", 288 | "json.dumps(data)\n", 289 | "```" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": { 296 | "slideshow": { 297 | "slide_type": "fragment" 298 | } 299 | }, 300 | "outputs": [], 301 | "source": [ 302 | "sample_json_string = sample.to_json()\n", 303 | "sample_json_string" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": null, 309 | "metadata": {}, 310 | "outputs": [], 311 | "source": [ 312 | "import json\n", 313 | "sample_json_dict = json.loads(sample_json_string)\n", 314 | "sample_json_dict" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": null, 320 | "metadata": {}, 321 | "outputs": [], 322 | "source": [ 323 | "from pprint import pprint\n", 324 | "pprint(sample_json_dict)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "metadata": {}, 331 | "outputs": [], 332 | "source": [ 333 | "sample.to_json('sample_data_json.txt')\n", 334 | "sample.to_json('sample_data.json')" 335 | ] 336 | }, 337 | { 338 | "cell_type": "markdown", 339 | "metadata": {}, 340 | "source": [ 341 | "And have a look [sample_data.json](sample_data.json) (or at the `txt` file - [sample_data_json.txt](sample_data_json.txt))" 342 | ] 343 | }, 344 | { 345 | "cell_type": "markdown", 346 | "metadata": { 347 | "slideshow": { 348 | "slide_type": "subslide" 349 | } 350 | }, 351 | "source": [ 352 | "### HTML Files" 353 | ] 354 | }, 355 | { 356 | "cell_type": "markdown", 357 | "metadata": { 358 | "slideshow": { 359 | "slide_type": "fragment" 360 | } 361 | }, 362 | "source": [ 363 | "An HTML is a plaintext file that uses hypertext markup language to help browers render web pages. These files carry the extension *.html* and *htm*, and in order to work with them, you will need to install an HTML library like **lxml** or **html5lib**.\n", 364 | "\n", 365 | "Once you have these libraries, you can \n", 366 | "\n", 367 | "\n", 368 | "\n", 369 | "You can save your DataFrame as an HTML file with `to_html()`:\n", 370 | "\n", 371 | "```python\n", 372 | "df = pd.DataFrame(data=data).T\n", 373 | "df.to_html('data.html')\n", 374 | "```" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": null, 380 | "metadata": { 381 | "slideshow": { 382 | "slide_type": "fragment" 383 | } 384 | }, 385 | "outputs": [], 386 | "source": [ 387 | "sample.to_html('sample_data_html.txt')\n", 388 | "sample.to_html('sample_data.html')" 389 | ] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "metadata": {}, 394 | "source": [ 395 | "And have a look [sample_data.html](sample_data.html) (or at the `txt` file - [sample_data_html.txt](sample_data_html.txt)) " 396 | ] 397 | }, 398 | { 399 | "cell_type": "markdown", 400 | "metadata": { 401 | "slideshow": { 402 | "slide_type": "slide" 403 | } 404 | }, 405 | "source": [ 406 | "## Binary Files\n", 407 | "\n", 408 | "In binary files, there is no terminator for a line and the data is stored after converting it into machine understandable binary language. Unlike text files, binary files are not human readable, this means, if you try to open them in any text editor, it will either not open, or show the data in an unrecognizable format.\n", 409 | "\n", 410 | "Without documentation, proper software, and version management, these files can be difficult to work with.\n", 411 | "\n", 412 | "Below, we show you a very simple example of how you could read and write to a binary file." 413 | ] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "metadata": { 418 | "slideshow": { 419 | "slide_type": "subslide" 420 | } 421 | }, 422 | "source": [ 423 | "### Reading and Writing to a Binary File\n", 424 | "\n", 425 | "Opening a file in binary format is very similar to opening a text file, just add `\"b\"` to the mode parameter. For example, `\"rb\"` mode opens the file in binary format for reading only.\n", 426 | "\n", 427 | "The following example stores a list of numbers in a binary file:\n", 428 | "\n", 429 | "```python\n", 430 | "with open('binfile.bin', 'wb') as f:\n", 431 | " num = [5, 10, 15, 20, 25]\n", 432 | " arr = bytearray(num)\n", 433 | " f.write(arr)\n", 434 | "```\n", 435 | "\n", 436 | "The function `bytearray` converts the list into a byte representation." 437 | ] 438 | }, 439 | { 440 | "cell_type": "markdown", 441 | "metadata": { 442 | "slideshow": { 443 | "slide_type": "fragment" 444 | } 445 | }, 446 | "source": [ 447 | "To read a binary file like the one shown above, the output of the `read()` function is turned back into a list:\n", 448 | "\n", 449 | "```python\n", 450 | "with open('binfile.bin', 'wb') as f:\n", 451 | " num = list(f.read())\n", 452 | "```" 453 | ] 454 | }, 455 | { 456 | "cell_type": "markdown", 457 | "metadata": { 458 | "slideshow": { 459 | "slide_type": "fragment" 460 | } 461 | }, 462 | "source": [ 463 | "There are, of course, advantages to using binary file:\n", 464 | "\n", 465 | "- smaller file sizes\n", 466 | "- supports more features (compression, multiple dataset storage, self-description, etc)\n", 467 | "- quicker read/write times\n", 468 | "- entire ecosystems of supported software\n", 469 | "\n", 470 | "Due to this, the developers of Pandas have created a whole set of IO tools that allow not only to read/write text files, but also binary and even SQL file types.\n", 471 | "\n", 472 | "| format type | data | read | write |\n", 473 | "| :---: |:----:|:-----: | :---: |\n", 474 | "| binary | MS Excel | read_excel | to_excel |\n", 475 | "| binary | Python Pickle Format | read_pickle | to_pickle |\n", 476 | "| binary | HDF5 Format | read_hdf | to_hdf |\n", 477 | "| binary | SPSS | read_spss | |\n", 478 | "\n", 479 | "This table contains only a few examples but you can see all of the available IO tools [here](https://pandas.pydata.org/pandas-docs/dev/user_guide/io.html).\n", 480 | "\n", 481 | "In the next sections, we show you a few of these standards for storing tabular data in binary formats." 482 | ] 483 | }, 484 | { 485 | "cell_type": "markdown", 486 | "metadata": { 487 | "slideshow": { 488 | "slide_type": "subslide" 489 | } 490 | }, 491 | "source": [ 492 | "### Excel Files\n", 493 | "\n", 494 | "Microsoft Excel is probably the most widely-used spreadsheet software, and even though it is a binary file format, you can read and write Excel files in Pandas, similar to CSV files.\n", 495 | "\n", 496 | "An additional requirement however, depending on the Excel version you will work with, you will need to install other Python packages first.\n", 497 | "\n", 498 | "- **xlrd** to read Excel files *.xls* (Excel 2003)\n", 499 | "\n", 500 | "- **openpyxl** to read/write *.xlsx* files (Excel 2007+)\n", 501 | "\n", 502 | "- **pyxlsb** to read binary Excel *.xlsb*\n", 503 | "\n", 504 | "\n", 505 | "You can install them using **pip** with a single command:\n", 506 | "```python\n", 507 | "pip install xlrd openpyxl pyxlsb\n", 508 | "```\n", 509 | "\n", 510 | "Or using Conda:\n", 511 | "```python\n", 512 | "conda install xlrd openpyxl pyxlsb\n", 513 | "```\n" 514 | ] 515 | }, 516 | { 517 | "cell_type": "markdown", 518 | "metadata": { 519 | "slideshow": { 520 | "slide_type": "fragment" 521 | } 522 | }, 523 | "source": [ 524 | "Once you have installed the neccessary packages, you can read an Excel file with read_excel():\n", 525 | "\n", 526 | "```python\n", 527 | "df = pd.read_excel('data.xlsx')\n", 528 | "```\n", 529 | "\n", 530 | "And save your DataFrame in an Excel file with to_excel():\n", 531 | "```python\n", 532 | "df.to_excel('data2.xlsx')\n", 533 | "``` " 534 | ] 535 | }, 536 | { 537 | "cell_type": "code", 538 | "execution_count": null, 539 | "metadata": { 540 | "slideshow": { 541 | "slide_type": "fragment" 542 | } 543 | }, 544 | "outputs": [], 545 | "source": [ 546 | "sample.to_excel('sample_data.xlsx')" 547 | ] 548 | }, 549 | { 550 | "cell_type": "markdown", 551 | "metadata": {}, 552 | "source": [ 553 | "As it is a binary file, you cannot open [sample_data.xlsx](sample_data.xlsx) in your browser." 554 | ] 555 | }, 556 | { 557 | "cell_type": "markdown", 558 | "metadata": { 559 | "slideshow": { 560 | "slide_type": "subslide" 561 | } 562 | }, 563 | "source": [ 564 | "### Pickle Files\n", 565 | "\n", 566 | "Pickling is the act of converting Python objects into byte streams, and unpickling is the inverse process. This format makes it easy to store any Python objects as binary files and keep the data and hierarchy of the object.\n", 567 | "\n", 568 | "However, you should remember that they will only read back correctly if the Python version and package versions of the readers are the same as the writer.\n", 569 | "\n", 570 | "The pickle module has the same interface as the json module:\n", 571 | "\n", 572 | "| function | read/write | file/string |\n", 573 | "| :---: | :----: | :-----: |\n", 574 | "| load() | read | file |\n", 575 | "| dump() | write | file |\n", 576 | "| loads() | read | string |\n", 577 | "| dumps() | write | string |\n", 578 | "\n", 579 | "\n", 580 | "The following command pickles the DataFrame *df* and saves it as *data.pickle*:\n", 581 | "\n", 582 | "```python\n", 583 | "import pickle\n", 584 | "with open('data.pickle', 'wb') as f:\n", 585 | " pickle.dump(df, f)\n", 586 | "``` \n", 587 | "\n", 588 | "While, the following unpickles *data.pickle* and loads it as a pandas DataFrame:\n", 589 | "\n", 590 | "```python \n", 591 | "with open('data.pickle', 'rb') as f:\n", 592 | " data = pickle.load(f)\n", 593 | "```" 594 | ] 595 | }, 596 | { 597 | "cell_type": "markdown", 598 | "metadata": { 599 | "slideshow": { 600 | "slide_type": "fragment" 601 | } 602 | }, 603 | "source": [ 604 | "You can also use the Pandas built-in functionality for dealing with pickle files.\n", 605 | "\n", 606 | "```python\n", 607 | "df.to_pickle('data.pickle') # Pickles df and saves it as data.pickle\n", 608 | "\n", 609 | "pd.read_pickle('data.pickle') # Unpickles and reads data.pickle\n", 610 | "```" 611 | ] 612 | }, 613 | { 614 | "cell_type": "markdown", 615 | "metadata": { 616 | "slideshow": { 617 | "slide_type": "fragment" 618 | } 619 | }, 620 | "source": [ 621 | "**As a word of caution, you should always beware of loading pickles from unstructured sources. When you unpickle an unstrustworthy file, it could execute arbitrary code on your machine, performing dangerous actions and exploiting your device.**" 622 | ] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": null, 627 | "metadata": { 628 | "slideshow": { 629 | "slide_type": "fragment" 630 | } 631 | }, 632 | "outputs": [], 633 | "source": [ 634 | "sample.to_pickle('sample_data.pkl')" 635 | ] 636 | }, 637 | { 638 | "cell_type": "markdown", 639 | "metadata": { 640 | "slideshow": { 641 | "slide_type": "subslide" 642 | } 643 | }, 644 | "source": [ 645 | "### HDF5\n", 646 | "\n", 647 | "HDF5 (Hierarchical Data Format 5) is a file format that has become quite popular. It can store a large amount of data in a single file, has compression features, and can store many datasets. HDF5 file format has a filesystem-like organization inside it, which means you can store the datasets in their own \"folder sctructure\" inside the file.\n", 648 | "\n", 649 | "HDFStore is dictionary-like object for reading and writing pandas using the **PyTables** library.\n", 650 | "\n", 651 | "To get data into an hdf5 file, you need to specify the filename and the key/group of the dataset. If you don't give it a path, it will put the key in the root group, which is the \"root folder\" of the hdf5 file. \n", 652 | "\n", 653 | "```python\n", 654 | "df.to_hdf('store.h5', key='/data', format='table', mode='a')\n", 655 | "```\n", 656 | "\n", 657 | "And in order to access and read from the HDF5 file:\n", 658 | "\n", 659 | "```python\n", 660 | "pd.read_hdf('store.h5', key='/data')\n", 661 | "```" 662 | ] 663 | }, 664 | { 665 | "cell_type": "markdown", 666 | "metadata": { 667 | "slideshow": { 668 | "slide_type": "fragment" 669 | } 670 | }, 671 | "source": [ 672 | "Similarly to the `open()` method, `to_hdf()` takes the `mode` parameter (`\"a\"`, `\"w\"` or `\"r+\"`). `to_hdf()` also requires a format parameter. You can read more about the different options here: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_hdf.html" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "execution_count": null, 678 | "metadata": {}, 679 | "outputs": [], 680 | "source": [ 681 | "sample.to_hdf('sample_data.h5', key='sample')" 682 | ] 683 | }, 684 | { 685 | "cell_type": "code", 686 | "execution_count": null, 687 | "metadata": {}, 688 | "outputs": [], 689 | "source": [] 690 | } 691 | ], 692 | "metadata": { 693 | "kernelspec": { 694 | "display_name": "Python 3 (ipykernel)", 695 | "language": "python", 696 | "name": "python3" 697 | }, 698 | "language_info": { 699 | "codemirror_mode": { 700 | "name": "ipython", 701 | "version": 3 702 | }, 703 | "file_extension": ".py", 704 | "mimetype": "text/x-python", 705 | "name": "python", 706 | "nbconvert_exporter": "python", 707 | "pygments_lexer": "ipython3", 708 | "version": "3.10.9" 709 | }, 710 | "rise": { 711 | "scroll": true 712 | }, 713 | "toc": { 714 | "base_numbering": 1, 715 | "nav_menu": {}, 716 | "number_sections": true, 717 | "sideBar": true, 718 | "skip_h1_title": false, 719 | "title_cell": "Table of Contents", 720 | "title_sidebar": "Contents", 721 | "toc_cell": false, 722 | "toc_position": {}, 723 | "toc_section_display": true, 724 | "toc_window_display": false 725 | } 726 | }, 727 | "nbformat": 4, 728 | "nbformat_minor": 4 729 | } 730 | -------------------------------------------------------------------------------- /Pandas/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 |

5 | 6 | 7 | notebook | content 8 | ---- | ------ 9 | [Pandas.ipynb](Pandas.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Pandas/Pandas.ipynb) | Pandas 10 | [IO_Pandas.ipynb](IO_Pandas.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Pandas/IO_Pandas.ipynb) | Reading data with Pandas 11 | 12 | 13 | ## Description 14 | 15 | - introduce the two main objects `Series` and `DataFrame` 16 | - highlight the concept of an `Index`, naming rows and columns (ToDo: add names to Series example in the beginning). 17 | 18 | ## Ressources 19 | 20 | - [Pandas Tutorial for Hands on ML](https://nbviewer.jupyter.org/github/ageron/handson-ml2/blob/master/tools_pandas.ipynb) 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 |

5 | 6 | 7 | __Please help us to improve the course by filling out the [feedback form](https://forms.office.com/e/UL3w7D3Q94)__ 8 | 9 | 10 | | DAY 1 | DAY 2 | DAY 3 | 11 | |------------------------------------------------------|------------------------------------------------------|---------------------------------------| 12 | | Morning coffee (optional) | Morning coffee (optional) | Morning coffee (optional) | 13 | | [Introduction and Motivation](slides) | [Pandas](Pandas) | [Recap Quiz](Recap) | 14 | | [Variables and data types](Variables_data_types) | | | 15 | | Coffee break | Coffee break | Coffee break | 16 | | [Iterables I](Iterables) | [Pandas](Pandas) | [Visualization](Visualizations) | 17 | | Coffee break | | | 18 | | [Iterables II](Iterables) | | | 19 | | Lunch | Lunch | Lunch | 20 | | [Booleans, operators and conditions](Conditionals) | [Pandas](Pandas) | [Virtual envs and installations](slides) | 21 | | Coffee break | Coffee break | Coffee break | 22 | | [Loops](Loops) | [Pandas](Pandas) | [Dataset Exercise](Exercise) | 23 | | Q & A | Q & A | Q & A | 24 | 25 | ## Program April 2024 26 | ![image](https://github.com/Center-for-Health-Data-Science/PythonTsunami/blob/2024_april/figures/program.PNG) 27 | 28 | ## Further Resources 29 | 30 | ### Cheat Sheets 31 | - Basics: 32 | - [Getting started](cheat_sheets/cheat_sheet_day0.pdf) 33 | - [Importing Data](cheat_sheets/Importing_Data_Cheat_sheet.pdf) 34 | - [Jupyter Notebook](cheat_sheets/Jupyter_Notebook_Cheat_Sheet.pdf) 35 | - Data Science: 36 | - [Numpy](cheat_sheets/Numpy_Python_Cheat_Sheet.pdf) 37 | - [Pandas](cheat_sheets/Pandas_Cheat_Sheet.pdf) 38 | - [Scipy](cheat_sheets/Scipy-LinearAlgebra_Cheat_Sheet.pdf) 39 | - [Scikit-learn](cheat_sheets/Scikit-learn_Cheat_Sheet.pdf) 40 | - Visualization: 41 | - [Matplotlib](cheat_sheets/Python_Matplotlib_Cheat_Sheet.pdf) 42 | - [Plot.ly](cheat_sheets/Plotly_Cheat_Sheet.pdf) 43 | - [Seaborn](cheat_sheets/Seaborn_Cheat_Sheet.pdf) 44 | - [Bokeh](cheat_sheets/Bokeh_Cheat_Sheet.pdf) 45 | 46 | ### Basics 47 | - [codecademy](https://codecademy.com) 48 | - Interactive website with many beginners code tutorials. Requires sign-up but base content is free. 49 | 50 | - [learnpython.org](https://www.learnpython.org/) 51 | - interactive python basics tutorial 52 | 53 | - [Springboard - Data Analysis with Python, SQL, and R](https://www.springboard.com/learning-paths/data-analysis/learn/) 54 | - starts with - [Solo Learn](https://www.sololearn.com/Course/Python/) and [Design of Computer Programs](https://www.udacity.com/course/design-of-computer-programs--cs212) 55 | - [Scipy Lectures](https://scipy-lectures.org/index.html) 56 | - Python introduction with a focus on scientific computing 57 | - [official tutorial](https://docs.python.org/3/tutorial/) 58 | 59 | ### Advanced 60 | - [Fluent Python](https://www.oreilly.com/library/view/fluent-python-2nd/9781492056348/) 61 | - [Intermediate Python Programming Course (6h) on Youtube](https://www.youtube.com/watch?v=HGOBQPFzWKo) 62 | - [Hitchhiker's Guide to Python](https://docs.python-guide.org/) 63 | 64 | 65 | ### Python Installations 66 | 67 | In this course we use [Google Colab](https://colab.research.google.com/) to execute notebooks. Notebooks are text files allowing 68 | the combination of Text, Code and the output of code. Colab offers an extended set of 69 | pre-installed tools. See the [tutorial series](https://www.youtube.com/playlist?list=PLQY2H8rRoyvyK5aEDAI3wUUqC_F0oEroL). 70 | 71 | [Anaconda](https://www.anaconda.com/products/individual) offers for your private computer 72 | an extended installations, including most tools you will ever need for Python. 73 | Use the [Anaconda Navigator](https://docs.anaconda.com/anaconda/navigator/) to launch applications. 74 | -------------------------------------------------------------------------------- /Recap/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 |

5 | 6 | 7 | notebook | content 8 | ---- | ------ 9 | [recap.ipynb](recap.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Recap/recap.ipynb)| Recap 10 | -------------------------------------------------------------------------------- /Recap/recap.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# Recap \n", 17 | "**Congratulations!** \n", 18 | "\n", 19 | "You learned a whole lot of new programming concepts and skills over the last two days. \n", 20 | "To kick off the final day of of this Python course, we will start by recaping some of the most important concepts." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## Quiz" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "Use the next 45 minutes on the quiz below. Work together with the people at your table and discuss your answers. Add code cells and try the code written in the question if you are unsure. " 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "**Question 1** \n", 42 | "Can you explain what happens to ``var`` in each step the example below? What type is ``var`` at the end of the program?\n", 43 | "```python\n", 44 | "var = 1\n", 45 | "var *= 100\n", 46 | "var /= 2\n", 47 | "var = int(var)\n", 48 | "var = str(var)\n", 49 | "var = var + \" points to Gryffindor!\"\n", 50 | "print(var)\n", 51 | "```" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "**Question 2** \n", 59 | "What is the loop below printing at each iteration?\n", 60 | " \n", 61 | "```python\n", 62 | "scand = [\"Denmark\", \"Finnland\", \"Sweden\", \"Norway\"]\n", 63 | "\n", 64 | "for index, country in enumerate(scand):\n", 65 | " if country != \"Finnland\":\n", 66 | " print(\"My #\" + str(index+1) + \" favorite country in Scandinavia is \" + country + \".\")\n", 67 | " \n", 68 | " else:\n", 69 | " print(\"Sorry, but Finnland is not part of Scandinavia.\")\n", 70 | " break\n", 71 | "```" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "**Question 3** \n", 79 | "Consider the scenarios A-C below. Which Python data structure would you use to solve them?\n", 80 | "\n", 81 | "**A.** You have all names of the participants in this course. You want to store them in a variable that perserves the order in which the participants signed up for the course. \n", 82 | "\n", 83 | "**B.** You have (a) all names of the participants in this course, and (b) the names of everyone who is currently employed at SUND. Next, you want to find all employees who DID NOT participate in the course. Which data structure would you store (a) and (b) in to achieve this? And bonus question: How would you achieve it? \n", 84 | "\n", 85 | "**C.** You want to store information about all capital cities in Europe and the number of their inhabitants. Since the numbers might change, you want to make sure that you can update the information at any time. " 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "**Question 4**\n", 93 | "\n", 94 | "Does the expression used in the `if` below evaluate to `True` or `False`? Can you explain why? Can you change it to make it `False`?\n", 95 | "\n" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 1, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "Include in study.\n" 108 | ] 109 | } 110 | ], 111 | "source": [ 112 | "smoker = False\n", 113 | "patient_age = 50\n", 114 | "\n", 115 | "if (not smoker and patient_age < 65):\n", 116 | " print('Include in study.')\n", 117 | "else:\n", 118 | " print('Skip to next candidate.')" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "**Question 5** \n", 126 | "Explain the errors below, and change the code to fix them." 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "# error 1\n", 136 | "75 * (2/0)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "# error 2\n", 146 | "weather = [\"sunny\", \"cloudy\", \"rainy\"]\n", 147 | "weather[4]" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "# error 3\n", 157 | "university = \"ucph\"\n", 158 | "typ(university)" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "# error 4\n", 168 | "university = \"ucph\"\n", 169 | "if \"c\" in university:\n", 170 | "print(\"I work at\" + university.upper() + \".\")" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "# error 5\n", 180 | "print(hello_world)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "**Question 6**\n", 188 | "\n", 189 | "Consider the data frame below. How do you:\n", 190 | "\n", 191 | "1. Create a new dataframe with only the treatment, age and satisfaction columns?\n", 192 | "2. Calculate the mean age?\n", 193 | "3. Extract all rows with patients older than 70?\n", 194 | "4. Omit rows with missing data?\n" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 19, 200 | "metadata": {}, 201 | "outputs": [ 202 | { 203 | "data": { 204 | "text/html": [ 205 | "

\n", 206 | "\n", 219 | "\n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | "
patient_idtreatmenthospitalconvalescence_daysagesatisfaction
0402109ARigshospitalet15.0683
1092070ARigshospitalet13.0745
2994082BHerlev27.0762
3843094AHerlev30.0655
4369360BRigshospitalet21.0685
5688213BRigshospitalet29.0773
6197347AHerlev25.0655
7374793ARigshospitaletNaN675
8759063BRigshospitalet16.0754
9121219BNone27.0684
10427898BRigshospitalet15.0745
\n", 333 | "
" 334 | ], 335 | "text/plain": [ 336 | " patient_id treatment hospital convalescence_days age satisfaction\n", 337 | "0 402109 A Rigshospitalet 15.0 68 3\n", 338 | "1 092070 A Rigshospitalet 13.0 74 5\n", 339 | "2 994082 B Herlev 27.0 76 2\n", 340 | "3 843094 A Herlev 30.0 65 5\n", 341 | "4 369360 B Rigshospitalet 21.0 68 5\n", 342 | "5 688213 B Rigshospitalet 29.0 77 3\n", 343 | "6 197347 A Herlev 25.0 65 5\n", 344 | "7 374793 A Rigshospitalet NaN 67 5\n", 345 | "8 759063 B Rigshospitalet 16.0 75 4\n", 346 | "9 121219 B None 27.0 68 4\n", 347 | "10 427898 B Rigshospitalet 15.0 74 5" 348 | ] 349 | }, 350 | "execution_count": 19, 351 | "metadata": {}, 352 | "output_type": "execute_result" 353 | } 354 | ], 355 | "source": [ 356 | "#defining the dataframe\n", 357 | "import pandas as pd\n", 358 | "\n", 359 | "data = [\n", 360 | " ['402109','A','Rigshospitalet',15,68,3],\n", 361 | " ['092070','A','Rigshospitalet',13,74,5],\n", 362 | " ['994082','B','Herlev',27,76,2],\n", 363 | " ['843094','A','Herlev',30,65,5],\n", 364 | " ['369360','B','Rigshospitalet',21,68,5],\n", 365 | " ['688213','B','Rigshospitalet',29,77,3],\n", 366 | " ['197347','A','Herlev',25,65,5],\n", 367 | " ['374793','A','Rigshospitalet', None, 67,5],\n", 368 | " ['759063','B','Rigshospitalet',16,75,4],\n", 369 | " ['121219','B',None,27,68,4],\n", 370 | " ['427898','B','Rigshospitalet',15,74,5]\n", 371 | "]\n", 372 | "\n", 373 | "df = pd.DataFrame(data, columns=['patient_id', 'treatment', 'hospital', 'convalescence_days' ,'age', 'satisfaction'])\n", 374 | "df\n" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": null, 380 | "metadata": {}, 381 | "outputs": [], 382 | "source": [ 383 | "#your answers here" 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": {}, 389 | "source": [ 390 | "## To sum it up...\n", 391 | "\n", 392 | "...here are the most important take-away points:\n", 393 | "\n", 394 | "\n", 395 | "#### Variables\n", 396 | "A variable is a named symbol that holds a value. \n", 397 | " \n", 398 | " \n", 399 | "#### Data Types and Structures\n", 400 | "\n", 401 | "* ``int``: zero, positive or negative whole number \n", 402 | "```python\n", 403 | "int_example = 1\n", 404 | "```\n", 405 | " \n", 406 | "* ``float``: zero, positive or negative number with a decimal point\n", 407 | "```python\n", 408 | "float_example = 34.6\n", 409 | "```\n", 410 | "* ``str``: containers of characters. \n", 411 | "```python\n", 412 | "str_example = \"I am a string!\"\n", 413 | "```\n", 414 | "* ``list``: container of ordered items that can be accessed by their index\n", 415 | "```python\n", 416 | "list_example = [\"element\", 1, 2, \"another_element\"]\n", 417 | "```\n", 418 | "* ``set``: collection of unqiue, unordered, unchangeable, and unindexed elements\n", 419 | "```python\n", 420 | "#with curly bracets\n", 421 | "set_example = {'Hello', 1, 2, 'pizza'}\n", 422 | "#set from list:\n", 423 | "set2 = set(['KU','SDU','AU'])\n", 424 | "```\n", 425 | "* ``dict``: stores (key, value) pairs\n", 426 | "```python\n", 427 | "dict_example = {\"apple\":1, \"cherry\":20, \"strawberry\":10}\n", 428 | "```\n", 429 | "* ``tuple``: immutable, ordered list of values\n", 430 | "```python\n", 431 | "tuple_example = (1,20,78,5)\n", 432 | "```\n", 433 | "* ``bool``: can only take the values `True` or `False`\n", 434 | "```python\n", 435 | "bool_example = True\n", 436 | "```\n", 437 | "\n", 438 | "\n", 439 | "#### Indexing and Slicing\n", 440 | "Indexing and slicing: Both the characters in a string and the elements in a list can be accessed by their index. In Python, indexing always starts with 0, i.e. the first element of a list lives at index position 0. To access values in a string or list, we can use the slicing notation ``[start, end, step]``:\n", 441 | "```python\n", 442 | "list_example[1:3] # access the element of index 1 and 2\n", 443 | "```\n", 444 | "\n", 445 | "#### Comparison Operators and Boolean Logic\n", 446 | "Comparison operators can tell how two Python values relate, resulting in a boolean. They answer yes/no questions.\n", 447 | "\n", 448 | "#### Conditional Statements\n", 449 | "Conditional statements use the keywords ``if``, ``elif`` and ``else``. They are used to control a piece of code based on the value of a Boolean condition.\n", 450 | "\n", 451 | "```python\n", 452 | "num_example = # you define a number here\n", 453 | "\n", 454 | "if num_example % 2:\n", 455 | " print(f\"{num_example} is an odd number.\")\n", 456 | "else:\n", 457 | " print(f\"{num_example} is an even number.\")\n", 458 | "```\n", 459 | "\n", 460 | "\n", 461 | "\n", 462 | "#### Loops\n", 463 | "Loops allow you to repeatedly execute a piece of code. You can control the flow of your loop with the statements ``break``, ``continue`` and ``pass``.\n", 464 | " \n", 465 | "``for`` loops repeat the code you want to run for each variable in the sequence you loop over. \n", 466 | "```python\n", 467 | "number_list = [1, 2, 3, 4, 5]\n", 468 | "for number in number_list:\n", 469 | " print(number)\n", 470 | "```\n", 471 | " \n", 472 | "``while`` loops continue to execute while a certain condition is ``True``, and ends once this conditions becomes ``False``.\n", 473 | "```python\n", 474 | "i = 0\n", 475 | "while i < 10:\n", 476 | " i += 1\n", 477 | " print(i)\n", 478 | "```\n" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": null, 484 | "metadata": {}, 485 | "outputs": [], 486 | "source": [] 487 | } 488 | ], 489 | "metadata": { 490 | "kernelspec": { 491 | "display_name": "Python 3 (ipykernel)", 492 | "language": "python", 493 | "name": "python3" 494 | }, 495 | "language_info": { 496 | "codemirror_mode": { 497 | "name": "ipython", 498 | "version": 3 499 | }, 500 | "file_extension": ".py", 501 | "mimetype": "text/x-python", 502 | "name": "python", 503 | "nbconvert_exporter": "python", 504 | "pygments_lexer": "ipython3", 505 | "version": "3.11.1" 506 | } 507 | }, 508 | "nbformat": 4, 509 | "nbformat_minor": 4 510 | } 511 | -------------------------------------------------------------------------------- /Variables_data_types/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 |

5 | 6 | 7 | notebook | content 8 | ---- | ------ 9 | [variables.ipynb](variables.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Variables_data_types/variables.ipynb) | Variables and data types 10 | -------------------------------------------------------------------------------- /Variables_data_types/Variables_slides_Oct22.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/Variables_data_types/Variables_slides_Oct22.pdf -------------------------------------------------------------------------------- /Variables_data_types/variables.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"markdown","metadata":{"id":"i7xIAr06u3oF"},"source":["\n","\n",""]},{"cell_type":"markdown","metadata":{"id":"1MjzmO4uoHh9"},"source":["# Variables and Data Types"]},{"cell_type":"markdown","metadata":{"id":"GXxxR4dDuunK"},"source":["## Variable Assignment"]},{"cell_type":"markdown","metadata":{"id":"Nx4q1VSyuzOA"},"source":["> A variable is a named symbol that holds a value.\n","\n","> Variable **containers** --> name and value\n","\n","* Create variables by assigning a value to a name (just like using variables in math).\n","* Variable names should be meaningful, i.e. not just ``a``, ``b``, ``c``\n","* Variables are always **assigned** with the variable name on the left and the value on the right of the ***equals*** sign. For instance:\n"," \n"," * `a_variable = 100` \n"," * assigned to other variables: `another_variable = a_variable`\n"," * reassigned at any time: `a_variable = 435`\n"," * assigning several variables at the same time: `all, at, once = 1, 130, 43`\n","\n","* Variables **must** be assigned before they can be used."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":377,"status":"ok","timestamp":1655709554070,"user":{"displayName":"Tugce K","userId":"13907696802096210204"},"user_tz":-120},"id":"-IoOMUOnuD6f","outputId":"7621238e-aee0-4b49-8862-54d5f74acabf"},"outputs":[{"name":"stdout","output_type":"stream","text":["100\n"]}],"source":["# Create a variable x by assigning a value to x\n","x = 100\n","print(x)\n","\n","# What is the container and what are the data here?"]},{"cell_type":"markdown","metadata":{"id":"M23lgzTcwMXn"},"source":["## Naming restrictions\n","\n","1. Variable names must start with a letter or underscore.\n"," \n","2. The rest of the name must consist of letters and numbers (i.e. **alphanumeric**). If you need to use a multi_word variable name, underscores can be used.\n"," \n","3. Names are case-sensitive. \n","\n","4. Each variable's name must be unique. Two variables with the same name are the same variable."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":230,"status":"ok","timestamp":1655709783721,"user":{"displayName":"Tugce K","userId":"13907696802096210204"},"user_tz":-120},"id":"ReKCem_Cu3oW","outputId":"e64b489b-658d-40d1-9741-f9b1e22340b9"},"outputs":[{"name":"stdout","output_type":"stream","text":["100\n"]}],"source":["# Try it out\n","_2x = 100\n","print(_2x)\n","\n","# What can be the alternatives to make this variable naming work?"]},{"cell_type":"markdown","metadata":{"id":"pVF6YjlgGtKX"},"source":["## Data Types"]},{"cell_type":"markdown","metadata":{"id":"J3kix56wGxEB"},"source":["We'll see the common Python data types below. There are more. You can always check a variable's type with the `type()` function.\n","\n","### Numbers\n","* `int`: an integer, e.g. `1`, `2`, `3`\n","* `float`: a floating point number with a decimal point, e.g. `1.2`, `2999.197`, `-160.8`\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":310,"status":"ok","timestamp":1655709993563,"user":{"displayName":"Tugce K","userId":"13907696802096210204"},"user_tz":-120},"id":"ovMTp36Eu3oa","outputId":"f3436719-929f-42d5-a309-5b5521eedd84"},"outputs":[{"name":"stdout","output_type":"stream","text":["-1\n","\n"]}],"source":["# Try it out\n","var = -1\n","print(var)\n","print(type(var))"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":292,"status":"ok","timestamp":1655709984448,"user":{"displayName":"Tugce K","userId":"13907696802096210204"},"user_tz":-120},"id":"goSxBiyf4qkY","outputId":"fc97c546-5d69-435a-b576-cd83c43c2994"},"outputs":[{"name":"stdout","output_type":"stream","text":["-2.099874565\n","\n"]}],"source":["var2 = -2.099874565\n","print(var2)\n","print(type(var2))"]},{"cell_type":"markdown","metadata":{},"source":["### Strings\n","\n","`str`: (string) a sequence of Unicode characters, e.g. \"Kate\" or \"程序设计\".\n","\n","There are different _encodings_ for characters. The default in Python 3 is the Unicode encoding which includes characters from European and Asian languages."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":222,"status":"ok","timestamp":1655709986784,"user":{"displayName":"Tugce K","userId":"13907696802096210204"},"user_tz":-120},"id":"Zt37xw_g40uH","outputId":"5921b555-db35-4cea-8d94-81b789f1888b"},"outputs":[{"name":"stdout","output_type":"stream","text":["Hello world!\n","\n"]}],"source":["my_string = \"Hello world!\"\n","print(my_string)\n","print(type(my_string))"]},{"cell_type":"markdown","metadata":{},"source":["String literals in Python can be declared with either single or double quotes.\n"," \n"," my_other_str = 'a hat'\n"," my_str = \"a cat\"\n","\n","You can use a mix to include literal qoutes in the string:"]},{"cell_type":"code","execution_count":1,"metadata":{},"outputs":[{"data":{"text/plain":["\"I said 'Good Morning'.\""]},"execution_count":1,"metadata":{},"output_type":"execute_result"}],"source":["my_string = \"I said 'Good Morning'.\"\n","my_string"]},{"cell_type":"markdown","metadata":{},"source":["You can integrate variables into a string by using formatted string syntax:"]},{"cell_type":"code","execution_count":1,"metadata":{},"outputs":[{"data":{"text/plain":["'The value of i is 5'"]},"execution_count":1,"metadata":{},"output_type":"execute_result"}],"source":["i = 5\n","f'The value of i is {i}'"]},{"cell_type":"markdown","metadata":{},"source":["### Booleans\n","\n","`bool`: True or False"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2,"status":"ok","timestamp":1655709988536,"user":{"displayName":"Tugce K","userId":"13907696802096210204"},"user_tz":-120},"id":"R9BA93CP4_12","outputId":"c8959f43-24b0-4ae1-a8c5-e915ab1b91bc"},"outputs":[{"name":"stdout","output_type":"stream","text":["True\n","\n"]}],"source":["bool_var = True\n","print(bool_var)\n","print(type(bool_var))"]},{"cell_type":"markdown","metadata":{"id":"PrL4PX77u3ol"},"source":["# Operators"]},{"cell_type":"markdown","metadata":{"id":"wCrDfqgGu3ol"},"source":["## Math Operators\n","\n","You can do math between variables of the `int` or `float` type:\n","\n","- addition: `+`\n","- subtraction: `-`\n","- multiplication: `*`\n","- division: `/`\n","- integer division: `//`\n","- exponentiation, power: `**`\n","- modulo: `%`\n","\n","The usual rules of algebra apply."]},{"cell_type":"code","execution_count":2,"metadata":{},"outputs":[{"data":{"text/plain":["8"]},"execution_count":2,"metadata":{},"output_type":"execute_result"}],"source":["a = 5\n","b = 3\n","a + b"]},{"cell_type":"code","execution_count":3,"metadata":{},"outputs":[{"data":{"text/plain":["2"]},"execution_count":3,"metadata":{},"output_type":"execute_result"}],"source":["a - b"]},{"cell_type":"code","execution_count":4,"metadata":{},"outputs":[{"data":{"text/plain":["1.6666666666666667"]},"execution_count":4,"metadata":{},"output_type":"execute_result"}],"source":["a / b"]},{"cell_type":"code","execution_count":6,"metadata":{},"outputs":[{"data":{"text/plain":["25"]},"execution_count":6,"metadata":{},"output_type":"execute_result"}],"source":["a**2"]},{"cell_type":"code","execution_count":8,"metadata":{},"outputs":[{"data":{"text/plain":["6.4"]},"execution_count":8,"metadata":{},"output_type":"execute_result"}],"source":["c = 1.4\n","\n","a + c"]},{"cell_type":"markdown","metadata":{},"source":["Strings can also be added:"]},{"cell_type":"code","execution_count":10,"metadata":{},"outputs":[{"data":{"text/plain":["'HelloWorld'"]},"execution_count":10,"metadata":{},"output_type":"execute_result"}],"source":["string_1 = 'Hello'\n","string_2 = 'World'\n","\n","string_1 + string_2"]},{"cell_type":"markdown","metadata":{"id":"czYsAUPBu3o4"},"source":["# Reading error messages\n","\n","Let's talk about error messages! They look red and scary, but they're actually just here to help you. Error messages tell you that you are trying to do something that is either not allowed, not possible, ambiguous, not meaningful or written using the wrong syntax. \n"," \n","You will encounter error messages **ALL THE TIME**; especially when learning to program, but also as an advanced programmer. That's why it's worth knowing how the read them.\n","\n","\n","***\n","Look at the error message below. You can find the most important piece of information **on the last line: this is the actual error** and this is where you should always look first. In the part above the last line, you can see in which line of your program the error occurs in.\n","\n","```python\n","---------------------------------------------------------------------------\n","ZeroDivisionError Traceback (most recent call last)\n"," in \n","----> 1 1 / 0 # the arrow points towards the line where the error occurs\n","\n","ZeroDivisionError: division by zero # this is the actual type of error\n","```"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"jYMwrxSQu3o4"},"outputs":[],"source":["# let's see what types of error messages there are"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":172},"executionInfo":{"elapsed":237,"status":"error","timestamp":1655710358320,"user":{"displayName":"Tugce K","userId":"13907696802096210204"},"user_tz":-120},"id":"3M6rVbZlu3o4","outputId":"91a3ebe0-4611-4070-85ed-5e8757f7b5cd"},"outputs":[{"ename":"NameError","evalue":"ignored","output_type":"error","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mi\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;31mNameError\u001b[0m: name 'i' is not defined"]}],"source":["i"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":172},"executionInfo":{"elapsed":295,"status":"error","timestamp":1655710392917,"user":{"displayName":"Tugce K","userId":"13907696802096210204"},"user_tz":-120},"id":"VSAExeGVu3o5","outputId":"67a352db-dca5-491d-fba4-66e83afa6b90"},"outputs":[{"ename":"ZeroDivisionError","evalue":"ignored","output_type":"error","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mZeroDivisionError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;36m1\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;31mZeroDivisionError\u001b[0m: division by zero"]}],"source":["1 / 0"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":135},"executionInfo":{"elapsed":233,"status":"error","timestamp":1655710432918,"user":{"displayName":"Tugce K","userId":"13907696802096210204"},"user_tz":-120},"id":"9KcwsZYzu3o5","outputId":"e1aa2000-74ba-41a7-8a5e-01202f0213cf"},"outputs":[{"ename":"SyntaxError","evalue":"ignored","output_type":"error","traceback":["\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m 1 %% 2\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"]}],"source":["1 %% 2"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":190},"executionInfo":{"elapsed":234,"status":"error","timestamp":1655710472185,"user":{"displayName":"Tugce K","userId":"13907696802096210204"},"user_tz":-120},"id":"ZirqXS0ou3o5","outputId":"4fb41198-afde-462f-d32f-d45811deb14e"},"outputs":[{"ename":"TypeError","evalue":"ignored","output_type":"error","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mmy_string\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Hello world!\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mmy_string\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;31mTypeError\u001b[0m: can only concatenate str (not \"int\") to str"]}],"source":["my_string = \"Hello world!\"\n","my_string + 1"]},{"cell_type":"markdown","metadata":{"id":"aVGtJ28V4nVm"},"source":["# Exercise\n","\n","_~ 10 minutes_"]},{"cell_type":"markdown","metadata":{"id":"Arrv3Ql1u3oq"},"source":["Please complete the four steps below.\n","\n","1. Use a number of your choice and store it in a variable. Multiply that variable by ``2`` and print this new variable.\n","2. Use a second number of your choice and multiply it with the initial variable used in (1).\n","3. Find out if the result is even (divisible by 2) or odd. _Hint: Use the [modulo](https://www.khanacademy.org/computing/computer-science/cryptography/modarithmetic/a/what-is-modular-arithmetic) operator to find out if the number if even or odd._\n","4. What is the type of the final variable?"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"OQ7yFVaO4nVm"},"outputs":[],"source":["# Your code goes here"]}],"metadata":{"colab":{"provenance":[]},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.9"},"rise":{"scroll":true},"toc":{"base_numbering":1,"nav_menu":{},"number_sections":true,"sideBar":true,"skip_h1_title":false,"title_cell":"Table of Contents","title_sidebar":"Contents","toc_cell":false,"toc_position":{},"toc_section_display":true,"toc_window_display":true}},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /Visualizations/README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 |

5 | 6 | 7 | notebook | content 8 | ---- | ------ 9 | [plotly.ipynb](plotly.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Visualizations/plotly.ipynb)| Plotly Express library [Be aware: [Plots stays interactive in nbviewer](https://nbviewer.org/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Visualizations/plotly.ipynb#Bar-Charts)] 10 | [plotly_extra_material.ipynb](plotly_extra_material.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Visualizations/plotly_extra_material.ipynb)| Advanced plotting in plotly 11 | [PlotlyExpress_ComprehensiveGuide.ipynb](PlotlyExpress_ComprehensiveGuide.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Visualizations/PlotlyExpress_ComprehensiveGuide.ipynb)| PlotlyExpress library guide (extensive external resource) 12 | 13 | ## Installing Plotly 14 | 15 | In order to [run plotly in jupyter lab](https://plotly.com/python/getting-started/#jupyterlab-support) you will need to add an labextension: 16 | 17 | ```bash 18 | # JupyterLab renderer support 19 | jupyter labextension install jupyterlab-plotly@4.14.3 20 | # OPTIONAL: Jupyter widgets extension 21 | jupyter labextension install @jupyter-widgets/jupyterlab-manager plotlywidget@4.14.3 22 | ``` 23 | 24 | ## Articles 25 | 26 | - python [plotting libraries](https://pbpython.com/python-vis-flowchart.html) 27 | -------------------------------------------------------------------------------- /cheat_sheets/Bokeh_Cheat_Sheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Bokeh_Cheat_Sheet.pdf -------------------------------------------------------------------------------- /cheat_sheets/Importing_Data_Cheat_sheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Importing_Data_Cheat_sheet.pdf -------------------------------------------------------------------------------- /cheat_sheets/Jupyter_Notebook_Cheat_Sheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Jupyter_Notebook_Cheat_Sheet.pdf -------------------------------------------------------------------------------- /cheat_sheets/Numpy_Python_Cheat_Sheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Numpy_Python_Cheat_Sheet.pdf -------------------------------------------------------------------------------- /cheat_sheets/Pandas_Cheat_Sheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Pandas_Cheat_Sheet.pdf -------------------------------------------------------------------------------- /cheat_sheets/Plotly_Cheat_Sheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Plotly_Cheat_Sheet.pdf -------------------------------------------------------------------------------- /cheat_sheets/Python_Matplotlib_Cheat_Sheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Python_Matplotlib_Cheat_Sheet.pdf -------------------------------------------------------------------------------- /cheat_sheets/Scikit-learn_Cheat_Sheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Scikit-learn_Cheat_Sheet.pdf -------------------------------------------------------------------------------- /cheat_sheets/Scipy-LinearAlgebra_Cheat_Sheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Scipy-LinearAlgebra_Cheat_Sheet.pdf -------------------------------------------------------------------------------- /cheat_sheets/Seaborn_Cheat_Sheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Seaborn_Cheat_Sheet.pdf -------------------------------------------------------------------------------- /cheat_sheets/cheat_sheet_day0.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/cheat_sheet_day0.pdf -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: tsunami 2 | channels: 3 | - defaults 4 | # - plotly 5 | dependencies: 6 | - python 7 | - matplotlib 8 | - pandas 9 | - ipykernel 10 | - ipywidgets>=7.5 11 | - ipython 12 | - scikit-learn 13 | - seaborn 14 | - jupyter 15 | - plotly 16 | - bs4 17 | - requests 18 | -------------------------------------------------------------------------------- /figures/HeaDS_logo_large_withTitle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/HeaDS_logo_large_withTitle.png -------------------------------------------------------------------------------- /figures/Program_October_2021.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/Program_October_2021.png -------------------------------------------------------------------------------- /figures/colab_restart_runtime_after_install.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/colab_restart_runtime_after_install.png -------------------------------------------------------------------------------- /figures/colab_save_in_drive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/colab_save_in_drive.png -------------------------------------------------------------------------------- /figures/colab_save_in_drive_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/colab_save_in_drive_2.png -------------------------------------------------------------------------------- /figures/colab_toc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/colab_toc.png -------------------------------------------------------------------------------- /figures/df_loc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/df_loc.png -------------------------------------------------------------------------------- /figures/df_loc_condition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/df_loc_condition.png -------------------------------------------------------------------------------- /figures/github_raw_file_view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/github_raw_file_view.png -------------------------------------------------------------------------------- /figures/long_format.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/long_format.png -------------------------------------------------------------------------------- /figures/matplotlib/fig_axes_axis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/matplotlib/fig_axes_axis.png -------------------------------------------------------------------------------- /figures/matplotlib/handout-beginner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/matplotlib/handout-beginner.png -------------------------------------------------------------------------------- /figures/pandas_dataframe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/pandas_dataframe.png -------------------------------------------------------------------------------- /figures/pandas_indexing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/pandas_indexing.png -------------------------------------------------------------------------------- /figures/program.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/program.PNG -------------------------------------------------------------------------------- /figures/program_june2022.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/program_june2022.png -------------------------------------------------------------------------------- /figures/program_march2023.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/program_march2023.png -------------------------------------------------------------------------------- /figures/program_oct_screen_GR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/program_oct_screen_GR.png -------------------------------------------------------------------------------- /figures/program_spring_2022.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/program_spring_2022.PNG -------------------------------------------------------------------------------- /figures/quartile-percentile.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/quartile-percentile.jpg -------------------------------------------------------------------------------- /figures/tsunami_logo.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/tsunami_logo.PNG -------------------------------------------------------------------------------- /figures/wide_format.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/wide_format.png -------------------------------------------------------------------------------- /slides/Python Tsunami Local Installations vs code.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/slides/Python Tsunami Local Installations vs code.pdf -------------------------------------------------------------------------------- /slides/Python Tsunami intro.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/slides/Python Tsunami intro.pdf -------------------------------------------------------------------------------- /solutions/conditions_solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Exercise 2\n", 8 | "\n", 9 | "You have two boolean variables:\n", 10 | "\n", 11 | "```python\n", 12 | "a = True\n", 13 | "b = False\n", 14 | "```\n", 15 | "\n", 16 | "What is the result of the following expressions?\n", 17 | "\n", 18 | "1. `a or b`\n", 19 | "\n", 20 | "2. `a and b`\n", 21 | "\n", 22 | "3. `a and not b`" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 1, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "a = True\n", 32 | "b = False" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "data": { 42 | "text/plain": [ 43 | "True" 44 | ] 45 | }, 46 | "execution_count": 2, 47 | "metadata": {}, 48 | "output_type": "execute_result" 49 | } 50 | ], 51 | "source": [ 52 | "#1 \n", 53 | "a or b" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 3, 59 | "metadata": {}, 60 | "outputs": [ 61 | { 62 | "data": { 63 | "text/plain": [ 64 | "False" 65 | ] 66 | }, 67 | "execution_count": 3, 68 | "metadata": {}, 69 | "output_type": "execute_result" 70 | } 71 | ], 72 | "source": [ 73 | "#2\n", 74 | "a and b" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 4, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "data": { 84 | "text/plain": [ 85 | "True" 86 | ] 87 | }, 88 | "execution_count": 4, 89 | "metadata": {}, 90 | "output_type": "execute_result" 91 | } 92 | ], 93 | "source": [ 94 | "#3\n", 95 | "a and not b" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 5, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "False" 107 | ] 108 | }, 109 | "execution_count": 5, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "#how to make it False. One example. \n", 116 | "\n", 117 | "not a and b" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "## Exercise 3\n", 125 | "\n", 126 | "If you set the name variable to \"Gandalf\" and run the script below, what will the output be? How do you get the output 'Move on then'?" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "For \"Gandalf\" the result is \"Run, you fools!\". To make the condition go into the else, put literally any string that is not \"Gandalf\" or \"Aragorn\"." 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 6, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "name": "stdout", 143 | "output_type": "stream", 144 | "text": [ 145 | "Move on then!\n" 146 | ] 147 | } 148 | ], 149 | "source": [ 150 | "name = \"Frodo\"\n", 151 | "if name == \"Gandalf\":\n", 152 | " print(\"Run, you fools!\")\n", 153 | "elif name == \"Aragorn\":\n", 154 | " print(\"There is always hope.\")\n", 155 | "else:\n", 156 | " print(\"Move on then!\")" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "Create a variable and assign an integer as value, then build a conditional to test it:\n", 164 | "- If the value is below 0, print \"The value is negative\"\n", 165 | "- If the value is between 0 and 20 (including 0 and 20), print the value\n", 166 | "- Otherwise, print \"Out of scope\"\n", 167 | "\n", 168 | "Test it by changing the value of the variable." 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 20, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "my_int = 30" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": 21, 183 | "metadata": {}, 184 | "outputs": [ 185 | { 186 | "name": "stdout", 187 | "output_type": "stream", 188 | "text": [ 189 | "Out of scope.\n" 190 | ] 191 | } 192 | ], 193 | "source": [ 194 | "if my_int < 0:\n", 195 | " print(\"The value is negative\")\n", 196 | "elif my_int <= 20:\n", 197 | " print(my_int)\n", 198 | "else:\n", 199 | " print(\"Out of scope.\")" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "## Exercise 4\n", 207 | "\n", 208 | "Write an `if` condition that tests whether `word_list` is empty. If it is, add 'Hello world' and print the list. If it is not, print the first element. Test that both outcomes work. " 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 8, 214 | "metadata": {}, 215 | "outputs": [ 216 | { 217 | "name": "stdout", 218 | "output_type": "stream", 219 | "text": [ 220 | "pen\n" 221 | ] 222 | } 223 | ], 224 | "source": [ 225 | "word_list = ['pen', 'paper', 'room']\n", 226 | "\n", 227 | "if word_list:\n", 228 | " print(word_list[0])\n", 229 | "else:\n", 230 | " word_list.append('Hello world')\n", 231 | " print(word_list)" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 9, 237 | "metadata": {}, 238 | "outputs": [ 239 | { 240 | "name": "stdout", 241 | "output_type": "stream", 242 | "text": [ 243 | "['Hello world']\n" 244 | ] 245 | } 246 | ], 247 | "source": [ 248 | "word_list = []\n", 249 | "\n", 250 | "if word_list:\n", 251 | " print(word_list[0])\n", 252 | "else:\n", 253 | " word_list.append('Hello world')\n", 254 | " print(word_list)" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [] 263 | } 264 | ], 265 | "metadata": { 266 | "kernelspec": { 267 | "display_name": "Python 3", 268 | "language": "python", 269 | "name": "python3" 270 | }, 271 | "language_info": { 272 | "codemirror_mode": { 273 | "name": "ipython", 274 | "version": 3 275 | }, 276 | "file_extension": ".py", 277 | "mimetype": "text/x-python", 278 | "name": "python", 279 | "nbconvert_exporter": "python", 280 | "pygments_lexer": "ipython3", 281 | "version": "3.11.1" 282 | } 283 | }, 284 | "nbformat": 4, 285 | "nbformat_minor": 4 286 | } 287 | -------------------------------------------------------------------------------- /solutions/functions_solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Exercise 1\n", 8 | "\n", 9 | "#### Calculate absolute difference\n", 10 | "\n", 11 | "Write a function that returns the absolute (positive) difference between two numbers." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "a = 25\n", 21 | "b = 65" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "def abs_difference(x, y):\n", 31 | " diff = abs(x-y)\n", 32 | " return diff" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "data": { 42 | "text/plain": [ 43 | "40" 44 | ] 45 | }, 46 | "execution_count": 3, 47 | "metadata": {}, 48 | "output_type": "execute_result" 49 | } 50 | ], 51 | "source": [ 52 | "abs_difference(a,b)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 4, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "data": { 62 | "text/plain": [ 63 | "60" 64 | ] 65 | }, 66 | "execution_count": 4, 67 | "metadata": {}, 68 | "output_type": "execute_result" 69 | } 70 | ], 71 | "source": [ 72 | "abs_difference(-20,40)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 5, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "data": { 82 | "text/plain": [ 83 | "10" 84 | ] 85 | }, 86 | "execution_count": 5, 87 | "metadata": {}, 88 | "output_type": "execute_result" 89 | } 90 | ], 91 | "source": [ 92 | "abs_difference(0,-10)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 6, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "data": { 102 | "text/plain": [ 103 | "10" 104 | ] 105 | }, 106 | "execution_count": 6, 107 | "metadata": {}, 108 | "output_type": "execute_result" 109 | } 110 | ], 111 | "source": [ 112 | "abs_difference(-10,-20)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "### Exercise 2\n", 120 | "\n", 121 | "#### Calculate squares\n", 122 | "\n", 123 | "Write a function that returns the square root of the sum of squares of two numbers. \n", 124 | "\n", 125 | "> Hint: You can use `math.sqrt` to calculate the square root." 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 11, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "from math import sqrt\n", 135 | "sqrt(25)\n", 136 | "\n", 137 | "def square_root_of_squares(a,b):\n", 138 | " res = sqrt(a**2 + b**2)\n", 139 | " return res" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 12, 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "data": { 149 | "text/plain": [ 150 | "4.47213595499958" 151 | ] 152 | }, 153 | "execution_count": 12, 154 | "metadata": {}, 155 | "output_type": "execute_result" 156 | } 157 | ], 158 | "source": [ 159 | "square_root_of_squares(2,4)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 13, 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "data": { 169 | "text/plain": [ 170 | "4.47213595499958" 171 | ] 172 | }, 173 | "execution_count": 13, 174 | "metadata": {}, 175 | "output_type": "execute_result" 176 | } 177 | ], 178 | "source": [ 179 | "#test:\n", 180 | "sqrt(4+16)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 14, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "data": { 190 | "text/plain": [ 191 | "1.4142135623730951" 192 | ] 193 | }, 194 | "execution_count": 14, 195 | "metadata": {}, 196 | "output_type": "execute_result" 197 | } 198 | ], 199 | "source": [ 200 | "square_root_of_squares(1,1)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 15, 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "data": { 210 | "text/plain": [ 211 | "4.47213595499958" 212 | ] 213 | }, 214 | "execution_count": 15, 215 | "metadata": {}, 216 | "output_type": "execute_result" 217 | } 218 | ], 219 | "source": [ 220 | "#also works with negative numbers since we square\n", 221 | "square_root_of_squares(-2,4)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": {}, 227 | "source": [ 228 | "### Exercise 3\n", 229 | "\n", 230 | "#### Indicate sign of difference betweem two numbers\n", 231 | "\n", 232 | "Write a function that substracts the second number from the first. Return \"Positive\" if their difference is positive, and \"Negative\" if their difference is negative." 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 16, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "#mind the edge case where difference == 0!\n", 242 | "def signed_diff(a,b):\n", 243 | " res = a - b\n", 244 | " if res > 0:\n", 245 | " return 'Positive'\n", 246 | " elif res < 0:\n", 247 | " return 'Negative'\n", 248 | " else:\n", 249 | " return 'Equal'" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 17, 255 | "metadata": {}, 256 | "outputs": [ 257 | { 258 | "data": { 259 | "text/plain": [ 260 | "'Negative'" 261 | ] 262 | }, 263 | "execution_count": 17, 264 | "metadata": {}, 265 | "output_type": "execute_result" 266 | } 267 | ], 268 | "source": [ 269 | "signed_diff(1,10)" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 18, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "data": { 279 | "text/plain": [ 280 | "'Positive'" 281 | ] 282 | }, 283 | "execution_count": 18, 284 | "metadata": {}, 285 | "output_type": "execute_result" 286 | } 287 | ], 288 | "source": [ 289 | "signed_diff(20,5)" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 19, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "data": { 299 | "text/plain": [ 300 | "'Equal'" 301 | ] 302 | }, 303 | "execution_count": 19, 304 | "metadata": {}, 305 | "output_type": "execute_result" 306 | } 307 | ], 308 | "source": [ 309 | "signed_diff(10,10)" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 20, 315 | "metadata": {}, 316 | "outputs": [ 317 | { 318 | "data": { 319 | "text/plain": [ 320 | "'Negative'" 321 | ] 322 | }, 323 | "execution_count": 20, 324 | "metadata": {}, 325 | "output_type": "execute_result" 326 | } 327 | ], 328 | "source": [ 329 | "signed_diff(-10,-5)" 330 | ] 331 | }, 332 | { 333 | "cell_type": "markdown", 334 | "metadata": {}, 335 | "source": [ 336 | "### Exercise 4\n", 337 | "\n", 338 | "#### Calculate sum and differences\n", 339 | "\n", 340 | "Write a function that returns **both** the sum of the first two inputs and the difference between the second and third input: " 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": 26, 346 | "metadata": {}, 347 | "outputs": [], 348 | "source": [ 349 | "def f4(a,b,c):\n", 350 | " sum_1 = a + b\n", 351 | " diff_2 = b - c\n", 352 | " #here we return a tuple. we could also use a list.\n", 353 | " return (sum_1, diff_2)" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 24, 359 | "metadata": {}, 360 | "outputs": [ 361 | { 362 | "data": { 363 | "text/plain": [ 364 | "(3, -1)" 365 | ] 366 | }, 367 | "execution_count": 24, 368 | "metadata": {}, 369 | "output_type": "execute_result" 370 | } 371 | ], 372 | "source": [ 373 | "f4(1,2,3)" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 25, 379 | "metadata": {}, 380 | "outputs": [ 381 | { 382 | "data": { 383 | "text/plain": [ 384 | "(-1, -6)" 385 | ] 386 | }, 387 | "execution_count": 25, 388 | "metadata": {}, 389 | "output_type": "execute_result" 390 | } 391 | ], 392 | "source": [ 393 | "f4(-5,4,10)" 394 | ] 395 | }, 396 | { 397 | "cell_type": "markdown", 398 | "metadata": {}, 399 | "source": [ 400 | "### Exercise 5\n", 401 | "\n", 402 | "#### Function overloading: Different behaviour for `int` and `str`\n", 403 | "\n", 404 | "Write a function that adds two numbers together if the inputs are both numbers, and concatenates the inputs if they are both strings.\n", 405 | "\n", 406 | "> Hint: You can use built-in functions [`type`](https://docs.python.org/3/library/functions.html#type) or [`isinstance`](https://docs.python.org/3/library/functions.html#isinstance) to find out the type of a variable." 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": 39, 412 | "metadata": {}, 413 | "outputs": [], 414 | "source": [ 415 | "#actually, the + function in python is already overloaded and will do the sum on numbers and concatenation on strings\n", 416 | "def add_this(a,b):\n", 417 | " if type(a) != type(b):\n", 418 | " return 'Arguments have different types.'\n", 419 | " \n", 420 | " elif type(a) == str:\n", 421 | " return a+b\n", 422 | " \n", 423 | " #the exercise text says int, but you can also add floats so I added that\n", 424 | " elif type(a) == int or type(a) == float:\n", 425 | " return a+b\n", 426 | " \n", 427 | " #edge case for whatever weird input the user might be give is the same type for both arugments but not int or str\n", 428 | " else:\n", 429 | " return f\"Cannot add type {type(a)} and {type(b)}\"" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 40, 435 | "metadata": {}, 436 | "outputs": [ 437 | { 438 | "data": { 439 | "text/plain": [ 440 | "3" 441 | ] 442 | }, 443 | "execution_count": 40, 444 | "metadata": {}, 445 | "output_type": "execute_result" 446 | } 447 | ], 448 | "source": [ 449 | "add_this(1,2)" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": 41, 455 | "metadata": {}, 456 | "outputs": [ 457 | { 458 | "data": { 459 | "text/plain": [ 460 | "'HelloWorld'" 461 | ] 462 | }, 463 | "execution_count": 41, 464 | "metadata": {}, 465 | "output_type": "execute_result" 466 | } 467 | ], 468 | "source": [ 469 | "add_this('Hello','World')" 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": 42, 475 | "metadata": {}, 476 | "outputs": [ 477 | { 478 | "data": { 479 | "text/plain": [ 480 | "'Arguments have different types.'" 481 | ] 482 | }, 483 | "execution_count": 42, 484 | "metadata": {}, 485 | "output_type": "execute_result" 486 | } 487 | ], 488 | "source": [ 489 | "add_this('Hi',3)" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": 43, 495 | "metadata": {}, 496 | "outputs": [ 497 | { 498 | "data": { 499 | "text/plain": [ 500 | "\"Cannot add type and \"" 501 | ] 502 | }, 503 | "execution_count": 43, 504 | "metadata": {}, 505 | "output_type": "execute_result" 506 | } 507 | ], 508 | "source": [ 509 | "add_this(True,False)" 510 | ] 511 | }, 512 | { 513 | "cell_type": "markdown", 514 | "metadata": {}, 515 | "source": [ 516 | "## Group Exercises" 517 | ] 518 | }, 519 | { 520 | "cell_type": "markdown", 521 | "metadata": {}, 522 | "source": [ 523 | "### Exercise 1\n", 524 | "\n", 525 | "#### Square numbers\n", 526 | "\n", 527 | "The code below squares all of the numbers and removes all of the strings from the list. Make it into a function, **square_numbers**:\n", 528 | "\n", 529 | "```python\n", 530 | "data = [5, \"missing\", 54, \"bad\", 3, 6]\n", 531 | "good_data = []\n", 532 | "good_data_squared = []\n", 533 | "idx = 0\n", 534 | "while idx < len(data):\n", 535 | " el = data[idx]\n", 536 | " if isinstance(el, int):\n", 537 | " good_data_squared.append(el ** 2)\n", 538 | " idx += 1\n", 539 | "good_data_squared\n", 540 | "```\n" 541 | ] 542 | }, 543 | { 544 | "cell_type": "code", 545 | "execution_count": 45, 546 | "metadata": {}, 547 | "outputs": [], 548 | "source": [ 549 | "#Note: I don't particularly care for while loops to iterate over lists so I changed the code to a for loop\n", 550 | "\n", 551 | "def square_numbers(my_list = None):\n", 552 | " ret_list = []\n", 553 | " \n", 554 | " #decide what to do if there is no data given. \n", 555 | " #We can also omit this part and make the my_list argument mandatory by removing the default value of None\n", 556 | " #Though the user could still supply an empty list, so it's better to check that list is not empty\n", 557 | " if not my_list:\n", 558 | " print('No data supplied')\n", 559 | " return ret_list\n", 560 | " \n", 561 | " #I could put this inside an else to the if above but I prefer to have one fewer level of indentation so I don't \n", 562 | " for item in my_list:\n", 563 | " if isinstance(item, int):\n", 564 | " ret_list.append(item**2)\n", 565 | " \n", 566 | " return ret_list " 567 | ] 568 | }, 569 | { 570 | "cell_type": "code", 571 | "execution_count": 46, 572 | "metadata": {}, 573 | "outputs": [], 574 | "source": [ 575 | "data = [5, \"missing\", 54, \"bad\", 3, 6]" 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": 47, 581 | "metadata": {}, 582 | "outputs": [ 583 | { 584 | "data": { 585 | "text/plain": [ 586 | "[25, 2916, 9, 36]" 587 | ] 588 | }, 589 | "execution_count": 47, 590 | "metadata": {}, 591 | "output_type": "execute_result" 592 | } 593 | ], 594 | "source": [ 595 | "square_numbers(data)" 596 | ] 597 | }, 598 | { 599 | "cell_type": "code", 600 | "execution_count": 48, 601 | "metadata": {}, 602 | "outputs": [ 603 | { 604 | "name": "stdout", 605 | "output_type": "stream", 606 | "text": [ 607 | "No data supplied\n" 608 | ] 609 | }, 610 | { 611 | "data": { 612 | "text/plain": [ 613 | "[]" 614 | ] 615 | }, 616 | "execution_count": 48, 617 | "metadata": {}, 618 | "output_type": "execute_result" 619 | } 620 | ], 621 | "source": [ 622 | "square_numbers([])" 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": 49, 628 | "metadata": {}, 629 | "outputs": [ 630 | { 631 | "name": "stdout", 632 | "output_type": "stream", 633 | "text": [ 634 | "No data supplied\n" 635 | ] 636 | }, 637 | { 638 | "data": { 639 | "text/plain": [ 640 | "[]" 641 | ] 642 | }, 643 | "execution_count": 49, 644 | "metadata": {}, 645 | "output_type": "execute_result" 646 | } 647 | ], 648 | "source": [ 649 | "square_numbers()" 650 | ] 651 | }, 652 | { 653 | "cell_type": "markdown", 654 | "metadata": {}, 655 | "source": [ 656 | "### Exercise 2\n", 657 | "\n", 658 | "#### Calculate the standard deviation\n", 659 | "\n", 660 | "The code below calculates the **standard deviation** of the data. Put it in a function called **`standard_deviation`** and use the function on the data:\n", 661 | "\n", 662 | "```python\n", 663 | "import math\n", 664 | "\n", 665 | "data = [2, 6, 8, 2, 5, 8, 9, 2]\n", 666 | "\n", 667 | "mean = 0\n", 668 | "std = 0\n", 669 | "for el in data:\n", 670 | " mean += el / len(data)\n", 671 | "dev_squareds = []\n", 672 | "for el in data:\n", 673 | " dev = (el - mean) ** 2\n", 674 | " dev_squareds.append(dev)\n", 675 | "sum_dev_squareds = sum(dev_squareds)\n", 676 | "standard_dev = sum_dev_squareds / len(data) * 1.\n", 677 | "standard_dev = math.sqrt(standard_dev)\n", 678 | "standard_dev\n", 679 | "```\n", 680 | "\n", 681 | "**Note**: We do not actually recommend to calc the standard deviation yourself. Use the pandas attribute `.std()` on a series instead. \n" 682 | ] 683 | }, 684 | { 685 | "cell_type": "code", 686 | "execution_count": 60, 687 | "metadata": {}, 688 | "outputs": [], 689 | "source": [ 690 | "import math\n", 691 | "def std_dev(data): \n", 692 | " mean = 0\n", 693 | " std = 0\n", 694 | " \n", 695 | " for el in data:\n", 696 | " mean += el / len(data)\n", 697 | " \n", 698 | " dev_squareds = []\n", 699 | " for el in data:\n", 700 | " dev = (el - mean) ** 2\n", 701 | " dev_squareds.append(dev)\n", 702 | " \n", 703 | " sum_dev_squareds = sum(dev_squareds)\n", 704 | " \n", 705 | " standard_dev = sum_dev_squareds / len(data) * 1.\n", 706 | " standard_dev = math.sqrt(standard_dev)\n", 707 | " return standard_dev" 708 | ] 709 | }, 710 | { 711 | "cell_type": "code", 712 | "execution_count": 61, 713 | "metadata": {}, 714 | "outputs": [], 715 | "source": [ 716 | "data = [2, 6, 8, 2, 5, 8, 9, 2]" 717 | ] 718 | }, 719 | { 720 | "cell_type": "code", 721 | "execution_count": 62, 722 | "metadata": {}, 723 | "outputs": [ 724 | { 725 | "data": { 726 | "text/plain": [ 727 | "2.7726341266023544" 728 | ] 729 | }, 730 | "execution_count": 62, 731 | "metadata": {}, 732 | "output_type": "execute_result" 733 | } 734 | ], 735 | "source": [ 736 | "std_dev(data)" 737 | ] 738 | }, 739 | { 740 | "cell_type": "code", 741 | "execution_count": 65, 742 | "metadata": {}, 743 | "outputs": [ 744 | { 745 | "data": { 746 | "text/plain": [ 747 | "2.7726341266023544" 748 | ] 749 | }, 750 | "execution_count": 65, 751 | "metadata": {}, 752 | "output_type": "execute_result" 753 | } 754 | ], 755 | "source": [ 756 | "#using the pandas library\n", 757 | "import pandas as pd\n", 758 | "#first, cast the list into a series with pd.Series(), then apply .std().\n", 759 | "#We use ddof = 0 to get the same result as in our manual calc since pandas by default corrects \n", 760 | "#the std with N – 1 instead of N\n", 761 | "pd.Series(data).std(ddof = 0)" 762 | ] 763 | }, 764 | { 765 | "cell_type": "markdown", 766 | "metadata": {}, 767 | "source": [ 768 | "### Exercise 3\n", 769 | "\n", 770 | "#### Bootstrap mean\n", 771 | "\n", 772 | "The code below generates a **bootstrap** sample of the data, getting a random selection of the data **boot_n** times and calculating the mean of that sample, so that many estimates of the mean can be made from a single dataset. Put it in a function called **bootstrap_means**.\n", 773 | "\n", 774 | "```python\n", 775 | "import random\n", 776 | "\n", 777 | "data = [2, 6, 8, 2, 5, 8, 9, 2, 6, 2, 10]\n", 778 | "n_boot = 5\n", 779 | "means = []\n", 780 | "for rep in range(n_boot):\n", 781 | " sample = random.choices(data, k=len(data))\n", 782 | " mean = sum(sample) / len(sample)\n", 783 | " rep = rep * 2\n", 784 | " means.append(mean)\n", 785 | "means\n", 786 | "\n", 787 | "```" 788 | ] 789 | }, 790 | { 791 | "cell_type": "code", 792 | "execution_count": 66, 793 | "metadata": {}, 794 | "outputs": [], 795 | "source": [ 796 | "data = [2, 6, 8, 2, 5, 8, 9, 2, 6, 2, 10]" 797 | ] 798 | }, 799 | { 800 | "cell_type": "code", 801 | "execution_count": 73, 802 | "metadata": {}, 803 | "outputs": [], 804 | "source": [ 805 | "#lets be good people and also document our function\n", 806 | "import random\n", 807 | "\n", 808 | "def bootstrap_means(data, n_boot = 5):\n", 809 | " \"\"\"A function that calculates means on a bootstrapped sample of the data\n", 810 | "\n", 811 | " Parameters\n", 812 | " ----------\n", 813 | " data : list\n", 814 | " The list of values.\n", 815 | " n_boot : int\n", 816 | " number of times we sub-sample the data. \n", 817 | " \"\"\"\n", 818 | " means = []\n", 819 | " if not data:\n", 820 | " print('No data supplied')\n", 821 | " return means\n", 822 | " \n", 823 | " for rep in range(n_boot):\n", 824 | " sample = random.choices(data, k=len(data))\n", 825 | " mean = sum(sample) / len(sample)\n", 826 | " rep = rep * 2\n", 827 | " means.append(mean)\n", 828 | " return means" 829 | ] 830 | }, 831 | { 832 | "cell_type": "code", 833 | "execution_count": 74, 834 | "metadata": {}, 835 | "outputs": [ 836 | { 837 | "data": { 838 | "text/plain": [ 839 | "[4.181818181818182,\n", 840 | " 4.818181818181818,\n", 841 | " 6.090909090909091,\n", 842 | " 5.2727272727272725,\n", 843 | " 5.181818181818182]" 844 | ] 845 | }, 846 | "execution_count": 74, 847 | "metadata": {}, 848 | "output_type": "execute_result" 849 | } 850 | ], 851 | "source": [ 852 | "bootstrap_means(data)" 853 | ] 854 | }, 855 | { 856 | "cell_type": "code", 857 | "execution_count": 75, 858 | "metadata": {}, 859 | "outputs": [ 860 | { 861 | "data": { 862 | "text/plain": [ 863 | "[4.636363636363637,\n", 864 | " 5.545454545454546,\n", 865 | " 4.545454545454546,\n", 866 | " 5.909090909090909,\n", 867 | " 6.2727272727272725,\n", 868 | " 5.0,\n", 869 | " 6.0,\n", 870 | " 5.454545454545454,\n", 871 | " 6.2727272727272725,\n", 872 | " 5.818181818181818]" 873 | ] 874 | }, 875 | "execution_count": 75, 876 | "metadata": {}, 877 | "output_type": "execute_result" 878 | } 879 | ], 880 | "source": [ 881 | "bootstrap_means(data,10)" 882 | ] 883 | }, 884 | { 885 | "cell_type": "code", 886 | "execution_count": 77, 887 | "metadata": {}, 888 | "outputs": [ 889 | { 890 | "name": "stdout", 891 | "output_type": "stream", 892 | "text": [ 893 | "No data supplied\n" 894 | ] 895 | }, 896 | { 897 | "data": { 898 | "text/plain": [ 899 | "[]" 900 | ] 901 | }, 902 | "execution_count": 77, 903 | "metadata": {}, 904 | "output_type": "execute_result" 905 | } 906 | ], 907 | "source": [ 908 | "bootstrap_means([])" 909 | ] 910 | }, 911 | { 912 | "cell_type": "markdown", 913 | "metadata": {}, 914 | "source": [ 915 | "### Exercise 4\n", 916 | "\n", 917 | "#### Build a function to modify a file\n", 918 | "\n", 919 | "Based on what we have seen in the previous lectures (Importing data, Conditionals and Loops): \n", 920 | "\n", 921 | "Build a function that takes as parameter the name of a country, then reads the file 'data/sample.txt', checks if the country exists in the file and if it doesn't the function should add it as a new line and otherwise print out that the country already exists (You can use a formatted string).\n", 922 | "\n", 923 | "**Note**: We haven't talked about opening files outside of pandas data import so I'm just gonna put the solution as an example how to read files line by line and using file handles. In order to test this you need to create a file 'data/sample.txt' in the same directory as this script so the script can find it." 924 | ] 925 | }, 926 | { 927 | "cell_type": "code", 928 | "execution_count": null, 929 | "metadata": {}, 930 | "outputs": [], 931 | "source": [ 932 | "def add_country(country_name):\n", 933 | " country_set = set()\n", 934 | " #open file in read mode 'r'\n", 935 | " with open('data/sample.txt', 'r') as IN:\n", 936 | " for line in IN:\n", 937 | " #we'll strip of newlines before adding to our set of countries\n", 938 | " country_set.add(line.rstrip())\n", 939 | " \n", 940 | " #we're outside the with block now, the file has been closed. We need to open the file again in append mode 'a'\n", 941 | " if not country_name in country_set:\n", 942 | " with open('data/sample.txt', 'a') as OUT:\n", 943 | " #print to the filehandle instead of the standard output with the 'file' argument\n", 944 | " print(country_name, file = OUT)\n", 945 | " return f'Added {country_name}.' \n", 946 | " else:\n", 947 | " return f'Country {country_name} exists already in file.'\n", 948 | " " 949 | ] 950 | }, 951 | { 952 | "cell_type": "markdown", 953 | "metadata": {}, 954 | "source": [ 955 | "Refactoring Code is left as an exercise to the reader. We already optimized some of the code in the solutions." 956 | ] 957 | }, 958 | { 959 | "cell_type": "code", 960 | "execution_count": null, 961 | "metadata": {}, 962 | "outputs": [], 963 | "source": [] 964 | } 965 | ], 966 | "metadata": { 967 | "kernelspec": { 968 | "display_name": "Python 3", 969 | "language": "python", 970 | "name": "python3" 971 | }, 972 | "language_info": { 973 | "codemirror_mode": { 974 | "name": "ipython", 975 | "version": 3 976 | }, 977 | "file_extension": ".py", 978 | "mimetype": "text/x-python", 979 | "name": "python", 980 | "nbconvert_exporter": "python", 981 | "pygments_lexer": "ipython3", 982 | "version": "3.8.5" 983 | } 984 | }, 985 | "nbformat": 4, 986 | "nbformat_minor": 4 987 | } 988 | -------------------------------------------------------------------------------- /solutions/iterables_wSolutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "taqKnUx8avlW" 7 | }, 8 | "source": [ 9 | "## Exercise 1\n", 10 | "\n", 11 | "Create a list called `random_things` that is at least 4 elements long. It must contain at least 1 `str` and 1 `float`. \n", 12 | "\n", 13 | "Use the ``len`` function to verify if your list is indeed at least 4 elements long." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 72, 19 | "metadata": { 20 | "colab": { 21 | "base_uri": "https://localhost:8080/" 22 | }, 23 | "id": "4umAIKkttCzN", 24 | "outputId": "08af7925-4b8c-4ef1-f84c-4e579517e523" 25 | }, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/plain": [ 30 | "4" 31 | ] 32 | }, 33 | "execution_count": 72, 34 | "metadata": {}, 35 | "output_type": "execute_result" 36 | } 37 | ], 38 | "source": [ 39 | "# your code goes here\n", 40 | "\n", 41 | "random_things = ['one', 2, 3, 4.0]\n", 42 | "len(random_things)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "Now, access the first element of your list." 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "random_things[0]" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "And the last element." 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "random_things[-1]" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "## Exercise 2\n", 82 | "\n", 83 | "Use slicing on the list to extract the desired elements:" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 1, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "# A list of centers:\n", 93 | "sund_centers = [\"BRIC\", \"CPR\", \"CBMR\", \"reNEW\", \"CTN\", \"HeaDS\", \"Globe\", \"Vet\", \"Pharma\"]" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 7, 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "name": "stdout", 103 | "output_type": "stream", 104 | "text": [ 105 | "BRIC\n", 106 | "Pharma\n", 107 | "['BRIC', 'CPR', 'CBMR', 'reNEW', 'CTN', 'HeaDS', 'Globe', 'Vet']\n", 108 | "['Globe', 'Vet', 'Pharma']\n", 109 | "['BRIC', 'CBMR', 'CTN', 'Globe', 'Pharma']\n", 110 | "['CPR', 'reNEW', 'HeaDS', 'Vet']\n" 111 | ] 112 | } 113 | ], 114 | "source": [ 115 | "# Display the first element in 'sund_centers'.\n", 116 | "print(sund_centers[0])\n", 117 | "\n", 118 | "# Display the last element in 'sund_centers'.\n", 119 | "print(sund_centers[-1])\n", 120 | "\n", 121 | "# Display all but the last element in 'sund_centers'.\n", 122 | "print(sund_centers[:-1])\n", 123 | "\n", 124 | "# Display the last 3 elements in 'sund_centers'.\n", 125 | "print(sund_centers[-3:])\n", 126 | "\n", 127 | "# Display every second element in 'sund_centers'.\n", 128 | "print(sund_centers[::2])\n", 129 | "\n", 130 | "# Display every second element starting from \"CPR\" in 'sund_centers'.\n", 131 | "print(sund_centers[1::2])" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 8, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "data": { 141 | "text/plain": [ 142 | "['BRIC', 'CPR', 'BMI', 'reNEW', 'CTN', 'HeaDS', 'Globe', 'Vet', 'Pharma']" 143 | ] 144 | }, 145 | "execution_count": 8, 146 | "metadata": {}, 147 | "output_type": "execute_result" 148 | } 149 | ], 150 | "source": [ 151 | "#replace CMBR with BMI\n", 152 | "\n", 153 | "sund_centers[2] = \"BMI\"\n", 154 | "sund_centers" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "## Exercise 3\n", 162 | "\n", 163 | "Work on the list 'colors' as specified below.\n", 164 | "\n", 165 | "1. How many times does the color 'red' appear in the list?\n", 166 | "2. Add 'cyan' and 'magenta' to the list.\n", 167 | "3. Remove the last element. \n", 168 | "4. Remove the first element.\n", 169 | "5. Make a copy of the list.\n", 170 | "6. Delete the original list." 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 9, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "colors = ['red', 'green', 'orange', 'yellow', 'black', 'red', 'blue', 'purple']" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 10, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "data": { 189 | "text/plain": [ 190 | "2" 191 | ] 192 | }, 193 | "execution_count": 10, 194 | "metadata": {}, 195 | "output_type": "execute_result" 196 | } 197 | ], 198 | "source": [ 199 | "#1\n", 200 | "colors.count('red')" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 11, 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "data": { 210 | "text/plain": [ 211 | "['red',\n", 212 | " 'green',\n", 213 | " 'orange',\n", 214 | " 'yellow',\n", 215 | " 'black',\n", 216 | " 'red',\n", 217 | " 'blue',\n", 218 | " 'purple',\n", 219 | " 'cyan',\n", 220 | " 'magenta']" 221 | ] 222 | }, 223 | "execution_count": 11, 224 | "metadata": {}, 225 | "output_type": "execute_result" 226 | } 227 | ], 228 | "source": [ 229 | "#2\n", 230 | "colors.extend(['cyan', 'magenta'])\n", 231 | "colors" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 12, 237 | "metadata": {}, 238 | "outputs": [ 239 | { 240 | "data": { 241 | "text/plain": [ 242 | "['red', 'green', 'orange', 'yellow', 'black', 'red', 'blue', 'purple', 'cyan']" 243 | ] 244 | }, 245 | "execution_count": 12, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "#3\n", 252 | "colors.pop()\n", 253 | "colors" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 13, 259 | "metadata": {}, 260 | "outputs": [ 261 | { 262 | "data": { 263 | "text/plain": [ 264 | "['green', 'orange', 'yellow', 'black', 'red', 'blue', 'purple', 'cyan']" 265 | ] 266 | }, 267 | "execution_count": 13, 268 | "metadata": {}, 269 | "output_type": "execute_result" 270 | } 271 | ], 272 | "source": [ 273 | "#4\n", 274 | "colors.pop(0)\n", 275 | "colors" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 14, 281 | "metadata": {}, 282 | "outputs": [ 283 | { 284 | "data": { 285 | "text/plain": [ 286 | "['green', 'orange', 'yellow', 'black', 'red', 'blue', 'purple', 'cyan']" 287 | ] 288 | }, 289 | "execution_count": 14, 290 | "metadata": {}, 291 | "output_type": "execute_result" 292 | } 293 | ], 294 | "source": [ 295 | "new_list = colors.copy()\n", 296 | "new_list" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 15, 302 | "metadata": {}, 303 | "outputs": [], 304 | "source": [ 305 | "del colors" 306 | ] 307 | }, 308 | { 309 | "cell_type": "markdown", 310 | "metadata": {}, 311 | "source": [ 312 | "## Exercise 4\n", 313 | "\n", 314 | "1. Create a set of a least 3 yellow fruits.\n", 315 | "\n", 316 | "2. And a another at least 3 citrus fruits.\n", 317 | "\n", 318 | "3. Which fruits are both yellow and citrus?" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 18, 324 | "metadata": {}, 325 | "outputs": [ 326 | { 327 | "data": { 328 | "text/plain": [ 329 | "{'lemon'}" 330 | ] 331 | }, 332 | "execution_count": 18, 333 | "metadata": {}, 334 | "output_type": "execute_result" 335 | } 336 | ], 337 | "source": [ 338 | "yellow = {'banana', 'lemon', 'apple'}\n", 339 | "citrus = {'orange', 'grapefruit', 'lemon', 'lime'}\n", 340 | "\n", 341 | "yellow.intersection(citrus)" 342 | ] 343 | }, 344 | { 345 | "cell_type": "markdown", 346 | "metadata": {}, 347 | "source": [ 348 | "## Exercise 5\n", 349 | "\n", 350 | "1. Create a dictionary called 'countries' where the key is the country and the value is a (non-exhaustive) list of cities in the country. Include at least 3 countries. One of them has to be 'Denmark'. \n", 351 | "\n", 352 | "2. Extract the value for the key 'Denmark'.\n", 353 | "\n", 354 | "3. Add another country and its cities to the dictionary.\n", 355 | "\n", 356 | "4. Remove one of the countries and associated cities." 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 22, 362 | "metadata": { 363 | "colab": { 364 | "base_uri": "https://localhost:8080/" 365 | }, 366 | "id": "t-xhyk9WtCzY", 367 | "outputId": "08c0d967-7442-42de-a673-80ad595b130c" 368 | }, 369 | "outputs": [ 370 | { 371 | "name": "stdout", 372 | "output_type": "stream", 373 | "text": [ 374 | "{'Denmark': ['Copenhagen', 'Aarhus'], 'France': ['Paris', 'Marseille'], 'USA': ['Washington', 'New York']}\n" 375 | ] 376 | } 377 | ], 378 | "source": [ 379 | "countries = {\n", 380 | " 'Denmark': ['Copenhagen', 'Aarhus'],\n", 381 | " 'France': ['Paris', 'Marseille'],\n", 382 | " 'USA': ['Washington','New York']\n", 383 | "}\n", 384 | "\n", 385 | "print(countries)" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": 23, 391 | "metadata": {}, 392 | "outputs": [ 393 | { 394 | "data": { 395 | "text/plain": [ 396 | "['Copenhagen', 'Aarhus']" 397 | ] 398 | }, 399 | "execution_count": 23, 400 | "metadata": {}, 401 | "output_type": "execute_result" 402 | } 403 | ], 404 | "source": [ 405 | "countries['Denmark']" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 24, 411 | "metadata": { 412 | "colab": { 413 | "base_uri": "https://localhost:8080/" 414 | }, 415 | "id": "ThFsOIc1tCzZ", 416 | "outputId": "d52b05cb-fd97-4a7e-e8a1-017ab14b0306" 417 | }, 418 | "outputs": [ 419 | { 420 | "name": "stdout", 421 | "output_type": "stream", 422 | "text": [ 423 | "{'Denmark': ['Copenhagen', 'Aarhus'], 'France': ['Paris', 'Marseille'], 'USA': ['Washington', 'New York'], 'Japan': ['Tokyo', 'Osaka']}\n" 424 | ] 425 | } 426 | ], 427 | "source": [ 428 | "countries['Japan'] = ['Tokyo', 'Osaka']\n", 429 | "\n", 430 | "print(countries)" 431 | ] 432 | }, 433 | { 434 | "cell_type": "markdown", 435 | "metadata": { 436 | "id": "KK3PF2B2tCzZ" 437 | }, 438 | "source": [ 439 | "6. Remove one of the countries and its elements." 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": 25, 445 | "metadata": { 446 | "colab": { 447 | "base_uri": "https://localhost:8080/" 448 | }, 449 | "id": "-Y9aaM7wtCzZ", 450 | "outputId": "8942e738-8ec0-4b6e-8750-9348be4caaf2" 451 | }, 452 | "outputs": [ 453 | { 454 | "name": "stdout", 455 | "output_type": "stream", 456 | "text": [ 457 | "{'Denmark': ['Copenhagen', 'Aarhus'], 'France': ['Paris', 'Marseille'], 'Japan': ['Tokyo', 'Osaka']}\n" 458 | ] 459 | } 460 | ], 461 | "source": [ 462 | "del countries['USA']\n", 463 | "\n", 464 | "print(countries)" 465 | ] 466 | } 467 | ], 468 | "metadata": { 469 | "colab": { 470 | "collapsed_sections": [ 471 | "nNNFT7KttCzD", 472 | "Xxg8pJi5czAg", 473 | "BzxGAkXqV0XI", 474 | "Slkq6GYKtCzJ", 475 | "K9hVHmTKtCzM", 476 | "taqKnUx8avlW", 477 | "lAqWTd6jeFlg", 478 | "zyZO6bhUMl6g", 479 | "--PTuLUitCzS", 480 | "SW1Ggx4ytCzX", 481 | "wveXX--OtCzU" 482 | ], 483 | "name": "iterables_wSolutions.ipynb", 484 | "provenance": [] 485 | }, 486 | "kernelspec": { 487 | "display_name": "Python 3", 488 | "language": "python", 489 | "name": "python3" 490 | }, 491 | "language_info": { 492 | "codemirror_mode": { 493 | "name": "ipython", 494 | "version": 3 495 | }, 496 | "file_extension": ".py", 497 | "mimetype": "text/x-python", 498 | "name": "python", 499 | "nbconvert_exporter": "python", 500 | "pygments_lexer": "ipython3", 501 | "version": "3.11.1" 502 | }, 503 | "rise": { 504 | "scroll": true 505 | }, 506 | "toc": { 507 | "base_numbering": 1, 508 | "nav_menu": {}, 509 | "number_sections": true, 510 | "sideBar": true, 511 | "skip_h1_title": false, 512 | "title_cell": "Table of Contents", 513 | "title_sidebar": "Contents", 514 | "toc_cell": false, 515 | "toc_position": { 516 | "height": "calc(100% - 180px)", 517 | "left": "10px", 518 | "top": "150px", 519 | "width": "512px" 520 | }, 521 | "toc_section_display": true, 522 | "toc_window_display": true 523 | }, 524 | "toc-autonumbering": true 525 | }, 526 | "nbformat": 4, 527 | "nbformat_minor": 0 528 | } 529 | -------------------------------------------------------------------------------- /solutions/loops_solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise 1\n", 8 | "\n", 9 | "Loop through numbers 1-20:\n", 10 | "- If the number is 4 or 13, print \"x is unlucky\"\n", 11 | "- Otherwise:\n", 12 | " - If the number is even, print \"x is even\"\n", 13 | " - If the number is odd, print \"x is odd\"\n", 14 | "\n", 15 | "> check [`Conditions.ipynb`](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/fall2021/Conditionals/Conditions.ipynb)" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 6, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "name": "stdout", 25 | "output_type": "stream", 26 | "text": [ 27 | "1 is uneven\n", 28 | "2 is even\n", 29 | "3 is uneven\n", 30 | "4 is unlucky\n", 31 | "5 is uneven\n", 32 | "6 is even\n", 33 | "7 is uneven\n", 34 | "8 is even\n", 35 | "9 is uneven\n", 36 | "10 is even\n", 37 | "11 is uneven\n", 38 | "12 is even\n", 39 | "13 is unlucky\n", 40 | "14 is even\n", 41 | "15 is uneven\n", 42 | "16 is even\n", 43 | "17 is uneven\n", 44 | "18 is even\n", 45 | "19 is uneven\n", 46 | "20 is even\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "for i in range(1,21):\n", 52 | " if i == 4 or i == 13:\n", 53 | " print(i, 'is unlucky')\n", 54 | " else:\n", 55 | " #he mode of uneven numbers with 2 is 0: even numbers are dividable by 2 without a left over\n", 56 | " if i % 2 == 0:\n", 57 | " print(i, \"is even\")\n", 58 | " #the mode of uneven numbers with 2 is 1\n", 59 | " else:\n", 60 | " print(i, \"is uneven\")" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 3, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/plain": [ 71 | "0" 72 | ] 73 | }, 74 | "execution_count": 3, 75 | "metadata": {}, 76 | "output_type": "execute_result" 77 | } 78 | ], 79 | "source": [ 80 | "#how does the mode operator (%) work: Try it out\n", 81 | "i = 4\n", 82 | "i % 2" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 4, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "data": { 92 | "text/plain": [ 93 | "1" 94 | ] 95 | }, 96 | "execution_count": 4, 97 | "metadata": {}, 98 | "output_type": "execute_result" 99 | } 100 | ], 101 | "source": [ 102 | "i = 5\n", 103 | "i % 2" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "# Exercise 2\n", 111 | "\n", 112 | "Write a loop that:\n", 113 | "\n", 114 | "- iterates over each character in the string `\"I live in CPH, and I like it here.\"`;\n", 115 | "- for each character checks if it is an empty string;\n", 116 | "- if it is an empty string, then just continue with the loop;\n", 117 | "- if the character is an actual letter (not an empty string);\n", 118 | "- then check for each letter if it is a comma `,` or not;\n", 119 | "- if the letter is a comma `,`, break the loop;\n", 120 | "- if the letter is not a comma, print the letter." 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "for letter in \"I live in CPH, and I like it here.\": # Iterate over all characters in the string-\n", 130 | " if letter == None: # If the character is an empty string,\n", 131 | " continue # just continue with the loop.\n", 132 | " \n", 133 | " else: # If the character is a letter...\n", 134 | " if letter == \",\": # ...and if it is a comma, then break the loop.\n", 135 | " break\n", 136 | " else: # ...and it is not a comma, then print the letter.\n", 137 | " print(letter)" 138 | ] 139 | } 140 | ], 141 | "metadata": { 142 | "kernelspec": { 143 | "display_name": "Python 3", 144 | "language": "python", 145 | "name": "python3" 146 | }, 147 | "language_info": { 148 | "codemirror_mode": { 149 | "name": "ipython", 150 | "version": 3 151 | }, 152 | "file_extension": ".py", 153 | "mimetype": "text/x-python", 154 | "name": "python", 155 | "nbconvert_exporter": "python", 156 | "pygments_lexer": "ipython3", 157 | "version": "3.8.3" 158 | } 159 | }, 160 | "nbformat": 4, 161 | "nbformat_minor": 4 162 | } 163 | -------------------------------------------------------------------------------- /solutions/recap_solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "\n", 10 | "\n", 11 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/fall2021/Variables_and_data_types/recap.ipynb)" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Recap Python Day 1\n", 19 | "**Congratulations!** \n", 20 | "\n", 21 | "You learned a whole lot of new programming concepts and skills over the last two days. \n", 22 | "To kick off the final day of of this Python course, we will start by recaping some of the most important concepts." 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "## Recap Quiz" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "Use the next 45 minutes on the quiz below. Work together with the people at your table and discuss your answers. Add code cells and try the code written in the question if you are unsure. " 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "**Question 1** \n", 44 | "Can you explain what happens to ``var`` in each step the example below? What type is ``var`` at the end of the program?\n", 45 | "```python\n", 46 | "var = 1\n", 47 | "var *= 100\n", 48 | "var /= 2\n", 49 | "var = int(var)\n", 50 | "var = str(var)\n", 51 | "var = var + \" points to Gryffindor!\"\n", 52 | "print(var)\n", 53 | "```" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 1, 59 | "metadata": {}, 60 | "outputs": [ 61 | { 62 | "name": "stdout", 63 | "output_type": "stream", 64 | "text": [ 65 | "50 points to Gryffindor!\n", 66 | "\n" 67 | ] 68 | } 69 | ], 70 | "source": [ 71 | "var = 1\n", 72 | "var *= 100\n", 73 | "var /= 2\n", 74 | "var = int(var)\n", 75 | "var = str(var)\n", 76 | "var = var + \" points to Gryffindor!\"\n", 77 | "print(var)\n", 78 | "print(type(var))" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "**Question 2** \n", 86 | "What is the loop below printing at each iteration?\n", 87 | " \n", 88 | "```python\n", 89 | "scand = [\"Denmark\", \"Finnland\", \"Sweden\", \"Norway\"]\n", 90 | "\n", 91 | "for index, country in enumerate(scand):\n", 92 | " if country != \"Finnland\":\n", 93 | " print(\"My #\" + str(index+1) + \" favorite country in Scandinavia is \" + country + \".\")\n", 94 | " \n", 95 | " else:\n", 96 | " print(\"Sorry, but Finnland is not part of Scandinavia.\")\n", 97 | " break\n", 98 | "```" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 3, 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "name": "stdout", 108 | "output_type": "stream", 109 | "text": [ 110 | "My #1 favorite country in Scandinavia is Denmark.\n", 111 | "Sorry, but Finnland is not part of Scandinavia.\n" 112 | ] 113 | } 114 | ], 115 | "source": [ 116 | "scand = [\"Denmark\", \"Finnland\", \"Sweden\", \"Norway\"]\n", 117 | "\n", 118 | "for index, country in enumerate(scand):\n", 119 | " if country != \"Finnland\":\n", 120 | " print(\"My #\" + str(index+1) + \" favorite country in Scandinavia is \" + country + \".\")\n", 121 | " \n", 122 | " else:\n", 123 | " print(\"Sorry, but Finnland is not part of Scandinavia.\")\n", 124 | " break" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "**Question 3** \n", 132 | "Consider the scenarios A-C below. Which Python data structure would you use to solve them?\n", 133 | "\n", 134 | "**A.** You have all names of the participants in this course. You want to store them in a variable that perserves the order in which the participants signed up for the course. \n", 135 | "\n", 136 | "**B.** You have (a) all names of the participants in this course, and (b) the names of everyone who is currently employed at SUND. Next, you want to find all employees who DID NOT participate in the course. Which data structure would you store (a) and (b) in to achieve this? And bonus question: How would you achieve it? \n", 137 | "\n", 138 | "**C.** You want to store information about all capital cities in Europe and the number of their inhabitants. Since the numbers might change, you want to make sure that you can update the information at any time. " 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "**A.** A list is used for ordered data.\n", 146 | "\n", 147 | "**B.** The easiest way to do this is to define two sets and then substract the a set from the b set:\n", 148 | "```{python}\n", 149 | "\n", 150 | "not_partipants = set_all_employees - set_participants \n", 151 | "\n", 152 | "```\n", 153 | "\n", 154 | "You could also use two lists.\n", 155 | "\n", 156 | "**C.** A dictionary is used to store values as key-value pairs. Dictionaries can be updated at any time.\n", 157 | "\n" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "**Question 4**\n", 165 | "\n", 166 | "Does the expression used in the `if` below evaluate to `True` or `False`? Can you explain why? Can you change it to make it `False`?\n", 167 | "\n" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 4, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "name": "stdout", 177 | "output_type": "stream", 178 | "text": [ 179 | "Include in study.\n" 180 | ] 181 | } 182 | ], 183 | "source": [ 184 | "smoker = False\n", 185 | "patient_age = 50\n", 186 | "\n", 187 | "if (not smoker and patient_age < 65):\n", 188 | " print('Include in study.')\n", 189 | "else:\n", 190 | " print('Skip to next candidate.')" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 5, 196 | "metadata": {}, 197 | "outputs": [ 198 | { 199 | "name": "stdout", 200 | "output_type": "stream", 201 | "text": [ 202 | "Skip to next candidate.\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "#some examples\n", 208 | "\n", 209 | "smoker = True\n", 210 | "patient_age = 50\n", 211 | "\n", 212 | "if (not smoker and patient_age < 65):\n", 213 | " print('Include in study.')\n", 214 | "else:\n", 215 | " print('Skip to next candidate.')\n" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 6, 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "name": "stdout", 225 | "output_type": "stream", 226 | "text": [ 227 | "Skip to next candidate.\n" 228 | ] 229 | } 230 | ], 231 | "source": [ 232 | "smoker = False\n", 233 | "patient_age = 77\n", 234 | "\n", 235 | "if (not smoker and patient_age < 65):\n", 236 | " print('Include in study.')\n", 237 | "else:\n", 238 | " print('Skip to next candidate.')" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "**Question 5** \n", 253 | "Explain the errors below, and change the code to fix them." 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "There are many ways to fix the errors. We show some examples." 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "# error 1\n", 270 | "75 * (2/0)" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "#don't divide by 0\n", 280 | "75 * (2/0.5)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": {}, 287 | "outputs": [], 288 | "source": [ 289 | "# error 2\n", 290 | "weather = [\"sunny\", \"cloudy\", \"rainy\"]\n", 291 | "weather[4]" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "#The list only has 3 elements, so you can only access elements between 0 and 2\n", 301 | "weather[2]" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": null, 307 | "metadata": {}, 308 | "outputs": [], 309 | "source": [ 310 | "# error 3\n", 311 | "university = \"ucph\"\n", 312 | "typ(university)" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": null, 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [ 321 | "#there is a typo:\n", 322 | "type(university)" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": null, 328 | "metadata": {}, 329 | "outputs": [], 330 | "source": [ 331 | "# error 4\n", 332 | "university = \"ucph\"\n", 333 | "if \"c\" in university:\n", 334 | "print(\"I work at\" + university.upper() + \".\")" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": null, 340 | "metadata": {}, 341 | "outputs": [], 342 | "source": [ 343 | "#The content of if blocks need to be indented\n", 344 | "university = \"ucph\"\n", 345 | "if \"c\" in university:\n", 346 | " print(\"I work at\" + university.upper() + \".\")" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": null, 352 | "metadata": {}, 353 | "outputs": [], 354 | "source": [ 355 | "# error 5\n", 356 | "print(hello_world)" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": null, 362 | "metadata": {}, 363 | "outputs": [], 364 | "source": [ 365 | "#this is either a variable name or a string\n", 366 | "\n", 367 | "#1. define the variable\n", 368 | "hello_world = 'Hello world'\n", 369 | "print(hello_world)\n", 370 | "\n", 371 | "#2. add quotation marks\n", 372 | "print('hello_world')" 373 | ] 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "metadata": {}, 378 | "source": [ 379 | "**Question 6**\n", 380 | "\n", 381 | "Consider the data frame below. How do you:\n", 382 | "\n", 383 | "1. Create a new dataframe with only the treatment, age and satisfaction columns?\n", 384 | "2. Calculate the mean age?\n", 385 | "3. Extract all rows with patients older than 70?\n", 386 | "4. Omit rows with missing data?\n" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 19, 392 | "metadata": {}, 393 | "outputs": [ 394 | { 395 | "data": { 396 | "text/html": [ 397 | "

\n", 398 | "\n", 411 | "\n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | "
patient_idtreatmenthospitalconvalescence_daysagesatisfaction
0402109ARigshospitalet15.0683
1092070ARigshospitalet13.0745
2994082BHerlev27.0762
3843094AHerlev30.0655
4369360BRigshospitalet21.0685
5688213BRigshospitalet29.0773
6197347AHerlev25.0655
7374793ARigshospitaletNaN675
8759063BRigshospitalet16.0754
9121219BNone27.0684
10427898BRigshospitalet15.0745
\n", 525 | "
" 526 | ], 527 | "text/plain": [ 528 | " patient_id treatment hospital convalescence_days age satisfaction\n", 529 | "0 402109 A Rigshospitalet 15.0 68 3\n", 530 | "1 092070 A Rigshospitalet 13.0 74 5\n", 531 | "2 994082 B Herlev 27.0 76 2\n", 532 | "3 843094 A Herlev 30.0 65 5\n", 533 | "4 369360 B Rigshospitalet 21.0 68 5\n", 534 | "5 688213 B Rigshospitalet 29.0 77 3\n", 535 | "6 197347 A Herlev 25.0 65 5\n", 536 | "7 374793 A Rigshospitalet NaN 67 5\n", 537 | "8 759063 B Rigshospitalet 16.0 75 4\n", 538 | "9 121219 B None 27.0 68 4\n", 539 | "10 427898 B Rigshospitalet 15.0 74 5" 540 | ] 541 | }, 542 | "execution_count": 19, 543 | "metadata": {}, 544 | "output_type": "execute_result" 545 | } 546 | ], 547 | "source": [ 548 | "#defining the dataframe\n", 549 | "import pandas as pd\n", 550 | "\n", 551 | "data = [\n", 552 | " ['402109','A','Rigshospitalet',15,68,3],\n", 553 | " ['092070','A','Rigshospitalet',13,74,5],\n", 554 | " ['994082','B','Herlev',27,76,2],\n", 555 | " ['843094','A','Herlev',30,65,5],\n", 556 | " ['369360','B','Rigshospitalet',21,68,5],\n", 557 | " ['688213','B','Rigshospitalet',29,77,3],\n", 558 | " ['197347','A','Herlev',25,65,5],\n", 559 | " ['374793','A','Rigshospitalet', None, 67,5],\n", 560 | " ['759063','B','Rigshospitalet',16,75,4],\n", 561 | " ['121219','B',None,27,68,4],\n", 562 | " ['427898','B','Rigshospitalet',15,74,5]\n", 563 | "]\n", 564 | "\n", 565 | "df = pd.DataFrame(data, columns=['patient_id', 'treatment', 'hospital', 'convalescence_days' ,'age', 'satisfaction'])\n", 566 | "df\n" 567 | ] 568 | }, 569 | { 570 | "cell_type": "code", 571 | "execution_count": null, 572 | "metadata": {}, 573 | "outputs": [], 574 | "source": [ 575 | "#1\n", 576 | "new_df = df[['treatment', 'age', 'satisfaction']]\n", 577 | "new_df" 578 | ] 579 | }, 580 | { 581 | "cell_type": "code", 582 | "execution_count": null, 583 | "metadata": {}, 584 | "outputs": [], 585 | "source": [ 586 | "#2\n", 587 | "df['age'].mean()" 588 | ] 589 | }, 590 | { 591 | "cell_type": "code", 592 | "execution_count": null, 593 | "metadata": {}, 594 | "outputs": [], 595 | "source": [ 596 | "#3\n", 597 | "select_rows = df.loc[df['age'] > 70]\n", 598 | "select_rows" 599 | ] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": null, 604 | "metadata": {}, 605 | "outputs": [], 606 | "source": [ 607 | "#4\n", 608 | "df_no_na = df.dropna()" 609 | ] 610 | }, 611 | { 612 | "cell_type": "markdown", 613 | "metadata": {}, 614 | "source": [ 615 | "## To sum it up...\n", 616 | "\n", 617 | "...here are the most important take-away points:\n", 618 | "\n", 619 | "\n", 620 | "#### Variables\n", 621 | "A variable is a named symbol that holds a value. \n", 622 | " \n", 623 | " \n", 624 | "#### Data Types and Structures\n", 625 | "\n", 626 | "* ``int``: zero, positive or negative whole number \n", 627 | "```python\n", 628 | "int_example = 1\n", 629 | "```\n", 630 | " \n", 631 | "* ``float``: zero, positive or negative number with a decimal point\n", 632 | "```python\n", 633 | "float_example = 34.6\n", 634 | "```\n", 635 | "* ``str``: containers of characters. \n", 636 | "```python\n", 637 | "str_example = \"I am a string!\"\n", 638 | "```\n", 639 | "* ``list``: container of ordered items that can be accessed by their index\n", 640 | "```python\n", 641 | "list_example = [\"element\", 1, 2, \"another_element\"]\n", 642 | "```\n", 643 | "* ``set``: collection of unqiue, unordered, unchangeable, and unindexed elements\n", 644 | "```python\n", 645 | "#with curly bracets\n", 646 | "set_example = {'Hello', 1, 2, 'pizza'}\n", 647 | "#set from list:\n", 648 | "set2 = set(['KU','SDU','AU'])\n", 649 | "```\n", 650 | "* ``dict``: stores (key, value) pairs\n", 651 | "```python\n", 652 | "dict_example = {\"apple\":1, \"cherry\":20, \"strawberry\":10}\n", 653 | "```\n", 654 | "* ``tuple``: immutable, ordered list of values\n", 655 | "```python\n", 656 | "tuple_example = (1,20,78,5)\n", 657 | "```\n", 658 | "* ``bool``: can only take the values `True` or `False`\n", 659 | "```python\n", 660 | "bool_example = True\n", 661 | "```\n", 662 | "\n", 663 | "\n", 664 | "#### Indexing and Slicing\n", 665 | "Indexing and slicing: Both the characters in a string and the elements in a list can be accessed by their index. In Python, indexing always starts with 0, i.e. the first element of a list lives at index position 0. To access values in a string or list, we can use the slicing notation ``[start, end, step]``:\n", 666 | "```python\n", 667 | "list_example[1:3] # access the element of index 1 and 2\n", 668 | "```\n", 669 | "\n", 670 | "#### Comparison Operators and Boolean Logic\n", 671 | "Comparison operators can tell how two Python values relate, resulting in a boolean. They answer yes/no questions.\n", 672 | "\n", 673 | "#### Conditional Statements\n", 674 | "Conditional statements use the keywords ``if``, ``elif`` and ``else``. They are used to control a piece of code based on the value of a Boolean condition.\n", 675 | "\n", 676 | "```python\n", 677 | "num_example = # you define a number here\n", 678 | "\n", 679 | "if num_example % 2:\n", 680 | " print(f\"{num_example} is an odd number.\")\n", 681 | "else:\n", 682 | " print(f\"{num_example} is an even number.\")\n", 683 | "```\n", 684 | "\n", 685 | "\n", 686 | "\n", 687 | "#### Loops\n", 688 | "Loops allow you to repeatedly execute a piece of code. You can control the flow of your loop with the statements ``break``, ``continue`` and ``pass``.\n", 689 | " \n", 690 | "``for`` loops repeat the code you want to run for each variable in the sequence you loop over. \n", 691 | "```python\n", 692 | "number_list = [1, 2, 3, 4, 5]\n", 693 | "for number in number_list:\n", 694 | " print(number)\n", 695 | "```\n", 696 | " \n", 697 | "``while`` loops continue to execute while a certain condition is ``True``, and ends once this conditions becomes ``False``.\n", 698 | "```python\n", 699 | "i = 0\n", 700 | "while i < 10:\n", 701 | " i += 1\n", 702 | " print(i)\n", 703 | "```\n" 704 | ] 705 | }, 706 | { 707 | "cell_type": "code", 708 | "execution_count": null, 709 | "metadata": {}, 710 | "outputs": [], 711 | "source": [] 712 | } 713 | ], 714 | "metadata": { 715 | "kernelspec": { 716 | "display_name": "Python 3", 717 | "language": "python", 718 | "name": "python3" 719 | }, 720 | "language_info": { 721 | "codemirror_mode": { 722 | "name": "ipython", 723 | "version": 3 724 | }, 725 | "file_extension": ".py", 726 | "mimetype": "text/x-python", 727 | "name": "python", 728 | "nbconvert_exporter": "python", 729 | "pygments_lexer": "ipython3", 730 | "version": "3.8.5" 731 | } 732 | }, 733 | "nbformat": 4, 734 | "nbformat_minor": 4 735 | } 736 | -------------------------------------------------------------------------------- /solutions/variables_solutions.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"markdown","metadata":{"id":"aVGtJ28V4nVm"},"source":["# Exercise "]},{"cell_type":"markdown","metadata":{"id":"Arrv3Ql1u3oq"},"source":["Please complete the four steps below.\n","1. Use a number of your choice and store it in a variable. Multiply that variable by ``2`` and print this new variable.\n","2. Use a second number of your choice and multiply it with the initial variable used in (1).\n","3. Find out if the result is even (divisible by 2) or odd. _Hint: Use the modulo operator._\n","4. What is the type of the final variable?"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"OQ7yFVaO4nVm"},"outputs":[],"source":["# Your code goes here\n","\n","#1\n","my_number = 13\n","new_var = my_number * 2\n","print(new_var)\n","\n","#2\n","result = my_number * 3\n","print(result)\n","\n","#3\n","#The modulo 2 is 0 for even numbers and 1 for odd numbers.\n","print(result % 2)\n","\n","# print(type(result)) you need to use print() if you want to see the output from type(), unless it is in the last line of your code.\n","type(result)\n","\n","\n","\n"]}],"metadata":{"colab":{"collapsed_sections":[],"provenance":[]},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.3"},"rise":{"scroll":true},"toc":{"base_numbering":1,"nav_menu":{},"number_sections":true,"sideBar":true,"skip_h1_title":false,"title_cell":"Table of Contents","title_sidebar":"Contents","toc_cell":false,"toc_position":{},"toc_section_display":true,"toc_window_display":true}},"nbformat":4,"nbformat_minor":0} 2 | --------------------------------------------------------------------------------