├── .gitignore ├── .gitmodules ├── Conditionals ├── Conditions.ipynb └── README.md ├── Exercise ├── Exercise_oct2023.ipynb ├── Exercise_oct2023_solution.ipynb ├── README.md └── datasets │ ├── cirrhosis.csv │ └── healthcare-dataset-stroke-data.csv ├── Extra ├── List_comprehension.ipynb └── README.md ├── Iterables ├── README.md └── iterables.ipynb ├── LICENSE ├── Loops ├── Loops.ipynb └── README.md ├── Pandas ├── IO_Pandas.ipynb ├── Pandas.ipynb └── README.md ├── README.md ├── Recap ├── README.md └── recap.ipynb ├── Variables_data_types ├── README.md ├── Variables_slides_Oct22.pdf └── variables.ipynb ├── Visualizations ├── PlotlyExpress_ComprehensiveGuide.ipynb ├── README.md ├── plotly.ipynb └── plotly_extra_material.ipynb ├── cheat_sheets ├── Bokeh_Cheat_Sheet.pdf ├── Importing_Data_Cheat_sheet.pdf ├── Jupyter_Notebook_Cheat_Sheet.pdf ├── Numpy_Python_Cheat_Sheet.pdf ├── Pandas_Cheat_Sheet.pdf ├── Plotly_Cheat_Sheet.pdf ├── Python_Matplotlib_Cheat_Sheet.pdf ├── Scikit-learn_Cheat_Sheet.pdf ├── Scipy-LinearAlgebra_Cheat_Sheet.pdf ├── Seaborn_Cheat_Sheet.pdf └── cheat_sheet_day0.pdf ├── environment.yml ├── figures ├── HeaDS_logo_large_withTitle.png ├── Program_October_2021.png ├── colab_restart_runtime_after_install.png ├── colab_save_in_drive.png ├── colab_save_in_drive_2.png ├── colab_toc.png ├── df_loc.png ├── df_loc_condition.png ├── github_raw_file_view.png ├── long_format.png ├── matplotlib │ ├── fig_axes_axis.png │ └── handout-beginner.png ├── pandas_dataframe.png ├── pandas_indexing.png ├── program.PNG ├── program_june2022.png ├── program_march2023.png ├── program_oct_screen_GR.png ├── program_spring_2022.PNG ├── quartile-percentile.jpg ├── tsunami_logo.PNG └── wide_format.png ├── slides ├── Python Tsunami Local Installations vs code.pdf └── Python Tsunami intro.pdf └── solutions ├── conditions_solutions.ipynb ├── functions_solutions.ipynb ├── iterables_wSolutions.ipynb ├── loops_solutions.ipynb ├── pandas_solutions.ipynb ├── plotly_solutions.ipynb ├── recap_solutions.ipynb └── variables_solutions.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | Untitled.ipynb 131 | .DS_Store 132 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "data/covid-19"] 2 | path = data/covid-19 3 | url = https://github.com/datasets/covid-19.git 4 | -------------------------------------------------------------------------------- /Conditionals/README.md: -------------------------------------------------------------------------------- 1 |
2 |
3 |
5 |
6 |
7 | notebook | content
8 | ---- | ------
9 | [Conditions.ipynb](Conditions.ipynb) [](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Conditionals/Conditions.ipynb) | Conditionals
10 |
--------------------------------------------------------------------------------
/Exercise/Exercise_oct2023.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {
7 | "id": "3YLOMOoHRwRR"
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import pandas as pd\n",
12 | "import plotly.express as px"
13 | ]
14 | },
15 | {
16 | "cell_type": "markdown",
17 | "metadata": {
18 | "id": "k9Nxy_ic9GgE"
19 | },
20 | "source": [
21 | "# Exercise\n",
22 | "\n",
23 | "Now that we have learned some of the basics of python, we should practice how to use this new superpower. We have here prepared a loosely guided exercise that focusses on data exploration and visualization on two example datasets, one on strokes and one on cirrhosis. You can also explore a dataset of your choosing, though the questions are prepared with the example datasets in mind.\n",
24 | "\n",
25 | "Here you can see the [metadata](https://www.kaggle.com/datasets/fedesoriano/cirrhosis-prediction-dataset) for the cirrhosis dataset which describes what each of the columns are. For the stroke data the meaning of the columns is more straightforward."
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {
31 | "id": "XVWSFVPK9TCe"
32 | },
33 | "source": [
34 | "## 1. Data Loading\n",
35 | "\n",
36 | "We will start with the **stroke** dataset. You can find it on the GitHub repository under Exercise/datasets. Load the stroke data into colab by using one of the two approaches detailed below and assign it to variable name ```data```."
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {
42 | "id": "SmuuZeHpx3S_"
43 | },
44 | "source": [
45 | "### 1.1 Loading the data\n",
46 | "\n",
47 | "**Option 1:**\n",
48 | "\n",
49 | "Use the pandas csv reader with a link to the data on GitHub. To do this, go to the github repository, find the stroke dataset and click on the 'raw' button. Copy that link and enter it as the file path in pandas csv reader."
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {
56 | "id": "vR5EptJg0G1J"
57 | },
58 | "outputs": [],
59 | "source": []
60 | },
61 | {
62 | "cell_type": "markdown",
63 | "metadata": {
64 | "id": "MXMXD20CxXHT"
65 | },
66 | "source": [
67 | "... or\n",
68 | "\n",
69 | "**Option 2**\n",
70 | "\n",
71 | "Manually load the dataset into colab and then read it with the pandas csv reader. See steps below:\n",
72 | "\n",
73 | "1. go to the left side bar and click on the folder icon\n",
74 | "2. click on data upload\n",
75 | "3. select dataset from your computer\n",
76 | "4. call pandas csv reader with the name of the dataset"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": null,
82 | "metadata": {
83 | "id": "LWfDOeGB9Uw2"
84 | },
85 | "outputs": [],
86 | "source": []
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {
91 | "id": "Gf5b1jRGxVOO"
92 | },
93 | "source": [
94 | "### 1.2 First look\n",
95 | "\n",
96 | "Have a first look at the data. There are some neat built-in pandas functions to get an initial understanding of the data, i.e. by using the info function: `df.info()`, or the use pandas `df.describe()` function.\n",
97 | "\n",
98 | "Questions you might want to answer here:\n",
99 | "- What different types of columns do you have?\n",
100 | "- Is there a column that describes a variable that can be understood as an 'outcome' ? Which one?\n",
101 | "- How many values does each variable, i.e. column, have and what are some preliminary statistics of the features? (tip: use pandas `describe` function)\n",
102 | "\n"
103 | ]
104 | },
105 | {
106 | "cell_type": "code",
107 | "execution_count": null,
108 | "metadata": {
109 | "id": "-U3eWqWn9fCX"
110 | },
111 | "outputs": [],
112 | "source": []
113 | },
114 | {
115 | "cell_type": "markdown",
116 | "metadata": {
117 | "id": "4simFEW-teti"
118 | },
119 | "source": [
120 | "It helps to know which column is the outcome variable. In the stroke datasets (and many others!) the outcome variable is coded as a numerical variable. However, during analysis it should be interpreted as categorical.\n",
121 | "\n",
122 | "Identify the column of the outcome variable and change its type to \"category\" by using `astype()`. You can see an example in the [API reference on categorical data](https://pandas.pydata.org/pandas-docs/stable/user_guide/categorical.html). Remember to save your changes!\n",
123 | "\n",
124 | "Then, use `info()` on the dataframe again. Has it changed?"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": null,
130 | "metadata": {
131 | "id": "YDLT6I4gtetj"
132 | },
133 | "outputs": [],
134 | "source": []
135 | },
136 | {
137 | "cell_type": "markdown",
138 | "metadata": {
139 | "id": "ScZ9eNl99zEN"
140 | },
141 | "source": [
142 | "## 2. Exploratory analysis\n",
143 | "\n",
144 | "Get to know your data better. If you want to first visually inspect the data it can help to explore with some plots.\n"
145 | ]
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "metadata": {
150 | "id": "3O904uWZtetj"
151 | },
152 | "source": [
153 | "### 2.1 Violin plots and histograms\n",
154 | "\n",
155 | "Consider you dataframe columns that are not the outcome variable. How are the measurements distributed?\n",
156 | "\n",
157 | "To study the distributions we want to make **violin plots** of variables, i.e. data columns, that are numeric and **histograms** of the variables that are strings/categorical.\n",
158 | "\n",
159 | "To check for the data type of a column, have a look at how data types are specified in `dataframe.dtypes`. Then check the data type of each column. Remember a column is a **pandas `Series`**, so it has a `dtype` attribute instead (only dataframes have `dtypes`!).\n",
160 | "\n",
161 | "You can start by figuring out how to make a plot of the data in one column. Once you have that, make one plot for each column that is numeric or a string (except the outcome). This is a repetitive task, so it is ideally suited for a loop. Remember to use `fig.show()` to actually display your plots during the loop.\n",
162 | "\n",
163 | "**Pro version**: Some columns are not actually explanatory variables, such as a the ID column. You can identify these columns i.e. by seeing that each of their values is unique (this would be very unlikely for a measured variable). Skip them when making the plots.\n"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": null,
169 | "metadata": {
170 | "id": "llINtmry-NqA"
171 | },
172 | "outputs": [],
173 | "source": []
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": null,
178 | "metadata": {
179 | "id": "aEIouSF6tetj"
180 | },
181 | "outputs": [],
182 | "source": []
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": null,
187 | "metadata": {
188 | "id": "9XT_fy_utetk"
189 | },
190 | "outputs": [],
191 | "source": []
192 | },
193 | {
194 | "cell_type": "markdown",
195 | "metadata": {
196 | "id": "-d-4HSAay-xc"
197 | },
198 | "source": [
199 | "### 2.2 Correlation coefficients\n",
200 | "\n",
201 | "Plot the correlation coefficients of all numerical features:\n",
202 | "\n",
203 | "1. Use the method `corr()` on the dataframe. What is the result?\n",
204 | "\n",
205 | "2. Now use a heatmap to show the correlation coeffcients graphically.\n",
206 | "\n",
207 | "3. Try some different options to make your heatmap look nicer."
208 | ]
209 | },
210 | {
211 | "cell_type": "code",
212 | "execution_count": null,
213 | "metadata": {
214 | "id": "yc3GuLRe-U6l"
215 | },
216 | "outputs": [],
217 | "source": []
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": null,
222 | "metadata": {
223 | "id": "uQQfqB9atetk"
224 | },
225 | "outputs": [],
226 | "source": []
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": null,
231 | "metadata": {
232 | "id": "TOdMk4Qrtetk"
233 | },
234 | "outputs": [],
235 | "source": []
236 | },
237 | {
238 | "cell_type": "markdown",
239 | "metadata": {
240 | "id": "v9Ez8XDCzCGW"
241 | },
242 | "source": [
243 | "### 2.3 Scatter plot\n",
244 | "\n",
245 | "Make a scatter plot of the two variables with the highest correlation. Divide the plots by the outcome variable and add marginal plots and a trendline:\n",
246 | "\n",
247 | "1. Find the pair of variables that has the highest correlation with each other and make a scatter plot of them.\n",
248 | "\n",
249 | "2. Divide the scatter plot into two by the outcome variable. Have a look at ``facet`` and the visualization lecture if you have trouble.\n",
250 | "\n",
251 | "3. Add marginal distributions on one of the axis and a trendline. "
252 | ]
253 | },
254 | {
255 | "cell_type": "code",
256 | "execution_count": null,
257 | "metadata": {
258 | "id": "5xNHY4LwAJx8"
259 | },
260 | "outputs": [],
261 | "source": []
262 | },
263 | {
264 | "cell_type": "code",
265 | "execution_count": null,
266 | "metadata": {
267 | "id": "a1kHLkWbtetl"
268 | },
269 | "outputs": [],
270 | "source": []
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": null,
275 | "metadata": {
276 | "id": "vyf61JtAtetm"
277 | },
278 | "outputs": [],
279 | "source": []
280 | },
281 | {
282 | "cell_type": "markdown",
283 | "metadata": {
284 | "id": "XzWC3UdNAhdt"
285 | },
286 | "source": [
287 | "## 3. Data cleaning\n",
288 | "\n",
289 | "Now, we switch the [cirrhosis dataset]('https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/spring2022/Exercise/datasets/cirrhosis.csv').\n",
290 | "\n",
291 | "We will investigate what data is missing and try to impute it.\n",
292 | "\n",
293 | "A word of caution:\n",
294 | "\n",
295 | "Note that imputation is a __complex subject__ and whether it makes sense to do it and the method used highly depend on the data set. Sometimes, the mean of a value across all non-missing observations is a good approximation for the missing value. On the other hand, if you have a column that says whether or not the person was treated with the drug or the placebo we have no good way to guess which treatment the person received. Replacing missing values in this column with the most common value (which is that they did get the drug) will produce extremely __wrong data__ and lead you to __wrong conclusions__. Do not do that!\n"
296 | ]
297 | },
298 | {
299 | "cell_type": "markdown",
300 | "metadata": {
301 | "id": "uyHh_Y1Qtetm"
302 | },
303 | "source": [
304 | "### 3.0 Load the data\n",
305 | "\n",
306 | "Load in the cirrhosis dataset using one of the two methods you used earlier for the stroke data. Change as well the outcome variable to a type \"category\"."
307 | ]
308 | },
309 | {
310 | "cell_type": "code",
311 | "execution_count": null,
312 | "metadata": {
313 | "id": "0UKiUqBNtetm"
314 | },
315 | "outputs": [],
316 | "source": []
317 | },
318 | {
319 | "cell_type": "markdown",
320 | "metadata": {
321 | "id": "giu-oZSQtetm"
322 | },
323 | "source": [
324 | "### 3.1 Missing data\n",
325 | "\n",
326 | "1. Use the pandas method `isnull`.\n",
327 | "\n",
328 | "2. Get the number of missing values per column by calling `sum()` on the result of `isnull`. Which features, i.e. columns have missing values?\n",
329 | "\n",
330 | "3. Make a barplot that shows the number of missing values per column.\n"
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": null,
336 | "metadata": {
337 | "id": "KB_RPg-U9i8J"
338 | },
339 | "outputs": [],
340 | "source": []
341 | },
342 | {
343 | "cell_type": "code",
344 | "execution_count": null,
345 | "metadata": {
346 | "id": "XEbMbPHqAj2X"
347 | },
348 | "outputs": [],
349 | "source": []
350 | },
351 | {
352 | "cell_type": "markdown",
353 | "metadata": {
354 | "id": "3qBOePAlzIrv"
355 | },
356 | "source": [
357 | "### 3.2 Omitting observations with missing values\n",
358 | "\n",
359 | "1. Create a subset in which you omit all patients, i.e. rows, which have missing values in any column. Take care to not overwrite the original dataframe. If you did, you can re-import it.\n",
360 | "\n",
361 | "2. How many observations, i.e. patients, would you be left with if you removed all missing values?\n",
362 | "\n",
363 | "3. How many if you only omit patients where the outcome is missing?\n"
364 | ]
365 | },
366 | {
367 | "cell_type": "code",
368 | "execution_count": null,
369 | "metadata": {
370 | "id": "j3rEUU7iA1La"
371 | },
372 | "outputs": [],
373 | "source": []
374 | },
375 | {
376 | "cell_type": "code",
377 | "execution_count": null,
378 | "metadata": {
379 | "id": "0SbRFgmGChph"
380 | },
381 | "outputs": [],
382 | "source": []
383 | },
384 | {
385 | "cell_type": "markdown",
386 | "metadata": {
387 | "id": "t1_fL5hEC2LN"
388 | },
389 | "source": [
390 | "### 3.3 Effects of removing data\n",
391 | "\n",
392 | "We can now have a look at how removing nans effects the data.\n",
393 | "\n",
394 | "\n",
395 | "1. First, plot the correlation coefficient between all numerical columns in the original cirrhosis dataframe. (Analogous to 2.2).\n",
396 | "\n",
397 | "2. Now, remake the plot for the subset where you have removed all rows with any missing data. Have the correlations changed?"
398 | ]
399 | },
400 | {
401 | "cell_type": "code",
402 | "execution_count": null,
403 | "metadata": {
404 | "id": "r5G9AqzzDSBJ"
405 | },
406 | "outputs": [],
407 | "source": []
408 | },
409 | {
410 | "cell_type": "markdown",
411 | "metadata": {
412 | "id": "9b40PElUzT-2"
413 | },
414 | "source": [
415 | "### 3.4 Imputation\n",
416 | "\n",
417 | "Use the method `fillna()` to impute missing values in the columns **where it makes sense**. Have a look at the documentation: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.fillna.html\n",
418 | "\n",
419 | "1. A good way to impute numerical data can be i.e. the mean or median. Calculate the mean for all numerical columns.\n",
420 | "\n",
421 | "2. Perform the imputation.\n",
422 | "\n",
423 | "3. Re-make the barplot from 3.1. to check that it worked.\n",
424 | "\n",
425 | "4. Recalculate correlation coefficients between all numerical columns and show it in a heatmap.\n"
426 | ]
427 | },
428 | {
429 | "cell_type": "code",
430 | "execution_count": null,
431 | "metadata": {
432 | "id": "SF8PoOvOcp7n"
433 | },
434 | "outputs": [],
435 | "source": []
436 | },
437 | {
438 | "cell_type": "code",
439 | "execution_count": null,
440 | "metadata": {
441 | "id": "-KAI_fEIteto"
442 | },
443 | "outputs": [],
444 | "source": []
445 | },
446 | {
447 | "cell_type": "code",
448 | "execution_count": null,
449 | "metadata": {
450 | "id": "m5Wq7uL3teto"
451 | },
452 | "outputs": [],
453 | "source": []
454 | },
455 | {
456 | "cell_type": "code",
457 | "execution_count": null,
458 | "metadata": {
459 | "id": "fSFkAHeoteto"
460 | },
461 | "outputs": [],
462 | "source": []
463 | }
464 | ],
465 | "metadata": {
466 | "colab": {
467 | "collapsed_sections": [
468 | "Hto297viwyzY",
469 | "Gf5b1jRGxVOO",
470 | "TC6ewdf5qFze",
471 | "-d-4HSAay-xc",
472 | "v9Ez8XDCzCGW",
473 | "FxHy9gKJqtNE",
474 | "3qBOePAlzIrv",
475 | "nKknLSw1zOqi",
476 | "9b40PElUzT-2",
477 | "_CwrlBmMOzif",
478 | "ATnfchJIyGKj",
479 | "SS-KMKN7zeNw",
480 | "feYvDsddzhUl",
481 | "f8vhdvqVzjXi"
482 | ],
483 | "provenance": []
484 | },
485 | "kernelspec": {
486 | "display_name": "Python 3",
487 | "language": "python",
488 | "name": "python3"
489 | },
490 | "language_info": {
491 | "codemirror_mode": {
492 | "name": "ipython",
493 | "version": 3
494 | },
495 | "file_extension": ".py",
496 | "mimetype": "text/x-python",
497 | "name": "python",
498 | "nbconvert_exporter": "python",
499 | "pygments_lexer": "ipython3",
500 | "version": "3.12.6"
501 | }
502 | },
503 | "nbformat": 4,
504 | "nbformat_minor": 0
505 | }
506 |
--------------------------------------------------------------------------------
/Exercise/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 | notebook | content
8 | ---- | ------
9 | [Exercise_oct2023.ipynb](Exercise_oct2023.ipynb) [](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Exercise/Exercise_oct2023.ipynb) | Exercise
10 |
11 |
--------------------------------------------------------------------------------
/Extra/List_comprehension.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# List comprehensions"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "A list comprehension is an implicit for loop where we want to do *something* to every element of an existing list and create a new list.\n"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "## General syntax\n",
22 | "\n",
23 | "The general syntax is:\n",
24 | "\n",
25 | "```python\n",
26 | "new_list = [expression for iterator in old_list]\n",
27 | "```"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 5,
33 | "metadata": {},
34 | "outputs": [
35 | {
36 | "name": "stdout",
37 | "output_type": "stream",
38 | "text": [
39 | "[1, 4, 9, 16, 25, 36]\n"
40 | ]
41 | }
42 | ],
43 | "source": [
44 | "#an example: square a list of numbers\n",
45 | "\n",
46 | "numbers = [1,2,3,4,5,6]\n",
47 | "squares = []\n",
48 | "\n",
49 | "for x in numbers:\n",
50 | " squares.append(x**2)\n",
51 | "\n",
52 | "print(squares)"
53 | ]
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "metadata": {},
58 | "source": [
59 | "We can do this in one line instead with a list comprehension:"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 6,
65 | "metadata": {},
66 | "outputs": [
67 | {
68 | "name": "stdout",
69 | "output_type": "stream",
70 | "text": [
71 | "[1, 4, 9, 16, 25, 36]\n"
72 | ]
73 | }
74 | ],
75 | "source": [
76 | "squares = [x**2 for x in numbers]\n",
77 | "print(squares)"
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "metadata": {},
83 | "source": [
84 | "You could also skip defining the old list, `numbers`, and straight-up use an iterator like `range`:"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": 7,
90 | "metadata": {},
91 | "outputs": [
92 | {
93 | "name": "stdout",
94 | "output_type": "stream",
95 | "text": [
96 | "[1, 4, 9, 16, 25, 36]\n"
97 | ]
98 | }
99 | ],
100 | "source": [
101 | "#remember, we want to start from 1 and go up to 6 so we need range(1,7)\n",
102 | "squares = [x**2 for x in range(1,7)]\n",
103 | "print(squares)"
104 | ]
105 | },
106 | {
107 | "cell_type": "markdown",
108 | "metadata": {},
109 | "source": [
110 | "So in terms of syntax we mean:\n",
111 | "\n",
112 | "```python\n",
113 | "new_list = [expression for iterator in old_list]\n",
114 | "```\n",
115 | "\n",
116 | "where:\n",
117 | "* `old_list` - the name of the list we want to iterate over\n",
118 | "* `iterator` - what we want to call the entry of `old list` we are currently looking at, i.e. `x` or `i`\n",
119 | "* `expression` - what we want to do each entry of `old list`\n"
120 | ]
121 | },
122 | {
123 | "cell_type": "markdown",
124 | "metadata": {},
125 | "source": [
126 | "## Conditional comprehensions\n",
127 | "\n",
128 | "Where list comprehensions really shine is when we want to do *something* to only *some* entries of a list, depending on a condition.\n",
129 | "\n",
130 | "Here we will square only even numbers:"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": 8,
136 | "metadata": {},
137 | "outputs": [
138 | {
139 | "name": "stdout",
140 | "output_type": "stream",
141 | "text": [
142 | "[4, 16, 36]\n"
143 | ]
144 | }
145 | ],
146 | "source": [
147 | "numbers = [1,2,3,4,5,6]\n",
148 | "even_squares = []\n",
149 | "\n",
150 | "for x in numbers: #go through all entries in numbers\n",
151 | " if x % 2 == 0: #if the entry is even, square it and append it to the new list\n",
152 | " even_squares.append(x**2)\n",
153 | "\n",
154 | "print(even_squares) "
155 | ]
156 | },
157 | {
158 | "cell_type": "markdown",
159 | "metadata": {},
160 | "source": [
161 | "We can do the same in a list comprehension:"
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": 9,
167 | "metadata": {},
168 | "outputs": [
169 | {
170 | "data": {
171 | "text/plain": [
172 | "[4, 16, 36]"
173 | ]
174 | },
175 | "execution_count": 9,
176 | "metadata": {},
177 | "output_type": "execute_result"
178 | }
179 | ],
180 | "source": [
181 | "even_squares = [x**2 for x in numbers if x % 2 == 0]\n",
182 | "even_squares"
183 | ]
184 | },
185 | {
186 | "cell_type": "markdown",
187 | "metadata": {},
188 | "source": [
189 | "If we instead frontload the condition we can also add an `else`.\n",
190 | "\n",
191 | "Here we square the even numbers and add the odd numbers to themselves:"
192 | ]
193 | },
194 | {
195 | "cell_type": "code",
196 | "execution_count": 12,
197 | "metadata": {},
198 | "outputs": [
199 | {
200 | "name": "stdout",
201 | "output_type": "stream",
202 | "text": [
203 | "[1, 2, 3, 4, 5, 6]\n",
204 | "[2, 4, 6, 16, 10, 36]\n"
205 | ]
206 | }
207 | ],
208 | "source": [
209 | "print(numbers)\n",
210 | "even_square_odd_one = [x**2 if x % 2 == 0 else x+x for x in numbers]\n",
211 | "print(even_square_odd_one)"
212 | ]
213 | },
214 | {
215 | "cell_type": "markdown",
216 | "metadata": {},
217 | "source": [
218 | "## Exercise\n",
219 | "\n",
220 | "1. Write a list comprehension that adds 5 to every number in `numbers`."
221 | ]
222 | },
223 | {
224 | "cell_type": "code",
225 | "execution_count": null,
226 | "metadata": {},
227 | "outputs": [],
228 | "source": [
229 | "numbers = [0, 91, 69, -15, -54, 58, -58, 62, 4, 54, 53, -43, -87, 28, 23, -21, 69, -17, -60, 21]"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": null,
235 | "metadata": {},
236 | "outputs": [],
237 | "source": [
238 | "#your code here"
239 | ]
240 | },
241 | {
242 | "cell_type": "markdown",
243 | "metadata": {},
244 | "source": [
245 | "\n",
246 | "\n",
247 | "2. Write a list comprehension that goes through the list `numbers` and replaces every negative number with 0. The result should be saved in a new list called `positive_numbers`."
248 | ]
249 | },
250 | {
251 | "cell_type": "code",
252 | "execution_count": null,
253 | "metadata": {},
254 | "outputs": [],
255 | "source": [
256 | "#your code here"
257 | ]
258 | },
259 | {
260 | "cell_type": "markdown",
261 | "metadata": {},
262 | "source": [
263 | "We can use `random.randint()` to generate a pseudo random inter in python. It works like this:"
264 | ]
265 | },
266 | {
267 | "cell_type": "code",
268 | "execution_count": 56,
269 | "metadata": {},
270 | "outputs": [],
271 | "source": [
272 | "import random"
273 | ]
274 | },
275 | {
276 | "cell_type": "code",
277 | "execution_count": 61,
278 | "metadata": {},
279 | "outputs": [
280 | {
281 | "data": {
282 | "text/plain": [
283 | "-24"
284 | ]
285 | },
286 | "execution_count": 61,
287 | "metadata": {},
288 | "output_type": "execute_result"
289 | }
290 | ],
291 | "source": [
292 | "#run this cell a couple of times to create some different random numbers\n",
293 | "random.randint(-100,100)"
294 | ]
295 | },
296 | {
297 | "cell_type": "markdown",
298 | "metadata": {},
299 | "source": [
300 | "3. Write a list comprehension that creates 20 random integers. You can use `range` to define how many times you want the explicit loop to run."
301 | ]
302 | },
303 | {
304 | "cell_type": "code",
305 | "execution_count": 58,
306 | "metadata": {},
307 | "outputs": [
308 | {
309 | "name": "stdout",
310 | "output_type": "stream",
311 | "text": [
312 | "[0, 91, 69, -15, -54, 58, -58, 62, 4, 54, 53, -43, -87, 28, 23, -21, 69, -17, -60, 21]\n"
313 | ]
314 | }
315 | ],
316 | "source": [
317 | "#your code here"
318 | ]
319 | },
320 | {
321 | "cell_type": "markdown",
322 | "metadata": {},
323 | "source": [
324 | "## Practical applications\n",
325 | "\n",
326 | "### Finding all occurances of a certain item in a list.\n",
327 | "\n",
328 | "I want to know all the indices of 'red' in the 'colors' list."
329 | ]
330 | },
331 | {
332 | "cell_type": "code",
333 | "execution_count": 19,
334 | "metadata": {},
335 | "outputs": [],
336 | "source": [
337 | "colors = ['red', 'green', 'orange', 'yellow', 'black', 'green', 'red', 'blue', 'purple', 'yellow', 'red']"
338 | ]
339 | },
340 | {
341 | "cell_type": "code",
342 | "execution_count": 20,
343 | "metadata": {},
344 | "outputs": [
345 | {
346 | "data": {
347 | "text/plain": [
348 | "[0, 6, 10]"
349 | ]
350 | },
351 | "execution_count": 20,
352 | "metadata": {},
353 | "output_type": "execute_result"
354 | }
355 | ],
356 | "source": [
357 | "indices = [i for i, x in enumerate(colors) if x == \"red\"]\n",
358 | "indices"
359 | ]
360 | },
361 | {
362 | "cell_type": "markdown",
363 | "metadata": {},
364 | "source": [
365 | "### Tallying list items\n",
366 | "\n",
367 | "Finding out how many times each item in a list occurs is also referred to as tallying. \n",
368 | "\n",
369 | "We want to know how many times each of the colors occurs in 'colors'. Or in other words, we want to use `.count()` for each unique item in the list (how many times do we see 'red', ' yellow', 'purple', ect)."
370 | ]
371 | },
372 | {
373 | "cell_type": "code",
374 | "execution_count": 41,
375 | "metadata": {},
376 | "outputs": [
377 | {
378 | "data": {
379 | "text/plain": [
380 | "[('green', 2),\n",
381 | " ('black', 1),\n",
382 | " ('purple', 1),\n",
383 | " ('red', 3),\n",
384 | " ('yellow', 2),\n",
385 | " ('orange', 1),\n",
386 | " ('blue', 1)]"
387 | ]
388 | },
389 | "execution_count": 41,
390 | "metadata": {},
391 | "output_type": "execute_result"
392 | }
393 | ],
394 | "source": [
395 | "color_counts = [(x,colors.count(x)) for x in set(colors)]\n",
396 | "color_counts"
397 | ]
398 | },
399 | {
400 | "cell_type": "markdown",
401 | "metadata": {},
402 | "source": [
403 | "We can then reverse the order to have the count first:"
404 | ]
405 | },
406 | {
407 | "cell_type": "code",
408 | "execution_count": 42,
409 | "metadata": {},
410 | "outputs": [
411 | {
412 | "data": {
413 | "text/plain": [
414 | "[(2, 'green'),\n",
415 | " (1, 'black'),\n",
416 | " (1, 'purple'),\n",
417 | " (3, 'red'),\n",
418 | " (2, 'yellow'),\n",
419 | " (1, 'orange'),\n",
420 | " (1, 'blue')]"
421 | ]
422 | },
423 | "execution_count": 42,
424 | "metadata": {},
425 | "output_type": "execute_result"
426 | }
427 | ],
428 | "source": [
429 | "color_counts = [(colors.count(x),x) for x in set(colors)]\n",
430 | "color_counts"
431 | ]
432 | },
433 | {
434 | "cell_type": "markdown",
435 | "metadata": {},
436 | "source": [
437 | "Which will help us with imposing `sorted` of the list of tuples to get the most common entry on top:"
438 | ]
439 | },
440 | {
441 | "cell_type": "code",
442 | "execution_count": 43,
443 | "metadata": {},
444 | "outputs": [
445 | {
446 | "data": {
447 | "text/plain": [
448 | "[(3, 'red'),\n",
449 | " (2, 'yellow'),\n",
450 | " (2, 'green'),\n",
451 | " (1, 'purple'),\n",
452 | " (1, 'orange'),\n",
453 | " (1, 'blue'),\n",
454 | " (1, 'black')]"
455 | ]
456 | },
457 | "execution_count": 43,
458 | "metadata": {},
459 | "output_type": "execute_result"
460 | }
461 | ],
462 | "source": [
463 | "color_counts = sorted([(colors.count(x),x) for x in set(colors)], reverse=True)\n",
464 | "color_counts"
465 | ]
466 | },
467 | {
468 | "cell_type": "markdown",
469 | "metadata": {},
470 | "source": [
471 | "### Creating dictionaries from existing lists"
472 | ]
473 | },
474 | {
475 | "cell_type": "markdown",
476 | "metadata": {},
477 | "source": [
478 | "I have these two lists and I want to make them into a dictionary where one list is the key (the city name) and the other is the value (population). But I don't want to type out everything again."
479 | ]
480 | },
481 | {
482 | "cell_type": "code",
483 | "execution_count": 52,
484 | "metadata": {},
485 | "outputs": [],
486 | "source": [
487 | "cities = ['Tokyo', 'Berlin', 'New York', 'Copenhagen', 'Los Angeles']\n",
488 | "population = [37115035, 3576873, 8260000, 1391205, 3895836]"
489 | ]
490 | },
491 | {
492 | "cell_type": "code",
493 | "execution_count": 53,
494 | "metadata": {},
495 | "outputs": [
496 | {
497 | "data": {
498 | "text/plain": [
499 | "[{'Tokyo': 37115035},\n",
500 | " {'Berlin': 3576873},\n",
501 | " {'New York': 8260000},\n",
502 | " {'Copenhagen': 1391205},\n",
503 | " {'Los Angeles': 3895836}]"
504 | ]
505 | },
506 | "execution_count": 53,
507 | "metadata": {},
508 | "output_type": "execute_result"
509 | }
510 | ],
511 | "source": [
512 | "city_population = [{city : pop} for city, pop in zip(cities, population)]\n",
513 | "\n",
514 | "city_population"
515 | ]
516 | },
517 | {
518 | "cell_type": "code",
519 | "execution_count": 54,
520 | "metadata": {},
521 | "outputs": [
522 | {
523 | "data": {
524 | "text/plain": [
525 | "[('Tokyo', 37115035),\n",
526 | " ('Berlin', 3576873),\n",
527 | " ('New York', 8260000),\n",
528 | " ('Copenhagen', 1391205),\n",
529 | " ('Los Angeles', 3895836)]"
530 | ]
531 | },
532 | "execution_count": 54,
533 | "metadata": {},
534 | "output_type": "execute_result"
535 | }
536 | ],
537 | "source": [
538 | "bla = list(zip(cities,population))\n",
539 | "bla"
540 | ]
541 | },
542 | {
543 | "cell_type": "code",
544 | "execution_count": 55,
545 | "metadata": {},
546 | "outputs": [
547 | {
548 | "name": "stdout",
549 | "output_type": "stream",
550 | "text": [
551 | "Tokyo : 37115035\n",
552 | "Berlin : 3576873\n",
553 | "New York : 8260000\n",
554 | "Copenhagen : 1391205\n",
555 | "Los Angeles : 3895836\n"
556 | ]
557 | }
558 | ],
559 | "source": [
560 | "for city, pop in zip(cities,population):\n",
561 | " print(city, \":\", pop)"
562 | ]
563 | },
564 | {
565 | "cell_type": "code",
566 | "execution_count": null,
567 | "metadata": {},
568 | "outputs": [],
569 | "source": []
570 | }
571 | ],
572 | "metadata": {
573 | "kernelspec": {
574 | "display_name": "Python 3",
575 | "language": "python",
576 | "name": "python3"
577 | },
578 | "language_info": {
579 | "codemirror_mode": {
580 | "name": "ipython",
581 | "version": 3
582 | },
583 | "file_extension": ".py",
584 | "mimetype": "text/x-python",
585 | "name": "python",
586 | "nbconvert_exporter": "python",
587 | "pygments_lexer": "ipython3",
588 | "version": "3.11.1"
589 | }
590 | },
591 | "nbformat": 4,
592 | "nbformat_minor": 2
593 | }
594 |
--------------------------------------------------------------------------------
/Extra/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 | notebook | content
8 | ---- | ------
9 | [List_comprehension.ipynb](List_comprehension.ipynb) [](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Extra/List_comprehension.ipynb) | List Comprehension
10 |
--------------------------------------------------------------------------------
/Iterables/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 | notebook | content
8 | ---- | ------
9 | [iterables.ipynb](iterables.ipynb) [](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Iterables/iterables.ipynb) | Data structures
10 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 pythontsunami
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Loops/Loops.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "KFib138AzHLW"
7 | },
8 | "source": [
9 | "
\n",
10 | "\n",
11 | "
\n"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {
17 | "id": "jzzrpQS-zxnF"
18 | },
19 | "source": [
20 | "# Loops"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {
26 | "id": "HrDeygtfzx7P"
27 | },
28 | "source": [
29 | "Consider the code below: It prints the numbers 1 through 10 using what we've learned so far. \n",
30 | "This notebook is about how to do the same task less tediously. **Loops** are a way to repeatedly execute some code, in a simple and succinct way."
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": null,
36 | "metadata": {
37 | "id": "tjj39-AazwAb"
38 | },
39 | "outputs": [],
40 | "source": [
41 | "print(1)\n",
42 | "print(2)\n",
43 | "print(3)\n",
44 | "print(4)\n",
45 | "print(5)\n",
46 | "print(6)\n",
47 | "print(7)\n",
48 | "print(8)\n",
49 | "print(9)\n",
50 | "print(10)"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {
56 | "id": "VBLCVfQoseEh"
57 | },
58 | "source": [
59 | "Indeed, we can use a for loop to do the same in only two lines:\n",
60 | "\n",
61 | "```python\n",
62 | "#pseudo code\n",
63 | "for number in number_list:\n",
64 | " print number\n",
65 | "```\n"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": null,
71 | "metadata": {
72 | "id": "aCmVJXXBzHLn"
73 | },
74 | "outputs": [],
75 | "source": [
76 | "number_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n",
77 | "\n",
78 | "#the variable number is initialized inside the for statement.\n",
79 | "#You do not need to declare it before. It will continue to exist after the loop.\n",
80 | "for number in number_list:\n",
81 | " print(number)\n",
82 | "\n",
83 | "print(\"Now we are done.\")\n",
84 | "print(\"What is number now?\", number)"
85 | ]
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {
90 | "id": "DGXTd_1SseEj"
91 | },
92 | "source": [
93 | "Loops are part of flow control. The code inside the loop is (usually) executed several times, whereas lines such as above are only executed one time. The program also needs to know when the loop is over and we return to 'linear' flow. Like in `if` blocks, this is made clear with indentation."
94 | ]
95 | },
96 | {
97 | "cell_type": "markdown",
98 | "metadata": {
99 | "id": "EClTb9Qy0i8U"
100 | },
101 | "source": [
102 | "## **`for`** loops\n",
103 | "\n",
104 | "In Python, **`for`** loops are written like this:\n",
105 | "\n",
106 | "```python\n",
107 | "for element in sequence:\n",
108 | " this code is executed inside the loop\n",
109 | " and this code too\n",
110 | " \n",
111 | "now we are not in the loop anymore \n",
112 | "```\n",
113 | "The idea is that we go through our sequence step by step and perform a certain action (here `print()` on each element in the sequence).\n",
114 | "\n",
115 | "- ``element`` is a variable and can be called whatever you want.\n",
116 | "\n",
117 | "- ``sequence`` is a sequence we iterate over. It is some kind of collection of items, for instance: a `str` of characters, a `range`, a list etc. It is also often called an iterable.\n",
118 | "\n",
119 | "Note that the body of the loop is **indented**. This is important for [**flow**](https://colab.research.google.com/drive/11xJCNmKS1pFDxEjnYhJruDYAOGEbb3RK#scrollTo=7PmpZ4oTyPHw). When we write a command on the same indentation level as the initial `for` statement, the loop is over. This will be executed after the loop."
120 | ]
121 | },
122 | {
123 | "cell_type": "markdown",
124 | "metadata": {
125 | "id": "Zlj0LILxu9TA"
126 | },
127 | "source": [
128 | "### `for` loops using `for ... in`\n",
129 | "\n",
130 | "You go through the contents of any iterable such as a list or a dictionary using `for ... in` like shown before with the `number_list`:"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": null,
136 | "metadata": {
137 | "id": "jHskWVZ-vVy-"
138 | },
139 | "outputs": [],
140 | "source": [
141 | "#try it out!\n",
142 | "countries = ['Denmark', 'Spain', 'Italy']\n",
143 | "\n",
144 | "# iterate over the ountries as we did with the list of numbers above:\n",
145 | "for country in countries:\n",
146 | " print(country)"
147 | ]
148 | },
149 | {
150 | "cell_type": "markdown",
151 | "metadata": {
152 | "id": "gSAzxl6bvdoB"
153 | },
154 | "source": [
155 | "The `country` part is about how we want to refer to the element we are looking at right now. You can freely choose this variable name."
156 | ]
157 | },
158 | {
159 | "cell_type": "markdown",
160 | "metadata": {
161 | "id": "rP62SBxWzHLw"
162 | },
163 | "source": [
164 | "### `for` loops using `range()`\n",
165 | "\n",
166 | "Instead of writing out a list with all the numerical values we want to go through there is quicker way. We can create it using `range()`:"
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "execution_count": null,
172 | "metadata": {
173 | "id": "aI1zSyEw1Tvk"
174 | },
175 | "outputs": [],
176 | "source": [
177 | "for number in range(1, 11):\n",
178 | " print(number)"
179 | ]
180 | },
181 | {
182 | "cell_type": "markdown",
183 | "metadata": {
184 | "id": "ZCj6EX711bw1"
185 | },
186 | "source": [
187 | "The [**`range()`**](https://docs.python.org/3/library/functions.html#func-range) function returns a sequence of numbers, starting from 0 by default, and increments by 1 by default, and stops at a specified number (which is not included in the range).\n",
188 | "\n",
189 | "Based on what we learned so far you might think that it creates a list, but it does **not**. In fact, range **does not do anything** by itself, but can be used inside a for loop to create the sequence to loop over.\n",
190 | "\n",
191 | "> The syntax is: `range(start, stop, step)`\n",
192 | "\n",
193 | "The *step* parameter tells the function how many steps to skip and which direction to count (**`+`** for **up** and **`-`** for **down**).\n",
194 | "\n",
195 | "Examples:\n",
196 | "\n",
197 | "- `range(8)` gives you integers from 0 through 7.\n",
198 | "\n",
199 | "- `range(2, 9)` will give you integers from 2 to 8.\n",
200 | "\n",
201 | "- `range(10, 20, 2)` will give you even numbers from 10 to 18. Remember, the upper limit of the range is excluded!\n",
202 | "\n",
203 | "- `range(9, 0, -1)` will start from 9 and give you integers down to 1.\n"
204 | ]
205 | },
206 | {
207 | "cell_type": "code",
208 | "execution_count": null,
209 | "metadata": {
210 | "id": "dI_lYduNseEn"
211 | },
212 | "outputs": [],
213 | "source": [
214 | "#try it out!\n"
215 | ]
216 | },
217 | {
218 | "cell_type": "markdown",
219 | "metadata": {
220 | "id": "5nylK238seEn"
221 | },
222 | "source": [
223 | "### `for` loops using `enumerate()`\n",
224 | "\n",
225 | "Another useful function to know for `for` loops is `enumerate`. Like its name hints, `enumerate` helps us to *enumerate* the contents of an iterable.\n",
226 | "\n",
227 | "The different to `for ... in` is that `enumerate` will also tell us the position of an element in the iterable:"
228 | ]
229 | },
230 | {
231 | "cell_type": "code",
232 | "execution_count": null,
233 | "metadata": {
234 | "id": "LzVKG8MTseEo"
235 | },
236 | "outputs": [],
237 | "source": [
238 | "# get both items and their position\n",
239 | "for index, country in enumerate(countries):\n",
240 | " print(\"My number\" + str(index) + \" favorite country is: \" + country)"
241 | ]
242 | },
243 | {
244 | "cell_type": "markdown",
245 | "metadata": {
246 | "id": "o42yFkWx4WyO"
247 | },
248 | "source": [
249 | "# Exercise 1\n",
250 | "\n",
251 | "_~ 20 minutes_"
252 | ]
253 | },
254 | {
255 | "cell_type": "markdown",
256 | "metadata": {
257 | "id": "xqhI0vljzHL0"
258 | },
259 | "source": [
260 | "**a.** Use a for loop to iterate over `range(4)`. Which numbers does it produce?"
261 | ]
262 | },
263 | {
264 | "cell_type": "code",
265 | "execution_count": null,
266 | "metadata": {
267 | "id": "amW1Ja7kzHL0"
268 | },
269 | "outputs": [],
270 | "source": [
271 | "# your code goes here\n"
272 | ]
273 | },
274 | {
275 | "cell_type": "markdown",
276 | "metadata": {
277 | "id": "pHR_ctAfzHL0"
278 | },
279 | "source": [
280 | "**b.** Now write a for loop using `range` that prints the numbers 1 to 4."
281 | ]
282 | },
283 | {
284 | "cell_type": "code",
285 | "execution_count": null,
286 | "metadata": {
287 | "id": "nJj4ebrLzHL1"
288 | },
289 | "outputs": [],
290 | "source": [
291 | "# your code goes here"
292 | ]
293 | },
294 | {
295 | "cell_type": "markdown",
296 | "metadata": {
297 | "id": "1453WLTVzHL1"
298 | },
299 | "source": [
300 | "**c.** What numbers do you get when you use the following range inside a for loop? Write out the loop to check.\n",
301 | "\n",
302 | "`range(12,0,-3)`"
303 | ]
304 | },
305 | {
306 | "cell_type": "code",
307 | "execution_count": null,
308 | "metadata": {
309 | "id": "B1LU-GHpzHL2"
310 | },
311 | "outputs": [],
312 | "source": [
313 | "# your code goes here"
314 | ]
315 | },
316 | {
317 | "cell_type": "markdown",
318 | "metadata": {
319 | "id": "YqDcWibUzHL2"
320 | },
321 | "source": [
322 | "**d.** Loop through numbers 1-20:\n",
323 | "- If the number is 4 or 13, print \"x is unlucky\"\n",
324 | "- Otherwise:\n",
325 | " - If the number is even, print \"x is even\"\n",
326 | " - If the number is odd, print \"x is odd\"\n",
327 | "\n",
328 | "> check [`Conditions.ipynb`](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/fall2021/Conditionals/Conditions.ipynb)"
329 | ]
330 | },
331 | {
332 | "cell_type": "code",
333 | "execution_count": null,
334 | "metadata": {
335 | "id": "AC4dgNEPzHL3"
336 | },
337 | "outputs": [],
338 | "source": []
339 | },
340 | {
341 | "cell_type": "markdown",
342 | "metadata": {
343 | "id": "LwSW0hrCseEs"
344 | },
345 | "source": [
346 | "**e.** In the code below we're counting from 0 as python usually does. Can you fix so that it starts writing from 1?\n",
347 | "\n",
348 | "```python\n",
349 | "# get both items and their position\n",
350 | "for index, country in enumerate(countries):\n",
351 | " print(\"My number\" + str(index) + \" favorite country is: \" + country)\n",
352 | "```\n"
353 | ]
354 | },
355 | {
356 | "cell_type": "code",
357 | "execution_count": null,
358 | "metadata": {
359 | "id": "fuNGQzXLseEs"
360 | },
361 | "outputs": [],
362 | "source": []
363 | },
364 | {
365 | "cell_type": "markdown",
366 | "metadata": {
367 | "id": "4NJV8SnW2dcf"
368 | },
369 | "source": [
370 | "## **`while`** loops\n",
371 | "\n",
372 | "We can also iterate over a sequence using a **`while`** loop, which has a different format:\n",
373 | "\n",
374 | "```python\n",
375 | "while condition:\n",
376 | " expression\n",
377 | "```\n",
378 | "`while` loops continue to execute while a certain condition is `True`, and will end when it becomes `False`.\n",
379 | "\n",
380 | "```python\n",
381 | "user_response = \"Something...\"\n",
382 | "while user_response != \"please\":\n",
383 | " user_response = input(\"Ah ah ah, you didn't say the magic word: \")\n",
384 | "```\n",
385 | "\n",
386 | "`while` loops require more careful setup than `for` loops, since you have to specify the termination conditions manually.\n",
387 | "\n",
388 | "Be careful! If the condition doesn't become `False` at some point, your loop will continue ***forever***!"
389 | ]
390 | },
391 | {
392 | "cell_type": "code",
393 | "execution_count": null,
394 | "metadata": {
395 | "id": "dKlSQ64XzHL4"
396 | },
397 | "outputs": [],
398 | "source": [
399 | "my_float = 50.0\n",
400 | "\n",
401 | "while my_float > 1:\n",
402 | " my_float = my_float / 4\n",
403 | " print(my_float)"
404 | ]
405 | },
406 | {
407 | "cell_type": "markdown",
408 | "metadata": {
409 | "id": "xKOz5qSs5bfV"
410 | },
411 | "source": [
412 | "# Exercise 2\n",
413 | "\n",
414 | "_~15 minutes_\n",
415 | "\n",
416 | "**a.** What does the following loop do?\n",
417 | "```python\n",
418 | " i = 1\n",
419 | " while i < 5:\n",
420 | " i + i\n",
421 | " print(i)\n",
422 | "```\n",
423 | " \n",
424 | "> Hint: is the value of `i` changing?"
425 | ]
426 | },
427 | {
428 | "cell_type": "code",
429 | "execution_count": null,
430 | "metadata": {
431 | "id": "MEX_6xXPzHL4"
432 | },
433 | "outputs": [],
434 | "source": [
435 | "# your code goes here"
436 | ]
437 | },
438 | {
439 | "cell_type": "markdown",
440 | "metadata": {
441 | "id": "NlbI1VH4zHL5"
442 | },
443 | "source": [
444 | "**b.** What does the following loop do?\n",
445 | "```python\n",
446 | " i = 0\n",
447 | " while i <= 5:\n",
448 | " i = i + 1\n",
449 | " print(i)\n",
450 | "```"
451 | ]
452 | },
453 | {
454 | "cell_type": "code",
455 | "execution_count": null,
456 | "metadata": {
457 | "id": "iu3oul_BzHL5"
458 | },
459 | "outputs": [],
460 | "source": [
461 | "# your code goes here"
462 | ]
463 | },
464 | {
465 | "cell_type": "markdown",
466 | "metadata": {
467 | "id": "CYMshsgmzHL6"
468 | },
469 | "source": [
470 | "**c.** Fix the infinite loop below so that it doesn't run endlessly anymore:\n",
471 | "```python\n",
472 | " # this code runs forever...\n",
473 | " x = 0\n",
474 | " while x != 11:\n",
475 | " x += 2\n",
476 | " print(x)\n",
477 | "```"
478 | ]
479 | },
480 | {
481 | "cell_type": "code",
482 | "execution_count": null,
483 | "metadata": {
484 | "id": "aH1WA-K7zHL6"
485 | },
486 | "outputs": [],
487 | "source": [
488 | "# your code goes here"
489 | ]
490 | },
491 | {
492 | "cell_type": "markdown",
493 | "metadata": {
494 | "id": "XKETo3jK3aue"
495 | },
496 | "source": [
497 | "## Python loop control\n",
498 | "\n",
499 | "Controlled exit, skipping a block of code, or ignoring external factors that might influence your code, can be achieved with the Python statements: `break`, `continue`, and `pass`."
500 | ]
501 | },
502 | {
503 | "cell_type": "markdown",
504 | "metadata": {
505 | "id": "xFv803XuzHL7"
506 | },
507 | "source": [
508 | "### ***`break`*** statement\n",
509 | "\n",
510 | "The keyword `break` gives us the ability to exit out of a loop whenever we want, and can be used in both `while` and `for` loops.\n",
511 | "\n",
512 | "Example:\n",
513 | "\n",
514 | "``` python\n",
515 | "for letter in 'Python':\n",
516 | " if letter == 'h':\n",
517 | " break\n",
518 | " print('Current Letter:', letter)\n",
519 | "```"
520 | ]
521 | },
522 | {
523 | "cell_type": "markdown",
524 | "metadata": {
525 | "id": "Eff6JwP0zHL7"
526 | },
527 | "source": [
528 | "The `break` statement needs to be within the block of code under your loop statement, ususally after a conditional `if` statement."
529 | ]
530 | },
531 | {
532 | "cell_type": "code",
533 | "execution_count": null,
534 | "metadata": {
535 | "id": "VsEuKOb731qP"
536 | },
537 | "outputs": [],
538 | "source": [
539 | "for letter in 'Python':\n",
540 | " if letter == 'h':\n",
541 | " break\n",
542 | " print('Current Letter :', letter)"
543 | ]
544 | },
545 | {
546 | "cell_type": "markdown",
547 | "metadata": {
548 | "id": "Uf24LfUJzHL8"
549 | },
550 | "source": [
551 | "### ***`continue`*** statement\n",
552 | "\n",
553 | "The [`continue`](https://docs.python.org/3/tutorial/controlflow.html#break-and-continue-statements-and-else-clauses-on-loops) statement in Python gives you the option to skip over the part of a loop where a condition is met, but to go on to complete the rest of the loop. That is, it disrupts the iteration of the loop that fulfills the condition and returns the control to the beginning of the loop. It works with both `while` and `for` loops.\n",
554 | "\n",
555 | "Example:\n",
556 | "\n",
557 | "``` python\n",
558 | "for letter in 'Python':\n",
559 | " if letter == 'h':\n",
560 | " continue\n",
561 | " print('Current Letter :', letter)\n",
562 | "```"
563 | ]
564 | },
565 | {
566 | "cell_type": "markdown",
567 | "metadata": {
568 | "id": "V3Bic6PVzHL9"
569 | },
570 | "source": [
571 | "The difference in using `continue` rather than `break` is that the loop will continue despite the disruption when the condition is met."
572 | ]
573 | },
574 | {
575 | "cell_type": "code",
576 | "execution_count": 1,
577 | "metadata": {
578 | "colab": {
579 | "base_uri": "https://localhost:8080/"
580 | },
581 | "id": "8DJUYVlNzHL9",
582 | "outputId": "0e7f26f8-3053-4c76-9ccf-e996895f3560"
583 | },
584 | "outputs": [
585 | {
586 | "name": "stdout",
587 | "output_type": "stream",
588 | "text": [
589 | "Current Letter : P\n",
590 | "Current Letter : y\n",
591 | "Current Letter : t\n",
592 | "Current Letter : o\n",
593 | "Current Letter : n\n"
594 | ]
595 | }
596 | ],
597 | "source": [
598 | "for letter in 'Python':\n",
599 | " if letter == 'h':\n",
600 | " continue\n",
601 | " print('Current Letter :', letter)"
602 | ]
603 | },
604 | {
605 | "cell_type": "markdown",
606 | "metadata": {
607 | "id": "n_HVORV8zHL9"
608 | },
609 | "source": [
610 | "### ***`pass`*** statement\n",
611 | "\n",
612 | "The [`pass`](https://docs.python.org/3/tutorial/controlflow.html#pass-statements) statement is used when a statement is required syntactically but you do not want any command or code to execute. It's a *null* operation.\n",
613 | "\n",
614 | "Now what does this mean? Because of flow control, statements like `if` and `for` need to be followed by an indented block of code or the program will crash. There can however be special situation where we want literally nothing to happen, or we don't know yet what should happen. Then we use `pass`.\n",
615 | "\n",
616 | "A common reason for this that some operation should happen eventually but we haven't gotten around to implementing it yet.\n"
617 | ]
618 | },
619 | {
620 | "cell_type": "markdown",
621 | "metadata": {
622 | "id": "nLgJjFzzj1EH"
623 | },
624 | "source": [
625 | "Compare the output of this to the code block above where we used `continue`:"
626 | ]
627 | },
628 | {
629 | "cell_type": "code",
630 | "execution_count": 2,
631 | "metadata": {
632 | "colab": {
633 | "base_uri": "https://localhost:8080/"
634 | },
635 | "id": "JJ4O026WzHL-",
636 | "outputId": "8a1bf996-2aa8-4190-bb19-9f1984f09642"
637 | },
638 | "outputs": [
639 | {
640 | "name": "stdout",
641 | "output_type": "stream",
642 | "text": [
643 | "Current Letter : P\n",
644 | "Current Letter : y\n",
645 | "Current Letter : t\n",
646 | "Current Letter : h\n",
647 | "Current Letter : o\n",
648 | "Current Letter : n\n"
649 | ]
650 | }
651 | ],
652 | "source": [
653 | "for letter in 'Python':\n",
654 | " if letter == 'h':\n",
655 | " pass\n",
656 | " #perhaps in the future something special should happen when the letter is h\n",
657 | "\n",
658 | " print('Current Letter :', letter)"
659 | ]
660 | },
661 | {
662 | "cell_type": "markdown",
663 | "metadata": {
664 | "id": "FIEj-qc5zHL-"
665 | },
666 | "source": [
667 | "# Group Exercise \n",
668 | "\n",
669 | "In your group, take the next 10 mins to solve this exercise: \n",
670 | "\n",
671 | "Write a loop that:\n",
672 | "\n",
673 | "- iterates over each character in the string `\"I live in CPH, and I like it here.\"`;\n",
674 | "- for each character checks if it is a space;\n",
675 | "- if it is a space, then just continue with the loop;\n",
676 | "- if the character is not a space, do the following:\n",
677 | "- check if it is a comma `,` ;\n",
678 | "- if the character is a comma `,`, break the loop;\n",
679 | "- if the character is not a comma, print it."
680 | ]
681 | },
682 | {
683 | "cell_type": "code",
684 | "execution_count": null,
685 | "metadata": {
686 | "id": "XfAKZMP9seEy"
687 | },
688 | "outputs": [],
689 | "source": []
690 | }
691 | ],
692 | "metadata": {
693 | "colab": {
694 | "provenance": [],
695 | "toc_visible": true
696 | },
697 | "kernelspec": {
698 | "display_name": "Python 3 (ipykernel)",
699 | "language": "python",
700 | "name": "python3"
701 | },
702 | "language_info": {
703 | "codemirror_mode": {
704 | "name": "ipython",
705 | "version": 3
706 | },
707 | "file_extension": ".py",
708 | "mimetype": "text/x-python",
709 | "name": "python",
710 | "nbconvert_exporter": "python",
711 | "pygments_lexer": "ipython3",
712 | "version": "3.10.9"
713 | },
714 | "toc": {
715 | "base_numbering": 1,
716 | "nav_menu": {},
717 | "number_sections": true,
718 | "sideBar": true,
719 | "skip_h1_title": false,
720 | "title_cell": "Table of Contents",
721 | "title_sidebar": "Contents",
722 | "toc_cell": false,
723 | "toc_position": {},
724 | "toc_section_display": true,
725 | "toc_window_display": false
726 | },
727 | "toc-autonumbering": true
728 | },
729 | "nbformat": 4,
730 | "nbformat_minor": 1
731 | }
732 |
--------------------------------------------------------------------------------
/Loops/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 | notebook | content
8 | ---- | ------
9 | [Loops.ipynb](Loops.ipynb) [](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Loops/Loops.ipynb) | Loops
10 |
--------------------------------------------------------------------------------
/Pandas/IO_Pandas.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "
\n",
8 | "\n",
9 | "
\n"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {
15 | "slideshow": {
16 | "slide_type": "slide"
17 | }
18 | },
19 | "source": [
20 | "# Pandas: Working with Different File types"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {
26 | "slideshow": {
27 | "slide_type": "fragment"
28 | }
29 | },
30 | "source": [
31 | "The Pandas library offers a wide range of possibilities for creating, writing and reading files. There are two types of files that can be handled in Python, normal text files and binary files.\n",
32 | "\n",
33 | "\n",
34 | "In this notebook we will learn more about working with these different formats: CSV, Excel, JSON, HTML, SQL, Pickle, Matlab .mat, and HDF5 files.\n"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {
40 | "slideshow": {
41 | "slide_type": "subslide"
42 | }
43 | },
44 | "source": [
45 | "### CSV (Comma-Separated Values) Files\n",
46 | "\n",
47 | "As we saw before, a CSV file is a plaintext file with a .csv extension that holds tabular data. This is one of the most popular file formats for storing large amounts of data. \n",
48 | "\n",
49 | "Each line of the file represents one record, and the fields are, by default, separated by commas, but you could change the separator to a semicolon, tab, space, or some other character. If the fields are labelled, the first line pf the file (referred to as \"header\") will contain the field names.\n",
50 | "\n",
51 | "Example of CSV file:\n",
52 | "```\n",
53 | "month,height,weight\n",
54 | "Jan,1.2,76\n",
55 | "Feb,1.21,77\n",
56 | "March,1.21,76\n",
57 | "```"
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "metadata": {
63 | "slideshow": {
64 | "slide_type": "fragment"
65 | }
66 | },
67 | "source": [
68 | "Previously we learnt that to read CSV files, python comes with a csv reader that works quite well.\n",
69 | "\n",
70 | "```python\n",
71 | "import csv\n",
72 | "\n",
73 | "with open('file.csv', 'r') as f:\n",
74 | " reader = csv.reader(f)\n",
75 | " header = next(reader)\n",
76 | " data = list(reader)\n",
77 | "```\n",
78 | "\n",
79 | "Once you have read the data, it can go to a DataFrame, for example:\n",
80 | "\n",
81 | "```python\n",
82 | "import pandas as pd\n",
83 | "\n",
84 | "df = pd.DataFrame(data=data, columns=header)\n",
85 | "```"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {
91 | "slideshow": {
92 | "slide_type": "fragment"
93 | }
94 | },
95 | "source": [
96 | "You can also use the Pandas csv read function `pandas.read_csv()`, which can get the data into a DataFrame. This is what we usually use.\n",
97 | "\n",
98 | "The major advantage of this function is that it has a lot of options and does good file format and data format inference.\n",
99 | "\n",
100 | "```python\n",
101 | "import pandas as pd\n",
102 | "\n",
103 | "df = pd.read_csv('file.csv')\n",
104 | "```\n",
105 | "\n",
106 | "The input `'file.csv'` can be any valid path, including URLs.\n",
107 | "\n",
108 | "You can read about all the options [here](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html).\n",
109 | "\n",
110 | "`read_csv` is accompained by the `to_csv` function, to write data from a `DataFrame` to disk in `csv`-format:\n",
111 | "\n",
112 | "```python\n",
113 | "df.to_csv('file.csv')\n",
114 | "```"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": null,
120 | "metadata": {
121 | "slideshow": {
122 | "slide_type": "fragment"
123 | }
124 | },
125 | "outputs": [],
126 | "source": [
127 | "# using the covid 19 data from before\n",
128 | "import pandas as pd\n",
129 | "df = pd.read_csv(\"https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/data.csv\", index_col='dateRep')\n",
130 | "sample = df.sample(10)\n",
131 | "sample"
132 | ]
133 | },
134 | {
135 | "cell_type": "markdown",
136 | "metadata": {},
137 | "source": [
138 | "Let's us create some sample data, containing 10 entires."
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": null,
144 | "metadata": {},
145 | "outputs": [],
146 | "source": [
147 | "sample.to_csv('testdata.txt')\n",
148 | "sample.to_csv('testdata.csv')"
149 | ]
150 | },
151 | {
152 | "cell_type": "markdown",
153 | "metadata": {},
154 | "source": [
155 | "And look at the create text-file: [testdata.csv](testdata.csv) (displayed nicely already, try the [testdata.txt](testdata.txt) file!)"
156 | ]
157 | },
158 | {
159 | "cell_type": "markdown",
160 | "metadata": {
161 | "slideshow": {
162 | "slide_type": "subslide"
163 | }
164 | },
165 | "source": [
166 | "### JSON (Javascript Object Notation) Files\n",
167 | "\n",
168 | "The next file type we will look at is JSON. This is a popular format for transferring data over the web via APIs, and is also a plaintext file format.\n",
169 | "\n",
170 | "JSON is very similar to the text representation of a Python dictionary and lists:"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": null,
176 | "metadata": {
177 | "slideshow": {
178 | "slide_type": "fragment"
179 | }
180 | },
181 | "outputs": [],
182 | "source": [
183 | "data = \"\"\"\n",
184 | "{\n",
185 | "\"day\": \"Saturday\",\n",
186 | "\"week\": 3,\n",
187 | "\"isSunny\": true,\n",
188 | "\"goals\": [\"eat breakfast\", \"write a book\", \"eat lunch\"]\n",
189 | "}\n",
190 | "\"\"\"\n",
191 | "print(data)"
192 | ]
193 | },
194 | {
195 | "cell_type": "markdown",
196 | "metadata": {
197 | "slideshow": {
198 | "slide_type": "fragment"
199 | }
200 | },
201 | "source": [
202 | "The main downside with **hand-writing** JSON is that it is very picky about getting everything right. Even though it's very readable, it should not be considered human writable."
203 | ]
204 | },
205 | {
206 | "cell_type": "markdown",
207 | "metadata": {
208 | "slideshow": {
209 | "slide_type": "fragment"
210 | }
211 | },
212 | "source": [
213 | "Python and Pandas work well with JSON files, as Python's json library offers buit-in support for them.\n",
214 | "Tabular data can be stored in JSON in a variety of ways, called \"orientations\".\n",
215 | "\n",
216 | "- `'split'` : dict like {'index' -> [index], 'columns' -> [columns], 'data' -> [values]}\n",
217 | "- `'records'` : list like [{column -> value}, ... , {column -> value}]\n",
218 | "- `'index'` : dict like {index -> {column -> value}}\n",
219 | "- `'columns'` : dict like {column -> {index -> value}}\n",
220 | "- `'values'` : just the values array\n",
221 | "- `'table'` : dict like {'schema': {schema}, 'data': {data}}"
222 | ]
223 | },
224 | {
225 | "cell_type": "markdown",
226 | "metadata": {
227 | "slideshow": {
228 | "slide_type": "fragment"
229 | }
230 | },
231 | "source": [
232 | "You can save the data from your DataFrame to a JSON file with `to_json()` function:\n",
233 | "\n",
234 | "```python\n",
235 | "df.to_json('data.json', orient='index')\n",
236 | "```"
237 | ]
238 | },
239 | {
240 | "cell_type": "markdown",
241 | "metadata": {
242 | "slideshow": {
243 | "slide_type": "fragment"
244 | }
245 | },
246 | "source": [
247 | "You can also load the data from a JSON file with `read_json()`.\n",
248 | "\n",
249 | "```python\n",
250 | "df = pd.read_json('data.json', orient='index')\n",
251 | "```\n",
252 | "\n",
253 | "In this case, the *orient* parameter is very important because it specifies how Pandas understands the structure of the file."
254 | ]
255 | },
256 | {
257 | "cell_type": "markdown",
258 | "metadata": {
259 | "slideshow": {
260 | "slide_type": "fragment"
261 | }
262 | },
263 | "source": [
264 | "Alternatively, you can use the **json module** to load (read) and dump (write) JSON files.\n",
265 | "This module has 4 main functions:\n",
266 | " \n",
267 | "| function | read/write | file/string |\n",
268 | "| :---: | :----: | :-----: |\n",
269 | "| load() | read | file |\n",
270 | "| dump() | write | file |\n",
271 | "| loads() | read | string |\n",
272 | "| dumps() | write | string |\n",
273 | "\n",
274 | "\n",
275 | "To read the data example we created above, which means converting from JSON to Python:\n",
276 | "\n",
277 | "```python\n",
278 | "import json\n",
279 | "\n",
280 | "json.loads(data)\n",
281 | "```\n",
282 | "\n",
283 | "And to convert a Python object to JSON:\n",
284 | "\n",
285 | "```python\n",
286 | "import json\n",
287 | "\n",
288 | "json.dumps(data)\n",
289 | "```"
290 | ]
291 | },
292 | {
293 | "cell_type": "code",
294 | "execution_count": null,
295 | "metadata": {
296 | "slideshow": {
297 | "slide_type": "fragment"
298 | }
299 | },
300 | "outputs": [],
301 | "source": [
302 | "sample_json_string = sample.to_json()\n",
303 | "sample_json_string"
304 | ]
305 | },
306 | {
307 | "cell_type": "code",
308 | "execution_count": null,
309 | "metadata": {},
310 | "outputs": [],
311 | "source": [
312 | "import json\n",
313 | "sample_json_dict = json.loads(sample_json_string)\n",
314 | "sample_json_dict"
315 | ]
316 | },
317 | {
318 | "cell_type": "code",
319 | "execution_count": null,
320 | "metadata": {},
321 | "outputs": [],
322 | "source": [
323 | "from pprint import pprint\n",
324 | "pprint(sample_json_dict)"
325 | ]
326 | },
327 | {
328 | "cell_type": "code",
329 | "execution_count": null,
330 | "metadata": {},
331 | "outputs": [],
332 | "source": [
333 | "sample.to_json('sample_data_json.txt')\n",
334 | "sample.to_json('sample_data.json')"
335 | ]
336 | },
337 | {
338 | "cell_type": "markdown",
339 | "metadata": {},
340 | "source": [
341 | "And have a look [sample_data.json](sample_data.json) (or at the `txt` file - [sample_data_json.txt](sample_data_json.txt))"
342 | ]
343 | },
344 | {
345 | "cell_type": "markdown",
346 | "metadata": {
347 | "slideshow": {
348 | "slide_type": "subslide"
349 | }
350 | },
351 | "source": [
352 | "### HTML Files"
353 | ]
354 | },
355 | {
356 | "cell_type": "markdown",
357 | "metadata": {
358 | "slideshow": {
359 | "slide_type": "fragment"
360 | }
361 | },
362 | "source": [
363 | "An HTML is a plaintext file that uses hypertext markup language to help browers render web pages. These files carry the extension *.html* and *htm*, and in order to work with them, you will need to install an HTML library like **lxml** or **html5lib**.\n",
364 | "\n",
365 | "Once you have these libraries, you can \n",
366 | "\n",
367 | "\n",
368 | "\n",
369 | "You can save your DataFrame as an HTML file with `to_html()`:\n",
370 | "\n",
371 | "```python\n",
372 | "df = pd.DataFrame(data=data).T\n",
373 | "df.to_html('data.html')\n",
374 | "```"
375 | ]
376 | },
377 | {
378 | "cell_type": "code",
379 | "execution_count": null,
380 | "metadata": {
381 | "slideshow": {
382 | "slide_type": "fragment"
383 | }
384 | },
385 | "outputs": [],
386 | "source": [
387 | "sample.to_html('sample_data_html.txt')\n",
388 | "sample.to_html('sample_data.html')"
389 | ]
390 | },
391 | {
392 | "cell_type": "markdown",
393 | "metadata": {},
394 | "source": [
395 | "And have a look [sample_data.html](sample_data.html) (or at the `txt` file - [sample_data_html.txt](sample_data_html.txt)) "
396 | ]
397 | },
398 | {
399 | "cell_type": "markdown",
400 | "metadata": {
401 | "slideshow": {
402 | "slide_type": "slide"
403 | }
404 | },
405 | "source": [
406 | "## Binary Files\n",
407 | "\n",
408 | "In binary files, there is no terminator for a line and the data is stored after converting it into machine understandable binary language. Unlike text files, binary files are not human readable, this means, if you try to open them in any text editor, it will either not open, or show the data in an unrecognizable format.\n",
409 | "\n",
410 | "Without documentation, proper software, and version management, these files can be difficult to work with.\n",
411 | "\n",
412 | "Below, we show you a very simple example of how you could read and write to a binary file."
413 | ]
414 | },
415 | {
416 | "cell_type": "markdown",
417 | "metadata": {
418 | "slideshow": {
419 | "slide_type": "subslide"
420 | }
421 | },
422 | "source": [
423 | "### Reading and Writing to a Binary File\n",
424 | "\n",
425 | "Opening a file in binary format is very similar to opening a text file, just add `\"b\"` to the mode parameter. For example, `\"rb\"` mode opens the file in binary format for reading only.\n",
426 | "\n",
427 | "The following example stores a list of numbers in a binary file:\n",
428 | "\n",
429 | "```python\n",
430 | "with open('binfile.bin', 'wb') as f:\n",
431 | " num = [5, 10, 15, 20, 25]\n",
432 | " arr = bytearray(num)\n",
433 | " f.write(arr)\n",
434 | "```\n",
435 | "\n",
436 | "The function `bytearray` converts the list into a byte representation."
437 | ]
438 | },
439 | {
440 | "cell_type": "markdown",
441 | "metadata": {
442 | "slideshow": {
443 | "slide_type": "fragment"
444 | }
445 | },
446 | "source": [
447 | "To read a binary file like the one shown above, the output of the `read()` function is turned back into a list:\n",
448 | "\n",
449 | "```python\n",
450 | "with open('binfile.bin', 'wb') as f:\n",
451 | " num = list(f.read())\n",
452 | "```"
453 | ]
454 | },
455 | {
456 | "cell_type": "markdown",
457 | "metadata": {
458 | "slideshow": {
459 | "slide_type": "fragment"
460 | }
461 | },
462 | "source": [
463 | "There are, of course, advantages to using binary file:\n",
464 | "\n",
465 | "- smaller file sizes\n",
466 | "- supports more features (compression, multiple dataset storage, self-description, etc)\n",
467 | "- quicker read/write times\n",
468 | "- entire ecosystems of supported software\n",
469 | "\n",
470 | "Due to this, the developers of Pandas have created a whole set of IO tools that allow not only to read/write text files, but also binary and even SQL file types.\n",
471 | "\n",
472 | "| format type | data | read | write |\n",
473 | "| :---: |:----:|:-----: | :---: |\n",
474 | "| binary | MS Excel | read_excel | to_excel |\n",
475 | "| binary | Python Pickle Format | read_pickle | to_pickle |\n",
476 | "| binary | HDF5 Format | read_hdf | to_hdf |\n",
477 | "| binary | SPSS | read_spss | |\n",
478 | "\n",
479 | "This table contains only a few examples but you can see all of the available IO tools [here](https://pandas.pydata.org/pandas-docs/dev/user_guide/io.html).\n",
480 | "\n",
481 | "In the next sections, we show you a few of these standards for storing tabular data in binary formats."
482 | ]
483 | },
484 | {
485 | "cell_type": "markdown",
486 | "metadata": {
487 | "slideshow": {
488 | "slide_type": "subslide"
489 | }
490 | },
491 | "source": [
492 | "### Excel Files\n",
493 | "\n",
494 | "Microsoft Excel is probably the most widely-used spreadsheet software, and even though it is a binary file format, you can read and write Excel files in Pandas, similar to CSV files.\n",
495 | "\n",
496 | "An additional requirement however, depending on the Excel version you will work with, you will need to install other Python packages first.\n",
497 | "\n",
498 | "- **xlrd** to read Excel files *.xls* (Excel 2003)\n",
499 | "\n",
500 | "- **openpyxl** to read/write *.xlsx* files (Excel 2007+)\n",
501 | "\n",
502 | "- **pyxlsb** to read binary Excel *.xlsb*\n",
503 | "\n",
504 | "\n",
505 | "You can install them using **pip** with a single command:\n",
506 | "```python\n",
507 | "pip install xlrd openpyxl pyxlsb\n",
508 | "```\n",
509 | "\n",
510 | "Or using Conda:\n",
511 | "```python\n",
512 | "conda install xlrd openpyxl pyxlsb\n",
513 | "```\n"
514 | ]
515 | },
516 | {
517 | "cell_type": "markdown",
518 | "metadata": {
519 | "slideshow": {
520 | "slide_type": "fragment"
521 | }
522 | },
523 | "source": [
524 | "Once you have installed the neccessary packages, you can read an Excel file with read_excel():\n",
525 | "\n",
526 | "```python\n",
527 | "df = pd.read_excel('data.xlsx')\n",
528 | "```\n",
529 | "\n",
530 | "And save your DataFrame in an Excel file with to_excel():\n",
531 | "```python\n",
532 | "df.to_excel('data2.xlsx')\n",
533 | "``` "
534 | ]
535 | },
536 | {
537 | "cell_type": "code",
538 | "execution_count": null,
539 | "metadata": {
540 | "slideshow": {
541 | "slide_type": "fragment"
542 | }
543 | },
544 | "outputs": [],
545 | "source": [
546 | "sample.to_excel('sample_data.xlsx')"
547 | ]
548 | },
549 | {
550 | "cell_type": "markdown",
551 | "metadata": {},
552 | "source": [
553 | "As it is a binary file, you cannot open [sample_data.xlsx](sample_data.xlsx) in your browser."
554 | ]
555 | },
556 | {
557 | "cell_type": "markdown",
558 | "metadata": {
559 | "slideshow": {
560 | "slide_type": "subslide"
561 | }
562 | },
563 | "source": [
564 | "### Pickle Files\n",
565 | "\n",
566 | "Pickling is the act of converting Python objects into byte streams, and unpickling is the inverse process. This format makes it easy to store any Python objects as binary files and keep the data and hierarchy of the object.\n",
567 | "\n",
568 | "However, you should remember that they will only read back correctly if the Python version and package versions of the readers are the same as the writer.\n",
569 | "\n",
570 | "The pickle module has the same interface as the json module:\n",
571 | "\n",
572 | "| function | read/write | file/string |\n",
573 | "| :---: | :----: | :-----: |\n",
574 | "| load() | read | file |\n",
575 | "| dump() | write | file |\n",
576 | "| loads() | read | string |\n",
577 | "| dumps() | write | string |\n",
578 | "\n",
579 | "\n",
580 | "The following command pickles the DataFrame *df* and saves it as *data.pickle*:\n",
581 | "\n",
582 | "```python\n",
583 | "import pickle\n",
584 | "with open('data.pickle', 'wb') as f:\n",
585 | " pickle.dump(df, f)\n",
586 | "``` \n",
587 | "\n",
588 | "While, the following unpickles *data.pickle* and loads it as a pandas DataFrame:\n",
589 | "\n",
590 | "```python \n",
591 | "with open('data.pickle', 'rb') as f:\n",
592 | " data = pickle.load(f)\n",
593 | "```"
594 | ]
595 | },
596 | {
597 | "cell_type": "markdown",
598 | "metadata": {
599 | "slideshow": {
600 | "slide_type": "fragment"
601 | }
602 | },
603 | "source": [
604 | "You can also use the Pandas built-in functionality for dealing with pickle files.\n",
605 | "\n",
606 | "```python\n",
607 | "df.to_pickle('data.pickle') # Pickles df and saves it as data.pickle\n",
608 | "\n",
609 | "pd.read_pickle('data.pickle') # Unpickles and reads data.pickle\n",
610 | "```"
611 | ]
612 | },
613 | {
614 | "cell_type": "markdown",
615 | "metadata": {
616 | "slideshow": {
617 | "slide_type": "fragment"
618 | }
619 | },
620 | "source": [
621 | "**As a word of caution, you should always beware of loading pickles from unstructured sources. When you unpickle an unstrustworthy file, it could execute arbitrary code on your machine, performing dangerous actions and exploiting your device.**"
622 | ]
623 | },
624 | {
625 | "cell_type": "code",
626 | "execution_count": null,
627 | "metadata": {
628 | "slideshow": {
629 | "slide_type": "fragment"
630 | }
631 | },
632 | "outputs": [],
633 | "source": [
634 | "sample.to_pickle('sample_data.pkl')"
635 | ]
636 | },
637 | {
638 | "cell_type": "markdown",
639 | "metadata": {
640 | "slideshow": {
641 | "slide_type": "subslide"
642 | }
643 | },
644 | "source": [
645 | "### HDF5\n",
646 | "\n",
647 | "HDF5 (Hierarchical Data Format 5) is a file format that has become quite popular. It can store a large amount of data in a single file, has compression features, and can store many datasets. HDF5 file format has a filesystem-like organization inside it, which means you can store the datasets in their own \"folder sctructure\" inside the file.\n",
648 | "\n",
649 | "HDFStore is dictionary-like object for reading and writing pandas using the **PyTables** library.\n",
650 | "\n",
651 | "To get data into an hdf5 file, you need to specify the filename and the key/group of the dataset. If you don't give it a path, it will put the key in the root group, which is the \"root folder\" of the hdf5 file. \n",
652 | "\n",
653 | "```python\n",
654 | "df.to_hdf('store.h5', key='/data', format='table', mode='a')\n",
655 | "```\n",
656 | "\n",
657 | "And in order to access and read from the HDF5 file:\n",
658 | "\n",
659 | "```python\n",
660 | "pd.read_hdf('store.h5', key='/data')\n",
661 | "```"
662 | ]
663 | },
664 | {
665 | "cell_type": "markdown",
666 | "metadata": {
667 | "slideshow": {
668 | "slide_type": "fragment"
669 | }
670 | },
671 | "source": [
672 | "Similarly to the `open()` method, `to_hdf()` takes the `mode` parameter (`\"a\"`, `\"w\"` or `\"r+\"`). `to_hdf()` also requires a format parameter. You can read more about the different options here: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_hdf.html"
673 | ]
674 | },
675 | {
676 | "cell_type": "code",
677 | "execution_count": null,
678 | "metadata": {},
679 | "outputs": [],
680 | "source": [
681 | "sample.to_hdf('sample_data.h5', key='sample')"
682 | ]
683 | },
684 | {
685 | "cell_type": "code",
686 | "execution_count": null,
687 | "metadata": {},
688 | "outputs": [],
689 | "source": []
690 | }
691 | ],
692 | "metadata": {
693 | "kernelspec": {
694 | "display_name": "Python 3 (ipykernel)",
695 | "language": "python",
696 | "name": "python3"
697 | },
698 | "language_info": {
699 | "codemirror_mode": {
700 | "name": "ipython",
701 | "version": 3
702 | },
703 | "file_extension": ".py",
704 | "mimetype": "text/x-python",
705 | "name": "python",
706 | "nbconvert_exporter": "python",
707 | "pygments_lexer": "ipython3",
708 | "version": "3.10.9"
709 | },
710 | "rise": {
711 | "scroll": true
712 | },
713 | "toc": {
714 | "base_numbering": 1,
715 | "nav_menu": {},
716 | "number_sections": true,
717 | "sideBar": true,
718 | "skip_h1_title": false,
719 | "title_cell": "Table of Contents",
720 | "title_sidebar": "Contents",
721 | "toc_cell": false,
722 | "toc_position": {},
723 | "toc_section_display": true,
724 | "toc_window_display": false
725 | }
726 | },
727 | "nbformat": 4,
728 | "nbformat_minor": 4
729 | }
730 |
--------------------------------------------------------------------------------
/Pandas/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 | notebook | content
8 | ---- | ------
9 | [Pandas.ipynb](Pandas.ipynb) [](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Pandas/Pandas.ipynb) | Pandas
10 | [IO_Pandas.ipynb](IO_Pandas.ipynb) [](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Pandas/IO_Pandas.ipynb) | Reading data with Pandas
11 |
12 |
13 | ## Description
14 |
15 | - introduce the two main objects `Series` and `DataFrame`
16 | - highlight the concept of an `Index`, naming rows and columns (ToDo: add names to Series example in the beginning).
17 |
18 | ## Ressources
19 |
20 | - [Pandas Tutorial for Hands on ML](https://nbviewer.jupyter.org/github/ageron/handson-ml2/blob/master/tools_pandas.ipynb)
21 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 | __Please help us to improve the course by filling out the [feedback form](https://forms.office.com/e/UL3w7D3Q94)__
8 |
9 |
10 | | DAY 1 | DAY 2 | DAY 3 |
11 | |------------------------------------------------------|------------------------------------------------------|---------------------------------------|
12 | | Morning coffee (optional) | Morning coffee (optional) | Morning coffee (optional) |
13 | | [Introduction and Motivation](slides) | [Pandas](Pandas) | [Recap Quiz](Recap) |
14 | | [Variables and data types](Variables_data_types) | | |
15 | | Coffee break | Coffee break | Coffee break |
16 | | [Iterables I](Iterables) | [Pandas](Pandas) | [Visualization](Visualizations) |
17 | | Coffee break | | |
18 | | [Iterables II](Iterables) | | |
19 | | Lunch | Lunch | Lunch |
20 | | [Booleans, operators and conditions](Conditionals) | [Pandas](Pandas) | [Virtual envs and installations](slides) |
21 | | Coffee break | Coffee break | Coffee break |
22 | | [Loops](Loops) | [Pandas](Pandas) | [Dataset Exercise](Exercise) |
23 | | Q & A | Q & A | Q & A |
24 |
25 | ## Program April 2024
26 | 
27 |
28 | ## Further Resources
29 |
30 | ### Cheat Sheets
31 | - Basics:
32 | - [Getting started](cheat_sheets/cheat_sheet_day0.pdf)
33 | - [Importing Data](cheat_sheets/Importing_Data_Cheat_sheet.pdf)
34 | - [Jupyter Notebook](cheat_sheets/Jupyter_Notebook_Cheat_Sheet.pdf)
35 | - Data Science:
36 | - [Numpy](cheat_sheets/Numpy_Python_Cheat_Sheet.pdf)
37 | - [Pandas](cheat_sheets/Pandas_Cheat_Sheet.pdf)
38 | - [Scipy](cheat_sheets/Scipy-LinearAlgebra_Cheat_Sheet.pdf)
39 | - [Scikit-learn](cheat_sheets/Scikit-learn_Cheat_Sheet.pdf)
40 | - Visualization:
41 | - [Matplotlib](cheat_sheets/Python_Matplotlib_Cheat_Sheet.pdf)
42 | - [Plot.ly](cheat_sheets/Plotly_Cheat_Sheet.pdf)
43 | - [Seaborn](cheat_sheets/Seaborn_Cheat_Sheet.pdf)
44 | - [Bokeh](cheat_sheets/Bokeh_Cheat_Sheet.pdf)
45 |
46 | ### Basics
47 | - [codecademy](https://codecademy.com)
48 | - Interactive website with many beginners code tutorials. Requires sign-up but base content is free.
49 |
50 | - [learnpython.org](https://www.learnpython.org/)
51 | - interactive python basics tutorial
52 |
53 | - [Springboard - Data Analysis with Python, SQL, and R](https://www.springboard.com/learning-paths/data-analysis/learn/)
54 | - starts with - [Solo Learn](https://www.sololearn.com/Course/Python/) and [Design of Computer Programs](https://www.udacity.com/course/design-of-computer-programs--cs212)
55 | - [Scipy Lectures](https://scipy-lectures.org/index.html)
56 | - Python introduction with a focus on scientific computing
57 | - [official tutorial](https://docs.python.org/3/tutorial/)
58 |
59 | ### Advanced
60 | - [Fluent Python](https://www.oreilly.com/library/view/fluent-python-2nd/9781492056348/)
61 | - [Intermediate Python Programming Course (6h) on Youtube](https://www.youtube.com/watch?v=HGOBQPFzWKo)
62 | - [Hitchhiker's Guide to Python](https://docs.python-guide.org/)
63 |
64 |
65 | ### Python Installations
66 |
67 | In this course we use [Google Colab](https://colab.research.google.com/) to execute notebooks. Notebooks are text files allowing
68 | the combination of Text, Code and the output of code. Colab offers an extended set of
69 | pre-installed tools. See the [tutorial series](https://www.youtube.com/playlist?list=PLQY2H8rRoyvyK5aEDAI3wUUqC_F0oEroL).
70 |
71 | [Anaconda](https://www.anaconda.com/products/individual) offers for your private computer
72 | an extended installations, including most tools you will ever need for Python.
73 | Use the [Anaconda Navigator](https://docs.anaconda.com/anaconda/navigator/) to launch applications.
74 |
--------------------------------------------------------------------------------
/Recap/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 | notebook | content
8 | ---- | ------
9 | [recap.ipynb](recap.ipynb) [](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Recap/recap.ipynb)| Recap
10 |
--------------------------------------------------------------------------------
/Recap/recap.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "
\n",
8 | "\n",
9 | "
\n"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "# Recap \n",
17 | "**Congratulations!** \n",
18 | "\n",
19 | "You learned a whole lot of new programming concepts and skills over the last two days. \n",
20 | "To kick off the final day of of this Python course, we will start by recaping some of the most important concepts."
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "## Quiz"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "Use the next 45 minutes on the quiz below. Work together with the people at your table and discuss your answers. Add code cells and try the code written in the question if you are unsure. "
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "**Question 1** \n",
42 | "Can you explain what happens to ``var`` in each step the example below? What type is ``var`` at the end of the program?\n",
43 | "```python\n",
44 | "var = 1\n",
45 | "var *= 100\n",
46 | "var /= 2\n",
47 | "var = int(var)\n",
48 | "var = str(var)\n",
49 | "var = var + \" points to Gryffindor!\"\n",
50 | "print(var)\n",
51 | "```"
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "metadata": {},
57 | "source": [
58 | "**Question 2** \n",
59 | "What is the loop below printing at each iteration?\n",
60 | " \n",
61 | "```python\n",
62 | "scand = [\"Denmark\", \"Finnland\", \"Sweden\", \"Norway\"]\n",
63 | "\n",
64 | "for index, country in enumerate(scand):\n",
65 | " if country != \"Finnland\":\n",
66 | " print(\"My #\" + str(index+1) + \" favorite country in Scandinavia is \" + country + \".\")\n",
67 | " \n",
68 | " else:\n",
69 | " print(\"Sorry, but Finnland is not part of Scandinavia.\")\n",
70 | " break\n",
71 | "```"
72 | ]
73 | },
74 | {
75 | "cell_type": "markdown",
76 | "metadata": {},
77 | "source": [
78 | "**Question 3** \n",
79 | "Consider the scenarios A-C below. Which Python data structure would you use to solve them?\n",
80 | "\n",
81 | "**A.** You have all names of the participants in this course. You want to store them in a variable that perserves the order in which the participants signed up for the course. \n",
82 | "\n",
83 | "**B.** You have (a) all names of the participants in this course, and (b) the names of everyone who is currently employed at SUND. Next, you want to find all employees who DID NOT participate in the course. Which data structure would you store (a) and (b) in to achieve this? And bonus question: How would you achieve it? \n",
84 | "\n",
85 | "**C.** You want to store information about all capital cities in Europe and the number of their inhabitants. Since the numbers might change, you want to make sure that you can update the information at any time. "
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "**Question 4**\n",
93 | "\n",
94 | "Does the expression used in the `if` below evaluate to `True` or `False`? Can you explain why? Can you change it to make it `False`?\n",
95 | "\n"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 1,
101 | "metadata": {},
102 | "outputs": [
103 | {
104 | "name": "stdout",
105 | "output_type": "stream",
106 | "text": [
107 | "Include in study.\n"
108 | ]
109 | }
110 | ],
111 | "source": [
112 | "smoker = False\n",
113 | "patient_age = 50\n",
114 | "\n",
115 | "if (not smoker and patient_age < 65):\n",
116 | " print('Include in study.')\n",
117 | "else:\n",
118 | " print('Skip to next candidate.')"
119 | ]
120 | },
121 | {
122 | "cell_type": "markdown",
123 | "metadata": {},
124 | "source": [
125 | "**Question 5** \n",
126 | "Explain the errors below, and change the code to fix them."
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": null,
132 | "metadata": {},
133 | "outputs": [],
134 | "source": [
135 | "# error 1\n",
136 | "75 * (2/0)"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": null,
142 | "metadata": {},
143 | "outputs": [],
144 | "source": [
145 | "# error 2\n",
146 | "weather = [\"sunny\", \"cloudy\", \"rainy\"]\n",
147 | "weather[4]"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": null,
153 | "metadata": {},
154 | "outputs": [],
155 | "source": [
156 | "# error 3\n",
157 | "university = \"ucph\"\n",
158 | "typ(university)"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": null,
164 | "metadata": {},
165 | "outputs": [],
166 | "source": [
167 | "# error 4\n",
168 | "university = \"ucph\"\n",
169 | "if \"c\" in university:\n",
170 | "print(\"I work at\" + university.upper() + \".\")"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": null,
176 | "metadata": {},
177 | "outputs": [],
178 | "source": [
179 | "# error 5\n",
180 | "print(hello_world)"
181 | ]
182 | },
183 | {
184 | "cell_type": "markdown",
185 | "metadata": {},
186 | "source": [
187 | "**Question 6**\n",
188 | "\n",
189 | "Consider the data frame below. How do you:\n",
190 | "\n",
191 | "1. Create a new dataframe with only the treatment, age and satisfaction columns?\n",
192 | "2. Calculate the mean age?\n",
193 | "3. Extract all rows with patients older than 70?\n",
194 | "4. Omit rows with missing data?\n"
195 | ]
196 | },
197 | {
198 | "cell_type": "code",
199 | "execution_count": 19,
200 | "metadata": {},
201 | "outputs": [
202 | {
203 | "data": {
204 | "text/html": [
205 | "
\n", 223 | " | patient_id | \n", 224 | "treatment | \n", 225 | "hospital | \n", 226 | "convalescence_days | \n", 227 | "age | \n", 228 | "satisfaction | \n", 229 | "
---|---|---|---|---|---|---|
0 | \n", 234 | "402109 | \n", 235 | "A | \n", 236 | "Rigshospitalet | \n", 237 | "15.0 | \n", 238 | "68 | \n", 239 | "3 | \n", 240 | "
1 | \n", 243 | "092070 | \n", 244 | "A | \n", 245 | "Rigshospitalet | \n", 246 | "13.0 | \n", 247 | "74 | \n", 248 | "5 | \n", 249 | "
2 | \n", 252 | "994082 | \n", 253 | "B | \n", 254 | "Herlev | \n", 255 | "27.0 | \n", 256 | "76 | \n", 257 | "2 | \n", 258 | "
3 | \n", 261 | "843094 | \n", 262 | "A | \n", 263 | "Herlev | \n", 264 | "30.0 | \n", 265 | "65 | \n", 266 | "5 | \n", 267 | "
4 | \n", 270 | "369360 | \n", 271 | "B | \n", 272 | "Rigshospitalet | \n", 273 | "21.0 | \n", 274 | "68 | \n", 275 | "5 | \n", 276 | "
5 | \n", 279 | "688213 | \n", 280 | "B | \n", 281 | "Rigshospitalet | \n", 282 | "29.0 | \n", 283 | "77 | \n", 284 | "3 | \n", 285 | "
6 | \n", 288 | "197347 | \n", 289 | "A | \n", 290 | "Herlev | \n", 291 | "25.0 | \n", 292 | "65 | \n", 293 | "5 | \n", 294 | "
7 | \n", 297 | "374793 | \n", 298 | "A | \n", 299 | "Rigshospitalet | \n", 300 | "NaN | \n", 301 | "67 | \n", 302 | "5 | \n", 303 | "
8 | \n", 306 | "759063 | \n", 307 | "B | \n", 308 | "Rigshospitalet | \n", 309 | "16.0 | \n", 310 | "75 | \n", 311 | "4 | \n", 312 | "
9 | \n", 315 | "121219 | \n", 316 | "B | \n", 317 | "None | \n", 318 | "27.0 | \n", 319 | "68 | \n", 320 | "4 | \n", 321 | "
10 | \n", 324 | "427898 | \n", 325 | "B | \n", 326 | "Rigshospitalet | \n", 327 | "15.0 | \n", 328 | "74 | \n", 329 | "5 | \n", 330 | "
2 |
3 |
5 |
2 |
5 |
6 |
7 | notebook | content
8 | ---- | ------
9 | [variables.ipynb](variables.ipynb) [](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Variables_data_types/variables.ipynb) | Variables and data types
10 |
--------------------------------------------------------------------------------
/Variables_data_types/Variables_slides_Oct22.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/Variables_data_types/Variables_slides_Oct22.pdf
--------------------------------------------------------------------------------
/Variables_data_types/variables.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"cell_type":"markdown","metadata":{"id":"i7xIAr06u3oF"},"source":["
\n","\n","
"]},{"cell_type":"markdown","metadata":{"id":"1MjzmO4uoHh9"},"source":["# Variables and Data Types"]},{"cell_type":"markdown","metadata":{"id":"GXxxR4dDuunK"},"source":["## Variable Assignment"]},{"cell_type":"markdown","metadata":{"id":"Nx4q1VSyuzOA"},"source":["> A variable is a named symbol that holds a value.\n","\n","> Variable **containers** --> name and value\n","\n","* Create variables by assigning a value to a name (just like using variables in math).\n","* Variable names should be meaningful, i.e. not just ``a``, ``b``, ``c``\n","* Variables are always **assigned** with the variable name on the left and the value on the right of the ***equals*** sign. For instance:\n"," \n"," * `a_variable = 100` \n"," * assigned to other variables: `another_variable = a_variable`\n"," * reassigned at any time: `a_variable = 435`\n"," * assigning several variables at the same time: `all, at, once = 1, 130, 43`\n","\n","* Variables **must** be assigned before they can be used."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":377,"status":"ok","timestamp":1655709554070,"user":{"displayName":"Tugce K","userId":"13907696802096210204"},"user_tz":-120},"id":"-IoOMUOnuD6f","outputId":"7621238e-aee0-4b49-8862-54d5f74acabf"},"outputs":[{"name":"stdout","output_type":"stream","text":["100\n"]}],"source":["# Create a variable x by assigning a value to x\n","x = 100\n","print(x)\n","\n","# What is the container and what are the data here?"]},{"cell_type":"markdown","metadata":{"id":"M23lgzTcwMXn"},"source":["## Naming restrictions\n","\n","1. Variable names must start with a letter or underscore.\n"," \n","2. The rest of the name must consist of letters and numbers (i.e. **alphanumeric**). If you need to use a multi_word variable name, underscores can be used.\n"," \n","3. Names are case-sensitive. \n","\n","4. Each variable's name must be unique. Two variables with the same name are the same variable."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":230,"status":"ok","timestamp":1655709783721,"user":{"displayName":"Tugce K","userId":"13907696802096210204"},"user_tz":-120},"id":"ReKCem_Cu3oW","outputId":"e64b489b-658d-40d1-9741-f9b1e22340b9"},"outputs":[{"name":"stdout","output_type":"stream","text":["100\n"]}],"source":["# Try it out\n","_2x = 100\n","print(_2x)\n","\n","# What can be the alternatives to make this variable naming work?"]},{"cell_type":"markdown","metadata":{"id":"pVF6YjlgGtKX"},"source":["## Data Types"]},{"cell_type":"markdown","metadata":{"id":"J3kix56wGxEB"},"source":["We'll see the common Python data types below. There are more. You can always check a variable's type with the `type()` function.\n","\n","### Numbers\n","* `int`: an integer, e.g. `1`, `2`, `3`\n","* `float`: a floating point number with a decimal point, e.g. `1.2`, `2999.197`, `-160.8`\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":310,"status":"ok","timestamp":1655709993563,"user":{"displayName":"Tugce K","userId":"13907696802096210204"},"user_tz":-120},"id":"ovMTp36Eu3oa","outputId":"f3436719-929f-42d5-a309-5b5521eedd84"},"outputs":[{"name":"stdout","output_type":"stream","text":["-1\n","
3 |
6 |
7 | notebook | content
8 | ---- | ------
9 | [plotly.ipynb](plotly.ipynb) [](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Visualizations/plotly.ipynb)| Plotly Express library [Be aware: [Plots stays interactive in nbviewer](https://nbviewer.org/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Visualizations/plotly.ipynb#Bar-Charts)]
10 | [plotly_extra_material.ipynb](plotly_extra_material.ipynb) [](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Visualizations/plotly_extra_material.ipynb)| Advanced plotting in plotly
11 | [PlotlyExpress_ComprehensiveGuide.ipynb](PlotlyExpress_ComprehensiveGuide.ipynb) [](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/2024_Oct/Visualizations/PlotlyExpress_ComprehensiveGuide.ipynb)| PlotlyExpress library guide (extensive external resource)
12 |
13 | ## Installing Plotly
14 |
15 | In order to [run plotly in jupyter lab](https://plotly.com/python/getting-started/#jupyterlab-support) you will need to add an labextension:
16 |
17 | ```bash
18 | # JupyterLab renderer support
19 | jupyter labextension install jupyterlab-plotly@4.14.3
20 | # OPTIONAL: Jupyter widgets extension
21 | jupyter labextension install @jupyter-widgets/jupyterlab-manager plotlywidget@4.14.3
22 | ```
23 |
24 | ## Articles
25 |
26 | - python [plotting libraries](https://pbpython.com/python-vis-flowchart.html)
27 |
--------------------------------------------------------------------------------
/cheat_sheets/Bokeh_Cheat_Sheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Bokeh_Cheat_Sheet.pdf
--------------------------------------------------------------------------------
/cheat_sheets/Importing_Data_Cheat_sheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Importing_Data_Cheat_sheet.pdf
--------------------------------------------------------------------------------
/cheat_sheets/Jupyter_Notebook_Cheat_Sheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Jupyter_Notebook_Cheat_Sheet.pdf
--------------------------------------------------------------------------------
/cheat_sheets/Numpy_Python_Cheat_Sheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Numpy_Python_Cheat_Sheet.pdf
--------------------------------------------------------------------------------
/cheat_sheets/Pandas_Cheat_Sheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Pandas_Cheat_Sheet.pdf
--------------------------------------------------------------------------------
/cheat_sheets/Plotly_Cheat_Sheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Plotly_Cheat_Sheet.pdf
--------------------------------------------------------------------------------
/cheat_sheets/Python_Matplotlib_Cheat_Sheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Python_Matplotlib_Cheat_Sheet.pdf
--------------------------------------------------------------------------------
/cheat_sheets/Scikit-learn_Cheat_Sheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Scikit-learn_Cheat_Sheet.pdf
--------------------------------------------------------------------------------
/cheat_sheets/Scipy-LinearAlgebra_Cheat_Sheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Scipy-LinearAlgebra_Cheat_Sheet.pdf
--------------------------------------------------------------------------------
/cheat_sheets/Seaborn_Cheat_Sheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/Seaborn_Cheat_Sheet.pdf
--------------------------------------------------------------------------------
/cheat_sheets/cheat_sheet_day0.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/cheat_sheets/cheat_sheet_day0.pdf
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: tsunami
2 | channels:
3 | - defaults
4 | # - plotly
5 | dependencies:
6 | - python
7 | - matplotlib
8 | - pandas
9 | - ipykernel
10 | - ipywidgets>=7.5
11 | - ipython
12 | - scikit-learn
13 | - seaborn
14 | - jupyter
15 | - plotly
16 | - bs4
17 | - requests
18 |
--------------------------------------------------------------------------------
/figures/HeaDS_logo_large_withTitle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/HeaDS_logo_large_withTitle.png
--------------------------------------------------------------------------------
/figures/Program_October_2021.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/Program_October_2021.png
--------------------------------------------------------------------------------
/figures/colab_restart_runtime_after_install.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/colab_restart_runtime_after_install.png
--------------------------------------------------------------------------------
/figures/colab_save_in_drive.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/colab_save_in_drive.png
--------------------------------------------------------------------------------
/figures/colab_save_in_drive_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/colab_save_in_drive_2.png
--------------------------------------------------------------------------------
/figures/colab_toc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/colab_toc.png
--------------------------------------------------------------------------------
/figures/df_loc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/df_loc.png
--------------------------------------------------------------------------------
/figures/df_loc_condition.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/df_loc_condition.png
--------------------------------------------------------------------------------
/figures/github_raw_file_view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/github_raw_file_view.png
--------------------------------------------------------------------------------
/figures/long_format.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/long_format.png
--------------------------------------------------------------------------------
/figures/matplotlib/fig_axes_axis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/matplotlib/fig_axes_axis.png
--------------------------------------------------------------------------------
/figures/matplotlib/handout-beginner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/matplotlib/handout-beginner.png
--------------------------------------------------------------------------------
/figures/pandas_dataframe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/pandas_dataframe.png
--------------------------------------------------------------------------------
/figures/pandas_indexing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/pandas_indexing.png
--------------------------------------------------------------------------------
/figures/program.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/program.PNG
--------------------------------------------------------------------------------
/figures/program_june2022.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/program_june2022.png
--------------------------------------------------------------------------------
/figures/program_march2023.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/program_march2023.png
--------------------------------------------------------------------------------
/figures/program_oct_screen_GR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/program_oct_screen_GR.png
--------------------------------------------------------------------------------
/figures/program_spring_2022.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/program_spring_2022.PNG
--------------------------------------------------------------------------------
/figures/quartile-percentile.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/quartile-percentile.jpg
--------------------------------------------------------------------------------
/figures/tsunami_logo.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/tsunami_logo.PNG
--------------------------------------------------------------------------------
/figures/wide_format.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/figures/wide_format.png
--------------------------------------------------------------------------------
/slides/Python Tsunami Local Installations vs code.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/slides/Python Tsunami Local Installations vs code.pdf
--------------------------------------------------------------------------------
/slides/Python Tsunami intro.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Center-for-Health-Data-Science/PythonTsunami/97523816d9eb3da33c30feba103f2d0c37c00950/slides/Python Tsunami intro.pdf
--------------------------------------------------------------------------------
/solutions/conditions_solutions.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Exercise 2\n",
8 | "\n",
9 | "You have two boolean variables:\n",
10 | "\n",
11 | "```python\n",
12 | "a = True\n",
13 | "b = False\n",
14 | "```\n",
15 | "\n",
16 | "What is the result of the following expressions?\n",
17 | "\n",
18 | "1. `a or b`\n",
19 | "\n",
20 | "2. `a and b`\n",
21 | "\n",
22 | "3. `a and not b`"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 1,
28 | "metadata": {},
29 | "outputs": [],
30 | "source": [
31 | "a = True\n",
32 | "b = False"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 2,
38 | "metadata": {},
39 | "outputs": [
40 | {
41 | "data": {
42 | "text/plain": [
43 | "True"
44 | ]
45 | },
46 | "execution_count": 2,
47 | "metadata": {},
48 | "output_type": "execute_result"
49 | }
50 | ],
51 | "source": [
52 | "#1 \n",
53 | "a or b"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": 3,
59 | "metadata": {},
60 | "outputs": [
61 | {
62 | "data": {
63 | "text/plain": [
64 | "False"
65 | ]
66 | },
67 | "execution_count": 3,
68 | "metadata": {},
69 | "output_type": "execute_result"
70 | }
71 | ],
72 | "source": [
73 | "#2\n",
74 | "a and b"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 4,
80 | "metadata": {},
81 | "outputs": [
82 | {
83 | "data": {
84 | "text/plain": [
85 | "True"
86 | ]
87 | },
88 | "execution_count": 4,
89 | "metadata": {},
90 | "output_type": "execute_result"
91 | }
92 | ],
93 | "source": [
94 | "#3\n",
95 | "a and not b"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 5,
101 | "metadata": {},
102 | "outputs": [
103 | {
104 | "data": {
105 | "text/plain": [
106 | "False"
107 | ]
108 | },
109 | "execution_count": 5,
110 | "metadata": {},
111 | "output_type": "execute_result"
112 | }
113 | ],
114 | "source": [
115 | "#how to make it False. One example. \n",
116 | "\n",
117 | "not a and b"
118 | ]
119 | },
120 | {
121 | "cell_type": "markdown",
122 | "metadata": {},
123 | "source": [
124 | "## Exercise 3\n",
125 | "\n",
126 | "If you set the name variable to \"Gandalf\" and run the script below, what will the output be? How do you get the output 'Move on then'?"
127 | ]
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "metadata": {},
132 | "source": [
133 | "For \"Gandalf\" the result is \"Run, you fools!\". To make the condition go into the else, put literally any string that is not \"Gandalf\" or \"Aragorn\"."
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": 6,
139 | "metadata": {},
140 | "outputs": [
141 | {
142 | "name": "stdout",
143 | "output_type": "stream",
144 | "text": [
145 | "Move on then!\n"
146 | ]
147 | }
148 | ],
149 | "source": [
150 | "name = \"Frodo\"\n",
151 | "if name == \"Gandalf\":\n",
152 | " print(\"Run, you fools!\")\n",
153 | "elif name == \"Aragorn\":\n",
154 | " print(\"There is always hope.\")\n",
155 | "else:\n",
156 | " print(\"Move on then!\")"
157 | ]
158 | },
159 | {
160 | "cell_type": "markdown",
161 | "metadata": {},
162 | "source": [
163 | "Create a variable and assign an integer as value, then build a conditional to test it:\n",
164 | "- If the value is below 0, print \"The value is negative\"\n",
165 | "- If the value is between 0 and 20 (including 0 and 20), print the value\n",
166 | "- Otherwise, print \"Out of scope\"\n",
167 | "\n",
168 | "Test it by changing the value of the variable."
169 | ]
170 | },
171 | {
172 | "cell_type": "code",
173 | "execution_count": 20,
174 | "metadata": {},
175 | "outputs": [],
176 | "source": [
177 | "my_int = 30"
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": 21,
183 | "metadata": {},
184 | "outputs": [
185 | {
186 | "name": "stdout",
187 | "output_type": "stream",
188 | "text": [
189 | "Out of scope.\n"
190 | ]
191 | }
192 | ],
193 | "source": [
194 | "if my_int < 0:\n",
195 | " print(\"The value is negative\")\n",
196 | "elif my_int <= 20:\n",
197 | " print(my_int)\n",
198 | "else:\n",
199 | " print(\"Out of scope.\")"
200 | ]
201 | },
202 | {
203 | "cell_type": "markdown",
204 | "metadata": {},
205 | "source": [
206 | "## Exercise 4\n",
207 | "\n",
208 | "Write an `if` condition that tests whether `word_list` is empty. If it is, add 'Hello world' and print the list. If it is not, print the first element. Test that both outcomes work. "
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": 8,
214 | "metadata": {},
215 | "outputs": [
216 | {
217 | "name": "stdout",
218 | "output_type": "stream",
219 | "text": [
220 | "pen\n"
221 | ]
222 | }
223 | ],
224 | "source": [
225 | "word_list = ['pen', 'paper', 'room']\n",
226 | "\n",
227 | "if word_list:\n",
228 | " print(word_list[0])\n",
229 | "else:\n",
230 | " word_list.append('Hello world')\n",
231 | " print(word_list)"
232 | ]
233 | },
234 | {
235 | "cell_type": "code",
236 | "execution_count": 9,
237 | "metadata": {},
238 | "outputs": [
239 | {
240 | "name": "stdout",
241 | "output_type": "stream",
242 | "text": [
243 | "['Hello world']\n"
244 | ]
245 | }
246 | ],
247 | "source": [
248 | "word_list = []\n",
249 | "\n",
250 | "if word_list:\n",
251 | " print(word_list[0])\n",
252 | "else:\n",
253 | " word_list.append('Hello world')\n",
254 | " print(word_list)"
255 | ]
256 | },
257 | {
258 | "cell_type": "code",
259 | "execution_count": null,
260 | "metadata": {},
261 | "outputs": [],
262 | "source": []
263 | }
264 | ],
265 | "metadata": {
266 | "kernelspec": {
267 | "display_name": "Python 3",
268 | "language": "python",
269 | "name": "python3"
270 | },
271 | "language_info": {
272 | "codemirror_mode": {
273 | "name": "ipython",
274 | "version": 3
275 | },
276 | "file_extension": ".py",
277 | "mimetype": "text/x-python",
278 | "name": "python",
279 | "nbconvert_exporter": "python",
280 | "pygments_lexer": "ipython3",
281 | "version": "3.11.1"
282 | }
283 | },
284 | "nbformat": 4,
285 | "nbformat_minor": 4
286 | }
287 |
--------------------------------------------------------------------------------
/solutions/functions_solutions.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "### Exercise 1\n",
8 | "\n",
9 | "#### Calculate absolute difference\n",
10 | "\n",
11 | "Write a function that returns the absolute (positive) difference between two numbers."
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 1,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "a = 25\n",
21 | "b = 65"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 2,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "def abs_difference(x, y):\n",
31 | " diff = abs(x-y)\n",
32 | " return diff"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 3,
38 | "metadata": {},
39 | "outputs": [
40 | {
41 | "data": {
42 | "text/plain": [
43 | "40"
44 | ]
45 | },
46 | "execution_count": 3,
47 | "metadata": {},
48 | "output_type": "execute_result"
49 | }
50 | ],
51 | "source": [
52 | "abs_difference(a,b)"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 4,
58 | "metadata": {},
59 | "outputs": [
60 | {
61 | "data": {
62 | "text/plain": [
63 | "60"
64 | ]
65 | },
66 | "execution_count": 4,
67 | "metadata": {},
68 | "output_type": "execute_result"
69 | }
70 | ],
71 | "source": [
72 | "abs_difference(-20,40)"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 5,
78 | "metadata": {},
79 | "outputs": [
80 | {
81 | "data": {
82 | "text/plain": [
83 | "10"
84 | ]
85 | },
86 | "execution_count": 5,
87 | "metadata": {},
88 | "output_type": "execute_result"
89 | }
90 | ],
91 | "source": [
92 | "abs_difference(0,-10)"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": 6,
98 | "metadata": {},
99 | "outputs": [
100 | {
101 | "data": {
102 | "text/plain": [
103 | "10"
104 | ]
105 | },
106 | "execution_count": 6,
107 | "metadata": {},
108 | "output_type": "execute_result"
109 | }
110 | ],
111 | "source": [
112 | "abs_difference(-10,-20)"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "### Exercise 2\n",
120 | "\n",
121 | "#### Calculate squares\n",
122 | "\n",
123 | "Write a function that returns the square root of the sum of squares of two numbers. \n",
124 | "\n",
125 | "> Hint: You can use `math.sqrt` to calculate the square root."
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": 11,
131 | "metadata": {},
132 | "outputs": [],
133 | "source": [
134 | "from math import sqrt\n",
135 | "sqrt(25)\n",
136 | "\n",
137 | "def square_root_of_squares(a,b):\n",
138 | " res = sqrt(a**2 + b**2)\n",
139 | " return res"
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": 12,
145 | "metadata": {},
146 | "outputs": [
147 | {
148 | "data": {
149 | "text/plain": [
150 | "4.47213595499958"
151 | ]
152 | },
153 | "execution_count": 12,
154 | "metadata": {},
155 | "output_type": "execute_result"
156 | }
157 | ],
158 | "source": [
159 | "square_root_of_squares(2,4)"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": 13,
165 | "metadata": {},
166 | "outputs": [
167 | {
168 | "data": {
169 | "text/plain": [
170 | "4.47213595499958"
171 | ]
172 | },
173 | "execution_count": 13,
174 | "metadata": {},
175 | "output_type": "execute_result"
176 | }
177 | ],
178 | "source": [
179 | "#test:\n",
180 | "sqrt(4+16)"
181 | ]
182 | },
183 | {
184 | "cell_type": "code",
185 | "execution_count": 14,
186 | "metadata": {},
187 | "outputs": [
188 | {
189 | "data": {
190 | "text/plain": [
191 | "1.4142135623730951"
192 | ]
193 | },
194 | "execution_count": 14,
195 | "metadata": {},
196 | "output_type": "execute_result"
197 | }
198 | ],
199 | "source": [
200 | "square_root_of_squares(1,1)"
201 | ]
202 | },
203 | {
204 | "cell_type": "code",
205 | "execution_count": 15,
206 | "metadata": {},
207 | "outputs": [
208 | {
209 | "data": {
210 | "text/plain": [
211 | "4.47213595499958"
212 | ]
213 | },
214 | "execution_count": 15,
215 | "metadata": {},
216 | "output_type": "execute_result"
217 | }
218 | ],
219 | "source": [
220 | "#also works with negative numbers since we square\n",
221 | "square_root_of_squares(-2,4)"
222 | ]
223 | },
224 | {
225 | "cell_type": "markdown",
226 | "metadata": {},
227 | "source": [
228 | "### Exercise 3\n",
229 | "\n",
230 | "#### Indicate sign of difference betweem two numbers\n",
231 | "\n",
232 | "Write a function that substracts the second number from the first. Return \"Positive\" if their difference is positive, and \"Negative\" if their difference is negative."
233 | ]
234 | },
235 | {
236 | "cell_type": "code",
237 | "execution_count": 16,
238 | "metadata": {},
239 | "outputs": [],
240 | "source": [
241 | "#mind the edge case where difference == 0!\n",
242 | "def signed_diff(a,b):\n",
243 | " res = a - b\n",
244 | " if res > 0:\n",
245 | " return 'Positive'\n",
246 | " elif res < 0:\n",
247 | " return 'Negative'\n",
248 | " else:\n",
249 | " return 'Equal'"
250 | ]
251 | },
252 | {
253 | "cell_type": "code",
254 | "execution_count": 17,
255 | "metadata": {},
256 | "outputs": [
257 | {
258 | "data": {
259 | "text/plain": [
260 | "'Negative'"
261 | ]
262 | },
263 | "execution_count": 17,
264 | "metadata": {},
265 | "output_type": "execute_result"
266 | }
267 | ],
268 | "source": [
269 | "signed_diff(1,10)"
270 | ]
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": 18,
275 | "metadata": {},
276 | "outputs": [
277 | {
278 | "data": {
279 | "text/plain": [
280 | "'Positive'"
281 | ]
282 | },
283 | "execution_count": 18,
284 | "metadata": {},
285 | "output_type": "execute_result"
286 | }
287 | ],
288 | "source": [
289 | "signed_diff(20,5)"
290 | ]
291 | },
292 | {
293 | "cell_type": "code",
294 | "execution_count": 19,
295 | "metadata": {},
296 | "outputs": [
297 | {
298 | "data": {
299 | "text/plain": [
300 | "'Equal'"
301 | ]
302 | },
303 | "execution_count": 19,
304 | "metadata": {},
305 | "output_type": "execute_result"
306 | }
307 | ],
308 | "source": [
309 | "signed_diff(10,10)"
310 | ]
311 | },
312 | {
313 | "cell_type": "code",
314 | "execution_count": 20,
315 | "metadata": {},
316 | "outputs": [
317 | {
318 | "data": {
319 | "text/plain": [
320 | "'Negative'"
321 | ]
322 | },
323 | "execution_count": 20,
324 | "metadata": {},
325 | "output_type": "execute_result"
326 | }
327 | ],
328 | "source": [
329 | "signed_diff(-10,-5)"
330 | ]
331 | },
332 | {
333 | "cell_type": "markdown",
334 | "metadata": {},
335 | "source": [
336 | "### Exercise 4\n",
337 | "\n",
338 | "#### Calculate sum and differences\n",
339 | "\n",
340 | "Write a function that returns **both** the sum of the first two inputs and the difference between the second and third input: "
341 | ]
342 | },
343 | {
344 | "cell_type": "code",
345 | "execution_count": 26,
346 | "metadata": {},
347 | "outputs": [],
348 | "source": [
349 | "def f4(a,b,c):\n",
350 | " sum_1 = a + b\n",
351 | " diff_2 = b - c\n",
352 | " #here we return a tuple. we could also use a list.\n",
353 | " return (sum_1, diff_2)"
354 | ]
355 | },
356 | {
357 | "cell_type": "code",
358 | "execution_count": 24,
359 | "metadata": {},
360 | "outputs": [
361 | {
362 | "data": {
363 | "text/plain": [
364 | "(3, -1)"
365 | ]
366 | },
367 | "execution_count": 24,
368 | "metadata": {},
369 | "output_type": "execute_result"
370 | }
371 | ],
372 | "source": [
373 | "f4(1,2,3)"
374 | ]
375 | },
376 | {
377 | "cell_type": "code",
378 | "execution_count": 25,
379 | "metadata": {},
380 | "outputs": [
381 | {
382 | "data": {
383 | "text/plain": [
384 | "(-1, -6)"
385 | ]
386 | },
387 | "execution_count": 25,
388 | "metadata": {},
389 | "output_type": "execute_result"
390 | }
391 | ],
392 | "source": [
393 | "f4(-5,4,10)"
394 | ]
395 | },
396 | {
397 | "cell_type": "markdown",
398 | "metadata": {},
399 | "source": [
400 | "### Exercise 5\n",
401 | "\n",
402 | "#### Function overloading: Different behaviour for `int` and `str`\n",
403 | "\n",
404 | "Write a function that adds two numbers together if the inputs are both numbers, and concatenates the inputs if they are both strings.\n",
405 | "\n",
406 | "> Hint: You can use built-in functions [`type`](https://docs.python.org/3/library/functions.html#type) or [`isinstance`](https://docs.python.org/3/library/functions.html#isinstance) to find out the type of a variable."
407 | ]
408 | },
409 | {
410 | "cell_type": "code",
411 | "execution_count": 39,
412 | "metadata": {},
413 | "outputs": [],
414 | "source": [
415 | "#actually, the + function in python is already overloaded and will do the sum on numbers and concatenation on strings\n",
416 | "def add_this(a,b):\n",
417 | " if type(a) != type(b):\n",
418 | " return 'Arguments have different types.'\n",
419 | " \n",
420 | " elif type(a) == str:\n",
421 | " return a+b\n",
422 | " \n",
423 | " #the exercise text says int, but you can also add floats so I added that\n",
424 | " elif type(a) == int or type(a) == float:\n",
425 | " return a+b\n",
426 | " \n",
427 | " #edge case for whatever weird input the user might be give is the same type for both arugments but not int or str\n",
428 | " else:\n",
429 | " return f\"Cannot add type {type(a)} and {type(b)}\""
430 | ]
431 | },
432 | {
433 | "cell_type": "code",
434 | "execution_count": 40,
435 | "metadata": {},
436 | "outputs": [
437 | {
438 | "data": {
439 | "text/plain": [
440 | "3"
441 | ]
442 | },
443 | "execution_count": 40,
444 | "metadata": {},
445 | "output_type": "execute_result"
446 | }
447 | ],
448 | "source": [
449 | "add_this(1,2)"
450 | ]
451 | },
452 | {
453 | "cell_type": "code",
454 | "execution_count": 41,
455 | "metadata": {},
456 | "outputs": [
457 | {
458 | "data": {
459 | "text/plain": [
460 | "'HelloWorld'"
461 | ]
462 | },
463 | "execution_count": 41,
464 | "metadata": {},
465 | "output_type": "execute_result"
466 | }
467 | ],
468 | "source": [
469 | "add_this('Hello','World')"
470 | ]
471 | },
472 | {
473 | "cell_type": "code",
474 | "execution_count": 42,
475 | "metadata": {},
476 | "outputs": [
477 | {
478 | "data": {
479 | "text/plain": [
480 | "'Arguments have different types.'"
481 | ]
482 | },
483 | "execution_count": 42,
484 | "metadata": {},
485 | "output_type": "execute_result"
486 | }
487 | ],
488 | "source": [
489 | "add_this('Hi',3)"
490 | ]
491 | },
492 | {
493 | "cell_type": "code",
494 | "execution_count": 43,
495 | "metadata": {},
496 | "outputs": [
497 | {
498 | "data": {
499 | "text/plain": [
500 | "\"Cannot add type
\n",
8 | "\n",
9 | "
\n",
10 | "\n",
11 | "[](https://colab.research.google.com/github/Center-for-Health-Data-Science/PythonTsunami/blob/fall2021/Variables_and_data_types/recap.ipynb)"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "# Recap Python Day 1\n",
19 | "**Congratulations!** \n",
20 | "\n",
21 | "You learned a whole lot of new programming concepts and skills over the last two days. \n",
22 | "To kick off the final day of of this Python course, we will start by recaping some of the most important concepts."
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "## Recap Quiz"
30 | ]
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "metadata": {},
35 | "source": [
36 | "Use the next 45 minutes on the quiz below. Work together with the people at your table and discuss your answers. Add code cells and try the code written in the question if you are unsure. "
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {},
42 | "source": [
43 | "**Question 1** \n",
44 | "Can you explain what happens to ``var`` in each step the example below? What type is ``var`` at the end of the program?\n",
45 | "```python\n",
46 | "var = 1\n",
47 | "var *= 100\n",
48 | "var /= 2\n",
49 | "var = int(var)\n",
50 | "var = str(var)\n",
51 | "var = var + \" points to Gryffindor!\"\n",
52 | "print(var)\n",
53 | "```"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": 1,
59 | "metadata": {},
60 | "outputs": [
61 | {
62 | "name": "stdout",
63 | "output_type": "stream",
64 | "text": [
65 | "50 points to Gryffindor!\n",
66 | "
\n",
412 | " \n",
413 | "
\n",
525 | "\n",
414 | " \n",
422 | " \n",
423 | " \n",
424 | " \n",
415 | " patient_id \n",
416 | " treatment \n",
417 | " hospital \n",
418 | " convalescence_days \n",
419 | " age \n",
420 | " satisfaction \n",
421 | " \n",
425 | " \n",
433 | " 0 \n",
426 | " 402109 \n",
427 | " A \n",
428 | " Rigshospitalet \n",
429 | " 15.0 \n",
430 | " 68 \n",
431 | " 3 \n",
432 | " \n",
434 | " \n",
442 | " 1 \n",
435 | " 092070 \n",
436 | " A \n",
437 | " Rigshospitalet \n",
438 | " 13.0 \n",
439 | " 74 \n",
440 | " 5 \n",
441 | " \n",
443 | " \n",
451 | " 2 \n",
444 | " 994082 \n",
445 | " B \n",
446 | " Herlev \n",
447 | " 27.0 \n",
448 | " 76 \n",
449 | " 2 \n",
450 | " \n",
452 | " \n",
460 | " 3 \n",
453 | " 843094 \n",
454 | " A \n",
455 | " Herlev \n",
456 | " 30.0 \n",
457 | " 65 \n",
458 | " 5 \n",
459 | " \n",
461 | " \n",
469 | " 4 \n",
462 | " 369360 \n",
463 | " B \n",
464 | " Rigshospitalet \n",
465 | " 21.0 \n",
466 | " 68 \n",
467 | " 5 \n",
468 | " \n",
470 | " \n",
478 | " 5 \n",
471 | " 688213 \n",
472 | " B \n",
473 | " Rigshospitalet \n",
474 | " 29.0 \n",
475 | " 77 \n",
476 | " 3 \n",
477 | " \n",
479 | " \n",
487 | " 6 \n",
480 | " 197347 \n",
481 | " A \n",
482 | " Herlev \n",
483 | " 25.0 \n",
484 | " 65 \n",
485 | " 5 \n",
486 | " \n",
488 | " \n",
496 | " 7 \n",
489 | " 374793 \n",
490 | " A \n",
491 | " Rigshospitalet \n",
492 | " NaN \n",
493 | " 67 \n",
494 | " 5 \n",
495 | " \n",
497 | " \n",
505 | " 8 \n",
498 | " 759063 \n",
499 | " B \n",
500 | " Rigshospitalet \n",
501 | " 16.0 \n",
502 | " 75 \n",
503 | " 4 \n",
504 | " \n",
506 | " \n",
514 | " 9 \n",
507 | " 121219 \n",
508 | " B \n",
509 | " None \n",
510 | " 27.0 \n",
511 | " 68 \n",
512 | " 4 \n",
513 | " \n",
515 | " \n",
523 | " \n",
524 | "10 \n",
516 | " 427898 \n",
517 | " B \n",
518 | " Rigshospitalet \n",
519 | " 15.0 \n",
520 | " 74 \n",
521 | " 5 \n",
522 | "