├── .gitattributes ├── Social_Network_Ads.pkl ├── myfunctions.py ├── README.md ├── .gitignore ├── data └── student.csv ├── 99Python_Colab_data.ipynb ├── 04Python_Modules.ipynb ├── 01Python_Intro.ipynb ├── 02Python_Functions.ipynb ├── 03Python_Data.ipynb ├── LICENSE └── 05Python_Dataframes.ipynb /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /Social_Network_Ads.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterhgruber/python-intro-colab/HEAD/Social_Network_Ads.pkl -------------------------------------------------------------------------------- /myfunctions.py: -------------------------------------------------------------------------------- 1 | # myfunctions - a simple Python module 2 | # peter.gruber@usi.ch, 2024-04 3 | # See here: https://github.com/peterhgruber/python-intro-colab 4 | # Functions from: https://github.com/peterhgruber/python-intro-colab/blob/main/02Python_Functions.ipynb 5 | 6 | def f(x): 7 | return x**2 - 2*x + 1 8 | 9 | def unif_dens(x,a,b): 10 | # Case 1: x is outside the interval 11 | if x < a or x > b: 12 | return 0 13 | # Case 2: x is inside the interval 14 | else: 15 | return 1/(b-a) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python Intro for Colab 2 | peter.gruber@usi.ch, 2024-04-01 3 | 4 | Python Intro for Colab is a set of Jupyter Notebooks that can directly be executed in Google Colaboratory (https://colab.research.google.com), rendering the first steps in Python as easy as possible. 5 | 6 | 7 | ## Getting started 8 | * Have your Google account ready 9 | * If you do not have one, [get it here for free](https://accounts.google.com/signup/v2/createaccount) 10 | * Work through the notebooks in numerical order 11 | 12 | ## Chapters 13 | Simply click on *Open in Colab* 14 | 15 | * **01 – Getting started**   Open In Colab 16 | * **02 – Functions**   Open In Colab 17 | * **03 – Data types and structures**   Open In Colab 18 | * **04 – Modules and Packages**  Open In Colab 19 | * **05 – Dataframes**  Open In Colab 20 | * **Appendix: working with files in Colab**   Open In Colab 21 | 22 | ## Philosophy 23 | * Quick intro to Python 24 | * With minimum obstacles 25 | 26 | 27 | ## License 28 | 29 | [![CC BY-SA 4.0][cc-by-sa-shield]][cc-by-sa] 30 | 31 | (c) Peter H. Gruber. This work is licensed under a 32 | [Creative Commons Attribution-ShareAlike 4.0 International License][cc-by-sa]. 33 | 34 | [![CC BY-SA 4.0][cc-by-sa-image]][cc-by-sa] 35 | 36 | [cc-by-sa]: http://creativecommons.org/licenses/by-sa/4.0/ 37 | [cc-by-sa-image]: https://licensebuttons.net/l/by-sa/4.0/88x31.png 38 | [cc-by-sa-shield]: https://img.shields.io/badge/License-CC%20BY--SA%204.0-lightgrey.svg 39 | 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore Mac system files 2 | .DS_Store 3 | 4 | ## Config and credentials 5 | credentials 6 | credentials* 7 | 8 | ## Solutions 9 | *_solution* 10 | 11 | # Byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | *$py.class 15 | **/.ipynb_checkpoints 16 | *-checkpoint.ipynb 17 | *-checkpoint.html 18 | 19 | ## Internal verions 20 | *_internal* 21 | *_internal/** 22 | .webloc 23 | 24 | # C extensions 25 | *.so 26 | 27 | # Distribution / packaging 28 | .Python 29 | build/ 30 | develop-eggs/ 31 | dist/ 32 | downloads/ 33 | eggs/ 34 | .eggs/ 35 | lib/ 36 | lib64/ 37 | parts/ 38 | sdist/ 39 | var/ 40 | wheels/ 41 | share/python-wheels/ 42 | *.egg-info/ 43 | .installed.cfg 44 | *.egg 45 | MANIFEST 46 | 47 | # PyInstaller 48 | # Usually these files are written by a python script from a template 49 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 50 | *.manifest 51 | *.spec 52 | 53 | # Installer logs 54 | pip-log.txt 55 | pip-delete-this-directory.txt 56 | 57 | # Unit test / coverage reports 58 | htmlcov/ 59 | .tox/ 60 | .nox/ 61 | .coverage 62 | .coverage.* 63 | .cache 64 | nosetests.xml 65 | coverage.xml 66 | *.cover 67 | *.py,cover 68 | .hypothesis/ 69 | .pytest_cache/ 70 | cover/ 71 | 72 | # Translations 73 | *.mo 74 | *.pot 75 | 76 | # Django stuff: 77 | *.log 78 | local_settings.py 79 | db.sqlite3 80 | db.sqlite3-journal 81 | 82 | # Flask stuff: 83 | instance/ 84 | .webassets-cache 85 | 86 | # Scrapy stuff: 87 | .scrapy 88 | 89 | # Sphinx documentation 90 | docs/_build/ 91 | 92 | # PyBuilder 93 | .pybuilder/ 94 | target/ 95 | 96 | # Jupyter Notebook 97 | .ipynb_checkpoints 98 | 99 | # IPython 100 | profile_default/ 101 | ipython_config.py 102 | 103 | # pyenv 104 | # For a library or package, you might want to ignore these files since the code is 105 | # intended to run in multiple environments; otherwise, check them in: 106 | # .python-version 107 | 108 | # pipenv 109 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 110 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 111 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 112 | # install all needed dependencies. 113 | #Pipfile.lock 114 | 115 | # poetry 116 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 117 | # This is especially recommended for binary packages to ensure reproducibility, and is more 118 | # commonly ignored for libraries. 119 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 120 | #poetry.lock 121 | 122 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 123 | __pypackages__/ 124 | 125 | # Celery stuff 126 | celerybeat-schedule 127 | celerybeat.pid 128 | 129 | # SageMath parsed files 130 | *.sage.py 131 | 132 | # Environments 133 | .env 134 | .venv 135 | env/ 136 | venv/ 137 | ENV/ 138 | env.bak/ 139 | venv.bak/ 140 | 141 | # Spyder project settings 142 | .spyderproject 143 | .spyproject 144 | 145 | # Rope project settings 146 | .ropeproject 147 | 148 | # mkdocs documentation 149 | /site 150 | 151 | # mypy 152 | .mypy_cache/ 153 | .dmypy.json 154 | dmypy.json 155 | 156 | # Pyre type checker 157 | .pyre/ 158 | 159 | # pytype static type analyzer 160 | .pytype/ 161 | 162 | # Cython debug symbols 163 | cython_debug/ 164 | 165 | # PyCharm 166 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 167 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 168 | # and can be added to the global gitignore or merged into this file. For a more nuclear 169 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 170 | #.idea/ 171 | -------------------------------------------------------------------------------- /data/student.csv: -------------------------------------------------------------------------------- 1 | "id","isMale","isCity","math","stat","econ","art","engl" 2 | "1",0,1,95.3,82,73.6,86.5,68.6 3 | "2",0,0,53.7,63,81,38.7,38.9 4 | "3",0,0,62,74,74.1,52.5,55.1 5 | "4",1,0,22.2,59,35.2,74.9,84.5 6 | "5",1,0,76,86,72.7,66.2,64.4 7 | "6",0,0,68.1,61,76.2,62.1,87.6 8 | "7",0,1,87.1,77,90.6,97.8,64.5 9 | "8",1,0,70.5,96,78.1,40,50.1 10 | "9",0,0,88.8,79,81.7,82.7,63.7 11 | "10",1,1,76.9,81,87.2,54.8,44.3 12 | "11",1,1,79.5,84,100,82.6,58.2 13 | "12",0,0,46.2,47,64,52.5,65.4 14 | "13",1,0,41.9,78,57.3,65.1,74.1 15 | "14",0,0,15.1,33,10,54.5,95 16 | "15",1,0,73.3,75,95,47.4,31.7 17 | "16",1,1,76,96,92.7,77.2,43.9 18 | "17",1,1,57.9,80,81,43.8,43.2 19 | "18",1,1,69,78,81.4,31.5,43.2 20 | "19",0,0,57.4,66,78,29.8,61.9 21 | "20",0,0,63.4,64,73.6,60.1,80.8 22 | "21",0,1,95.3,89,64.9,90,64.1 23 | "22",0,1,73.2,71,86.7,56.4,47.3 24 | "23",1,1,77.4,69,78.1,69,42.5 25 | "24",0,0,66.5,61,77.5,53.2,50.8 26 | "25",0,0,88.3,77,82.6,97.3,55.5 27 | "26",0,0,71.3,70,78.6,79,90.2 28 | "27",0,0,89.4,77,90.5,76.1,63.6 29 | "28",1,0,49.4,43,63.9,53.6,61.8 30 | "29",1,0,63,77,78.8,62.8,45.4 31 | "30",0,0,54.7,71,59.3,47.5,72.2 32 | "31",1,1,78.5,79,84.8,63,47.4 33 | "32",0,0,71.1,45,88.3,37.1,65.3 34 | "33",0,1,89.4,78,78.8,97.8,99.7 35 | "34",0,0,84.2,75,73.3,94.3,75.2 36 | "35",1,1,48.6,65,66.4,55.2,56 37 | "36",1,0,56.7,76,80,48.1,50.6 38 | "37",1,1,64.9,94,69.9,83.6,81.8 39 | "38",0,0,84.4,83,88.7,67,43.5 40 | "39",1,1,42.8,58,61.6,43.4,56.4 41 | "40",0,0,67,61,73.4,74.1,81.2 42 | "41",1,0,35.3,65,53.3,52,69.9 43 | "42",0,1,54.9,60,70.5,48.6,64.3 44 | "43",0,1,88,87,86.2,97.6,77.1 45 | "44",0,1,76.3,73,69.8,86.7,79.4 46 | "45",0,0,84.3,65,96.7,91.3,68 47 | "46",0,0,37.8,45,49.3,57.9,85.4 48 | "47",0,1,89.1,73,76.3,88,76.2 49 | "48",1,0,36.6,67,46.6,58.7,59.2 50 | "49",0,0,72.2,79,72.7,56.2,59.2 51 | "50",1,0,61.5,81,69.7,49.4,43.3 52 | "51",1,0,42.3,54,71.4,56.4,70.6 53 | "52",0,0,88.4,89,90.7,77.5,69.5 54 | "53",1,1,72.2,86,87.4,35.1,41.3 55 | "54",0,1,76.8,84,78.5,51.4,43 56 | "55",0,1,88.2,77,65.1,71.7,56.2 57 | "56",0,0,90.2,82,89.8,96.6,89.6 58 | "57",1,1,60.2,80,78.5,39.2,59.6 59 | "58",0,0,11.8,44,22.6,74.4,100 60 | "59",0,1,81.3,60,68.2,76.7,48.5 61 | "60",0,0,72.3,23,88.9,48,45.4 62 | "61",1,1,75.9,77,65.9,62.5,56.5 63 | "62",0,1,37,41,62.3,34.8,60 64 | "63",0,0,85.6,69,76.1,68.5,70.1 65 | "64",0,1,94.7,73,75.5,80.4,60.1 66 | "65",1,0,67.3,81,81.8,28.5,44.5 67 | "66",0,0,73.9,67,92.8,41.1,54.1 68 | "67",0,1,69,75,93.2,48.5,57.5 69 | "68",0,0,76.5,32,67.8,91.2,88.8 70 | "69",0,0,69.4,64,69.7,72.4,86.5 71 | "70",0,0,61.3,37,68.9,34.6,62.4 72 | "71",1,1,71.2,79,80.4,70.5,68.3 73 | "72",0,1,82.3,52,86.5,84.8,77.9 74 | "73",0,0,82.6,82,84.2,87.8,67.9 75 | "74",0,0,75.4,79,84,62.2,65.2 76 | "75",1,0,63.1,72,80.9,55.7,58.8 77 | "76",0,0,61.8,52,91.5,59.6,75.6 78 | "77",0,0,86.3,90,74.9,80.8,60.1 79 | "78",0,0,54.8,52,72.1,77.8,58.1 80 | "79",1,0,52.1,69,85.7,40.3,58.4 81 | "80",0,0,75.8,53,82.4,52.3,44.8 82 | "81",0,1,52.8,60,71,47.5,72.1 83 | "82",0,0,72.5,75,75.8,95.4,66.2 84 | "83",1,1,74.3,81,95.1,69.8,76.3 85 | "84",0,0,72.4,82,98,58.9,49.3 86 | "85",0,1,90.7,75,73.8,58.1,45.2 87 | "86",0,1,76.9,70,80.9,68.6,52.2 88 | "87",0,1,94.5,79,70.3,69.6,68.1 89 | "88",0,0,10,15,31.8,59.2,93.1 90 | "89",0,0,80.9,64,83.7,86.2,67.5 91 | "90",0,1,98,96,78.2,82.3,49 92 | "91",1,1,47.6,49,69.5,44.7,68 93 | "92",0,1,89.1,56,97.8,69.9,82.4 94 | "93",1,0,64.8,54,81.9,48,66.4 95 | "94",1,0,40.7,62,57.7,72.8,69.3 96 | "95",0,1,85.3,88,71.5,88.1,66.8 97 | "96",1,1,67.4,73,85.3,43.2,56.2 98 | "97",1,1,58.9,81,64.9,47.3,47 99 | "98",1,0,40.4,53,50.7,62.9,67.2 100 | "99",1,1,80.3,91,90.9,51.1,54.2 101 | "100",0,0,43,42,81.3,49.3,75.8 102 | "101",1,0,70.7,77,98.8,49.1,56.8 103 | "102",0,1,91.5,73,80.8,69.9,72.9 104 | "103",1,0,73.4,97,81.6,58.8,52.3 105 | "104",0,0,76.1,71,90.7,64.8,71.8 106 | "105",1,0,26.6,34,43.3,46.3,71.7 107 | "106",1,0,72.3,89,87.5,64.2,67.6 108 | "107",0,0,67.2,78,70.7,61.8,72.7 109 | "108",0,0,33.6,29,34,65.3,83.7 110 | "109",0,0,61,76,58.9,67.3,55.4 111 | "110",1,0,59.5,79,84.6,46.5,55.6 112 | "111",0,0,77.2,63,75.3,68.3,54.7 113 | "112",1,1,76.5,67,91.5,66.4,57 114 | "113",0,1,97.2,95,72,100,59.4 115 | "114",0,1,52,66,62.9,19,28 116 | "115",0,0,36.8,40,58,41.2,69.3 117 | "116",0,0,85.8,80,71.4,69,65.7 118 | "117",1,1,36.2,25,55.7,28.8,69.4 119 | "118",0,1,91.1,86,67.2,81.3,62.9 120 | "119",1,1,80.4,93,78,67.4,66.3 121 | "120",0,0,73.1,53,89.7,65,84.2 122 | "121",0,1,73.9,76,78.8,69.4,65.3 123 | "122",1,0,68.6,94,70.1,66.5,68.9 124 | "123",1,0,63.5,78,99.5,60.2,48.6 125 | "124",1,1,80.5,98,90.4,78.9,58 126 | "125",1,0,66.3,88,89.3,68.1,59.7 127 | "126",1,1,73.1,89,96.6,79.8,47.3 128 | "127",0,1,62.8,74,86.9,65.9,63 129 | "128",1,0,53.4,63,59.5,52.2,73.7 130 | "129",0,0,76.6,81,92.7,46.9,49.1 131 | "130",0,0,89.1,84,74.9,63.1,39 132 | "131",1,0,59.2,74,69.9,35.7,46.1 133 | "132",0,1,39,43,57.7,59.5,66.6 134 | "133",1,0,60.1,66,95.8,45.6,30.7 135 | "134",0,0,75.7,74,76,72.7,82.2 136 | "135",1,0,67,84,80.5,35.2,44.6 137 | "136",1,0,66.9,74,81.4,61.1,76.4 138 | "137",1,0,50.3,82,76,39.6,49.7 139 | "138",0,1,82.3,82,75,75.6,58.7 140 | "139",0,0,80.8,75,78.5,79,75.8 141 | "140",0,0,76.9,76,80.8,52.2,42.7 142 | "141",1,1,54.3,82,72.7,24.2,34.7 143 | "142",0,0,71.6,68,66.8,31.5,52 144 | "143",0,1,75.3,61,72.5,41.2,40.7 145 | "144",0,0,55.1,65,76.2,34.1,52.1 146 | "145",0,0,84.6,74,65.4,85.5,57.6 147 | "146",0,0,89.4,87,94,77.6,52.3 148 | "147",0,0,79.9,64,68.8,51.2,58 149 | "148",0,0,55.4,57,78,39.9,59 150 | "149",1,0,66.2,84,84.3,88,85.1 151 | "150",1,1,63.4,79,77.4,39.6,33.9 152 | "151",1,0,70,92,84.2,46.6,31.3 153 | "152",0,0,74.1,78,84.6,40.2,30.5 154 | "153",0,0,75.7,70,83.3,59.3,64 155 | "154",0,1,85.6,86,85,74.6,46.1 156 | "155",0,0,85.9,70,66.7,93,81.2 157 | "156",0,0,85.1,79,84.2,74.2,64.8 158 | "157",1,1,58.7,66,66.5,28.1,38.5 159 | "158",1,0,37.6,59,69,50.5,77 160 | "159",1,0,44.8,62,86.1,52.1,77 161 | "160",1,1,55.5,67,71.6,26.3,58.2 162 | "161",1,0,55.3,87,66.6,31.4,31.3 163 | "162",0,0,83.3,71,79.6,67.5,77.1 164 | "163",1,1,61.1,65,94.1,29.6,64 165 | "164",1,0,27.6,55,53.8,53,88.5 166 | "165",1,1,62.9,79,77,49.1,75.4 167 | "166",1,1,50.5,75,74.3,37.9,76.5 168 | "167",0,0,78.5,70,96.4,49.2,46.9 169 | "168",0,1,86.7,55,91.5,84.4,71.3 170 | "169",1,1,81.8,98,97.9,66.9,57 171 | "170",1,1,72.8,95,76.2,55.3,53.5 172 | "171",1,0,54.8,78,64.9,34.9,48.4 173 | "172",0,1,75.3,76,89.6,83.8,72.9 174 | "173",0,0,87.7,75,72.7,87,71.9 175 | "174",1,1,69.3,71,77.6,45.4,49 176 | "175",0,0,75,48,85.9,60.8,56.2 177 | "176",1,0,69.1,84,74.5,32.7,40.9 178 | "177",0,1,57.8,72,81.5,41.3,79.1 179 | "178",1,0,40.1,49,54.7,43.7,78.4 180 | "179",0,1,92.6,91,90.9,100,74.1 181 | "180",1,0,50.5,56,66.5,56.8,59.1 182 | "181",0,0,68.2,55,73.8,52.8,53.9 183 | "182",0,0,79.1,58,93.9,73.1,55.6 184 | "183",1,0,39.3,52,54.5,37.4,70.4 185 | "184",1,0,47.4,73,51.3,53.8,72.6 186 | "185",1,1,22.4,31,60.6,63.3,91 187 | "186",1,0,63.3,78,96,73.2,74.4 188 | "187",1,1,70.3,77,73.3,59.8,54.4 189 | "188",1,1,64,93,84.8,48.5,42.6 190 | "189",1,1,44.4,72,85.9,19,45.3 191 | "190",1,1,76,94,100.8,72.5,42.6 192 | "191",1,0,71.8,75,86.9,48.4,44 193 | "192",1,1,82,96,82.8,62.2,33.5 194 | "193",0,1,78.6,64,89.3,73.9,70.9 195 | "194",1,0,19.6,56,35,38.2,77.3 196 | "195",0,1,84.7,61,64.1,73.6,60.8 197 | "196",1,0,38.8,64,56.1,39.8,55.9 198 | "197",0,0,40.3,47,62,62.7,78.6 199 | "198",1,1,46.8,64,63.4,44.3,71.1 200 | "199",0,1,91.6,88,74.1,77.2,54.7 201 | "200",1,1,53.4,73,67.6,44.4,53.9 202 | "201",1,0,71.5,86,81,31.2,46.7 203 | "202",0,0,67.9,67,94.6,66.5,70.2 204 | "203",1,0,54.3,62,61,40,46.5 205 | "204",0,0,85,74,59.7,87.5,82.1 206 | "205",0,1,31.3,43,48.4,50.7,82.5 207 | "206",1,0,58.3,82,83.9,51.8,60.7 208 | "207",1,1,66.3,93,92.2,68.6,56.1 209 | "208",1,0,50.9,71,64,32.7,49.9 210 | "209",0,0,57.5,56,72.2,59.1,61 211 | "210",0,0,76.6,56,90.5,62.8,51.4 212 | -------------------------------------------------------------------------------- /99Python_Colab_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "22f282b7", 6 | "metadata": { 7 | "id": "22f282b7" 8 | }, 9 | "source": [ 10 | " \"Open " 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "1f0d16e0", 16 | "metadata": { 17 | "id": "1f0d16e0" 18 | }, 19 | "source": [ 20 | "# Introduction to Python\n", 21 | "#### Loading and saving files with Google Colab – Part 99\n", 22 | "Peter Gruber (peter.gruber@usi.ch), 2024-04-01" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "19637220", 28 | "metadata": { 29 | "id": "19637220" 30 | }, 31 | "source": [ 32 | "### Step 1: Connect to Google Drive\n", 33 | "* Run the cell below\n", 34 | "* A dialog will warn you that this notebook has not been created by Google\n", 35 | "* A second dialog will pop up asking if you want to connect this notebook to your Google Drive\n", 36 | " 1. Select `Connect to Google Drive`\n", 37 | " 2. Click on your Google account\n", 38 | " 3. Click twice on `Continue`" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "id": "09169106", 45 | "metadata": { 46 | "id": "09169106" 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "from google.colab import drive\n", 51 | "drive.mount('/content/drive')" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "id": "03c0637c", 57 | "metadata": { 58 | "id": "03c0637c" 59 | }, 60 | "source": [ 61 | "### Step 2: Create a directory\n", 62 | "* Using the `os` module" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "id": "bdb6fefb", 69 | "metadata": { 70 | "id": "bdb6fefb" 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "import os\n", 75 | "os.mkdir(\"/content/drive/My Drive/ColabData\")" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "id": "5ca1cc0d", 81 | "metadata": { 82 | "id": "5ca1cc0d" 83 | }, 84 | "source": [ 85 | "### Step 3: Write to file\n", 86 | "* For this example, we create a Pandas dataframe and write it to a CSV file" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "id": "d67d04d6", 93 | "metadata": { 94 | "id": "d67d04d6" 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "import pandas as pd\n", 99 | "\n", 100 | "# Sample data for the DataFrame\n", 101 | "data = {\n", 102 | " 'Country': ['India', 'China', 'USA', 'Indonesia'],\n", 103 | " 'Population': [1428, 1425, 340, 277]\n", 104 | "}\n", 105 | "\n", 106 | "# Create DataFrame\n", 107 | "df = pd.DataFrame(data)\n", 108 | "\n", 109 | "# Write DataFrame to CSV\n", 110 | "df.to_csv('/content/drive/MyDrive/ColabData/sample_data.csv', index=False)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "source": [ 116 | "+ Now is a good moment to check your new folder and the file on the web interface of your Google drive: https://drive.google.com/drive/my-drive\n", 117 | "\n" 118 | ], 119 | "metadata": { 120 | "id": "cbetI5GfFexX" 121 | }, 122 | "id": "cbetI5GfFexX" 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "id": "be2bd8c0", 127 | "metadata": { 128 | "id": "be2bd8c0" 129 | }, 130 | "source": [ 131 | "### Step 4: Verify that directory and file are present" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "id": "9b233d6d", 138 | "metadata": { 139 | "id": "9b233d6d" 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "os.path.isdir('/content/drive/My Drive/ColabData')" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "id": "ab09c42b", 150 | "metadata": { 151 | "id": "ab09c42b" 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "os.path.isfile('/content/drive/My Drive/ColabData/sample_data.csv')" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "id": "c15dc41b", 161 | "metadata": { 162 | "id": "c15dc41b" 163 | }, 164 | "source": [ 165 | "### Step 5: Load file" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "id": "6f6296a4", 172 | "metadata": { 173 | "id": "6f6296a4" 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "# Load data from CSV file into DataFrame\n", 178 | "df2 = pd.read_csv('/content/drive/My Drive/ColabData/sample_data.csv')\n", 179 | "\n", 180 | "# Display the loaded DataFrame\n", 181 | "print(df2)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "id": "893c36f5", 187 | "metadata": { 188 | "id": "893c36f5" 189 | }, 190 | "source": [ 191 | "### *Step 6: Obtain more information about the file" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "id": "1a7bf1f3", 198 | "metadata": { 199 | "id": "1a7bf1f3" 200 | }, 201 | "outputs": [], 202 | "source": [ 203 | "import time\n", 204 | "path = '/content/drive/MyDrive/ColabData/sample_data.csv'" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "id": "4d669a0d", 211 | "metadata": { 212 | "id": "4d669a0d" 213 | }, 214 | "outputs": [], 215 | "source": [ 216 | "# File size in bytes\n", 217 | "os.path.getsize(path)" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "id": "e508a95d", 224 | "metadata": { 225 | "id": "e508a95d" 226 | }, 227 | "outputs": [], 228 | "source": [ 229 | "# Last access time\n", 230 | "time.ctime(os.path.getatime(path))" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "id": "1e9d1de4", 237 | "metadata": { 238 | "id": "1e9d1de4" 239 | }, 240 | "outputs": [], 241 | "source": [ 242 | "# Last modification time\n", 243 | "time.ctime(os.path.getmtime(path))" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "id": "3048e766", 250 | "metadata": { 251 | "id": "3048e766" 252 | }, 253 | "outputs": [], 254 | "source": [ 255 | "# Last change time\n", 256 | "time.ctime(os.path.getctime(path))" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "id": "4b795ad7", 262 | "metadata": { 263 | "id": "4b795ad7" 264 | }, 265 | "source": [ 266 | "#### Acknowledgement\n", 267 | "* Inspired by https://saturncloud.io/blog/how-to-save-files-from-google-colab-to-google-drive-a-stepbystep-guide/" 268 | ] 269 | } 270 | ], 271 | "metadata": { 272 | "kernelspec": { 273 | "display_name": "Python 3 (ipykernel)", 274 | "language": "python", 275 | "name": "python3" 276 | }, 277 | "language_info": { 278 | "codemirror_mode": { 279 | "name": "ipython", 280 | "version": 3 281 | }, 282 | "file_extension": ".py", 283 | "mimetype": "text/x-python", 284 | "name": "python", 285 | "nbconvert_exporter": "python", 286 | "pygments_lexer": "ipython3", 287 | "version": "3.11.5" 288 | }, 289 | "colab": { 290 | "provenance": [] 291 | } 292 | }, 293 | "nbformat": 4, 294 | "nbformat_minor": 5 295 | } -------------------------------------------------------------------------------- /04Python_Modules.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "22f282b7", 6 | "metadata": { 7 | "id": "22f282b7" 8 | }, 9 | "source": [ 10 | "\n", 11 | " \"Open\n", 12 | "" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "id": "1f0d16e0", 18 | "metadata": { 19 | "id": "1f0d16e0" 20 | }, 21 | "source": [ 22 | "# Introduction to Python\n", 23 | "### Main concepts of Python – Part 04\n", 24 | "Peter Gruber (peter.gruber@usi.ch), 2024-04-01" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "id": "d06204e0", 30 | "metadata": { 31 | "id": "d06204e0" 32 | }, 33 | "source": [ 34 | "## Python modules and packages\n", 35 | "* **Module** = file containing (many) Python functions\n", 36 | "* **Package** = larger collection of modules + extra functionality\n", 37 | " * Sometimes used synonymously\n", 38 | "\n", 39 | "#### Famous modules\n", 40 | "* `math`\n", 41 | "* `numpy`\n", 42 | "* `pandas`\n", 43 | "* `matplotlib`\n", 44 | "* `plotly`\n", 45 | "* `scipy`\n", 46 | "* `tensorflow`\n" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "id": "17d1fd5b", 52 | "metadata": { 53 | "id": "17d1fd5b" 54 | }, 55 | "source": [ 56 | "## Step 1: Install a package\n", 57 | "* Download to your computer\n", 58 | "* Usually done only once\n", 59 | " * No harm if done multiple times\n", 60 | "* Use *Python Integrated Package mananger* = `pip`\n", 61 | "\n", 62 | "\n", 63 | "#### Note\n", 64 | "* Strictly speaking **not** a Python command.\n", 65 | "* Must run at level of operating system (i.e. outside Python)\n", 66 | "* However possible to run insider Python using `!` prefix\n", 67 | " * May work without `!` even inside Python (confusing)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "id": "7a13e135", 74 | "metadata": { 75 | "id": "7a13e135" 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "# Install the Pandas package\n", 80 | "!pip install pandas" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "id": "d0b2ea84", 86 | "metadata": { 87 | "id": "d0b2ea84" 88 | }, 89 | "source": [ 90 | "## Step 2: Importing a package\n", 91 | "* **Remember:** We must \"run\" *(import)* a Python function before we can use it\n", 92 | "* This is done by import\n", 93 | " * Usually at the top of the program ⬅︎ people know what to install\n", 94 | " * Run every time\n", 95 | "\n" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "id": "e9b8a9eb", 102 | "metadata": { 103 | "id": "e9b8a9eb" 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "import pandas" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "id": "2eaf7f83", 113 | "metadata": { 114 | "id": "2eaf7f83" 115 | }, 116 | "source": [ 117 | "#### 2.2 Import widely used functions\n", 118 | "* Tired of `module.function()`? Import specific functions ..." 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "id": "a249d0c4", 125 | "metadata": { 126 | "id": "a249d0c4" 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "from math import sqrt, pi\n", 131 | "print (sqrt(9) )\n", 132 | "print (pi)\n" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "id": "d3ba8def", 139 | "metadata": { 140 | "id": "d3ba8def" 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "# This will not work ...\n", 145 | "print (math.log(1)) # we did not import log" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "id": "118937a1", 151 | "metadata": { 152 | "id": "118937a1" 153 | }, 154 | "source": [ 155 | "#### 2.3 Import all (not a good idea)\n", 156 | "* Even possible: import all functions using `from module import *`\n", 157 | "* May lead to errors $\\leftrightarrow$ considered *bad Python*\n", 158 | "\n", 159 | "`from math import *`" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "id": "0af170ce", 165 | "metadata": { 166 | "id": "0af170ce" 167 | }, 168 | "source": [ 169 | "### 2.4 Understanding Python imports\n", 170 | "\n", 171 | "* It depends on the `import` statement\n", 172 | " * This is usually found at the top\n", 173 | " * Programmer chooses what to import\n", 174 | "* `import math` $\\rightarrow$ `math.log(1)`\n", 175 | "* `from math import sqrt` $\\rightarrow$ `sqrt(9)`\n", 176 | "\n", 177 | "**Note:** the long expression `math.sqrt(9)` will always work\n" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "id": "71fec59f", 184 | "metadata": { 185 | "id": "71fec59f" 186 | }, 187 | "outputs": [], 188 | "source": [ 189 | "import math\n", 190 | "from math import sqrt, pi\n", 191 | "print (sqrt(9) )\n", 192 | "print (pi)\n", 193 | "print (math.log(1))" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "id": "7dce5222", 200 | "metadata": { 201 | "id": "7dce5222" 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "# The long expression always works\n", 206 | "print (sqrt(9) )\n", 207 | "print (math.sqrt(9) )" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "id": "b73c24a1", 213 | "metadata": { 214 | "id": "b73c24a1" 215 | }, 216 | "source": [ 217 | "## 3 Importing your own module\n", 218 | "* Collect your functions in Python file\n", 219 | "* Same import command\n", 220 | "\n", 221 | "### ❗️ Manual steps*\n", 222 | "* Create a file `myfunctions.py`\n", 223 | "* Copy the functions `f()` and `unif_dens()` from Part 2 \n", 224 | "\"Open\n", 225 | "* Upload the file to Colab\n", 226 | " * **Note:** You can download this file here: https://github.com/peterhgruber/python-intro-colab/blob/main/myfunctions.py" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "id": "1baefd74", 233 | "metadata": { 234 | "id": "1baefd74" 235 | }, 236 | "outputs": [], 237 | "source": [ 238 | "# Now we can import our own module (here the * is OK because we know what is in the module)\n", 239 | "from myfunctions import *" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": null, 245 | "id": "c3367dfc", 246 | "metadata": { 247 | "id": "c3367dfc" 248 | }, 249 | "outputs": [], 250 | "source": [ 251 | "# ... and use our functions\n", 252 | "f(2)" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "id": "68cc0115", 259 | "metadata": { 260 | "id": "68cc0115" 261 | }, 262 | "outputs": [], 263 | "source": [ 264 | "unif_dens(0,-10,10)" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "id": "1299a535", 270 | "metadata": { 271 | "id": "1299a535" 272 | }, 273 | "source": [ 274 | "## Appendix: How did we know the `pip` command?\n", 275 | "* Identify any `import xxx` expressions\n", 276 | "* Package name is `xxx`\n", 277 | " * Usually lower case\n", 278 | "\n", 279 | "#### Example\n", 280 | "* You read `import pandas` in a program\n", 281 | "* You need to ..." 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "id": "7fb4f670", 287 | "metadata": { 288 | "id": "7fb4f670" 289 | }, 290 | "source": [ 291 | "!pip install pandas" 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "id": "b7357875", 297 | "metadata": { 298 | "id": "b7357875" 299 | }, 300 | "source": [ 301 | "#### Exception: crazy package names\n", 302 | "* Sometimes `import` and `pip` need different names (confusing)\n", 303 | "\n", 304 | "\n", 305 | "**Example:** the pillow package\n", 306 | "* You read `import PIL`\n", 307 | "* The name of the package to be installed is `pillow``\n", 308 | " * No way to know this without googling\n" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "id": "346bc199", 315 | "metadata": { 316 | "id": "346bc199" 317 | }, 318 | "outputs": [], 319 | "source": [ 320 | "# This will not work\n", 321 | "!pip install PIL" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": null, 327 | "id": "808f9e55", 328 | "metadata": { 329 | "id": "808f9e55" 330 | }, 331 | "outputs": [], 332 | "source": [ 333 | "# Example: a confusing package\n", 334 | "!pip install pillow" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "source": [ 340 | "# But the import uses PIL\n", 341 | "from PIL import Image" 342 | ], 343 | "metadata": { 344 | "id": "nIzBFU3P4QJ2" 345 | }, 346 | "id": "nIzBFU3P4QJ2", 347 | "execution_count": null, 348 | "outputs": [] 349 | }, 350 | { 351 | "cell_type": "markdown", 352 | "id": "88cc0027", 353 | "metadata": { 354 | "id": "88cc0027" 355 | }, 356 | "source": [ 357 | "## Appendix: How to upgrade a package?" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "id": "32e89cb1", 364 | "metadata": { 365 | "id": "32e89cb1" 366 | }, 367 | "outputs": [], 368 | "source": [ 369 | "# First step: upgrade pip\n", 370 | "!pip install --upgrade pip" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": null, 376 | "id": "5cdc1a50", 377 | "metadata": { 378 | "id": "5cdc1a50" 379 | }, 380 | "outputs": [], 381 | "source": [ 382 | "# Get version of pandas package\n", 383 | "!pip list | grep pandas" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": null, 389 | "id": "9cc5a293", 390 | "metadata": { 391 | "id": "9cc5a293" 392 | }, 393 | "outputs": [], 394 | "source": [ 395 | "# ... or with this command\n", 396 | "import pandas\n", 397 | "print(pandas. __version__)" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": null, 403 | "id": "1226676a", 404 | "metadata": { 405 | "id": "1226676a" 406 | }, 407 | "outputs": [], 408 | "source": [ 409 | "# Upgrade to latest version\n", 410 | "!pip install pandas --upgrade" 411 | ] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "id": "37e7ce8d", 416 | "metadata": { 417 | "id": "37e7ce8d" 418 | }, 419 | "source": [ 420 | "#### Installing a specific version of a package" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": null, 426 | "id": "561d468a", 427 | "metadata": { 428 | "id": "561d468a" 429 | }, 430 | "outputs": [], 431 | "source": [ 432 | "# List all available versions\n", 433 | "!pip index versions pandas" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": null, 439 | "id": "dac6e6c2", 440 | "metadata": { 441 | "id": "dac6e6c2" 442 | }, 443 | "outputs": [], 444 | "source": [ 445 | "# Install a specific version of pandas, e.g. 2.2.1\n", 446 | "!pip install pandas==2.2.1" 447 | ] 448 | }, 449 | { 450 | "cell_type": "markdown", 451 | "id": "d5ca14d5", 452 | "metadata": { 453 | "id": "d5ca14d5" 454 | }, 455 | "source": [ 456 | "#### *Trouble-shooting an installation" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": null, 462 | "id": "6eef39b0", 463 | "metadata": { 464 | "id": "6eef39b0" 465 | }, 466 | "outputs": [], 467 | "source": [ 468 | "# In case of problems\n", 469 | "pip install --force-reinstall pands" 470 | ] 471 | }, 472 | { 473 | "cell_type": "markdown", 474 | "id": "28890464", 475 | "metadata": { 476 | "id": "28890464" 477 | }, 478 | "source": [ 479 | "#### *More information about available packages" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": null, 485 | "id": "3b03a3f0", 486 | "metadata": { 487 | "id": "3b03a3f0" 488 | }, 489 | "outputs": [], 490 | "source": [ 491 | "# Get version of ALL packages (long)\n", 492 | "!pip list" 493 | ] 494 | } 495 | ], 496 | "metadata": { 497 | "kernelspec": { 498 | "display_name": "Python 3 (ipykernel)", 499 | "language": "python", 500 | "name": "python3" 501 | }, 502 | "language_info": { 503 | "codemirror_mode": { 504 | "name": "ipython", 505 | "version": 3 506 | }, 507 | "file_extension": ".py", 508 | "mimetype": "text/x-python", 509 | "name": "python", 510 | "nbconvert_exporter": "python", 511 | "pygments_lexer": "ipython3", 512 | "version": "3.11.5" 513 | }, 514 | "colab": { 515 | "provenance": [] 516 | } 517 | }, 518 | "nbformat": 4, 519 | "nbformat_minor": 5 520 | } -------------------------------------------------------------------------------- /01Python_Intro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github", 7 | "colab_type": "text" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "id": "Bw3TfqTUa7go" 17 | }, 18 | "source": [ 19 | "# Introduction to Python\n", 20 | "### Main concepts of Python – Part 01\n", 21 | "Peter Gruber (peter.gruber@usi.ch), 2024-04-01\n", 22 | "\n", 23 | "* Jupyter Notebooks\n", 24 | "* The Google Colab environment\n", 25 | "* Variables\n", 26 | "* Operators" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": { 32 | "id": "-BuWgq08a7gp" 33 | }, 34 | "source": [ 35 | "## 0.1 Welcome to Jupyter\n", 36 | "+ *Notebooks* combine text, code and results\n", 37 | "- **Text** / **markdown** cells with text (*this one*)\n", 38 | " + Double-click to edit, type `esc` to leave\n", 39 | "- **Code** cells with code+results (*below*)\n", 40 | " + Always run from top to bottom\n", 41 | " + Use `shift`-`Return` to execute a cell and move to next one\n" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "source": [ 47 | "# Your first Python code\n", 48 | "# Shift-Return to run --> you will get a warning\n", 49 | "1+1" 50 | ], 51 | "metadata": { 52 | "colab": { 53 | "base_uri": "https://localhost:8080/" 54 | }, 55 | "id": "IjIkJj_PbHUp", 56 | "outputId": "de748833-adf7-42b6-8dff-e1c8dbbf3bac" 57 | }, 58 | "execution_count": null, 59 | "outputs": [ 60 | { 61 | "output_type": "execute_result", 62 | "data": { 63 | "text/plain": [ 64 | "2" 65 | ] 66 | }, 67 | "metadata": {}, 68 | "execution_count": 2 69 | } 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "source": [ 75 | "#### Exercise\n", 76 | "* Add an empty text cell below and write a short comment\n", 77 | " * Use the `+ Text` button at the bottom of the cell\n", 78 | "* Then add a code cell and calculate $3^2$" 79 | ], 80 | "metadata": { 81 | "id": "de782BLYa_9C" 82 | } 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": { 87 | "id": "B4OjqaLJa7gq" 88 | }, 89 | "source": [ 90 | "## 0.2 Welcome to Google Colaboratory\n", 91 | "* Google's data science infrastructure\n", 92 | " * Basic service free, pay for more power (CPU, GPU, RAM)\n", 93 | " * Store notebooks and data in Google drive\n", 94 | "* Few useful menu commands\n", 95 | " * File > Download > Download .ipynb\n", 96 | " * File > Save a copy in Drive\n", 97 | " * Runtime > Restart session\n", 98 | "* More info here: https://colab.research.google.com/notebooks/basic_features_overview.ipynb\n", 99 | "\n", 100 | "#### Exercise\n", 101 | "* Select the menu *File > Save a copy in Drive*\n", 102 | "* Store this notebook in your Google Drive\n", 103 | " + A new window with your copy of this notebook will open\n", 104 | "* Rename (click top left)\n", 105 | "* Continue working in your copy on the Google Drive\n", 106 | " * *Note:* You can identify the original Github copy by the cat icon in the top left and the Google Drive copy by the triangle in green-yellow-blue" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": { 112 | "id": "YWkPj9kHa7gq" 113 | }, 114 | "source": [ 115 | "## 1 Variables\n", 116 | "\n", 117 | "* Use `=` to assign a value\n", 118 | "* Created implicitly (=by assigning a value)\n", 119 | "* Names are **case-sensitive**\n", 120 | "* Show the value\n", 121 | " * Type its name\n", 122 | " * Use `print()`\n" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": { 129 | "id": "ol9NqOdAa7gq" 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "a = 1\n", 134 | "a" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "id": "C2GhijFJa7gq" 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "A" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": { 151 | "id": "H0rQLOtja7gr" 152 | }, 153 | "source": [ 154 | "### 1.1 List of variables\n", 155 | "* Only works in Jupyter: `%who` or `%whos`\n", 156 | "* So-called *line magic*, see here: https://ipython.readthedocs.io/en/stable/interactive/magics.html" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": { 163 | "id": "0yGFbKMoa7gr" 164 | }, 165 | "outputs": [], 166 | "source": [ 167 | "%who" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": { 174 | "id": "Nhu1OD1ca7gr" 175 | }, 176 | "outputs": [], 177 | "source": [ 178 | "%whos" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": { 184 | "id": "NuKwnfrSa7gr" 185 | }, 186 | "source": [ 187 | "## 2 Operators" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": { 193 | "id": "_3ZBjhaPa7gr" 194 | }, 195 | "source": [ 196 | "| Type | Operators |\n", 197 | "|-------|-----------|\n", 198 | "| Arithmetic | `+`, `-`, `*`, `/`, `**`, `//`, `%` |\n", 199 | "| Comparison | `==`, `!=`, `<`, `>`, `<=`, `>=` |\n", 200 | "| Logical | `and`, `or`, `not` |\n", 201 | "| Assignment | `=`, `+=`, `-=`, `*=`, `/=`, `//=`, `%=`, `**=`,|\n", 202 | "| Membership | `in`, `not in` |\n", 203 | "| Identity | `is`, `is not` |" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": { 209 | "id": "n6HHqEx3a7gr" 210 | }, 211 | "source": [ 212 | "#### 2.1 Arithmetic\n", 213 | "* Unusual: power operator is `**`" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": { 220 | "id": "yHdqyzSsa7gr" 221 | }, 222 | "outputs": [], 223 | "source": [ 224 | "print(\"5 + 3 =\", 5 + 3)\n", 225 | "print(\"5 - 3 =\", 5 - 3)\n", 226 | "print(\"5 * 3 =\", 5 * 3)\n", 227 | "print(\"5 / 3 =\", 5 / 3)\n", 228 | "print(\"5 ** 3 =\", 5 ** 3) # Power\n", 229 | "print(\"5**(1/2)=\", 5 ** (1/2)) # Square root = power 1/2" 230 | ] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": { 235 | "id": "6O1s7Sdsa7gr" 236 | }, 237 | "source": [ 238 | "#### 2.1.1. Basic calculations with variables\n", 239 | "* Usually we don't print the results of calculations, but store them in variables" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": null, 245 | "metadata": { 246 | "id": "CX3i9nhza7gr" 247 | }, 248 | "outputs": [], 249 | "source": [ 250 | "# Example: how far can you count with 1 byte?\n", 251 | "n_values = 2\n", 252 | "n_bits = 8\n", 253 | "max_value = n_values ** n_bits\n", 254 | "print(max_value)" 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": { 260 | "id": "p5cWVDWla7gr" 261 | }, 262 | "source": [ 263 | "#### 2.2 Comparison\n", 264 | "* Use `==` to check for equality\n", 265 | "* Ordering of `<=` and `>=` like in spoken language (\"smaller or equal\")" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": { 272 | "id": "tTwpDbjRa7gr" 273 | }, 274 | "outputs": [], 275 | "source": [ 276 | "print(\"5 == 3 :\", 5 == 3)\n", 277 | "print(\"5 != 3 :\", 5 != 3)\n", 278 | "print(\"5 < 3 :\", 5 < 3)\n", 279 | "print(\"5 > 3 :\", 5 > 3)\n", 280 | "print(\"5 <= 3 :\", 5 <= 3)\n", 281 | "print(\"5 >= 3 :\", 5 >= 3)" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": { 287 | "id": "5zPeBERoa7gr" 288 | }, 289 | "source": [ 290 | "#### 2.3 Logical\n", 291 | "* Combine two conditions" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": { 298 | "id": "x4GhgRVta7gr" 299 | }, 300 | "outputs": [], 301 | "source": [ 302 | "print(\"1==1 and 1==2 :\", 1==1 and 1==2)\n", 303 | "print(\"1==1 or 1==2 :\", 1==1 or 1==2)\n", 304 | "print(\"not 1==2 :\", not 1==2)" 305 | ] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": { 310 | "id": "RcJC9Q7ta7gr" 311 | }, 312 | "source": [ 313 | "#### Exercises\n", 314 | "* Create a variable `nobs` and assign it the value 100\n", 315 | "* Create a variable `slices` and assign it the value 5\n", 316 | "* Calcualte the number of observations per slice and store the result in an appropriately named variable\n", 317 | "* Answer the question whether the number of observations per slice is smaller than 25" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": { 324 | "id": "m2RwnWANa7gr" 325 | }, 326 | "outputs": [], 327 | "source": [ 328 | "# Python code goes here" 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": { 334 | "id": "EP_QJUMXa7gs" 335 | }, 336 | "source": [ 337 | "## *Appendix: more operator examples" 338 | ] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": { 343 | "id": "IH0f0Jc_a7gs" 344 | }, 345 | "source": [ 346 | "#### *Assignment\n", 347 | "* Instead of `a = a+1` write `a += 1`" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": null, 353 | "metadata": { 354 | "id": "L8iLvtxca7gs" 355 | }, 356 | "outputs": [], 357 | "source": [ 358 | "a = 1\n", 359 | "a += 1\n", 360 | "print(a)" 361 | ] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "metadata": { 366 | "id": "Jvfadmkra7gs" 367 | }, 368 | "source": [ 369 | "#### *Membership" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": { 376 | "id": "Ax0xovz7a7gs" 377 | }, 378 | "outputs": [], 379 | "source": [ 380 | "print(\"Is 3 in range(1,5)? --\", 3 in range(1,5))\n", 381 | "print(\"Is 5 in range(1,5)? --\", 5 in range(1,5))" 382 | ] 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "metadata": { 387 | "id": "uPhX14mIa7gs" 388 | }, 389 | "source": [ 390 | "#### *Identity\n", 391 | "* `==` checks whether the **content** is equal\n", 392 | "* `is` checks whether two vairables are **identical** (= point to the same object in memory)" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": null, 398 | "metadata": { 399 | "id": "cBZMNzida7gs" 400 | }, 401 | "outputs": [], 402 | "source": [ 403 | "a = [1, 2, 3]\n", 404 | "b = a # <--- Do NOT use this!\n", 405 | "c = [1, 2, 3]\n", 406 | "print(\"a is b:\", a is b)\n", 407 | "print(\"a is c:\", a is c)\n", 408 | "print(\"a == c:\", a == c)\n" 409 | ] 410 | }, 411 | { 412 | "cell_type": "markdown", 413 | "metadata": { 414 | "id": "f3jnuSkEa7gs" 415 | }, 416 | "source": [ 417 | "#### *Why is identity relevant?\n", 418 | "* Assignment with `=` works differently in Python\n", 419 | " * May lead to surprises\n", 420 | "* Use `.copy()` as alternative" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": null, 426 | "metadata": { 427 | "id": "3sxwLzIYa7gs" 428 | }, 429 | "outputs": [], 430 | "source": [ 431 | "# Create three lists\n", 432 | "a = [1,2]\n", 433 | "b = a\n", 434 | "c = a.copy() # <--- MUCH better!\n", 435 | "\n", 436 | "# Change a ... what happens?\n", 437 | "a.append(3)\n", 438 | "print(\"a:\", a)\n", 439 | "print(\"b:\", b) # <--- changed\n", 440 | "print(\"c:\", c) # <--- unchanged" 441 | ] 442 | }, 443 | { 444 | "cell_type": "markdown", 445 | "metadata": { 446 | "id": "TIuFPEfCa7gs" 447 | }, 448 | "source": [ 449 | "## *Appendix 2: Naming rules\n", 450 | "**Guidelines**:\n", 451 | "- Do not use Python reserved keywords.\n", 452 | " - If not sure, use `my_` prefix. *Example:* `my_date`\n", 453 | "- Keep names descriptive but concise.\n", 454 | "- Start variables and functions with lowercase letters" 455 | ] 456 | }, 457 | { 458 | "cell_type": "markdown", 459 | "metadata": { 460 | "id": "oEDjNWIGa7gs" 461 | }, 462 | "source": [ 463 | "| Type | Convention | Example |\n", 464 | "|------|------------|---------|\n", 465 | "| Variables | `snake_case lowercase first` | `my_variable = 10` |\n", 466 | "| Functions | `snake_case lowercase first` | `def my_function():` |\n", 467 | "| *Classes | `CamelCase uppercase first` | `class MyClass:` |\n", 468 | "| *Modules & Packages | `snake_case lowercase first` | `import my_module` |" 469 | ] 470 | } 471 | ], 472 | "metadata": { 473 | "colab": { 474 | "provenance": [], 475 | "include_colab_link": true 476 | }, 477 | "kernelspec": { 478 | "display_name": "Python 3 (ipykernel)", 479 | "language": "python", 480 | "name": "python3" 481 | }, 482 | "language_info": { 483 | "codemirror_mode": { 484 | "name": "ipython", 485 | "version": 3 486 | }, 487 | "file_extension": ".py", 488 | "mimetype": "text/x-python", 489 | "name": "python", 490 | "nbconvert_exporter": "python", 491 | "pygments_lexer": "ipython3", 492 | "version": "3.11.5" 493 | } 494 | }, 495 | "nbformat": 4, 496 | "nbformat_minor": 0 497 | } -------------------------------------------------------------------------------- /02Python_Functions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "565868a5", 6 | "metadata": { 7 | "id": "565868a5" 8 | }, 9 | "source": [ 10 | "\n", 11 | "\"Open" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "id": "906395dc-5bd1-477f-af29-72c03d8e239e", 17 | "metadata": { 18 | "id": "906395dc-5bd1-477f-af29-72c03d8e239e" 19 | }, 20 | "source": [ 21 | "# Introduction to Python\n", 22 | "### Main concepts of Python – Part 02\n", 23 | "Peter Gruber (peter.gruber@usi.ch), 2024-04-01\n", 24 | "\n", 25 | "* Bult-in functions\n", 26 | "* Packages\n", 27 | "* Create your own function\n", 28 | " * Simple lambda function\n", 29 | " * Full function with `def`\n", 30 | " * Default values and arguments\n", 31 | " * Standard Google documentation structure" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "id": "9ff92329", 37 | "metadata": { 38 | "id": "9ff92329" 39 | }, 40 | "source": [ 41 | "## 1 Built-in functions\n", 42 | "- Functions in core Python that do not require a package" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "id": "26644cd9-6523-4387-8e8c-a4eb37349054", 49 | "metadata": { 50 | "id": "26644cd9-6523-4387-8e8c-a4eb37349054" 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "print(1+1)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "id": "78d44366-4170-4f15-9326-db1b6d3bcdc1", 61 | "metadata": { 62 | "id": "78d44366-4170-4f15-9326-db1b6d3bcdc1" 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "abs(-3)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "id": "3d2575b2-23f2-4542-a25b-3fb2f1a2618d", 72 | "metadata": { 73 | "id": "3d2575b2-23f2-4542-a25b-3fb2f1a2618d" 74 | }, 75 | "source": [ 76 | "#### 👍 1.1 Suggestions with `crtl`-`Space`\n", 77 | "* To find any command, type the first letter and then `crtl`-`Space`\n", 78 | "* A list of suggestions will be displayed\n", 79 | "\n", 80 | "**Example**\n", 81 | "* Round `a` usin the `round` function\n", 82 | "* Type `r` and then `crtl`-`Space`\n", 83 | " * Once you have found the `round` function, type `crtl`-`Space` again to get more information" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "id": "a51c9622-4958-48d4-a3f2-12d8779db52c", 90 | "metadata": { 91 | "id": "a51c9622-4958-48d4-a3f2-12d8779db52c" 92 | }, 93 | "outputs": [], 94 | "source": [ 95 | "a = 123.4567\n", 96 | "# type `r` and then `crtl`-`Space` here\n", 97 | "r" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "id": "d508fb5e-7d2c-4649-be7d-63be4d3dded3", 103 | "metadata": { 104 | "id": "d508fb5e-7d2c-4649-be7d-63be4d3dded3" 105 | }, 106 | "source": [ 107 | "#### 1.2 Using `help`\n", 108 | "* Use `help( )`\n", 109 | "\n", 110 | "**Exercise**\n", 111 | "* Find the help text for `round()`" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "id": "bb6ff12e-4967-48a9-9fb5-5bbbaed16d9e", 118 | "metadata": { 119 | "id": "bb6ff12e-4967-48a9-9fb5-5bbbaed16d9e" 120 | }, 121 | "outputs": [], 122 | "source": [ 123 | "# help request goes here\n" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "id": "667f3be7", 129 | "metadata": { 130 | "id": "667f3be7" 131 | }, 132 | "source": [ 133 | "## 2 More functions using modules\n", 134 | "* Core Python has **very few** built-in mathematical functions, not even $\\sqrt{x}, \\pi, e^x$ or $\\log()$\n", 135 | "* Import appropriate modules to extend Python's fuctionality\n", 136 | " * Larger modules are called *packages*\n", 137 | " * The terms are often used as synonyms\n", 138 | "* Syntax `module.function()`\n", 139 | "\n", 140 | "More aobut modules in Part 4 \n", 141 | " \"Open\n", 142 | "" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "id": "299e59ee", 149 | "metadata": { 150 | "id": "299e59ee" 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "# Example: using the math module\n", 155 | "import math # <------- Import once per jupyter notebook or program\n", 156 | "print( math.sqrt(4) )\n", 157 | "print( math.pi )\n", 158 | "print( math.exp(1) )\n", 159 | "print( math.log(1) )" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "id": "c6a571f0", 165 | "metadata": { 166 | "id": "c6a571f0" 167 | }, 168 | "source": [ 169 | "#### 👍 2.1 Suggestions with modules\n", 170 | "\n", 171 | "**Exercise**\n", 172 | "* You want to calculate $10!$\n", 173 | "* Type `math.` and then `crtl`-`Space`\n", 174 | " * Once you found the right function, type `crtl`-`Space` for more information" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "id": "57449575", 181 | "metadata": { 182 | "id": "57449575" 183 | }, 184 | "outputs": [], 185 | "source": [ 186 | "math." 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "id": "cf9f17ad", 192 | "metadata": { 193 | "id": "cf9f17ad" 194 | }, 195 | "source": [ 196 | "## 3 Your first function using `lambda`\n", 197 | "\n", 198 | "- `lambda` functions: quick 1-line functions\n", 199 | " + So-called *anonymous* function\n", 200 | " + Also used in optimization to express constraints\n", 201 | "\n", 202 | "**Example**\n", 203 | "* Function $f(x) = x^2 - 2x + 1$\n", 204 | "* Do not forget the `*` in $2x$" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "id": "0899cf36", 211 | "metadata": { 212 | "id": "0899cf36" 213 | }, 214 | "outputs": [], 215 | "source": [ 216 | "# Example\n", 217 | "f = lambda x: x**2 - 2*x +1\n", 218 | "f(2)" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "id": "b776d326", 224 | "metadata": { 225 | "id": "b776d326" 226 | }, 227 | "source": [ 228 | "**Exercise**\n", 229 | "* Create a function called `norm_dens` that returns the density of the standard normal distribution $f(x)={\\frac {1}{ {\\sqrt {2\\pi }}}}e^{-{\\frac {{x}^{2}}{2}}}$\n", 230 | "* Do not forget to ..." 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "id": "16f5c20d-333a-4136-bec9-9614a7ccd0ba", 237 | "metadata": { 238 | "id": "16f5c20d-333a-4136-bec9-9614a7ccd0ba" 239 | }, 240 | "outputs": [], 241 | "source": [ 242 | "# Python code goes here" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "id": "372b9656", 249 | "metadata": { 250 | "id": "372b9656" 251 | }, 252 | "outputs": [], 253 | "source": [ 254 | "norm_dens(0)" 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "id": "41f86100-b4b2-43a0-92c8-a0cdb5d628cd", 260 | "metadata": { 261 | "id": "41f86100-b4b2-43a0-92c8-a0cdb5d628cd" 262 | }, 263 | "source": [ 264 | "## 4 More elaborate functions using `def`\n", 265 | "\n", 266 | "- `def` functions allow for ...\n", 267 | " * Multiple lines\n", 268 | " * Default arguments\n", 269 | " * Documentation\n", 270 | "- Use `return` the specify the result" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "id": "0bd1952e", 277 | "metadata": { 278 | "id": "0bd1952e" 279 | }, 280 | "outputs": [], 281 | "source": [ 282 | "# Previous example with def/return\n", 283 | "def f(x):\n", 284 | " return x**2 - 2*x + 1\n" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "id": "b16677c0", 291 | "metadata": { 292 | "id": "b16677c0" 293 | }, 294 | "outputs": [], 295 | "source": [ 296 | "f(2)" 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "id": "720fef75", 302 | "metadata": { 303 | "id": "720fef75" 304 | }, 305 | "source": [ 306 | "**Example**\n", 307 | "* Density of the uniform distribution between $a$ and $b$" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": null, 313 | "id": "aae932be", 314 | "metadata": { 315 | "id": "aae932be" 316 | }, 317 | "outputs": [], 318 | "source": [ 319 | "def unif_dens(x,a,b):\n", 320 | " # Case 1: x is outside the interval\n", 321 | " if x < a or x > b:\n", 322 | " return 0\n", 323 | " # Case 2: x is inside the interval\n", 324 | " else:\n", 325 | " return 1/(b-a)" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": null, 331 | "id": "6fb6cf75", 332 | "metadata": { 333 | "id": "6fb6cf75" 334 | }, 335 | "outputs": [], 336 | "source": [ 337 | "# Example: uniform distribution with a=-1 and b=1\n", 338 | "a = -1\n", 339 | "b = 1\n", 340 | "print( unif_dens(0, a, b) )\n", 341 | "print( unif_dens(-2, a, b) )" 342 | ] 343 | }, 344 | { 345 | "cell_type": "markdown", 346 | "id": "a2acc1f5", 347 | "metadata": { 348 | "id": "a2acc1f5" 349 | }, 350 | "source": [ 351 | "#### 4.1 Documentation\n", 352 | "* Use triple quotation marks" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": null, 358 | "id": "43a78c20", 359 | "metadata": { 360 | "id": "43a78c20" 361 | }, 362 | "outputs": [], 363 | "source": [ 364 | "def unif_dens(x,a,b):\n", 365 | " \"\"\"\n", 366 | " Compute the density of the uniform distribution.\n", 367 | "\n", 368 | " Args:\n", 369 | " x (float): Value of interest\n", 370 | " a (float): Lower bound of the distribution\n", 371 | " b (int): Upper bound of the distribution\n", 372 | "\n", 373 | " Returns:\n", 374 | " float: Density of the uniform distribution at x\n", 375 | "\n", 376 | " Examples:\n", 377 | " >>> unf_dens(0.5,0,1)\n", 378 | " 1.0\n", 379 | "\n", 380 | " >>> unf_dens(5,0,1)\n", 381 | " 0.0\n", 382 | " \"\"\"\n", 383 | " # Case 1: x is outside the interval\n", 384 | " if x < a or x > b:\n", 385 | " return 0\n", 386 | " # Case 2: x is inside the interval\n", 387 | " else:\n", 388 | " return 1/(b-a)" 389 | ] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "source": [ 394 | "#### 4.2 Help text\n", 395 | "\n", 396 | "* By including a comment between `\"\"\"`, we have implicitly created a help text" 397 | ], 398 | "metadata": { 399 | "id": "xUIUyFdi9fia" 400 | }, 401 | "id": "xUIUyFdi9fia" 402 | }, 403 | { 404 | "cell_type": "code", 405 | "source": [ 406 | "help(unif_dens)" 407 | ], 408 | "metadata": { 409 | "id": "XG_xE43n9r0m" 410 | }, 411 | "id": "XG_xE43n9r0m", 412 | "execution_count": null, 413 | "outputs": [] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "id": "926ea685", 418 | "metadata": { 419 | "id": "926ea685" 420 | }, 421 | "source": [ 422 | "#### 4.3 Call by order or name\n", 423 | "\n", 424 | "- Calling by **order**: position determines which argument is what\n", 425 | "- Calling by **name**: naming determines which argument is what (better, more explicit)\n", 426 | "\n", 427 | "➡️ Works for both `lambda` and `def`" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": null, 433 | "id": "3e1eef27", 434 | "metadata": { 435 | "id": "3e1eef27" 436 | }, 437 | "outputs": [], 438 | "source": [ 439 | "# call by order\n", 440 | "unif_dens(0.5,0,3)" 441 | ] 442 | }, 443 | { 444 | "cell_type": "code", 445 | "execution_count": null, 446 | "id": "52f2a268", 447 | "metadata": { 448 | "id": "52f2a268" 449 | }, 450 | "outputs": [], 451 | "source": [ 452 | "# call by name\n", 453 | "unif_dens(x=0.5, a=0, b=3)" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": null, 459 | "id": "bfc3cf91", 460 | "metadata": { 461 | "id": "bfc3cf91" 462 | }, 463 | "outputs": [], 464 | "source": [ 465 | "# possibility to change the order (good idea?)\n", 466 | "unif_dens(a=0, b=3, x=0.5)" 467 | ] 468 | }, 469 | { 470 | "cell_type": "markdown", 471 | "id": "ebfd3d38", 472 | "metadata": { 473 | "id": "ebfd3d38" 474 | }, 475 | "source": [ 476 | "## *5 Python code or function?\n", 477 | "\n", 478 | "**Advantages of functions:**\n", 479 | "* Reusable\n", 480 | "* Make code more readable\n", 481 | "* Make code more testable\n", 482 | "\n", 483 | "**Can you overdo functions?**\n", 484 | "* Sometimes ChatGPT wraps a simple expression in a function\n", 485 | "* Sometimes it creates a function `main()` instead of a program" 486 | ] 487 | }, 488 | { 489 | "cell_type": "code", 490 | "execution_count": null, 491 | "id": "29789f62", 492 | "metadata": { 493 | "id": "29789f62" 494 | }, 495 | "outputs": [], 496 | "source": [ 497 | "# Example of a simple program\n", 498 | "golden_ratio = (1 + math.sqrt(5))/2\n", 499 | "print(golden_ratio)" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": null, 505 | "id": "a01e0915", 506 | "metadata": { 507 | "id": "a01e0915" 508 | }, 509 | "outputs": [], 510 | "source": [ 511 | "# Example of packaging as function\n", 512 | "def golden_ratio():\n", 513 | " return (1 + math.sqrt(5))/2\n", 514 | "print(golden_ratio())" 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": null, 520 | "id": "302ffca1", 521 | "metadata": { 522 | "id": "302ffca1" 523 | }, 524 | "outputs": [], 525 | "source": [ 526 | "# example of packaging with main():\n", 527 | "def golden_ratio():\n", 528 | " return (1 + math.sqrt(5))/2\n", 529 | "\n", 530 | "def main():\n", 531 | " print(golden_ratio())\n", 532 | "\n", 533 | "main()" 534 | ] 535 | } 536 | ], 537 | "metadata": { 538 | "kernelspec": { 539 | "display_name": "Python 3 (ipykernel)", 540 | "language": "python", 541 | "name": "python3" 542 | }, 543 | "language_info": { 544 | "codemirror_mode": { 545 | "name": "ipython", 546 | "version": 3 547 | }, 548 | "file_extension": ".py", 549 | "mimetype": "text/x-python", 550 | "name": "python", 551 | "nbconvert_exporter": "python", 552 | "pygments_lexer": "ipython3", 553 | "version": "3.11.5" 554 | }, 555 | "colab": { 556 | "provenance": [] 557 | } 558 | }, 559 | "nbformat": 4, 560 | "nbformat_minor": 5 561 | } -------------------------------------------------------------------------------- /03Python_Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "22f282b7", 6 | "metadata": { 7 | "id": "22f282b7" 8 | }, 9 | "source": [ 10 | "\n", 11 | " \"Open\n", 12 | "" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "id": "1f0d16e0", 18 | "metadata": { 19 | "id": "1f0d16e0" 20 | }, 21 | "source": [ 22 | "# Introduction to Python\n", 23 | "### Main concepts of Python – Part 03\n", 24 | "Peter Gruber (peter.gruber@usi.ch), 2024-04-01" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "id": "d06204e0", 30 | "metadata": { 31 | "id": "d06204e0" 32 | }, 33 | "source": [ 34 | "## 0 Important disctinction\n", 35 | "* Data **type** = how one *individual* observation is stored\n", 36 | " * bool\n", 37 | " * int, float\n", 38 | "* Data **strcture** = how *multiple* observations are stored\n", 39 | " * List\n", 40 | " * Array\n", 41 | " * Dataframe" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "id": "4726b96a", 47 | "metadata": { 48 | "id": "4726b96a" 49 | }, 50 | "source": [ 51 | "## 1 Data types" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "id": "cd75b863", 57 | "metadata": { 58 | "id": "cd75b863" 59 | }, 60 | "source": [ 61 | "### 1.1 Numeric types" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "id": "828137d3", 67 | "metadata": { 68 | "id": "828137d3" 69 | }, 70 | "source": [ 71 | "**Create the following variables**\n", 72 | "* `a` integer 5\n", 73 | "* `b` real 5" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "id": "eae2d061", 80 | "metadata": { 81 | "id": "eae2d061" 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "a = 5\n", 86 | "b = 5.7" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "id": "75f7b155", 93 | "metadata": { 94 | "id": "75f7b155" 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "print(type(a))\n", 99 | "print(type(b))" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "id": "70547ab6", 105 | "metadata": { 106 | "id": "70547ab6" 107 | }, 108 | "source": [ 109 | "#### 1.1.2 Convert numeric data types" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "id": "5dbfa842", 116 | "metadata": { 117 | "id": "5dbfa842" 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "# \"a\" is integer -> To float\n", 122 | "# Note that \"a1\" prints as 5.0\n", 123 | "a1 = float(a)\n", 124 | "print(a1, type(a1))" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "id": "f1346450-a85c-47b2-9258-54591f4d8f0b", 131 | "metadata": { 132 | "id": "f1346450-a85c-47b2-9258-54591f4d8f0b" 133 | }, 134 | "outputs": [], 135 | "source": [ 136 | "# \"b\" is a float -> To integer\n", 137 | "# Note that \"b1\" is truncated\n", 138 | "b1 = int(b)\n", 139 | "print(b1, type(b1))" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "id": "6f237a51", 145 | "metadata": { 146 | "id": "6f237a51" 147 | }, 148 | "source": [ 149 | "### 1.2 Boolean type" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "id": "cf76319d", 156 | "metadata": { 157 | "id": "cf76319d", 158 | "outputId": "da0b663c-fd63-4ed0-d376-4ef4458b4774" 159 | }, 160 | "outputs": [ 161 | { 162 | "name": "stdout", 163 | "output_type": "stream", 164 | "text": [ 165 | "\n" 166 | ] 167 | } 168 | ], 169 | "source": [ 170 | "c = True\n", 171 | "print(type(c))" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "id": "45bc1ab8", 178 | "metadata": { 179 | "id": "45bc1ab8", 180 | "outputId": "8835bff1-6505-4c9d-a7e3-a0f0c1465ae0" 181 | }, 182 | "outputs": [ 183 | { 184 | "name": "stdout", 185 | "output_type": "stream", 186 | "text": [ 187 | "1 \n" 188 | ] 189 | } 190 | ], 191 | "source": [ 192 | "# \"c\" --> to integer\n", 193 | "# True = 1 and Flase = 0\n", 194 | "c1 = int(c)\n", 195 | "print(c1, type(c1))" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "id": "98ec37c3", 201 | "metadata": { 202 | "id": "98ec37c3" 203 | }, 204 | "source": [ 205 | "### 1.3 Really no more numeric data types?\n", 206 | "* See appendix for `complex`\n", 207 | "* Many other data types like date are part of modules" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "id": "c5f605a5", 213 | "metadata": { 214 | "id": "c5f605a5" 215 | }, 216 | "source": [ 217 | "## 2 Python data structures\n", 218 | "* Organize multiple observations in a single variable\n", 219 | " * Structure vs. flexibility" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "id": "a4b69867", 225 | "metadata": { 226 | "id": "a4b69867" 227 | }, 228 | "source": [ 229 | "### 2.0 ❗️Python indexing\n", 230 | "* Python starts indexing at 0\n", 231 | "* Index ranges run to $n+1$\n", 232 | "\n", 233 | "**Example**\n", 234 | "* The first element is `0` in Python\n", 235 | "* The first three elements are `0:3` in Python" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "id": "e54681b8", 241 | "metadata": { 242 | "id": "e54681b8" 243 | }, 244 | "source": [ 245 | "### 2.1 Strings\n", 246 | "* Text contains multiple letters\n", 247 | "* A very simple data structure" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "id": "78ea0153", 254 | "metadata": { 255 | "id": "78ea0153" 256 | }, 257 | "outputs": [], 258 | "source": [ 259 | "myString = 'Hello world!'" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "id": "32bf7fc0", 266 | "metadata": { 267 | "id": "32bf7fc0" 268 | }, 269 | "outputs": [], 270 | "source": [ 271 | "print(type(myString))" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "id": "88518032", 278 | "metadata": { 279 | "id": "88518032" 280 | }, 281 | "outputs": [], 282 | "source": [ 283 | "# Access individual characters\n", 284 | "# Python starts indexing at 0\n", 285 | "print(myString[0])" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "id": "3fe905e4", 291 | "metadata": { 292 | "id": "3fe905e4" 293 | }, 294 | "source": [ 295 | "#### 2.1.1 Subsetting with the `:` operator\n", 296 | "* Access a range of elements\n", 297 | "* Couting starts at zero\n", 298 | "* The first index is inclusive and the second index is exclusive\n", 299 | "\n", 300 | "**Exercise**\n", 301 | "* Print the word \"world\" in `myString` (chaters 7 to 11)" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": null, 307 | "id": "293abf17", 308 | "metadata": { 309 | "id": "293abf17" 310 | }, 311 | "outputs": [], 312 | "source": [ 313 | "# Python code goes here" 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "id": "6a7f8ef6", 319 | "metadata": { 320 | "id": "6a7f8ef6" 321 | }, 322 | "source": [ 323 | "### 2.2 Lists\n", 324 | "* Very common data structure\n", 325 | "* Useful with for-loops\n", 326 | "\n", 327 | "**Example**\n", 328 | "* Create a list called `first_list` with the elements 0,1,2,...,5" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "id": "ecbd53e1", 335 | "metadata": { 336 | "id": "ecbd53e1" 337 | }, 338 | "outputs": [], 339 | "source": [ 340 | "first_list = list(range(0,6)) # <----- 6, not 5!\n", 341 | "first_list = [0,1,2,3,4,5]\n", 342 | "print(first_list)" 343 | ] 344 | }, 345 | { 346 | "cell_type": "markdown", 347 | "id": "6ecd5fe9", 348 | "metadata": { 349 | "id": "6ecd5fe9" 350 | }, 351 | "source": [ 352 | "#### 2.2.1 Subsetting lists\n", 353 | "* Print the first element of the list\n", 354 | "* Print the first three elements of the list\n", 355 | "* Print the last element of the list" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": null, 361 | "id": "b0c68d09", 362 | "metadata": { 363 | "id": "b0c68d09" 364 | }, 365 | "outputs": [], 366 | "source": [ 367 | "print(first_list[0])\n", 368 | "print(first_list[0:3]) # elements 0,1,2. (not 3)\n", 369 | "print(first_list[-1]) # from the back" 370 | ] 371 | }, 372 | { 373 | "cell_type": "markdown", 374 | "id": "5997a3a1", 375 | "metadata": { 376 | "id": "5997a3a1" 377 | }, 378 | "source": [ 379 | "#### 2.2.2 Lists can contain lists" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": null, 385 | "id": "4609b371", 386 | "metadata": { 387 | "id": "4609b371" 388 | }, 389 | "outputs": [], 390 | "source": [ 391 | "# Example\n", 392 | "my_list = [0,\"one\",[0,1,2]]" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": null, 398 | "id": "289c7bda", 399 | "metadata": { 400 | "id": "289c7bda" 401 | }, 402 | "outputs": [], 403 | "source": [ 404 | "# Exercise: access the \"1\" in my_list\n", 405 | "# Python code goes here" 406 | ] 407 | }, 408 | { 409 | "cell_type": "markdown", 410 | "id": "223e2fbc", 411 | "metadata": { 412 | "id": "223e2fbc" 413 | }, 414 | "source": [ 415 | "#### 👍 2.2.3 Suggestions with lists\n", 416 | "\n", 417 | "**Exercise**\n", 418 | "* You want to append 7 to `my_list`\n", 419 | "* Type `my_list.` and then `crtl`-`Space`" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": null, 425 | "id": "a3a70ce2", 426 | "metadata": { 427 | "id": "a3a70ce2" 428 | }, 429 | "outputs": [], 430 | "source": [ 431 | "my_list." 432 | ] 433 | }, 434 | { 435 | "cell_type": "markdown", 436 | "id": "67ae34e6", 437 | "metadata": { 438 | "id": "67ae34e6" 439 | }, 440 | "source": [ 441 | "## 3 Advanced Data Strcutres\n", 442 | "* The most popular data structures are not part of core Python\n", 443 | " * `array` is part of `scipy`\n", 444 | " * `dataframe` is part of `pandas`\n", 445 | " * See Chapter 5 \n", 446 | " \"Open\n", 447 | "" 448 | ] 449 | }, 450 | { 451 | "cell_type": "markdown", 452 | "id": "80c85570", 453 | "metadata": { 454 | "id": "80c85570" 455 | }, 456 | "source": [ 457 | "### 3.1 Arrays\n", 458 | "* $n$-dimensional, rectangular data structure\n", 459 | "* All elements of same type\n", 460 | "* Use for matrix/tensor algebra" 461 | ] 462 | }, 463 | { 464 | "cell_type": "markdown", 465 | "id": "ac60ccf2", 466 | "metadata": { 467 | "id": "ac60ccf2" 468 | }, 469 | "source": [ 470 | "### 3.2 Dataframes\n", 471 | "* 2-dimensional, rectangular data structure\n", 472 | "* Elements of one column are same data type\n", 473 | " * Each column can have different data type\n", 474 | "* Use for regression analysis" 475 | ] 476 | }, 477 | { 478 | "cell_type": "markdown", 479 | "id": "87363a86", 480 | "metadata": { 481 | "id": "87363a86" 482 | }, 483 | "source": [ 484 | "## *Appendix: Summary of data types\n", 485 | "\n", 486 | "| Data Type | Description |\n", 487 | "|-----------------|-----------------------------------------------------------------------------------|\n", 488 | "| `int` | Integer (whole numbers, positive or negativve) |\n", 489 | "| `float` | Floating-point |\n", 490 | "| `complex` | Complex number (real and imaginary part) |\n", 491 | "| `bool` | Boolean type, can only have the values `True` or `False` |\n", 492 | "\n" 493 | ] 494 | }, 495 | { 496 | "cell_type": "markdown", 497 | "id": "068bfcac", 498 | "metadata": { 499 | "id": "068bfcac" 500 | }, 501 | "source": [ 502 | "## *Appendix: Summary of data structures\n", 503 | "\n", 504 | "| Data Structure | Description |\n", 505 | "|-----------------|-----------------------------------------------------------------------------------|\n", 506 | "| `str` | Text string type, Unicode characters. |\n", 507 | "| `list` | Ordered collection of objects. Allows duplicate entries and is mutable (changeable). |\n", 508 | "| `tuple` | Ordered collection like `list`, but immutable (unchangeable). |\n", 509 | "| `dict` | Dictionary. Unordered collection of key-value pairs. Keys must be unique. |\n", 510 | "| `range` | Sequence of numbers, used in for-loops to control the number of iterations. |\n", 511 | "| `set` | Unordered collection of unique objects. |\n", 512 | "| `frozenset` | Immutable version of `set`. |\n", 513 | "| `bytearray` | Sequences of bytes. |\n", 514 | "| `bytes` | Immutable version of `bytearray` |\n", 515 | "| `memoryview` | Memory view object exposes the buffer protocol. |\n", 516 | "| `NoneType` | Represents the absence of a value or a null value. Only one instance: `None`. |\n", 517 | "\n", 518 | "Sequences store multiple values in an organized and efficient way. There are seven sequence types: strings, bytes, lists, tuples, bytearrays, buffers, and range objects.\n", 519 | "\n", 520 | "- Most commonly used ones: strings, lists, and tuples\n", 521 | "- Ranges used in for-loops\n", 522 | "- [Sequence types Wikibooks.org](https://en.wikibooks.org/wiki/Python_Programming/Sequences#:~:text=Sequences%20allow%20you%20to%20store,are%20containers%20for%20sequential%20data.)\n" 523 | ] 524 | } 525 | ], 526 | "metadata": { 527 | "kernelspec": { 528 | "display_name": "Python 3 (ipykernel)", 529 | "language": "python", 530 | "name": "python3" 531 | }, 532 | "language_info": { 533 | "codemirror_mode": { 534 | "name": "ipython", 535 | "version": 3 536 | }, 537 | "file_extension": ".py", 538 | "mimetype": "text/x-python", 539 | "name": "python", 540 | "nbconvert_exporter": "python", 541 | "pygments_lexer": "ipython3", 542 | "version": "3.11.5" 543 | }, 544 | "colab": { 545 | "provenance": [] 546 | } 547 | }, 548 | "nbformat": 4, 549 | "nbformat_minor": 5 550 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Attribution-ShareAlike 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More_considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution-ShareAlike 4.0 International Public 58 | License 59 | 60 | By exercising the Licensed Rights (defined below), You accept and agree 61 | to be bound by the terms and conditions of this Creative Commons 62 | Attribution-ShareAlike 4.0 International Public License ("Public 63 | License"). To the extent this Public License may be interpreted as a 64 | contract, You are granted the Licensed Rights in consideration of Your 65 | acceptance of these terms and conditions, and the Licensor grants You 66 | such rights in consideration of benefits the Licensor receives from 67 | making the Licensed Material available under these terms and 68 | conditions. 69 | 70 | 71 | Section 1 -- Definitions. 72 | 73 | a. Adapted Material means material subject to Copyright and Similar 74 | Rights that is derived from or based upon the Licensed Material 75 | and in which the Licensed Material is translated, altered, 76 | arranged, transformed, or otherwise modified in a manner requiring 77 | permission under the Copyright and Similar Rights held by the 78 | Licensor. For purposes of this Public License, where the Licensed 79 | Material is a musical work, performance, or sound recording, 80 | Adapted Material is always produced where the Licensed Material is 81 | synched in timed relation with a moving image. 82 | 83 | b. Adapter's License means the license You apply to Your Copyright 84 | and Similar Rights in Your contributions to Adapted Material in 85 | accordance with the terms and conditions of this Public License. 86 | 87 | c. BY-SA Compatible License means a license listed at 88 | creativecommons.org/compatiblelicenses, approved by Creative 89 | Commons as essentially the equivalent of this Public License. 90 | 91 | d. Copyright and Similar Rights means copyright and/or similar rights 92 | closely related to copyright including, without limitation, 93 | performance, broadcast, sound recording, and Sui Generis Database 94 | Rights, without regard to how the rights are labeled or 95 | categorized. For purposes of this Public License, the rights 96 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 97 | Rights. 98 | 99 | e. Effective Technological Measures means those measures that, in the 100 | absence of proper authority, may not be circumvented under laws 101 | fulfilling obligations under Article 11 of the WIPO Copyright 102 | Treaty adopted on December 20, 1996, and/or similar international 103 | agreements. 104 | 105 | f. Exceptions and Limitations means fair use, fair dealing, and/or 106 | any other exception or limitation to Copyright and Similar Rights 107 | that applies to Your use of the Licensed Material. 108 | 109 | g. License Elements means the license attributes listed in the name 110 | of a Creative Commons Public License. The License Elements of this 111 | Public License are Attribution and ShareAlike. 112 | 113 | h. Licensed Material means the artistic or literary work, database, 114 | or other material to which the Licensor applied this Public 115 | License. 116 | 117 | i. Licensed Rights means the rights granted to You subject to the 118 | terms and conditions of this Public License, which are limited to 119 | all Copyright and Similar Rights that apply to Your use of the 120 | Licensed Material and that the Licensor has authority to license. 121 | 122 | j. Licensor means the individual(s) or entity(ies) granting rights 123 | under this Public License. 124 | 125 | k. Share means to provide material to the public by any means or 126 | process that requires permission under the Licensed Rights, such 127 | as reproduction, public display, public performance, distribution, 128 | dissemination, communication, or importation, and to make material 129 | available to the public including in ways that members of the 130 | public may access the material from a place and at a time 131 | individually chosen by them. 132 | 133 | l. Sui Generis Database Rights means rights other than copyright 134 | resulting from Directive 96/9/EC of the European Parliament and of 135 | the Council of 11 March 1996 on the legal protection of databases, 136 | as amended and/or succeeded, as well as other essentially 137 | equivalent rights anywhere in the world. 138 | 139 | m. You means the individual or entity exercising the Licensed Rights 140 | under this Public License. Your has a corresponding meaning. 141 | 142 | 143 | Section 2 -- Scope. 144 | 145 | a. License grant. 146 | 147 | 1. Subject to the terms and conditions of this Public License, 148 | the Licensor hereby grants You a worldwide, royalty-free, 149 | non-sublicensable, non-exclusive, irrevocable license to 150 | exercise the Licensed Rights in the Licensed Material to: 151 | 152 | a. reproduce and Share the Licensed Material, in whole or 153 | in part; and 154 | 155 | b. produce, reproduce, and Share Adapted Material. 156 | 157 | 2. Exceptions and Limitations. For the avoidance of doubt, where 158 | Exceptions and Limitations apply to Your use, this Public 159 | License does not apply, and You do not need to comply with 160 | its terms and conditions. 161 | 162 | 3. Term. The term of this Public License is specified in Section 163 | 6(a). 164 | 165 | 4. Media and formats; technical modifications allowed. The 166 | Licensor authorizes You to exercise the Licensed Rights in 167 | all media and formats whether now known or hereafter created, 168 | and to make technical modifications necessary to do so. The 169 | Licensor waives and/or agrees not to assert any right or 170 | authority to forbid You from making technical modifications 171 | necessary to exercise the Licensed Rights, including 172 | technical modifications necessary to circumvent Effective 173 | Technological Measures. For purposes of this Public License, 174 | simply making modifications authorized by this Section 2(a) 175 | (4) never produces Adapted Material. 176 | 177 | 5. Downstream recipients. 178 | 179 | a. Offer from the Licensor -- Licensed Material. Every 180 | recipient of the Licensed Material automatically 181 | receives an offer from the Licensor to exercise the 182 | Licensed Rights under the terms and conditions of this 183 | Public License. 184 | 185 | b. Additional offer from the Licensor -- Adapted Material. 186 | Every recipient of Adapted Material from You 187 | automatically receives an offer from the Licensor to 188 | exercise the Licensed Rights in the Adapted Material 189 | under the conditions of the Adapter's License You apply. 190 | 191 | c. No downstream restrictions. You may not offer or impose 192 | any additional or different terms or conditions on, or 193 | apply any Effective Technological Measures to, the 194 | Licensed Material if doing so restricts exercise of the 195 | Licensed Rights by any recipient of the Licensed 196 | Material. 197 | 198 | 6. No endorsement. Nothing in this Public License constitutes or 199 | may be construed as permission to assert or imply that You 200 | are, or that Your use of the Licensed Material is, connected 201 | with, or sponsored, endorsed, or granted official status by, 202 | the Licensor or others designated to receive attribution as 203 | provided in Section 3(a)(1)(A)(i). 204 | 205 | b. Other rights. 206 | 207 | 1. Moral rights, such as the right of integrity, are not 208 | licensed under this Public License, nor are publicity, 209 | privacy, and/or other similar personality rights; however, to 210 | the extent possible, the Licensor waives and/or agrees not to 211 | assert any such rights held by the Licensor to the limited 212 | extent necessary to allow You to exercise the Licensed 213 | Rights, but not otherwise. 214 | 215 | 2. Patent and trademark rights are not licensed under this 216 | Public License. 217 | 218 | 3. To the extent possible, the Licensor waives any right to 219 | collect royalties from You for the exercise of the Licensed 220 | Rights, whether directly or through a collecting society 221 | under any voluntary or waivable statutory or compulsory 222 | licensing scheme. In all other cases the Licensor expressly 223 | reserves any right to collect such royalties. 224 | 225 | 226 | Section 3 -- License Conditions. 227 | 228 | Your exercise of the Licensed Rights is expressly made subject to the 229 | following conditions. 230 | 231 | a. Attribution. 232 | 233 | 1. If You Share the Licensed Material (including in modified 234 | form), You must: 235 | 236 | a. retain the following if it is supplied by the Licensor 237 | with the Licensed Material: 238 | 239 | i. identification of the creator(s) of the Licensed 240 | Material and any others designated to receive 241 | attribution, in any reasonable manner requested by 242 | the Licensor (including by pseudonym if 243 | designated); 244 | 245 | ii. a copyright notice; 246 | 247 | iii. a notice that refers to this Public License; 248 | 249 | iv. a notice that refers to the disclaimer of 250 | warranties; 251 | 252 | v. a URI or hyperlink to the Licensed Material to the 253 | extent reasonably practicable; 254 | 255 | b. indicate if You modified the Licensed Material and 256 | retain an indication of any previous modifications; and 257 | 258 | c. indicate the Licensed Material is licensed under this 259 | Public License, and include the text of, or the URI or 260 | hyperlink to, this Public License. 261 | 262 | 2. You may satisfy the conditions in Section 3(a)(1) in any 263 | reasonable manner based on the medium, means, and context in 264 | which You Share the Licensed Material. For example, it may be 265 | reasonable to satisfy the conditions by providing a URI or 266 | hyperlink to a resource that includes the required 267 | information. 268 | 269 | 3. If requested by the Licensor, You must remove any of the 270 | information required by Section 3(a)(1)(A) to the extent 271 | reasonably practicable. 272 | 273 | b. ShareAlike. 274 | 275 | In addition to the conditions in Section 3(a), if You Share 276 | Adapted Material You produce, the following conditions also apply. 277 | 278 | 1. The Adapter's License You apply must be a Creative Commons 279 | license with the same License Elements, this version or 280 | later, or a BY-SA Compatible License. 281 | 282 | 2. You must include the text of, or the URI or hyperlink to, the 283 | Adapter's License You apply. You may satisfy this condition 284 | in any reasonable manner based on the medium, means, and 285 | context in which You Share Adapted Material. 286 | 287 | 3. You may not offer or impose any additional or different terms 288 | or conditions on, or apply any Effective Technological 289 | Measures to, Adapted Material that restrict exercise of the 290 | rights granted under the Adapter's License You apply. 291 | 292 | 293 | Section 4 -- Sui Generis Database Rights. 294 | 295 | Where the Licensed Rights include Sui Generis Database Rights that 296 | apply to Your use of the Licensed Material: 297 | 298 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 299 | to extract, reuse, reproduce, and Share all or a substantial 300 | portion of the contents of the database; 301 | 302 | b. if You include all or a substantial portion of the database 303 | contents in a database in which You have Sui Generis Database 304 | Rights, then the database in which You have Sui Generis Database 305 | Rights (but not its individual contents) is Adapted Material, 306 | 307 | including for purposes of Section 3(b); and 308 | c. You must comply with the conditions in Section 3(a) if You Share 309 | all or a substantial portion of the contents of the database. 310 | 311 | For the avoidance of doubt, this Section 4 supplements and does not 312 | replace Your obligations under this Public License where the Licensed 313 | Rights include other Copyright and Similar Rights. 314 | 315 | 316 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 317 | 318 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 319 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 320 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 321 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 322 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 323 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 324 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 325 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 326 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 327 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 328 | 329 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 330 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 331 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 332 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 333 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 334 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 335 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 336 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 337 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 338 | 339 | c. The disclaimer of warranties and limitation of liability provided 340 | above shall be interpreted in a manner that, to the extent 341 | possible, most closely approximates an absolute disclaimer and 342 | waiver of all liability. 343 | 344 | 345 | Section 6 -- Term and Termination. 346 | 347 | a. This Public License applies for the term of the Copyright and 348 | Similar Rights licensed here. However, if You fail to comply with 349 | this Public License, then Your rights under this Public License 350 | terminate automatically. 351 | 352 | b. Where Your right to use the Licensed Material has terminated under 353 | Section 6(a), it reinstates: 354 | 355 | 1. automatically as of the date the violation is cured, provided 356 | it is cured within 30 days of Your discovery of the 357 | violation; or 358 | 359 | 2. upon express reinstatement by the Licensor. 360 | 361 | For the avoidance of doubt, this Section 6(b) does not affect any 362 | right the Licensor may have to seek remedies for Your violations 363 | of this Public License. 364 | 365 | c. For the avoidance of doubt, the Licensor may also offer the 366 | Licensed Material under separate terms or conditions or stop 367 | distributing the Licensed Material at any time; however, doing so 368 | will not terminate this Public License. 369 | 370 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 371 | License. 372 | 373 | 374 | Section 7 -- Other Terms and Conditions. 375 | 376 | a. The Licensor shall not be bound by any additional or different 377 | terms or conditions communicated by You unless expressly agreed. 378 | 379 | b. Any arrangements, understandings, or agreements regarding the 380 | Licensed Material not stated herein are separate from and 381 | independent of the terms and conditions of this Public License. 382 | 383 | 384 | Section 8 -- Interpretation. 385 | 386 | a. For the avoidance of doubt, this Public License does not, and 387 | shall not be interpreted to, reduce, limit, restrict, or impose 388 | conditions on any use of the Licensed Material that could lawfully 389 | be made without permission under this Public License. 390 | 391 | b. To the extent possible, if any provision of this Public License is 392 | deemed unenforceable, it shall be automatically reformed to the 393 | minimum extent necessary to make it enforceable. If the provision 394 | cannot be reformed, it shall be severed from this Public License 395 | without affecting the enforceability of the remaining terms and 396 | conditions. 397 | 398 | c. No term or condition of this Public License will be waived and no 399 | failure to comply consented to unless expressly agreed to by the 400 | Licensor. 401 | 402 | d. Nothing in this Public License constitutes or may be interpreted 403 | as a limitation upon, or waiver of, any privileges and immunities 404 | that apply to the Licensor or You, including from the legal 405 | processes of any jurisdiction or authority. 406 | 407 | 408 | ======================================================================= 409 | 410 | Creative Commons is not a party to its public 411 | licenses. Notwithstanding, Creative Commons may elect to apply one of 412 | its public licenses to material it publishes and in those instances 413 | will be considered the “Licensor.” The text of the Creative Commons 414 | public licenses is dedicated to the public domain under the CC0 Public 415 | Domain Dedication. Except for the limited purpose of indicating that 416 | material is shared under a Creative Commons public license or as 417 | otherwise permitted by the Creative Commons policies published at 418 | creativecommons.org/policies, Creative Commons does not authorize the 419 | use of the trademark "Creative Commons" or any other trademark or logo 420 | of Creative Commons without its prior written consent including, 421 | without limitation, in connection with any unauthorized modifications 422 | to any of its public licenses or any other arrangements, 423 | understandings, or agreements concerning use of licensed material. For 424 | the avoidance of doubt, this paragraph does not form part of the 425 | public licenses. 426 | 427 | Creative Commons may be contacted at creativecommons.org. 428 | -------------------------------------------------------------------------------- /05Python_Dataframes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "b8922c01", 6 | "metadata": {}, 7 | "source": [ 8 | "\n", 9 | " \"Open\n", 10 | "" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "906395dc-5bd1-477f-af29-72c03d8e239e", 16 | "metadata": {}, 17 | "source": [ 18 | "# Introduction to Python\n", 19 | "### Main concepts of Python – Part 05\n", 20 | "Peter Gruber (peter.gruber@usi.ch), 2024-04-01\n", 21 | "\n", 22 | "* Pandas DataFrames" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "80873f5c", 28 | "metadata": {}, 29 | "source": [ 30 | "## Pandas\n", 31 | "- Powerful library for ...\n", 32 | " - Managing data in dataframes\n", 33 | " - Modifying data structures (merge, subset)\n", 34 | " - Performing basic data analysis\n", 35 | " - Simple plotting\n", 36 | "- Gold standard for data analysis in Python" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "id": "9ebeaab5", 42 | "metadata": {}, 43 | "source": [ 44 | "## 1 Setup Pandas" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "id": "1bd53ace", 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# Install pandas: Uncomment the line below and run it *once*\n", 55 | "# !pip install pandas " 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 3, 61 | "id": "1c31b6e1", 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "# Importing the library \n", 66 | "# \"... as pd\" creates a shortcut for later use ... everybody does it\n", 67 | "import pandas as pd" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "id": "c8548849", 73 | "metadata": {}, 74 | "source": [ 75 | "## 2 Load data\n", 76 | "\n", 77 | "- Advertising click prediction, https://www.kaggle.com/jahnveenarang/cvdcvd-vd" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 10, 83 | "id": "3d0546a6", 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "# Load dataframe\n", 88 | "# On Colab: upload file first\n", 89 | "df = pd.read_pickle('Social_Network_Ads.pkl')" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "id": "c6349dd3", 95 | "metadata": {}, 96 | "source": [ 97 | "## 3 Inspect data" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 5, 103 | "id": "c244e346-d9cd-4e9f-89ac-0e4db16b1876", 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "data": { 108 | "text/html": [ 109 | "
\n", 110 | "\n", 123 | "\n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | "
User IDGenderAgeEstimatedSalaryPurchased
015624510Male19190000
115810944Male35200000
215668575Female26430000
315603246Female27570000
415804002Male19760000
..................
39515691863Female46410001
39615706071Male51230001
39715654296Female50200001
39815755018Male36330000
39915594041Female49360001
\n", 225 | "

400 rows × 5 columns

\n", 226 | "
" 227 | ], 228 | "text/plain": [ 229 | " User ID Gender Age EstimatedSalary Purchased\n", 230 | "0 15624510 Male 19 19000 0\n", 231 | "1 15810944 Male 35 20000 0\n", 232 | "2 15668575 Female 26 43000 0\n", 233 | "3 15603246 Female 27 57000 0\n", 234 | "4 15804002 Male 19 76000 0\n", 235 | ".. ... ... ... ... ...\n", 236 | "395 15691863 Female 46 41000 1\n", 237 | "396 15706071 Male 51 23000 1\n", 238 | "397 15654296 Female 50 20000 1\n", 239 | "398 15755018 Male 36 33000 0\n", 240 | "399 15594041 Female 49 36000 1\n", 241 | "\n", 242 | "[400 rows x 5 columns]" 243 | ] 244 | }, 245 | "execution_count": 5, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "df" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 6, 257 | "id": "63b057dc", 258 | "metadata": {}, 259 | "outputs": [ 260 | { 261 | "data": { 262 | "text/html": [ 263 | "
\n", 264 | "\n", 277 | "\n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | "
User IDGenderAgeEstimatedSalaryPurchased
015624510Male19190000
115810944Male35200000
215668575Female26430000
315603246Female27570000
\n", 323 | "
" 324 | ], 325 | "text/plain": [ 326 | " User ID Gender Age EstimatedSalary Purchased\n", 327 | "0 15624510 Male 19 19000 0\n", 328 | "1 15810944 Male 35 20000 0\n", 329 | "2 15668575 Female 26 43000 0\n", 330 | "3 15603246 Female 27 57000 0" 331 | ] 332 | }, 333 | "execution_count": 6, 334 | "metadata": {}, 335 | "output_type": "execute_result" 336 | } 337 | ], 338 | "source": [ 339 | "# First `n` entries (rows) of the dataframe\n", 340 | "df.head(4)" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": 7, 346 | "id": "a8d3a84e", 347 | "metadata": {}, 348 | "outputs": [ 349 | { 350 | "data": { 351 | "text/html": [ 352 | "
\n", 353 | "\n", 366 | "\n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | "
User IDGenderAgeEstimatedSalaryPurchased
39715654296Female50200001
39815755018Male36330000
39915594041Female49360001
\n", 404 | "
" 405 | ], 406 | "text/plain": [ 407 | " User ID Gender Age EstimatedSalary Purchased\n", 408 | "397 15654296 Female 50 20000 1\n", 409 | "398 15755018 Male 36 33000 0\n", 410 | "399 15594041 Female 49 36000 1" 411 | ] 412 | }, 413 | "execution_count": 7, 414 | "metadata": {}, 415 | "output_type": "execute_result" 416 | } 417 | ], 418 | "source": [ 419 | "# Last `n` entries (rows) of the dataframe\n", 420 | "df.tail(3)" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": 8, 426 | "id": "015f879f-ce72-402f-8dc5-56e634f57a20", 427 | "metadata": {}, 428 | "outputs": [ 429 | { 430 | "data": { 431 | "text/plain": [ 432 | "pandas.core.frame.DataFrame" 433 | ] 434 | }, 435 | "execution_count": 8, 436 | "metadata": {}, 437 | "output_type": "execute_result" 438 | } 439 | ], 440 | "source": [ 441 | "type(df)" 442 | ] 443 | }, 444 | { 445 | "cell_type": "markdown", 446 | "id": "b22a626c", 447 | "metadata": {}, 448 | "source": [ 449 | "### 4 Subsetting" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": 9, 455 | "id": "2b837f2f", 456 | "metadata": {}, 457 | "outputs": [ 458 | { 459 | "data": { 460 | "text/plain": [ 461 | "0 19\n", 462 | "1 35\n", 463 | "2 26\n", 464 | "3 27\n", 465 | "4 19\n", 466 | " ..\n", 467 | "395 46\n", 468 | "396 51\n", 469 | "397 50\n", 470 | "398 36\n", 471 | "399 49\n", 472 | "Name: Age, Length: 400, dtype: int64" 473 | ] 474 | }, 475 | "execution_count": 9, 476 | "metadata": {}, 477 | "output_type": "execute_result" 478 | } 479 | ], 480 | "source": [ 481 | "# Select a column by name\n", 482 | "df['Age']" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": null, 488 | "id": "e43317ba-5caa-4daf-ba6c-233d110a3a55", 489 | "metadata": {}, 490 | "outputs": [], 491 | "source": [ 492 | "# *Select a column by index (rarely used)\n", 493 | "df.iloc[:,2]" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": null, 499 | "id": "82acc066", 500 | "metadata": {}, 501 | "outputs": [], 502 | "source": [ 503 | "# Select multiple columns\n", 504 | "df[ ['Gender', 'EstimatedSalary'] ]" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": null, 510 | "id": "7cfdba49", 511 | "metadata": {}, 512 | "outputs": [], 513 | "source": [ 514 | "# *Select a row by index\n", 515 | "df.iloc[1,:] # (can leave out \",:\" here)" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": null, 521 | "id": "2a66e380", 522 | "metadata": {}, 523 | "outputs": [], 524 | "source": [ 525 | "# Select multiple rows with colon\n", 526 | "df.iloc[10:15,:] # <-- upper index is \"one more\" (can leave out \",:\" here)" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": null, 532 | "id": "8c50920f-9023-4924-8416-458e4382f533", 533 | "metadata": {}, 534 | "outputs": [], 535 | "source": [ 536 | "# Select multiple rows\n", 537 | "df.iloc[[1, 67, 96],:]" 538 | ] 539 | }, 540 | { 541 | "cell_type": "code", 542 | "execution_count": null, 543 | "id": "6f92b159-60ba-4cb2-86e8-807d9ece7d2c", 544 | "metadata": {}, 545 | "outputs": [], 546 | "source": [ 547 | "# Select individual element (rarely used)\n", 548 | "# Salary of row 5\n", 549 | "df.iloc[5,3]" 550 | ] 551 | }, 552 | { 553 | "cell_type": "markdown", 554 | "id": "1de9139b-e048-4964-a71e-cd908d94496e", 555 | "metadata": {}, 556 | "source": [ 557 | "### 5 Functions and summaries\n", 558 | "* No need for complicated list comprehensions here\n", 559 | "* Use `numpy` to apply function to entire vector\n", 560 | "* Use `df.xxx()` methods to summarize data" 561 | ] 562 | }, 563 | { 564 | "cell_type": "code", 565 | "execution_count": 12, 566 | "id": "c13175f8-26e3-49fb-9e02-fcef658eb922", 567 | "metadata": {}, 568 | "outputs": [ 569 | { 570 | "data": { 571 | "text/plain": [ 572 | "0 9.852194\n", 573 | "1 9.903488\n", 574 | "2 10.668955\n", 575 | "3 10.950807\n", 576 | "4 11.238489\n", 577 | " ... \n", 578 | "395 10.621327\n", 579 | "396 10.043249\n", 580 | "397 9.903488\n", 581 | "398 10.404263\n", 582 | "399 10.491274\n", 583 | "Name: EstimatedSalary, Length: 400, dtype: float64" 584 | ] 585 | }, 586 | "execution_count": 12, 587 | "metadata": {}, 588 | "output_type": "execute_result" 589 | } 590 | ], 591 | "source": [ 592 | "import numpy\n", 593 | "numpy.log(df['EstimatedSalary'])" 594 | ] 595 | }, 596 | { 597 | "cell_type": "code", 598 | "execution_count": 13, 599 | "id": "9fe88c87", 600 | "metadata": {}, 601 | "outputs": [ 602 | { 603 | "data": { 604 | "text/plain": [ 605 | "69742.5" 606 | ] 607 | }, 608 | "execution_count": 13, 609 | "metadata": {}, 610 | "output_type": "execute_result" 611 | } 612 | ], 613 | "source": [ 614 | "# Average salary\n", 615 | "df['EstimatedSalary'].mean()" 616 | ] 617 | }, 618 | { 619 | "cell_type": "code", 620 | "execution_count": 14, 621 | "id": "87b87ca5-9575-4d0e-8382-f7152d39aa85", 622 | "metadata": {}, 623 | "outputs": [ 624 | { 625 | "data": { 626 | "text/plain": [ 627 | "array(['Male', 'Female'], dtype=object)" 628 | ] 629 | }, 630 | "execution_count": 14, 631 | "metadata": {}, 632 | "output_type": "execute_result" 633 | } 634 | ], 635 | "source": [ 636 | "# Unique values in a column. \n", 637 | "# Useful when analysing categories such as products, stocks, countries, etc\n", 638 | "df['Gender'].unique()" 639 | ] 640 | }, 641 | { 642 | "cell_type": "code", 643 | "execution_count": 15, 644 | "id": "b8f13b05", 645 | "metadata": {}, 646 | "outputs": [ 647 | { 648 | "data": { 649 | "text/plain": [ 650 | "0 True\n", 651 | "1 True\n", 652 | "2 True\n", 653 | "3 True\n", 654 | "4 True\n", 655 | " ... \n", 656 | "395 False\n", 657 | "396 False\n", 658 | "397 False\n", 659 | "398 True\n", 660 | "399 False\n", 661 | "Name: Age, Length: 400, dtype: bool" 662 | ] 663 | }, 664 | "execution_count": 15, 665 | "metadata": {}, 666 | "output_type": "execute_result" 667 | } 668 | ], 669 | "source": [ 670 | "df['Age'] <= 40" 671 | ] 672 | }, 673 | { 674 | "cell_type": "code", 675 | "execution_count": 16, 676 | "id": "009a71dc", 677 | "metadata": {}, 678 | "outputs": [ 679 | { 680 | "data": { 681 | "text/plain": [ 682 | "253" 683 | ] 684 | }, 685 | "execution_count": 16, 686 | "metadata": {}, 687 | "output_type": "execute_result" 688 | } 689 | ], 690 | "source": [ 691 | "# Count number of young customers\n", 692 | "sum(df['Age'] <= 40)" 693 | ] 694 | }, 695 | { 696 | "cell_type": "code", 697 | "execution_count": 17, 698 | "id": "6bf09334-b0ca-4d11-83cb-69f26c0f9fda", 699 | "metadata": {}, 700 | "outputs": [ 701 | { 702 | "data": { 703 | "text/plain": [ 704 | "0.6325" 705 | ] 706 | }, 707 | "execution_count": 17, 708 | "metadata": {}, 709 | "output_type": "execute_result" 710 | } 711 | ], 712 | "source": [ 713 | "# Fraction of young customers\n", 714 | "(df['Age'] <= 40).mean()" 715 | ] 716 | }, 717 | { 718 | "cell_type": "markdown", 719 | "id": "d5bc6e34", 720 | "metadata": {}, 721 | "source": [ 722 | "### *5.1 Multiple and complementary conditons .. not that easy\n", 723 | "\n", 724 | "- Pandas is an abstraction that simplifies data operations, but...\n", 725 | " - Has different Object Types: Series and DataFrame\n", 726 | " - Requires specific syntax (`~`, `&`, `|`) instead of `not`, `and`, `or`." 727 | ] 728 | }, 729 | { 730 | "cell_type": "code", 731 | "execution_count": 18, 732 | "id": "f49efe59", 733 | "metadata": {}, 734 | "outputs": [ 735 | { 736 | "name": "stdout", 737 | "output_type": "stream", 738 | "text": [ 739 | "\n", 740 | "\n", 741 | "\n" 742 | ] 743 | } 744 | ], 745 | "source": [ 746 | "# A \"DataFrame\" object\n", 747 | "print(type(df))\n", 748 | "\n", 749 | "# Single column is seen as \"Series\" object\n", 750 | "print(type(df['Age']))\n", 751 | "\n", 752 | "# Multiple columns are seen as \"DataFrame\" object\n", 753 | "print(type(df[ ['Age', 'Purchased'] ]))" 754 | ] 755 | }, 756 | { 757 | "cell_type": "code", 758 | "execution_count": 19, 759 | "id": "2d06bf64-b30e-4d47-acb3-921b0528ddba", 760 | "metadata": {}, 761 | "outputs": [ 762 | { 763 | "data": { 764 | "text/plain": [ 765 | "0 False\n", 766 | "1 False\n", 767 | "2 False\n", 768 | "3 False\n", 769 | "4 False\n", 770 | " ... \n", 771 | "395 True\n", 772 | "396 True\n", 773 | "397 True\n", 774 | "398 False\n", 775 | "399 True\n", 776 | "Name: Age, Length: 400, dtype: bool" 777 | ] 778 | }, 779 | "execution_count": 19, 780 | "metadata": {}, 781 | "output_type": "execute_result" 782 | } 783 | ], 784 | "source": [ 785 | "# Complementary conditon, \"~\" for \"not\"\n", 786 | "~(df['Age'] <=40)" 787 | ] 788 | }, 789 | { 790 | "cell_type": "code", 791 | "execution_count": 20, 792 | "id": "415a7c0a", 793 | "metadata": {}, 794 | "outputs": [ 795 | { 796 | "data": { 797 | "text/plain": [ 798 | "0 False\n", 799 | "1 False\n", 800 | "2 False\n", 801 | "3 True\n", 802 | "4 True\n", 803 | " ... \n", 804 | "395 False\n", 805 | "396 False\n", 806 | "397 False\n", 807 | "398 False\n", 808 | "399 False\n", 809 | "Length: 400, dtype: bool" 810 | ] 811 | }, 812 | "execution_count": 20, 813 | "metadata": {}, 814 | "output_type": "execute_result" 815 | } 816 | ], 817 | "source": [ 818 | "# Dual conditions, \"&\" for \"and\"\n", 819 | "# Users with age below 40 and salary above 50'000\n", 820 | "(df['Age']<=40) & (df['EstimatedSalary']>=50000)" 821 | ] 822 | }, 823 | { 824 | "cell_type": "code", 825 | "execution_count": 21, 826 | "id": "784a63c0", 827 | "metadata": {}, 828 | "outputs": [ 829 | { 830 | "data": { 831 | "text/plain": [ 832 | "0 True\n", 833 | "1 False\n", 834 | "2 True\n", 835 | "3 True\n", 836 | "4 True\n", 837 | " ... \n", 838 | "395 True\n", 839 | "396 True\n", 840 | "397 True\n", 841 | "398 False\n", 842 | "399 True\n", 843 | "Name: Age, Length: 400, dtype: bool" 844 | ] 845 | }, 846 | "execution_count": 21, 847 | "metadata": {}, 848 | "output_type": "execute_result" 849 | } 850 | ], 851 | "source": [ 852 | "# \"|\" for \"or\"\n", 853 | "# Users with age either under 30 or above 45\n", 854 | "(df['Age']<=30) | (df['Age']>=45)" 855 | ] 856 | }, 857 | { 858 | "cell_type": "code", 859 | "execution_count": 22, 860 | "id": "3d7fbfa4-b55b-4e1c-ba86-15ca21c28f18", 861 | "metadata": {}, 862 | "outputs": [ 863 | { 864 | "data": { 865 | "text/plain": [ 866 | "0.3675" 867 | ] 868 | }, 869 | "execution_count": 22, 870 | "metadata": {}, 871 | "output_type": "execute_result" 872 | } 873 | ], 874 | "source": [ 875 | "# Fraction of people above 40\n", 876 | "( ~(df['Age']<=40) ).mean()" 877 | ] 878 | }, 879 | { 880 | "cell_type": "code", 881 | "execution_count": 23, 882 | "id": "86bcec51", 883 | "metadata": {}, 884 | "outputs": [ 885 | { 886 | "data": { 887 | "text/plain": [ 888 | "1.0" 889 | ] 890 | }, 891 | "execution_count": 23, 892 | "metadata": {}, 893 | "output_type": "execute_result" 894 | } 895 | ], 896 | "source": [ 897 | "# Tertia non datur\n", 898 | "( ~(df['Age']<=40) ).mean() + (df['Age']<=40).mean()" 899 | ] 900 | }, 901 | { 902 | "cell_type": "markdown", 903 | "id": "2cf5e998", 904 | "metadata": {}, 905 | "source": [ 906 | "### 6 Subsetting by condition\n", 907 | "\n", 908 | "* Daily bread of data scientist. Analyze ...\n", 909 | " * \"all older participants\"\n", 910 | " * \"all older male participants\"\n", 911 | " * \"all older male participants with high income\"\n", 912 | "* **Syntax:** `df[ ]`\n", 913 | "* **Syntax:** `df[''][ ]`" 914 | ] 915 | }, 916 | { 917 | "cell_type": "code", 918 | "execution_count": null, 919 | "id": "4c73dc0b-95d5-41cf-90ca-43edfd8d35b7", 920 | "metadata": {}, 921 | "outputs": [], 922 | "source": [ 923 | "# Recap\n", 924 | "df['Age']>=30" 925 | ] 926 | }, 927 | { 928 | "cell_type": "code", 929 | "execution_count": null, 930 | "id": "d66edc16-958d-4b60-b424-bff241286b3a", 931 | "metadata": {}, 932 | "outputs": [], 933 | "source": [ 934 | "# Select rows for which is True\n", 935 | "df[ df['Age']>=30 ]" 936 | ] 937 | }, 938 | { 939 | "cell_type": "code", 940 | "execution_count": null, 941 | "id": "8547f18c", 942 | "metadata": {}, 943 | "outputs": [], 944 | "source": [ 945 | "# Subset just one column\n", 946 | "df['EstimatedSalary'][ df['Age']>=30 ]" 947 | ] 948 | }, 949 | { 950 | "cell_type": "markdown", 951 | "id": "2f2b5861-115e-4a73-b32e-581b4293ba54", 952 | "metadata": {}, 953 | "source": [ 954 | "**Notice:** Length is now shorter" 955 | ] 956 | }, 957 | { 958 | "cell_type": "code", 959 | "execution_count": null, 960 | "id": "6f0f999f", 961 | "metadata": {}, 962 | "outputs": [], 963 | "source": [ 964 | "# Statistics of a subset\n", 965 | "# Average salary of people aged 30 or more.\n", 966 | "df['EstimatedSalary'][ df['Age']>=30 ].mean()" 967 | ] 968 | }, 969 | { 970 | "cell_type": "markdown", 971 | "id": "f62b0c32-a2c6-4343-9080-111b126c6ca1", 972 | "metadata": {}, 973 | "source": [ 974 | "#### 6.1 Multiple conditions\n", 975 | "* Require extra `()` around condition\n", 976 | "* Still `&` and `|` operators\n", 977 | "* **Syntax** `df[''][ () & () ]`" 978 | ] 979 | }, 980 | { 981 | "cell_type": "code", 982 | "execution_count": null, 983 | "id": "f195c76d-7e3f-4b19-ad9e-de731aa666ba", 984 | "metadata": {}, 985 | "outputs": [], 986 | "source": [ 987 | "df['EstimatedSalary'][ (df['Age']>=30) & (df['Gender']==\"Male\") ].mean()" 988 | ] 989 | }, 990 | { 991 | "cell_type": "code", 992 | "execution_count": null, 993 | "id": "a3a8f727-e0b5-483b-a81f-4eaec9f2d4ba", 994 | "metadata": {}, 995 | "outputs": [], 996 | "source": [ 997 | "df['EstimatedSalary'][ (df['Age']>=30) & (df['Gender']==\"Male\") & \n", 998 | " (df['EstimatedSalary']>40000) ].mean()" 999 | ] 1000 | }, 1001 | { 1002 | "cell_type": "markdown", 1003 | "id": "f7050c7b-0c38-494d-9c4d-345f6f9dc98d", 1004 | "metadata": {}, 1005 | "source": [ 1006 | "**Exercise:** Answer these questions ...\n", 1007 | "* Is the average salary for male or female participants higher?\n", 1008 | "* What is the average salary" 1009 | ] 1010 | }, 1011 | { 1012 | "cell_type": "code", 1013 | "execution_count": null, 1014 | "id": "b33b0c32-84cf-453c-a95c-c1f3c89f448c", 1015 | "metadata": {}, 1016 | "outputs": [], 1017 | "source": [ 1018 | "df['EstimatedSalary'][ df['Gender']==\"Male\" ].mean()" 1019 | ] 1020 | }, 1021 | { 1022 | "cell_type": "code", 1023 | "execution_count": null, 1024 | "id": "53b237f3-7bb9-4b26-9a5b-970becb5977c", 1025 | "metadata": {}, 1026 | "outputs": [], 1027 | "source": [ 1028 | "df['EstimatedSalary'][ ~(df['Gender']==\"Male\") ].mean()" 1029 | ] 1030 | }, 1031 | { 1032 | "cell_type": "code", 1033 | "execution_count": null, 1034 | "id": "f412f591-579f-4c7f-8123-96d6ad81b6eb", 1035 | "metadata": {}, 1036 | "outputs": [], 1037 | "source": [ 1038 | "df['EstimatedSalary'].mean()" 1039 | ] 1040 | }, 1041 | { 1042 | "cell_type": "markdown", 1043 | "id": "3e159077", 1044 | "metadata": {}, 1045 | "source": [ 1046 | "## 7 Modify dataframe structure\n", 1047 | "- Add new column: simply assign values\n", 1048 | "- Delete a column: `df.drop('')`" 1049 | ] 1050 | }, 1051 | { 1052 | "cell_type": "code", 1053 | "execution_count": null, 1054 | "id": "8b8b0f9f", 1055 | "metadata": {}, 1056 | "outputs": [], 1057 | "source": [ 1058 | "retirement_age = 60\n", 1059 | "df['YearsToRetirement'] = retirement_age - df['Age']\n", 1060 | "df" 1061 | ] 1062 | }, 1063 | { 1064 | "cell_type": "code", 1065 | "execution_count": null, 1066 | "id": "d28cfc3f-8bb3-45d9-9809-d9917653579c", 1067 | "metadata": {}, 1068 | "outputs": [], 1069 | "source": [ 1070 | "# Remove column\n", 1071 | "df.drop(columns='YearsToRetirement', inplace=True)" 1072 | ] 1073 | }, 1074 | { 1075 | "cell_type": "code", 1076 | "execution_count": null, 1077 | "id": "0c8c2339-a815-43d5-a776-84c23a71e1c0", 1078 | "metadata": {}, 1079 | "outputs": [], 1080 | "source": [ 1081 | "df" 1082 | ] 1083 | }, 1084 | { 1085 | "cell_type": "markdown", 1086 | "id": "b805a12a-0448-4322-b2b9-0663951e661e", 1087 | "metadata": {}, 1088 | "source": [ 1089 | "**Exercise:** Add a column named `SalaryUSD` that convertes the salary from CHF to USD. The exchange rate is 1.15 USD/CHF." 1090 | ] 1091 | }, 1092 | { 1093 | "cell_type": "code", 1094 | "execution_count": null, 1095 | "id": "b16173d6-5dd4-42a1-a500-7df00c8a7ee4", 1096 | "metadata": {}, 1097 | "outputs": [], 1098 | "source": [ 1099 | "# code goes here\n", 1100 | "df['SalaryUSD'] = df['EstimatedSalary']*1.15" 1101 | ] 1102 | }, 1103 | { 1104 | "cell_type": "code", 1105 | "execution_count": null, 1106 | "id": "7c3374e6-9832-41bc-9317-54a56bd3b5fd", 1107 | "metadata": {}, 1108 | "outputs": [], 1109 | "source": [ 1110 | "df" 1111 | ] 1112 | } 1113 | ], 1114 | "metadata": { 1115 | "kernelspec": { 1116 | "display_name": "Python 3 (ipykernel)", 1117 | "language": "python", 1118 | "name": "python3" 1119 | }, 1120 | "language_info": { 1121 | "codemirror_mode": { 1122 | "name": "ipython", 1123 | "version": 3 1124 | }, 1125 | "file_extension": ".py", 1126 | "mimetype": "text/x-python", 1127 | "name": "python", 1128 | "nbconvert_exporter": "python", 1129 | "pygments_lexer": "ipython3", 1130 | "version": "3.11.5" 1131 | } 1132 | }, 1133 | "nbformat": 4, 1134 | "nbformat_minor": 5 1135 | } 1136 | --------------------------------------------------------------------------------