├── .gitignore ├── data ├── em.npy ├── logistic_regression_data.npz ├── vi_regression.csv ├── ADSAI_football.csv └── em.csv ├── imgs ├── EM1.png ├── PGM.png ├── hmm.png ├── mood.png ├── factor2.png ├── forloop.png ├── plate_vec.png ├── plateloop.png ├── factor_graph.png ├── factor_example.png └── nested_plates.svg ├── README.md ├── methods_07.py ├── 02_numpy_pandas_sklearn ├── 023_scikit_learn_exercise.ipynb ├── 022_pandas_exercises.ipynb ├── 021_numpy_exercises.ipynb └── 021_numpy.ipynb ├── 01_probability_exercises.ipynb ├── Homeworks ├── Homework_1.ipynb └── Homework_2.ipynb ├── 01_probability_basics.ipynb ├── 09_exercise.ipynb └── 04_exact_inference.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | **/scripts/ -------------------------------------------------------------------------------- /data/em.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-doz/PML2025/HEAD/data/em.npy -------------------------------------------------------------------------------- /imgs/EM1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-doz/PML2025/HEAD/imgs/EM1.png -------------------------------------------------------------------------------- /imgs/PGM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-doz/PML2025/HEAD/imgs/PGM.png -------------------------------------------------------------------------------- /imgs/hmm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-doz/PML2025/HEAD/imgs/hmm.png -------------------------------------------------------------------------------- /imgs/mood.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-doz/PML2025/HEAD/imgs/mood.png -------------------------------------------------------------------------------- /imgs/factor2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-doz/PML2025/HEAD/imgs/factor2.png -------------------------------------------------------------------------------- /imgs/forloop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-doz/PML2025/HEAD/imgs/forloop.png -------------------------------------------------------------------------------- /imgs/plate_vec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-doz/PML2025/HEAD/imgs/plate_vec.png -------------------------------------------------------------------------------- /imgs/plateloop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-doz/PML2025/HEAD/imgs/plateloop.png -------------------------------------------------------------------------------- /imgs/factor_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-doz/PML2025/HEAD/imgs/factor_graph.png -------------------------------------------------------------------------------- /imgs/factor_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-doz/PML2025/HEAD/imgs/factor_example.png -------------------------------------------------------------------------------- /data/logistic_regression_data.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-doz/PML2025/HEAD/data/logistic_regression_data.npz -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Welcome to the official repository for the Probabilistic Machine Learning tutoring sessions at Università degli Studi di Trieste! 2 | 3 | This repository contains materials and exercises to support students throughout the course. 4 | For any questions regarding the tutoring sessions or course content, feel free to reach out to the tutors Romina Doz (romina.doz@phd.units.it) and Sara Candussio (sara.candussio@phd.units.it). 5 | 6 | Happy learning! 7 | -------------------------------------------------------------------------------- /data/vi_regression.csv: -------------------------------------------------------------------------------- 1 | x,y 2 | 0.8510247697231936,6.302669189693166 3 | 0.5017989569282866,12.503789695155673 4 | 0.1908432501293003,22.51349627538952 5 | 0.010764768950958277,21.75785978328044 6 | 0.7140322105237382,-0.9057970013956265 7 | 2.342547275480881,-4.793329714077552 8 | 1.5246788917755578,-2.594178694823033 9 | 0.030191479837389148,13.123643493050961 10 | -1.846745549002346,18.24361200112205 11 | 0.003495190641747407,29.093050943749926 12 | 0.24623216340659973,13.595997696120593 13 | 0.48476796331745636,1.141059921070818 14 | 0.5106031327433457,-5.253868225037637 15 | -1.4558338530309574,28.01116743022613 16 | 0.41735160266776783,8.453574410117781 17 | 1.3671657209026347,-0.06854687633100198 18 | 0.6953433394341216,5.564694235730063 19 | 0.39778807818396034,7.600790625265244 20 | -1.013924613278784,17.343363139818806 21 | -0.701900111076895,18.734978730641263 22 | -------------------------------------------------------------------------------- /methods_07.py: -------------------------------------------------------------------------------- 1 | import pyro 2 | import pyro.distributions as dist 3 | 4 | def conditioned_scale_file(obs, guess=8.5): 5 | weight = pyro.sample("weight", dist.Normal(guess, 1.)) 6 | measurement = pyro.sample("measurement", dist.Normal(weight, 1.), obs=obs) 7 | return measurement 8 | 9 | def eight_school_file(J, sigma, y=None): 10 | mu = pyro.sample("mu", dist.Normal(0., 5.)) 11 | tau = pyro.sample("tau", dist.HalfCauchy(5.)) 12 | with pyro.plate("schools", J): 13 | theta = pyro.sample("theta", dist.Normal(mu, tau)) 14 | obs = pyro.sample("obs", dist.Normal(theta, sigma), obs=y) 15 | 16 | 17 | 18 | def eight_schools_noncentered_file(J, sigma, y=None): 19 | mu = pyro.sample("mu", dist.Normal(0., 5.)) 20 | tau = pyro.sample("tau", dist.HalfCauchy(5.)) 21 | with pyro.plate("schools", J): 22 | nu = pyro.sample("nu", dist.Normal(0., 1.)) 23 | theta = mu + tau * nu 24 | obs = pyro.sample("obs", dist.Normal(theta, sigma), obs=y) 25 | -------------------------------------------------------------------------------- /imgs/nested_plates.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 9 | 10 | 11 | 12 | cluster_plate1 13 | 14 | plate1 15 | 16 | 17 | cluster_plate2 18 | 19 | plate2 20 | 21 | 22 | cluster_plate2__CLONE 23 | 24 | plate2 25 | 26 | 27 | 28 | x 29 | 30 | x 31 | 32 | 33 | 34 | y 35 | 36 | y 37 | 38 | 39 | 40 | x->y 41 | 42 | 43 | 44 | 45 | 46 | z 47 | 48 | z 49 | 50 | 51 | 52 | y->z 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /02_numpy_pandas_sklearn/023_scikit_learn_exercise.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "ae5bcaec", 6 | "metadata": {}, 7 | "source": [ 8 | "\"Open" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "%matplotlib inline\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "import numpy as np\n", 20 | "import pandas as pd\n", 21 | "import seaborn as sns" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "1) Import the diabetes database and show the input and output in a Pandas dataframe" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "from sklearn import datasets\n", 38 | "\n", 39 | "data = datasets.load_diabetes()" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "2) Visualize the multiple pairwise bivariate distributions in the dataset (use the .pairplot() function of seaborn) " 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "3) Split it in train and test sets (20% of the data must be in the test)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "from sklearn.model_selection import train_test_split\n", 70 | "\n", 71 | "#..." 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "4. Perform classic standardisation" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "from sklearn import preprocessing\n", 88 | "\n", 89 | "#..." 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "5. Fit a linear regression model and print the coefficient of determination (R2) of the prediction" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "from sklearn import linear_model\n", 106 | "\n", 107 | "#..." 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "6. Plot the real (on x-axis) vs predicted (on y-axis) outcomes together with a reference bisector line" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [] 123 | } 124 | ], 125 | "metadata": { 126 | "kernelspec": { 127 | "display_name": "Python 3", 128 | "language": "python", 129 | "name": "python3" 130 | }, 131 | "language_info": { 132 | "codemirror_mode": { 133 | "name": "ipython", 134 | "version": 3 135 | }, 136 | "file_extension": ".py", 137 | "mimetype": "text/x-python", 138 | "name": "python", 139 | "nbconvert_exporter": "python", 140 | "pygments_lexer": "ipython3", 141 | "version": "3.11.6" 142 | } 143 | }, 144 | "nbformat": 4, 145 | "nbformat_minor": 2 146 | } 147 | -------------------------------------------------------------------------------- /data/ADSAI_football.csv: -------------------------------------------------------------------------------- 1 | ,Date,Where,Team A,Team B,Goal A,Goal B 2 | 0,2023-01-17,Salesiani,"[17, 29, 53, 20, 22]","[10, 6, 11, 27, 45]",6,11 3 | 1,2023-02-02,Salesiani,"[10, 53, 6, 43, 52]","[13, 11, 17, 29, 27]",6,8 4 | 2,2023-02-16,Salesiani,"[11, 6, 40, 53, 1]","[10, 22, 29, 17, 36]",3,4 5 | 3,2023-02-16,Salesiani,"[11, 10, 53, 40, 1]","[6, 17, 29, 22, 36]",5,1 6 | 4,2023-02-28,Dopolavoro ferroviario,"[10, 53, 1, 22, 29]","[43, 27, 6, 37, 24]",4,3 7 | 5,2023-03-07,Salesiani,"[10, 20, 22, 29, 36]","[6, 43, 27, 24, 21]",10,2 8 | 6,2023-03-23,Salesiani,"[10, 6, 20, 1, 22]","[53, 29, 36, 40, 26]",6,0 9 | 7,2023-03-23,Salesiani,"[1, 40, 22, 20, 6]","[10, 53, 36, 26, 29]",6,7 10 | 8,2023-04-03,Salesiani,"[27, 6, 24, 4, 41]","[10, 17, 29, 22, 25]",3,5 11 | 9,2023-04-19,Via dei Mille,"[10, 6, 20, 41, 25]","[27, 29, 22, 36, 26]",6,11 12 | 10,2023-04-28,Salesiani,"[11, 6, 22, 10, 57]","[55, 56, 25, 29, 41]",8,4 13 | 11,2023-05-17,Salesiani,"[10, 6, 43, 22, 29]","[17, 36, 26, 41, 24]",5,7 14 | 12,2023-05-30,Salesiani,"[10, 53, 6, 43, 22]","[17, 27, 24, 21, 29]",3,3 15 | 13,2023-06-08,Salesiani,"[27, 29, 10, 6, 22]","[51, 44, 7, 24, 46]",4,7 16 | 14,2023-06-13,Salesiani,"[17, 29, 10, 53, 36]","[6, 43, 27, 22, 21]",6,5 17 | 15,2023-06-28,Salesiani,"[10, 29, 53, 6, 22]","[27, 24, 41, 17, 54]",6,7 18 | 16,2023-07-05,Salesiani,"[17, 29, 22, 2, 53]","[10, 6, 24, 14, 27]",2,10 19 | 17,2023-09-08,Salesiani,"[29, 10, 53, 26, 24]","[6, 43, 14, 36, 21]",6,5 20 | 18,2023-09-14,Salesiani,"[10, 24, 19, 6, 21]","[36, 29, 22, 40, 26]",4,5 21 | 19,2023-09-19,Salesiani,"[17, 27, 29, 43, 21]","[11, 6, 22, 41, 53]",9,4 22 | 20,2023-10-05,Salesiani,"[17, 29, 27, 53, 24]","[10, 6, 43, 14, 22]",4,5 23 | 21,2023-10-12,Salesiani,"[10, 53, 6, 27, 24]","[36, 20, 43, 18, 14]",0,4 24 | 22,2023-10-12,Salesiani,"[27, 10, 43, 24, 53]","[6, 20, 36, 18, 14]",1,4 25 | 23,2023-10-18,Salesiani,"[10, 53, 43, 22, 17]","[6, 36, 24, 41, 18]",5,8 26 | 24,2023-10-23,Salesiani,"[10, 6, 36, 24, 41]","[17, 18, 22, 53, 26]",10,6 27 | 25,2023-10-26,Salesiani,"[29, 22, 27, 24, 18]","[6, 10, 36, 26, 40]",7,7 28 | 26,2023-11-07,Salesiani,"[43, 17, 41, 53, 36]","[6, 18, 22, 29, 42]",7,12 29 | 27,2023-11-13,Salesiani,"[17, 27, 10, 53, 40]","[36, 41, 22, 6, 29]",5,12 30 | 28,2023-11-17,Salesiani,"[10, 45, 18, 6, 29]","[41, 36, 17, 21, 43]",16,10 31 | 29,2023-11-23,Salesiani,"[6, 29, 22, 18, 17]","[10, 53, 26, 36, 41]",9,16 32 | 30,2023-11-29,Salesiani,"[17, 36, 6, 45, 43]","[10, 27, 29, 22, 41]",5,9 33 | 31,2023-12-06,Salesiani,"[10, 43, 52, 6, 18]","[53, 36, 45, 26, 48]",5,3 34 | 32,2023-12-14,Salesiani,"[17, 36, 30, 6, 43]","[22, 21, 45, 27, 41]",11,6 35 | 33,2023-12-20,Chiarbola,"[10, 27, 29, 6, 36, 22]","[28, 33, 12, 5, 45, 9]",5,9 36 | 34,2024-01-12,Salesiani,"[10, 17, 18, 22, 58]","[36, 6, 26, 52, 61]",17,5 37 | 35,2024-01-16,Salesiani,"[10, 22, 18, 6, 17]","[36, 53, 27, 43, 30]",14,12 38 | 36,2024-01-18,Salesiani,"[10, 27, 36, 24, 48]","[52, 49, 61, 59, 60]",10,4 39 | 37,2024-01-24,Salesiani,"[10, 53, 43, 17, 24]","[6, 52, 27, 36, 40]",3,3 40 | 38,2024-01-31,Via Rossetti,"[10, 43, 41, 17, 50]","[6, 22, 21, 53, 52]",3,6 41 | 39,2024-04-11,Salesiani,"[10, 50, 17, 22, 6]","[41, 53, 39, 43, 24]",3,4 42 | 40,2024-06-05,Salesiani,"[10, 17, 15, 8, 6]","[24, 53, 22, 29, 54]",4,2 43 | 41,2024-06-27,Salesiani,"[10, 43, 6, 21, 34]","[1, 29, 17, 22, 53]",5,4 44 | 42,2024-07-10,Via Rossetti,"[34, 22, 6, 24, 21]","[43, 53, 15, 1, 17]",4,10 45 | 43,2024-10-28,Salesiani,"[10, 17, 18, 22, 23]","[43, 53, 21, 24, 6]",9,2 46 | 44,2024-11-05,Salesiani,"[29, 22, 18, 6, 35]","[30, 23, 16, 40, 47]",13,12 47 | 45,2024-11-07,Salesiani,"[41, 30, 17, 23, 22]","[6, 29, 36, 15, 24]",6,4 48 | 46,2024-11-13,Salesiani,"[10, 24, 43, 36, 17]","[18, 22, 53, 29, 6]",6,5 49 | 47,2024-11-19,Salesiani,"[10, 43, 21, 17, 36]","[18, 23, 6, 53, 22]",14,4 50 | 48,2024-11-28,Via Rossetti,"[10, 23, 6, 53, 22]","[36, 43, 29, 21, 15]",5,4 51 | 49,2024-12-13,Via Rossetti,"[10, 22, 6, 29, 18]","[17, 36, 23, 26, 62]",10,6 52 | 50,2025-03-20,Via Rossetti,"[17, 10, 43, 18, 29]","[6, 23, 32, 40, 35]",9,10 53 | 51,2025-03-26,Via Rossetti,"[10, 43, 6, 52, 15]","[38, 17, 18, 23, 53]",7,9 54 | 52,2025-04-09,Via Rossetti,"[10, 35, 53, 17, 31]","[18, 23, 6, 30, 3]",9,7 55 | 53,2025-04-15,Salesiani,"[10, 23, 18, 29, 6]","[16, 30, 35, 15, 24]",6,6 -------------------------------------------------------------------------------- /02_numpy_pandas_sklearn/022_pandas_exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "37266d31", 6 | "metadata": {}, 7 | "source": [ 8 | "\"Open" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import pandas as pd\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "path_csv = 'https://raw.githubusercontent.com/DavideScassola/data_analysis_laboratory/main/notebooks/dati/reviews.csv'\n", 20 | "reviews = pd.read_csv(path_csv)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "1. Find the video game with the lowest score" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "2. Find the video game of genre 'Shooter' with the highest score" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "3. Find the oldest video game" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "4. How many PC video games were rated higher than 9 in 2005? Tip: apply the `len()` function to get the number of rows in a DataFrame" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "5. Find the newest video game. Tip: You can sort the rows of a DataFrame by the value of a column (or more) with the `.sort_values` method." 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "6. As you noticed the dataset has both a score column and a score_phrase column (the grade). Can you find what grade (score_phrase) each score corresponds to? In other words, what are the score thresholds for moving from one grade to the next? Tip: You can use the `.unique()` method to find the unique values of a set" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "7. Load the dataset from the .csv located at the following path (on colab): `sample_data/california_housing_train.csv`" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "8. Show a histogram of the `median_house_value` variable." 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "9. Show a plot (scatterplot) of the variable 'population' (x-axis) versus the variable 'total_rooms' (y-axis).\n", 140 | "Tip: You can use matplotlib's 'scatter' method: it works similarly to 'plot' but does not join the points with a line by default." 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "10. How many inhabitants per house are there?" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "11. This dataset contains the geographic coordinates of each neighborhood, can you show in a graph the location of each neighborhood? (as if it were a map)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "12. Show as before the location of each neighborhood, but this time neighborhoods with less than 5000 inhabitants should be rapresented in a different color than neighborhoods with more than 5000 inhabitants.\n", 183 | "Tip: If you call a matplotlib graphing function (such as ply.plot or plt.scatter) more than once, the two graphs will be overlapped" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "13. Color the points according to the value of the `median_house_value` variable.\n", 198 | "Tip: use the `c` parameter of the scatter function" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [] 207 | } 208 | ], 209 | "metadata": { 210 | "colab": { 211 | "include_colab_link": true, 212 | "provenance": [] 213 | }, 214 | "kernelspec": { 215 | "display_name": "Python 3", 216 | "language": "python", 217 | "name": "python3" 218 | }, 219 | "language_info": { 220 | "codemirror_mode": { 221 | "name": "ipython", 222 | "version": 3 223 | }, 224 | "file_extension": ".py", 225 | "mimetype": "text/x-python", 226 | "name": "python", 227 | "nbconvert_exporter": "python", 228 | "pygments_lexer": "ipython3", 229 | "version": "3.10.12" 230 | } 231 | }, 232 | "nbformat": 4, 233 | "nbformat_minor": 0 234 | } 235 | -------------------------------------------------------------------------------- /01_probability_exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "bb6389e9", 6 | "metadata": {}, 7 | "source": [ 8 | "\"Open" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Notebook 1: **Probability** basics exercises" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "id": "0f40131d", 21 | "metadata": {}, 22 | "source": [ 23 | "\"Open" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "### **Exercise 1**" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "We would like to estimate the effect of a drug D (D=1 the patient took the drug, D=0 didn't) on heart attack H (H=1 had heart attack, H=0 didn't) by looking at an observational dataset which contains also information about the sex of the patients (S = M/F).\n", 38 | "\n", 39 | "- Control group (D=0):\n", 40 | "\n", 41 | "\n", 42 | "| | H = 1 | H = 0 |\n", 43 | "|--------|-------|-------|\n", 44 | "| Female | 1 | 19 |\n", 45 | "| Male | 12 | 28 |\n", 46 | "| Total | 13 | 47 |\n", 47 | "\n", 48 | " - Treatment group (D=1):\n", 49 | "\n", 50 | "| | H = 1 | H = 0 |\n", 51 | "|--------|-------|-------|\n", 52 | "| Female | 3 | 37 |\n", 53 | "| Male | 8 | 12 |\n", 54 | "| Total | 11 | 49 |\n", 55 | "\n", 56 | "- Among males, what is the difference between the probability of having heart attack given that the patient took the drug and given that he didn't? Is the treatment working?\n", 57 | "- What about females?\n", 58 | "- What happens if you consider the whole population? \n", 59 | "- How can you explain that?" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "### **Exercise 2**" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "Donated blood is screened for AIDS. Suppose the test has 99% accuracy,\n", 74 | "and that one in ten thousand people in your age group are HIV positive. The\n", 75 | "test has a 5% false positive rating, as well. Suppose the test screens you as\n", 76 | "positive. What is the probability you have AIDS? Is it 99%? (Hint: 99% refers\n", 77 | "to P(test positive|you have AIDS). You want to find P(you have AIDS|test\n", 78 | "is positive)." 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "### **Exercise 3**" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "Yuo are given a fair coin with probability $0.5$ or an unfair coin (having $P(\\text{head})=0.8$) with probability $0.5$. Then you toss it two times, with results $H_1$ and $H_2$.\n", 93 | "\n", 94 | "Let's call $C$ the random variable describing if the coin is fair or not.\n", 95 | "1. Is $H_2$ independent from $H_1$?\n", 96 | "2. Factorize $p(h_1, h_2 | c)$ (write it as a product of simpler terms)\n", 97 | "3. Factorize $p(h_1, h_2, c)$\n", 98 | "4. Compute $p(h_1)$\n", 99 | "5. Compute $p(c | h_1)$\n", 100 | "6. Compute $p(h_2 | h_1)$\n" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "### **Exercise 4**" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "Given $p(x,y)=$\n", 115 | "\n", 116 | "| | Y=0 | Y=1 |\n", 117 | "|---|-----|-----|\n", 118 | "| X=0 | 0.2 | 0.1 |\n", 119 | "| X=1 | 0.15 | 0.0 |\n", 120 | "| X=2 | 0.25 | 0.3 |\n", 121 | "\n", 122 | "\n", 123 | "\n", 124 | "Calculate\n", 125 | "- $p(y)$\n", 126 | "- $p(x)$\n", 127 | "- $p(x|y)$\n", 128 | "- $p(y|x)$\n", 129 | "- $\\mathbb{E}[x]$\n", 130 | "- $\\mathbb{E}[y]$\n", 131 | "- $\\mathbb{E}[x|y]$\n", 132 | "- $\\text{cov}[x,y]$\n", 133 | "\n", 134 | "You can do it by hand on a piece of paper, but I also suggest you doing it with the `numpy` library. You can do any of these computations with a single line of code." 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "import numpy as np\n", 144 | "\n", 145 | "p = np.array([[0.2, 0.1],\n", 146 | " [0.15, 0.0],\n", 147 | " [0.25, 0.3],\n", 148 | " ])\n", 149 | "\n", 150 | "x = np.array([0, 1, 2])\n", 151 | "y = np.array([0, 1])" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "### **Exercise 5**" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "**Exercise 1**\n", 166 | "\n", 167 | "\n", 168 | "Suppose we are interested in the relation between an exposure A (has been affected by coronavirus: 0 no, 1 yes) and an outcome Y (has myocarditis: 0 no, 1 yes). We conduct an observational study on a representative population and obtain the following proportions (N.B. this data is made up and does not come from a study):\n", 169 | "\n", 170 | "|A \\ Y| 0 | 1 |\n", 171 | "|-----|---|---|\n", 172 | "| 0 |0.75|0.07|\n", 173 | "| 1 |0.15|0.03|\n", 174 | "\n", 175 | "s.t. for example, among all subjects, 3% has been affected by coronavirus and has myocarditis.\n", 176 | "\n", 177 | "- Among the exposed subjects, what is the proportion on individuals that have the outcome?\n", 178 | "- Are A and Y independent?" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "### **Exercise 6**" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "Given the distribution $p(x,y) = x + y \\text{ if } x \\in [0,1], y \\in [0,1], 0 \\text{ otherwise}$\n", 193 | "\n", 194 | "Calculate\n", 195 | "- $\\mathbb{E}[x|y]$\n", 196 | "- $\\rho[X,Y]$\n", 197 | "\n", 198 | "This time, I suggest you trying the `sympy` library, that can help you with symbolic computations." 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "name": "stdout", 208 | "output_type": "stream", 209 | "text": [ 210 | "\n", 211 | "Z: 1\n" 212 | ] 213 | } 214 | ], 215 | "source": [ 216 | "from sympy import symbols, integrate, log, sqrt\n", 217 | "\n", 218 | "# Define the symbols\n", 219 | "x, y = symbols('x y')\n", 220 | "\n", 221 | "# Define the joint distribution\n", 222 | "p_xy = (x + y)\n", 223 | "\n", 224 | "# Example of integral\n", 225 | "Z = integrate(p_xy, (x, 0, 1), (y, 0, 1))\n", 226 | "\n", 227 | "print(\"\\nZ:\", Z)" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "### **Exercise 7**" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "Two alternative definitions of conditional independence were given, prove that they are equivalent:\n", 242 | "$$p(a | b, c)=p(a | c) \\Longleftrightarrow p(a, b | c)=p(a|c)p(b|c)$$" 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": {}, 248 | "source": [ 249 | "### **Exercise 8**" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "Compute the variance of the following unnormalized distribution (numerically, use `scipy.integrate`)\n", 257 | "$$p(x) \\propto sin(x)^2 e^{-|x|}$$" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": {}, 263 | "source": [ 264 | "### **Exercise 9**" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "You are proposed to play the following game: you toss a coin a first time, if tail appears, you win 1€ and the game ends, if head appears you win 2€ and you can continue playing. From the second toss on, if tail appears you stop playing, if head appears the amount you already won doubles and you can keep playing.\n", 272 | "\n", 273 | "Example:\n", 274 | "1) You toss the coin and **head** appears (you are winning 2€)\n", 275 | "2) You toss the coin again and **head** appears (you are winning 4€)\n", 276 | "3) You toss the coin again and **head** appears (you are winning 8€)\n", 277 | "4) You toss the coin again and **tail** appears (the game ends and you won a total amount of 8€)\n", 278 | "\n", 279 | "Let's call $X$ the amount of money you win playing this game.\n", 280 | "1. What is the expected amount you win?\n", 281 | "2. What is the expected value of $\\log_2(X)$?\n", 282 | "3. How much would you pay for playing this game?\n", 283 | "\n", 284 | "Hint: $ \\sum_{i=1}^{\\infty} i q^i = \\frac{q}{(1-q)^2}$" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "### **Exercise 10**" 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": {}, 297 | "source": [ 298 | "It's night and you are looking into the sky waiting to see a falling star. A friend of yours tells you that the waiting time $T$ (hours) is distributed exponentially: $p(t) = 2e^{-2t}$.\n", 299 | "1. What is the probability of seeing the first falling star within 1 hour? How much time do you expect to wait?\n", 300 | "2. You have not seen anything in one hour, what is the probability of seeing a the first falling star in the next 1 hour? (justify your answer)\n", 301 | "3. Is the waiting time dependent on how much you have already waited? Is the answer the same for any distribution?\n", 302 | "3. What is the probability of seeing at least two falling stars in the first 1 hour?\n", 303 | "\n", 304 | "4. Let's say the distribution is instead $p(t) = Uniform(0, 4)$ (for example, there is a known comet expected to show up at a certain point), how do aswers to questions 1 and 2 change?" 305 | ] 306 | } 307 | ], 308 | "metadata": { 309 | "kernelspec": { 310 | "display_name": "Python 3 (ipykernel)", 311 | "language": "python", 312 | "name": "python3" 313 | }, 314 | "language_info": { 315 | "codemirror_mode": { 316 | "name": "ipython", 317 | "version": 3 318 | }, 319 | "file_extension": ".py", 320 | "mimetype": "text/x-python", 321 | "name": "python", 322 | "nbconvert_exporter": "python", 323 | "pygments_lexer": "ipython3", 324 | "version": "3.10.12" 325 | } 326 | }, 327 | "nbformat": 4, 328 | "nbformat_minor": 4 329 | } 330 | -------------------------------------------------------------------------------- /Homeworks/Homework_1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "ccbc2313", 6 | "metadata": {}, 7 | "source": [ 8 | "\"Open" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "## Homework 1\n", 16 | "\n", 17 | "Probabilistic Machine Learning -- Spring 2025, UniTS" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "### Exercise 1: KL Divergence Between Two Gaussian Distributions\n", 25 | "\n", 26 | "Given two multivariate Gaussian distributions:\n", 27 | "\n", 28 | "$$\n", 29 | "p(x) = \\mathcal{N}(x \\mid \\mu, \\Sigma)\n", 30 | "$$\n", 31 | "\n", 32 | "$$\n", 33 | "q(x) = \\mathcal{N}(x \\mid m, L)\n", 34 | "$$\n", 35 | "\n", 36 | "where:\n", 37 | "- $\\mu$ and $ \\Sigma $ are the mean vector and covariance matrix of $ p(x) $,\n", 38 | "- $ m $ and $ L $ are the mean vector and covariance matrix of $ q(x) $,\n", 39 | "\n", 40 | "\n", 41 | "1. **Derive the closed-form expression** for $D_{\\text{KL}}(p \\parallel q)$ starting from the definition.\n", 42 | "\n", 43 | "2. **Implement a Python function** that computes the closed-form expression of the KL divergence for two-dimensional Gaussian distributions using only **numpy** functions.\n", 44 | "\n", 45 | "3. **Test the function** on the following concrete example where both $ p(x) $ and $q(x)$ are two-dimensional Gaussians. \n", 46 | "\n", 47 | "4. Implement another Python function which calculates an **approximation of $D_{\\text{KL}}(p \\parallel q)$ from samples** of p and q. Compare the results. \n" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 15, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "import numpy as np\n", 57 | "import matplotlib.pyplot as plt\n", 58 | "\n", 59 | "np.random.seed(42)\n", 60 | "\n", 61 | "mu_p = np.array([10, 12]) \n", 62 | "sigma_p = np.array([[3, 0.5], [0.5, 2]]) \n", 63 | "mu_q = np.array([14, 10]) \n", 64 | "sigma_q = np.array([[2, 0.3], [0.3, 1]]) \n", 65 | "\n", 66 | "data_points = 1000\n", 67 | "\n", 68 | "p_samples = np.random.multivariate_normal(mu_p, sigma_p, data_points)\n", 69 | "q_samples = np.random.multivariate_normal(mu_q, sigma_q, data_points)\n", 70 | "\n", 71 | "# p, q samples visualization\n", 72 | "plt.figure(figsize=(8, 6))\n", 73 | "plt.scatter(p_samples[:, 0], p_samples[:, 1], alpha=0.5, label='p(x)', color='blue')\n", 74 | "plt.scatter(q_samples[:, 0], q_samples[:, 1], alpha=0.5, label='q(x)', color='red')\n", 75 | "plt.title('Samples drawn from two multivariate Gaussians')\n", 76 | "plt.xlabel('x')\n", 77 | "plt.ylabel('y')\n", 78 | "plt.legend()\n", 79 | "plt.grid(True)\n", 80 | "plt.show()" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "### Exercise 2: Entropy of nonsingular linear transformations \n", 88 | "\n", 89 | "Consider a vector $x$ of continuous variables with distribution $p(\\mathbf{x})$ and corresponding entropy $H[\\mathbf{x}]$. Suppose that we make a nonsingular linear transformation of $x$ to obtain a new variable $y = Ax$. Show that the corresponding entropy is given by\n", 90 | "\n", 91 | "$$\n", 92 | "H[\\mathbf{y}] = H[\\mathbf{x}] + \\ln |\\mathbf{A}|\n", 93 | "$$\n", 94 | "\n", 95 | "where $|\\mathbf{A}|$ denotes the determinant of $A$.\n" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "### Exercise 3: A good reason to go to university\n", 103 | "\n", 104 | "You enrolled to a small tennis tournament organized by your university, that has only other three participants: let's call them $A$, $B$ and $C$.\n", 105 | "Your first match will be against $A$, and it's scheduled after the match between $A$ and $B$ and the match between $B$ and $C$.\n", 106 | "\n", 107 | "Assuming the result of a match $M \\in \\{0,1\\}$ between two players $X$ and $Y$ ($M=1$ means $X$ won, $M=0$ means $Y$ won) is described by the following model:\n", 108 | "\n", 109 | "$$M \\sim Bern(p)$$\n", 110 | "\n", 111 | "where $p = f(2(S_x - S_y))$ with $f(k) = \\frac{1}{1 + e^{-k}}$ and\n", 112 | "\n", 113 | "$$S_i \\sim \\mathcal{N}(0,1)$$\n", 114 | "\n", 115 | "is the \"latent\" skill of player $i$ (always the same for every match that player $i$ plays)\n", 116 | "\n", 117 | "1. Show a bayesian network describing the relationship between all the involved random variables.\n", 118 | "\n", 119 | "2. Make a model in pyro describing the stochastic process.\n", 120 | "\n", 121 | "3. Estimate by simulation the probability of (you) winninng against $A$, given that $A$ won against $B$ and $B$ won against $C$. Use exactly 30000 samples and call `set_seed()` before sampling.\n" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "import random\n", 131 | "import numpy as np\n", 132 | "import torch\n", 133 | " \n", 134 | "def set_seed():\n", 135 | " seed = 0\n", 136 | " random.seed(seed)\n", 137 | " np.random.seed(seed)\n", 138 | " torch.manual_seed(seed)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "### Exercise 4: Bayesian Inference on Carrier Status for Hemophilia\n", 146 | "\n", 147 | "Hemophilia is caused by a **recessive gene** located on the **X-chromosome**. If $X^*$ denotes an X-chromosome carrying the hemophilia gene, then:\n", 148 | " - $X^*X^*$ represents a **female with the disease**.\n", 149 | " - $X^*X$ represents a **female without the disease but carrying the gene**.\n", 150 | " - $X^*Y$ represents a **male with the disease**. \n", 151 | "\n", 152 | "Mary has: \n", 153 | "- **An affected brother** ($X^*Y$), \n", 154 | "- **An unaffected mother** ($XX$ or $X^*X$), \n", 155 | "- **An unaffected father** ($XY$). \n", 156 | "\n", 157 | "Since Mary’s **brother is affected**, he must have inherited an $X^*$ from the **mother**, implying that the **mother is a carrier** ($X^*X$). \n", 158 | "\n", 159 | "Let $\\theta$ be an indicator variable where: \n", 160 | "- $\\theta = 1$ if Mary is a **gene carrier** ($X^*X$), \n", 161 | "- $\\theta = 0$ if Mary is **not a carrier** ($XX$). \n", 162 | "\n", 163 | "Given the above information, before any additional observations, we assign the **prior probability**:\n", 164 | "\n", 165 | "$$\n", 166 | "P(\\theta = 1) = \\frac{1}{2}\n", 167 | "$$\n", 168 | "\n", 169 | "\n", 170 | "Mary has **two sons** (not identical twins, with unaffected father), both of whom are **not affected** by hemophilia. \n", 171 | "\n", 172 | "Let $y_i$ be an indicator variable where: \n", 173 | "- $y_i = 1$ if the $i$-th son is affected, \n", 174 | "- $y_i = 0$ if the $i$-th son is unaffected. \n", 175 | "\n", 176 | "Since males inherit their **X-chromosome from their mother**, if Mary is a carrier ($\\theta = 1$), each son has a **50% chance** of being affected. \n", 177 | "\n", 178 | "The probability of both sons being unaffected (we denote this event, i.e. $y_1 = 0$ AND $y_2 = 0$, with $y=1$), given $\\theta$, is:\n", 179 | "\n", 180 | "$$\n", 181 | "P(y = 1 \\mid \\theta) =\n", 182 | "\\begin{cases} \n", 183 | "0.25, & \\text{if } \\theta = 1 \\\\ \n", 184 | "1, & \\text{if } \\theta = 0 \n", 185 | "\\end{cases}\n", 186 | "$$\n", 187 | "\n", 188 | "1) Calculate $P(y)$\n", 189 | "\n", 190 | "2) Considering that both the sons are unaffected, calculate the posterior $P(\\theta = 1 | y)$\n", 191 | "\n", 192 | "3) What is the probability that a third son is affected? Calculate the predictive distribution\n", 193 | "\n", 194 | "4) Suppose a third son is born and he is not affected, update the posterior by computing $P(\\theta = 1 | y, y_3 = 0)$\n" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "### Exercise 5: Hierarchical model in Pyro\n", 202 | "\n", 203 | "In this problem, we consider a hierarchical model that represents the distribution of test scores across multiple schools. Our goal is to define a generative model that captures both **global** and **school-specific** variations in scores.\n", 204 | "\n", 205 | "- There are **N** schools, each having **M** students and a different average performance.\n", 206 | "- The **global mean score** across all schools follows a normal prior.\n", 207 | " $$\n", 208 | " \\mu_{\\text{global}} \\sim \\mathcal{N}(0, 5)\n", 209 | " $$\n", 210 | "\n", 211 | "- Each **school-specific mean** is derived from the global mean with a random offset: each school $i$ has a deviation from the global mean:\n", 212 | "\n", 213 | " $$\n", 214 | " \\theta_i \\sim \\mathcal{N}(0, 1), \\quad i = 1, \\dots, N\n", 215 | " $$\n", 216 | "\n", 217 | " s.t.\n", 218 | "\n", 219 | " $$\n", 220 | " \\mu_i = \\mu_{\\text{global}} + \\theta_i\n", 221 | " $$\n", 222 | "- **Individual student scores** are drawn from a normal distribution with their school's mean: each student $j$ in school $i$ receives a test score:\n", 223 | "\n", 224 | " $$\n", 225 | " y_{ij} \\sim \\mathcal{N}(\\mu_i, 1), \\quad j = 1, \\dots, M\n", 226 | " $$\n", 227 | "\n", 228 | "Generative model:\n", 229 | "\n", 230 | "1) Sample the global mean\n", 231 | "2) For each school, sample its offset and compute its mean.\n", 232 | "3) For each student in each school, sample their test score\n", 233 | "4) Plot one histogram for each school, showing the distribution of the student scores.\n", 234 | "\n", 235 | "**NOTE: use the plate notation!**" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "### Exercise 6: Extending Belief Propagation to the Sum-Product Algorithm\n", 243 | "\n", 244 | "In the notebook *\"Exact Inference with Belief Propagation\"*, we previously computed the marginal distribution of a given variable using the message-passing method. Now, we aim to extend this implementation to the sum-product algorithm.\n", 245 | "\n", 246 | "1. **Extend the `Messages` class** by adding the following methods: \n", 247 | " - **`forward`**: Computes the forward pass. \n", 248 | " - **`backward`**: Computes the backward pass. \n", 249 | " - **`belief_propagation`**: Executes the forward and backward passes, then uses the computed messages to determine all marginal distributions. This method should return a dictionary mapping each variable name to its corresponding marginal distribution. \n", 250 | "\n", 251 | "2. **Apply the `belief_propagation` method** to compute the marginal distributions of the variables in the factor graph described on page 43 of the course notes. \n", 252 | "\n", 253 | "For this exercise, please submit the updated notebook **`04_exact_inference.ipynb`**, including your additional code.\n", 254 | "\n", 255 | "**NOTE: Make sure to add comments to all the code you write!**" 256 | ] 257 | } 258 | ], 259 | "metadata": { 260 | "kernelspec": { 261 | "display_name": "general", 262 | "language": "python", 263 | "name": "python3" 264 | }, 265 | "language_info": { 266 | "codemirror_mode": { 267 | "name": "ipython", 268 | "version": 3 269 | }, 270 | "file_extension": ".py", 271 | "mimetype": "text/x-python", 272 | "name": "python", 273 | "nbconvert_exporter": "python", 274 | "pygments_lexer": "ipython3", 275 | "version": "3.12.8" 276 | } 277 | }, 278 | "nbformat": 4, 279 | "nbformat_minor": 2 280 | } 281 | -------------------------------------------------------------------------------- /02_numpy_pandas_sklearn/021_numpy_exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "6d888bd2", 6 | "metadata": {}, 7 | "source": [ 8 | "\"Open" 9 | ] 10 | }, 11 | { 12 | "attachments": {}, 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# Numpy exercises\n", 17 | "\n", 18 | "Probabilistic Machine Learning -- Spring 2025, UniTS\n", 19 | "\n", 20 | "### No loops allowed!\n", 21 | "(also list comprehension is not allowed)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "## Exercise 1" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "Compute the empirical correlation coefficient between the given obsevations `x` and `y`:\n", 36 | "\n", 37 | "$$\\rho[x,y] = \\frac{cov[x,y]}{\\sigma_x \\sigma_y}$$ " 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 1, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "import numpy as np\n", 47 | "np.random.seed(0)\n", 48 | "x = np.random.normal(0, 1, 1000)\n", 49 | "y = 2*x + np.random.normal(0, 1, 1000)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "### Solution" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "## Exercise 2" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "Define a function that computes the mean squared error between an array of observations `y` and an array of predictions `pred`:\n", 78 | "\n", 79 | "$$MSE(y,\\hat{y}) := \\frac{1}{n} \\sum_{i=1}^{n} (y - \\hat{y})^2$$" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 3, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "np.random.seed(0)\n", 89 | "y = np.random.normal(0, 1, 1000)\n", 90 | "pred = y + np.random.normal(0, 0.4, 1000)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "### Solution" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "## Exercise 3" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "Compute a matrix $M$ where $M_{i,j} := i \\times j$ for $i \\in \\{1,\\ldots,10\\}$ and $j \\in \\{1,\\ldots,10\\}$\n", 119 | "\n", 120 | "Hint: use broadcasting" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "### Solution" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "## Exercise 4" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "Given the following `(8,3)` matrix `x`, build a matrix `y` such that row $y_{i} = x_{2i} + x_{2i -1}$" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 6, 154 | "metadata": {}, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/plain": [ 159 | "array([[ 0, 1, 2],\n", 160 | " [ 3, 4, 5],\n", 161 | " [ 6, 7, 8],\n", 162 | " [ 9, 10, 11],\n", 163 | " [12, 13, 14],\n", 164 | " [15, 16, 17],\n", 165 | " [18, 19, 20],\n", 166 | " [21, 22, 23]])" 167 | ] 168 | }, 169 | "execution_count": 6, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [ 175 | "x = np.arange(24).reshape(8,3)\n", 176 | "x" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "### Solution" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "## Exercise 5" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "Write a function that given a scalar valued function `f`, an interval `[a,b]` and a number of steps `n` approximates $\\int_a^b f(x) \\ dx$ using the [trapezoidal rule](https://www.wikiwand.com/en/Trapezoidal_rule). Then test it on the following given function `g` in the interval `[-10,10]`.\n", 205 | "\n", 206 | "Hint: use the function `np.linspace`" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 8, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "def g(x):\n", 216 | " return (x**3 - 2*x - x**5 * np.sin(x))*np.exp(-0.2*x**2)\n", 217 | "\n", 218 | "def trapezoidal_rule(f, a, b, n):\n", 219 | " pass" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "### Solution" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "## Exercise 6" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "Given the following `(1000, 5)` matrix `m` representing a set of n=`1000` observations having `5` features, compute the empirical correlation matrix:\n", 248 | "$$ R = \\frac{1}{n} X^t X $$\n", 249 | "where $X$ is the standardized data (centered in 0, rescaled to 1).\n", 250 | "\n", 251 | "Hint: use `.dot()` or `@` for matrix multiplication and `.T` for transposing" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 9, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "np.random.seed(0)\n", 261 | "m = np.random.normal(0, 1, (1000, 5)).cumsum(axis=1)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "### Solution" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "## Exercise 7" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "Compute a series of 1000 coordinates $(x_i,y_i)$ such that:\n", 290 | "$(x_0,y_0) = (0,0)$ and $(x_{i+1},y_{i+1}) = (x_i + \\epsilon^x_i ,y_i + \\epsilon^y_i)$ where \n", 291 | "\n", 292 | "$\\epsilon^x_i \\sim \\mathcal{N}(0,1)$ and $\\epsilon^y_i \\sim \\mathcal{N}(0, 1)$\n", 293 | "\n", 294 | "Then visualize it using the following function (`plot_random_walk`, that takes as input an `(n,2)` shaped array).\n", 295 | "\n", 296 | "Hint: use the function `np.cumsum` " 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 10, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "data": { 306 | "text/plain": [ 307 | "
" 308 | ] 309 | }, 310 | "metadata": {}, 311 | "output_type": "display_data" 312 | } 313 | ], 314 | "source": [ 315 | "import matplotlib.pyplot as plt\n", 316 | "plt.figure(dpi=400)\n", 317 | "\n", 318 | "def plot_random_walk(x):\n", 319 | " plt.plot(x[:,0], x[:,1], alpha=0.8)" 320 | ] 321 | }, 322 | { 323 | "cell_type": "markdown", 324 | "metadata": {}, 325 | "source": [ 326 | "### Solution" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": null, 332 | "metadata": {}, 333 | "outputs": [], 334 | "source": [] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "## Exercise 8" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "Implement the [ReLU (rectified linear unit) activation function](https://www.wikiwand.com/en/Rectifier_(neural_networks)) in all the 3 ways showed in this formula:\n", 348 | "\n", 349 | "\n", 350 | "\n", 351 | "
\n", 352 | " \n", 353 | "
\n", 354 | "\n", 355 | "then appliy it to the given array `x` and verify that results are equal.\n", 356 | "\n", 357 | "Hint: use `np.maximum`, `np.abs`, `np.where` for computing ReLu, use `==` and `np.all` for checking equality." 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": 11, 363 | "metadata": {}, 364 | "outputs": [], 365 | "source": [ 366 | "x = np.arange(10)-5" 367 | ] 368 | }, 369 | { 370 | "cell_type": "markdown", 371 | "metadata": {}, 372 | "source": [ 373 | "### Solution" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "metadata": {}, 380 | "outputs": [], 381 | "source": [] 382 | }, 383 | { 384 | "cell_type": "markdown", 385 | "metadata": {}, 386 | "source": [ 387 | "## Exercise 9" 388 | ] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": {}, 393 | "source": [ 394 | "The entropy of a discrete distribution $p$ is defined as:\n", 395 | "\n", 396 | "$$\\text{H}[p] := -\\mathbb{E}[\\log{p}(x)]$$\n", 397 | "\n", 398 | "compute it for the given array $p$ representing a discrete distribution." 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": 12, 404 | "metadata": {}, 405 | "outputs": [], 406 | "source": [ 407 | "p = np.array([0.1, 0.2, 0.0, 0.2, 0.5])" 408 | ] 409 | }, 410 | { 411 | "cell_type": "markdown", 412 | "metadata": {}, 413 | "source": [ 414 | "### Solution" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": null, 420 | "metadata": {}, 421 | "outputs": [], 422 | "source": [] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": {}, 427 | "source": [ 428 | "## Exercise 10" 429 | ] 430 | }, 431 | { 432 | "cell_type": "markdown", 433 | "metadata": {}, 434 | "source": [ 435 | "The following array `price` with shape `(365, 7)` contains the prices of 7 different assets recorded at the end of 365 different days. The array `portfolio` of shape `(7,)` instead contains the amounts for each asset that you posses.\n", 436 | "\n", 437 | "Compute:\n", 438 | "- the total value of the portfolio $v_i$ at the end of each day\n", 439 | "- the value difference between consecutive days $v_i - v_{i-1}$\n", 440 | "- the value ratio between consecutive days $\\frac{v_i}{v_{i-1}}$\n", 441 | "- plot the price series of the third asset (using `plt.plot`)\n", 442 | "- plot $v_i$ (using `plt.plot`)" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 13, 448 | "metadata": {}, 449 | "outputs": [ 450 | { 451 | "data": { 452 | "text/plain": [ 453 | "
" 454 | ] 455 | }, 456 | "metadata": {}, 457 | "output_type": "display_data" 458 | } 459 | ], 460 | "source": [ 461 | "import matplotlib.pyplot as plt\n", 462 | "plt.figure(dpi=400)\n", 463 | "\n", 464 | "def generate_prices(n):\n", 465 | " rng = np.random.default_rng(13)\n", 466 | " x = rng.normal(0,0.01,size=(n,7))\n", 467 | " x[:,0] = 0.3*x[:,0] + 0.8*x[:,1] + 0.3*x[:,2]\n", 468 | " x[:,4] = x[:,4] * 2\n", 469 | " x[:,5:] = x[:,5:] * 0.5\n", 470 | " x[:, 3] += 0.001\n", 471 | " x = np.exp(np.cumsum(x, axis=0))\n", 472 | " return x\n", 473 | "\n", 474 | "x = generate_prices(365)\n", 475 | "\n", 476 | "portfolio = np.array([12, 200, 100, 125, 50, 5, 100])" 477 | ] 478 | }, 479 | { 480 | "cell_type": "markdown", 481 | "metadata": {}, 482 | "source": [ 483 | "### Solution" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": null, 489 | "metadata": {}, 490 | "outputs": [], 491 | "source": [] 492 | } 493 | ], 494 | "metadata": { 495 | "colab": { 496 | "include_colab_link": true, 497 | "provenance": [] 498 | }, 499 | "kernelspec": { 500 | "display_name": "general", 501 | "language": "python", 502 | "name": "python3" 503 | }, 504 | "language_info": { 505 | "codemirror_mode": { 506 | "name": "ipython", 507 | "version": 3 508 | }, 509 | "file_extension": ".py", 510 | "mimetype": "text/x-python", 511 | "name": "python", 512 | "nbconvert_exporter": "python", 513 | "pygments_lexer": "ipython3", 514 | "version": "3.12.8" 515 | } 516 | }, 517 | "nbformat": 4, 518 | "nbformat_minor": 0 519 | } 520 | -------------------------------------------------------------------------------- /data/em.csv: -------------------------------------------------------------------------------- 1 | -1.7727746025119444,0.7869448993373641 2 | -0.23410035554764475,-0.3480711264910097 3 | -0.8998850826005726,-0.12626155081781043 4 | 0.08755725148499094,2.142619910788488 5 | -2.5256688442096027,-11.834255137275209 6 | 0.1561160035342053,-0.25785915619013045 7 | -1.9332227389676218,-7.269151992591847 8 | 2.354516624132628,0.6770536495214052 9 | -2.2111281796011366,2.3860782901393964 10 | -1.8214866109180663,3.518887817085782 11 | -1.0857777046740404,0.2972159488937588 12 | 1.352267111956151,2.20663018196168 13 | -1.8051443662990545,7.241147629163413 14 | 1.056475926108212,0.9901028910997285 15 | -2.158647779721559,0.7953623786879809 16 | -2.4016502021927826,3.0759385810946567 17 | 1.61541018266507,1.7565762098681168 18 | -2.3611027208640567,0.16230136049778343 19 | -0.01184675703508542,2.1141852547033366 20 | -0.4219353896578686,1.9378229570967742 21 | -3.126837532706381,2.1191988065526597 22 | -1.0812081317587106,0.9405032955593291 23 | 0.0878300566070851,2.079154286266845 24 | -1.1675176295952296,-2.1525125811380574 25 | -1.730395620187975,-2.148310409981591 26 | -0.5719846276808673,-1.9449854746511122 27 | -0.9002557984505366,1.1736894566587508 28 | -2.358238710575357,1.5310769480467123 29 | 0.40947070398938235,2.3527308502552255 30 | -0.07591653768638618,1.1756858931989713 31 | 0.8766445555822378,2.393861993219337 32 | 0.5910863453885781,2.5466684275850238 33 | -0.9026183991025896,3.7091109110833624 34 | -0.5487131731760533,1.174493029267234 35 | -2.173303666883803,-2.142653087497118 36 | -2.0839264432032696,-1.7596217545021589 37 | -1.1596059065367605,3.4620209483910633 38 | -0.4080655195601741,0.5359299266572908 39 | -0.7742374490637052,0.4055095310333967 40 | -1.2784539478707553,-4.545861119240634 41 | -1.9236960207415534,-0.19898070286779013 42 | 0.18835508894860992,2.465394743167092 43 | -0.2283037749393937,0.0506408526334059 44 | -2.891312068632164,2.143810247769389 45 | -1.3040539529679898,3.3738670738332495 46 | 0.18067186694573906,2.31358595227511 47 | -1.502564262010188,-14.776639893545507 48 | -2.5100010112089217,2.4028531018736503 49 | -1.9978652180251313,-1.1892246394069215 50 | 1.1438767407059536,2.040533535294736 51 | -0.43036730195325923,0.16206152630111192 52 | 0.2426358572072409,2.846499044828597 53 | -1.4201856904950234,0.16936483775471434 54 | -2.336396992282095,0.399235450935818 55 | -1.7225316305983926,-1.0701267789485502 56 | -0.9103924592210759,2.3438626219071628 57 | 0.8330826101996738,0.8378067632892385 58 | 0.31403038993559873,1.660936320384073 59 | 0.03415019523821117,1.8302516022823283 60 | -2.3960486325212256,2.1054067974406294 61 | 2.2276844278250563,1.5048108834109077 62 | -0.6706122862190449,3.933855168520571 63 | 0.2142558521246618,2.574136203402172 64 | 0.220238088601529,2.427508191522124 65 | -3.3268026123003835,1.0281703740161083 66 | -0.86985826422885,0.5430659395128203 67 | -2.058018067313901,4.2814314341457 68 | 0.24202349870101325,2.3664169616787043 69 | 1.8202900791256225,2.005038633856421 70 | -1.7310972662262971,-1.0929258454961572 71 | -3.48283606405654,1.2093242826422537 72 | 0.25873919820206714,1.8723999273374428 73 | -1.2254867596930485,-2.9312502860335767 74 | -2.0601960452601107,3.7723731893288095 75 | 0.1317354024288988,1.695142760217328 76 | -1.7263040656935749,0.7098864589928002 77 | -2.2098504980501334,-4.477071794618643 78 | -1.235836191217881,0.6729448372477445 79 | -2.396589385968681,-1.92946488669928 80 | -1.4855748067093912,2.462326452819151 81 | 0.31518294603070296,2.1621215979194317 82 | -2.0385438812976227,-3.7357669790246524 83 | -0.6947846852281541,2.5152268822003636 84 | 0.8281463316356485,4.140552886874229 85 | 1.017771670395161,1.0634108901394248 86 | -0.9136733262554744,1.7986038820798216 87 | -0.14606285181733236,0.4497669120309873 88 | -2.1885937302396394,0.9537994212515634 89 | 0.972905119665757,1.6342256516522817 90 | -2.2160840963913446,-6.972419529486582 91 | -1.6263593918950479,-7.3314161670363385 92 | 1.3027412818619586,1.3235247884907888 93 | -1.7552969934649383,-7.204683612272409 94 | 0.9466171229429312,3.7086225281512473 95 | 1.2551191866210865,1.7835270497293119 96 | -1.9151116608112309,6.658608298494364 97 | -2.0352137925667506,1.696320811246629 98 | 1.0088986612097997,2.3773579749788407 99 | -1.351619655312319,4.284518494324349 100 | -1.0306931879497765,2.1368595580770027 101 | 0.3676288015484024,1.1593553770982814 102 | -1.1276421228102202,-0.2705451388386908 103 | 1.7126276789993966,0.5463961930727126 104 | 0.19768576365378548,2.3366114545325596 105 | -0.21992699431132146,1.6065123153876018 106 | -2.1124561450947508,1.6445047690516312 107 | -0.5348553502589756,1.7239911285031488 108 | -0.6884438124703922,3.8866357538805927 109 | -2.087895367250253,3.404493699359156 110 | 0.41104682559519895,1.7974048494497608 111 | -2.5353778428509726,1.4701423446312774 112 | 0.9094762068128281,1.2551910328624634 113 | 0.6230432187873726,3.0670201293329074 114 | -2.2461141432178966,-2.782045053781368 115 | -0.32873599916096974,1.5987630475028736 116 | 1.3118255883384227,1.6095760619778745 117 | -1.8124549853454117,3.8480250608603512 118 | 1.8664453403748693,0.11386349791133488 119 | -1.7055588299288558,-4.225652717515665 120 | -0.976485684808749,1.5420361599030592 121 | -1.1152116769578504,1.7331192326263387 122 | 0.05988856286278632,3.456579174935696 123 | -2.200086119278,-1.5659249591720619 124 | -1.2263760180237928,0.6035890067152713 125 | -2.2242841330709346,3.001646265574602 126 | -0.1938895777151689,3.3612217493692564 127 | -1.3769032886964347,-3.2317694082858024 128 | -1.339353010678467,0.7193441010246779 129 | 0.04278370676013271,2.0626034989759567 130 | -1.8994920478791442,-9.152479122469462 131 | -2.0997514447286947,1.9797692766177928 132 | -1.4953217000568648,-0.456292774764105 133 | -1.8488908572625085,-0.37606576499799726 134 | 1.2274292331504213,2.834522426725071 135 | -1.4920402601954885,-0.27717210484413823 136 | 0.015418358046877634,0.6065032268234848 137 | 2.4789923921548724,0.60771544678986 138 | -1.2363043585783489,-2.7027191053752713 139 | -0.48242334186507785,2.2138545768698803 140 | -0.87939875833165,2.240937167454681 141 | 0.6437645515186708,2.0160109105134554 142 | -0.8492540262019677,0.5692134863563042 143 | -0.8408847058447826,3.0431643581596726 144 | -2.992889647328231,0.7951127115087742 145 | -0.047055373128683575,0.5836257882933447 146 | -2.2148618793058414,-3.016278751992406 147 | -2.240811386755418,-3.5236596789032104 148 | -2.128191811612621,3.0530110059708404 149 | 0.9599031446681536,4.638337871159939 150 | -2.2677011914895733,-3.135405379913732 151 | -1.4592319900890618,-4.174612659556796 152 | 0.9753635242156373,1.600824076903623 153 | -2.0633522631604597,0.8128675031254747 154 | 1.5971503044785629,1.8355335264941903 155 | 2.3312990824642377,1.2507262517389726 156 | -0.8676395335856278,-0.4138914395734703 157 | 0.08504295738592882,-0.07215726169197811 158 | 0.8580035200353653,0.7879155050829694 159 | 0.7252785445368597,2.6897773000485934 160 | -0.2186008764605396,0.895390142000183 161 | -0.4912051227133387,3.5342023162816707 162 | -0.5780255946180244,-0.32145439676931287 163 | 0.8554710556131426,2.0984140082960283 164 | -1.9548489289331454,0.33440571677400044 165 | 0.44754562502868644,2.1465018817026125 166 | -0.3846510850951088,0.30224393905537605 167 | -0.8523324852130965,-0.2601675447908329 168 | -1.7024388203663343,4.092105040049735 169 | -2.1948032153094985,-0.19301290063519438 170 | -2.2270454303241696,-8.717033534804784 171 | -0.5389317564124709,-0.587356838826969 172 | -2.4748217933053525,-9.390822917090325 173 | -1.85897018777863,2.911816759244571 174 | -2.6614283625841404,-1.2024192161583154 175 | -0.7610506940378463,4.184718082186533 176 | -0.5668868081503499,2.0701963917640693 177 | -2.0263002566170543,2.396406581881646 178 | 0.26732902893704114,3.844540759908806 179 | -1.7297947399286886,-4.491929435397781 180 | -2.727258924438142,5.150145783373621 181 | -1.8733245539119256,3.130101584126068 182 | -1.6665012362529328,-7.986942327786729 183 | 2.0271750975551504,0.9390138820066052 184 | -2.1805489845837434,-3.9560136029570376 185 | -0.42247326434620724,2.7865245183640095 186 | 0.556695049822148,2.662461532460219 187 | 1.0303020737351982,2.3179025018425032 188 | 0.6159789447610158,2.0844521590741736 189 | -2.4278737509566666,-5.5306677278570096 190 | -0.9031525428964499,3.236775973093129 191 | -1.6691468466751296,-0.46967703351287593 192 | -1.96884407670193,-3.7024232394596237 193 | 1.5602437895776862,0.23489968136364625 194 | 2.0524477865284085,0.4580552366529088 195 | 1.4141049126109997,-0.4306741579237787 196 | -1.467804984129009,-0.6065724314741172 197 | -0.649036494675655,3.1633178418985937 198 | 0.1392337076983221,1.303783937016193 199 | 1.0543808854943317,1.5175490677478787 200 | -1.1347598167417488,-0.16856435835187034 201 | -1.4716555719270037,1.5345895040713062 202 | -1.3091914128126927,2.8553588241023324 203 | -0.7618173904320471,-1.1340145165127522 204 | -1.6931323202614266,-5.0072286052673824 205 | -1.788503502686105,1.2354302457351203 206 | -3.3998396051926956,-0.39718804898252125 207 | -0.8272565418736881,2.808630882454969 208 | 0.8204709362545832,2.0069220911518086 209 | 1.1040798893379447,1.4378618180106488 210 | -1.3004125928053036,1.4200838929572621 211 | -2.260813462048462,1.7882714571579812 212 | -2.2505076430383375,1.7560744310024203 213 | 0.36671606924481526,1.999425897910278 214 | 0.2396380200810897,1.6346962976978507 215 | 0.8272429992328483,-1.7411464262649865 216 | 1.3009667769441275,0.43469883464466874 217 | -3.277964469673576,2.0599977271004 218 | -1.930430170632307,-1.5978302269396316 219 | -0.5368621466644921,1.8265873698699924 220 | -3.0923181647551905,-1.500000399792627 221 | 0.8294548093525613,1.8743840018246916 222 | -1.7611110852648468,-6.6885443888172595 223 | 0.7142154890649803,-0.18486807109522063 224 | 1.9052098184816293,1.6963242518279615 225 | -3.013296063881636,2.7774305032995183 226 | 0.8703469884656675,0.8277933157144945 227 | 0.0842226335668399,2.4667440854748706 228 | -0.5143424505520051,-0.12872379012517454 229 | -2.564932271789982,-6.542298067348528 230 | 0.651498196286753,3.0336942809375684 231 | 0.22110130430055408,2.915816431876346 232 | -0.26754528635896835,1.0822957384291076 233 | -0.8498573350227168,-0.19745687038839366 234 | -2.2922757685914252,-1.7018489472948848 235 | -1.9172908014830943,0.17072408559684016 236 | -2.1204799128168617,-6.974369539663644 237 | -1.8313938505092404,5.040133545645765 238 | -1.3604038388168067,-7.822485772545408 239 | 0.8285221614111173,2.8101558303579504 240 | -2.0422793091421365,0.09091825042187529 241 | 1.4050214495380187,0.14916761008375845 242 | -0.7711146485565102,1.3517718422087135 243 | -3.5245681696792315,3.492813723674515 244 | -2.0516856305434366,0.8073526610566149 245 | 1.1092177926712425,2.1110904428080817 246 | 0.5483772047310159,2.0682626919124276 247 | -1.6104180249095057,-3.3552965871189144 248 | -0.008412069960689772,2.652598386420399 249 | -2.734485700634464,-7.0541471644149425 250 | 0.6801438505494999,1.7339176654952484 251 | -1.4600030418530627,1.3056904018699385 252 | -2.254878550923161,-5.609230786362688 253 | -1.864208845693172,-3.6898056232104812 254 | 0.9453802100382809,2.1955765452070812 255 | -2.7936176863341338,2.1262513119396482 256 | -2.5081810963918705,3.1662327286442373 257 | -1.036140487638632,-7.837458905981772 258 | 0.1817067260743655,3.1128638105315813 259 | -2.482139577004953,3.2257264971304513 260 | -1.4420128377181844,-6.782186309554455 261 | -1.3516157273084244,-6.274565174853126 262 | -2.1750281049478883,-5.120109841726284 263 | -3.4819274873902213,0.6000199950697682 264 | -2.495285049140726,-4.2645282433601945 265 | -1.8932757107177125,-4.972142548693592 266 | 1.2255175082529013,2.0503402788935228 267 | -1.9953189544870558,0.8957898298507971 268 | -1.4922601520903473,-0.7036513402151607 269 | -0.2541732964658491,-1.6566705905534698 270 | -2.238807999941416,-1.6137819583571715 271 | -1.0214021723007856,4.420351886576062 272 | -1.6468256708890543,6.74749321700544 273 | 1.7258198147200141,0.5226540611997903 274 | 0.552126742818771,1.3038232887484738 275 | 1.3998268620325207,1.5542884680004583 276 | 0.5579772007634316,1.9306086172331587 277 | -2.2830041117027675,3.180841930708377 278 | -0.803477341904893,1.0068210038351946 279 | -0.7785738872620758,3.1054198762955916 280 | -1.7028838178450028,-7.425705064548873 281 | -0.9325964960036446,0.18350487287879036 282 | -0.6679507612354654,-0.1474166798913401 283 | -1.8946629011444553,1.1465564782917186 284 | -2.224373178337129,1.5336115527069505 285 | -1.0390577158085628,4.012005633075246 286 | 0.6336458974544148,3.563983277495329 287 | -0.8967790837096028,1.0029248042247192 288 | 0.24301853519870598,1.737947560723891 289 | 0.772482056840229,1.4698212689794738 290 | -1.3463767743562747,0.09413711756953425 291 | -1.414100945764045,2.3981775670380445 292 | -1.239750890168144,-4.509188789069948 293 | -0.5647160732233216,-0.27685552357239374 294 | -1.9753723293729395,0.9168919479818132 295 | -1.1281347535470567,-1.052172498943551 296 | -1.9712178914666176,-3.1596808158623255 297 | 0.3966857073437202,1.1967477126889177 298 | 1.4971571038292972,1.4574104627298758 299 | 1.5664004220349277,1.082226487630308 300 | 0.08310725114297135,0.016703205283830735 301 | 2.3482994088322937,3.653278897138937 302 | -1.6101668738601815,-3.7755284219111074 303 | 0.3923691724418754,2.4535673750693694 304 | 0.5337460058436962,2.2077037185693418 305 | -1.832553798760363,1.4659535158772976 306 | -0.3746897237341027,3.004608881830084 307 | -1.3832390065930356,-1.4071793793512983 308 | -1.9815922947274565,-1.6140664326811147 309 | -0.8965421237094908,0.8475282717178116 310 | 0.3717950373355615,1.1541440700275316 311 | 1.9576713474996255,-1.0239408465923128 312 | -2.257242708371523,-1.1429188477035048 313 | -1.7813493389141262,1.204084230469819 314 | 0.7552106701542395,2.0848290222392603 315 | -0.39860112924676294,2.683206483197163 316 | -2.2066423866739164,1.5215659723902912 317 | -1.6687176392140417,2.797786707307828 318 | -0.18066099486679588,3.0655708271557036 319 | -1.589795547206158,-0.5650600283298663 320 | -1.4424100740020114,-1.948621416000469 321 | -0.6315577900365199,1.8535144612762227 322 | -2.159181159817782,-13.307321640005142 323 | -1.9748912989676626,-1.155080892137946 324 | -1.9017390313275035,2.790975815365776 325 | 0.9882503593036556,1.8902179253867353 326 | -1.9496582424834858,1.688567995452606 327 | -0.9613186144903734,0.8008800562209483 328 | -1.8088805506609091,0.2562690262532388 329 | -2.111766931725327,2.767764861351611 330 | 0.28411962100684074,1.3898416951882893 331 | -2.240280459482683,-0.30439265820348704 332 | -2.863632013038921,0.6690630149524417 333 | -1.4188707005964107,-1.474084843325358 334 | 1.0639271198467362,1.7981408719081364 335 | -0.11154523995419896,2.9705037875871128 336 | 1.0723293813476151,0.6511250566191011 337 | -1.980815390639329,3.2495434155569907 338 | 0.4645809770188004,2.2641825465694305 339 | -0.8796082254282326,0.48928511381202655 340 | -1.483165373652175,3.267130094007462 341 | -1.8008977219787548,4.691126173765165 342 | -1.0529260983081348,2.8308654430163425 343 | -0.6786510039855922,1.621679646651063 344 | -2.3775347476278794,-0.06716623403643762 345 | 1.2638515484471888,1.168027720724398 346 | -2.4113978462269543,4.21335554068989 347 | -1.0003431747484444,-0.6037332728788751 348 | -2.510932927755952,-5.605879940516248 349 | -0.16260703426875156,3.359159770571053 350 | 0.03396583690308619,0.18953456960231918 351 | 1.9812106676180408,0.14075800294328933 352 | -0.7222905190872229,2.0781832551973762 353 | -1.5104605043432755,1.762401737770816 354 | -2.281105230033588,-2.835668991807428 355 | -0.7156712032822146,2.415641796421416 356 | -2.5113387375165637,7.230395187755869 357 | 2.657178509496738,0.2845677882929547 358 | -2.0110668071847835,3.634138795333479 359 | -0.40011136753092835,1.9733508658263452 360 | 0.3183703318098303,2.7392353747333953 361 | -1.6615569297016795,-2.151690775899006 362 | -1.8690731118374142,-0.11990735814628595 363 | 1.2978472944844035,0.9537105453403814 364 | 1.0913715285767034,1.9717156192015697 365 | 0.12865586780926952,2.038753588370761 366 | -1.869408557105599,-1.3831325888578232 367 | -2.0432817558582936,4.622224147718892 368 | -2.361000625212272,-2.405060020174735 369 | -0.3654626461134085,1.2466561152984588 370 | -1.8550918844183844,0.6076472031803901 371 | -0.04937515638764389,2.790361555239376 372 | 0.5577727063073785,-0.5889061141831649 373 | -1.154343208610935,-0.12357145063544661 374 | -1.573406152450888,-5.565127656347407 375 | -2.7821066052973498,-0.5115741588496374 376 | -0.3457454907419786,2.1121616613765095 377 | 0.8764781762240349,2.081416150260902 378 | 0.22103567722168727,1.93867970616814 379 | -2.4704176449755053,6.967619389824577 380 | 1.1886718357817405,0.7191817174041721 381 | -1.728199500043841,2.805800066308718 382 | -1.8386534911306698,-4.985246164060912 383 | -1.8530523145237465,0.28734564436365984 384 | 0.11416494289059509,2.9698300147923793 385 | -0.09138933326803189,2.6901897507604806 386 | 1.6815551249662017,1.2887678091058463 387 | -0.4425273000337637,2.3186759836812647 388 | 0.4326033504099336,0.7522841546230824 389 | -1.99446360143949,0.5320045400623092 390 | 1.2495779968518943,1.579537895091231 391 | -2.4197249904249922,-0.4198164871185819 392 | -1.4537811831822185,0.41227978515822117 393 | 0.5024799777521389,2.4971937017677295 394 | -2.065887255852351,2.2945961637679777 395 | -1.88360190795127,-5.834570168650543 396 | -0.5184569266111156,1.5881009875275702 397 | -1.6159627872221431,-1.2175216553976536 398 | -0.22735336954630925,2.41562102717895 399 | -2.1395855122771503,-0.687088628463667 400 | -2.0032556060604567,-6.226499355114521 401 | -------------------------------------------------------------------------------- /01_probability_basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "82017187", 6 | "metadata": {}, 7 | "source": [ 8 | "\"Open" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Notebook 1: **Probability** basics\n", 16 | "\n", 17 | "Probabilistic Machine Learning -- Spring 2025, UniTS" 18 | ] 19 | }, 20 | { 21 | "attachments": {}, 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "tags": [] 25 | }, 26 | "source": [ 27 | "### Random variables\n", 28 | "\n", 29 | "**Operational definitions**\n", 30 | "\n", 31 | "*Random processes* involve uncertainty due to chance, contrasting with *deterministic processes* that yield predictable outcomes.\n", 32 | "\n", 33 | "*Random variables* serve as abstract representations of outcomes in randomized experiments: they are functions mapping experimental results to real numbers.\n", 34 | "\n", 35 | "Random variables are not directly observable: what we can observe are the *realizations*, i.e. the result of applying the r.v., so a function, to an observed outcome of a random experiment.\n", 36 | "\n", 37 | "The domain of a random variable $X$ is the *sample space* $\\mathcal{X}$.\n", 38 | "\n", 39 | "**Probability mass function** $p$\n", 40 | "\n", 41 | "- Finite or countable sample space $\\mathcal{X}$;\n", 42 | "- $p_X(x):=P(X=x)$ for $x\\in\\mathcal{X}$.\n", 43 | "- Notation: instead of $p_X(x)$ we mostly use $p(x)$\n", 44 | "\n", 45 | "\n", 46 | "**Probability density function** $f$:\n", 47 | "\n", 48 | "- Infinite sample space $\\mathcal{X}$;\n", 49 | "- Measurable function $f_X:\\mathcal{X}\\rightarrow[0,+\\infty)$;\n", 50 | "- $P(a \\leq X \\leq b) = \\int_a^b f_X(x)dx$.\n", 51 | "- Notation: instead of $f_X(x)$ we mostly use $p(x)$\n", 52 | "\n", 53 | "It follows that $\\int_\\mathbb{R} f_X(x)dx=1$.\n", 54 | "\n", 55 | "\n", 56 | "### Notable probability distributions\n", 57 | "\n", 58 | "\n", 59 | "| discrete distribution | *pmf* | mean | variance |\n", 60 | "| :--------------------:|:-----:|:----:|:--------:|\n", 61 | "| Binomial $$\\text{Bin}(n,p)$$ | $$ {n \\choose x} p^x (1-p)^{n-x}$$ | $$np$$ | $$np(1-p)$$ |\n", 62 | "| Bernoulli $$\\text{Bern}(p)$$| $$\\begin{cases}1-p &k=1\\\\ 0&k=0\\end{cases}$$ | $$p$$ |$$p(1-p)$$ |\n", 63 | "| Discrete Uniform $$\\mathcal{U}(a,b)$$ | $$\\frac{1}{b-a+1}$$ | $$\\frac{b+a}{2}$$ |$$\\frac{(b-a+1)^2-1}{12}$$ |\n", 64 | "| Geometric $$\\text{Geom}(p)$$ | $(1-p)^{k-1}p$ |$$\\frac{1}{p}$$|$$\\frac{1-p}{p^2}$$ |\n", 65 | "| Poisson $$\\text{Pois}(\\lambda)$$ |$$\\frac{\\lambda^k e^{-\\lambda}}{k!}$$|$$\\lambda$$ | $$\\lambda$$ |\n", 66 | "\n", 67 | "where:\n", 68 | "- $n\\in\\{0,1,2,...\\}$\n", 69 | "- $p \\in [0,1]$ or $p \\in (0,1)$\n", 70 | "- $b\\geq a$\n", 71 | "- $k\\in\\{1,2,3,...\\}$\n", 72 | "- $\\lambda \\in \\mathbb{R}^+$\n", 73 | "\n", 74 | "| continuous distribution | *pdf* | mean | variance |\n", 75 | "| :----------------------:|:-----:|:----:|:--------:|\n", 76 | "| Continuous Uniform $$\\mathcal{U}(a,b)$$|$$\\begin{cases}\\frac{1}{b-a} & x \\in [a,b]\\\\0 & \\text{otherwise}\\end{cases}$$|$$\\frac{a+b}{2}$$|$$\\frac{(b-a)^2}{12}$$ |\n", 77 | "| Exponential $$\\text{Exp}(\\lambda)$$|$$\\lambda e^{-\\lambda x}$$|$$1/\\lambda$$|$$1/\\lambda^2$$ |\n", 78 | "| Gaussian $$\\mathcal{N}(\\mu,\\sigma^2)$$|$$\\frac{1}{\\sigma\\sqrt{2\\pi}}e^{-\\frac{1}{2}\\big(\\frac{x-\\mu}{\\sigma}\\big)^2}$$|$$\\mu$$|$$\\sigma^2$$|\n", 79 | "|Beta $$\\text{Beta}(\\alpha,\\beta)$$|$$\\frac{x^{\\alpha-1}(1-x)^{\\beta-1}}{B(\\alpha,\\beta)}$$|$$\\frac{\\alpha}{\\alpha+\\beta}$$|$$\\frac{\\alpha\\beta}{(\\alpha+\\beta)^2(\\alpha+\\beta+1)}$$| \n", 80 | "|Gamma $$\\text{Gamma}(\\alpha, \\beta)$$|$$\\frac{\\beta^\\alpha}{\\Gamma(\\alpha)}x^{\\alpha-1}e^{-\\beta x}$$|$$\\frac{\\alpha}{\\beta}$$|$$\\frac{\\alpha}{\\beta^2}$$|\n", 81 | "|Dirichlet $$Dir(\\alpha)$$|$$\\frac{1}{B(\\alpha)}\\prod_{i=1}^{K}x_i^{\\alpha_i-1}$$|$$\\tilde{\\alpha}_i$$|$$\\frac{\\tilde{\\alpha}_i(1-\\tilde{\\alpha}_i)}{\\alpha_0+1}$$ |\n", 82 | "|Student's t $$St(\\nu)$$| $$\\frac{\\Gamma(\\frac{\\nu+1}{2})}{\\sqrt{\\nu\\pi}\\Gamma(\\frac{\\nu}{2})}{\\Big(1+\\frac{x^2}{\\nu}\\Big)^{-\\frac{\\nu+1}{2}}}$$ |$$0$$|$$\\begin{cases}\\frac{\\nu}{\\nu-2}&\\nu>2\\\\\\infty&1<\\nu\\leq2\\end{cases}$$ |\n", 83 | "\n", 84 | "where:\n", 85 | "- $b \\geq a$\n", 86 | "- $\\lambda \\in \\mathbb{R}^+$\n", 87 | "- $\\mu,\\sigma,\\alpha,\\beta\\in\\mathbb{R}$\n", 88 | "- $\\alpha,\\beta>0$ for the Gamma distribution\n", 89 | "- $k,\\theta > 0$\n", 90 | "- $K\\in\\mathbb{Z}_{\\geq2}$\n", 91 | "- $\\tilde{\\alpha}_i=\\frac{\\alpha_i}{\\sum_{h=1}^K\\alpha_h}$, $\\alpha_0=\\sum_{i=1}^K \\alpha_i$\n", 92 | "- $\\nu>1$\n" 93 | ] 94 | }, 95 | { 96 | "attachments": {}, 97 | "cell_type": "markdown", 98 | "metadata": { 99 | "tags": [] 100 | }, 101 | "source": [ 102 | "### Expected value\n", 103 | "\n", 104 | "**Definition**\n", 105 | "\n", 106 | "Let $X$ be a random variable.\n", 107 | "\n", 108 | "|values|expectation $E[X]$|\n", 109 | "|:----:|:----------------:|\n", 110 | "|finite| $$\\sum_{i=1}^k x_i p(x_i)$$|\n", 111 | "|countable|$$\\sum_{i=1}^\\infty x_i p(x_i)$$|\n", 112 | "|continuous|$$\\int_{\\mathbb{R}}x p(x)dx$$|\n", 113 | "\n", 114 | "where $p$ is the probability mass function of $X$ in the discrete case and the probability density function of $X$ in the continuous case. \n", 115 | "\n", 116 | "**Example: discrete case**\n", 117 | "\n", 118 | "Let $Y$ be a discrete random variable with values in $\\{0,1\\}$ and let $P(Y=1)=p$. Suppose we want to compute the expectation $\\mathbb{E}[|Y-p|]$.\n", 119 | "\n", 120 | "From the definition of expectation we know that, in the discrete case, we just need to multiply each possible value that the random variable can assume by its probability of occurring:\n", 121 | "\n", 122 | "$$\n", 123 | "\\mathbb{E}[|Y-p|] = p(1-p) + (1-p) p = 2p(1-p)\n", 124 | "$$\n", 125 | "\n", 126 | "**Example: continuous case**\n", 127 | "\n", 128 | "Let the pdf of $X$ be \n", 129 | "\n", 130 | "$$f(x)=\\begin{cases}cx^2(1-x) & 0\\leq x \\leq 1\\\\ 0 & \\text{otherwise}\\end{cases}$$\n", 131 | "\n", 132 | "We want to determine $c\\in\\mathbb{R}$ such that $f(x)$ is a valid *pdf*:\n", 133 | "\n", 134 | "$$1 =\\int_0^1 cx^2(1-x)dx = c \\int_0^1 (x^2-x^3)dx = c\\Big[\\frac{x^3}{3}-\\frac{x^4}{4}\\Big]_0^1 = \\frac{c}{12} $$\n", 135 | "\n", 136 | "$$\\Longrightarrow c=12$$\n", 137 | "\n", 138 | "Now we compute the expected value of $X$:\n", 139 | "\n", 140 | "$$E[X]=12\\int_0^1x^3(1-x)dx=12\\Big[\\frac{x^4}{4}-\\frac{x^5}{5}\\Big]_0^1=\\frac{3}{5}$$" 141 | ] 142 | }, 143 | { 144 | "attachments": {}, 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "### Marginal and conditional distributions\n", 149 | "\n", 150 | "**Definitions**\n", 151 | "\n", 152 | "Multiple random variables $X_1,\\ldots, X_N$ on the same probability space define a **multivariate random variable**, whose **joint probability mass function** is -- in the discrete case:\n", 153 | "\n", 154 | "$$p_{X_1,\\ldots, X_N}(x_1,\\ldots, x_N)=P(X_1=x_1,\\ldots,X_N=x_N)$$\n", 155 | "\n", 156 | "\n", 157 | "While the **joint probability density function** is -- in the continuous case:\n", 158 | "\n", 159 | "$$P(X_1\\in[a_1,b_1],\\ldots, X_N\\in[a_N,b_N])=\\int_{a_1}^{b_1}\\ldots\\int_{a_N}^{b_N}f_{X_1,\\ldots, X_N}(x_1,\\ldots,x_N)dx_1\\ldots dx_N$$\n", 160 | "\n", 161 | "\n", 162 | "![](./img/multivariate_normal_sample.png)\n", 163 | "
From Wikipedia: Joint probability distribution\n", 164 | "\n", 165 | "\n", 166 | "In the bivariate case, for example, we can derive marginal and conditional distributions from the joint distribution as follows: \n", 167 | "\n", 168 | "|$X$ values|marginal distribution| conditional distribution|\n", 169 | "|:--------:|:-------------------------------------------------:|:-----------------------:|\n", 170 | "| discrete | $$p_X(x)=\\sum_{y\\in{\\mathcal{X}_Y}}p_{X,Y}(x,y)$$ | $$ p_{Y\\|X}(y\\|x) = \\frac{p_{X,Y}(x,y)}{p_X(x)} $$ |\n", 171 | "| continuous | $$f_X(x)=\\int_{\\mathcal{X_Y}}f_{X,Y}(x,y)dy$$ | $$ f_{Y\\|X}(y\\|x) = \\frac{f_{X,Y}(x,y)}{f_X(x)} $$ |\n", 172 | "\n", 173 | "These definitions easily extend to the multivariate case.\n", 174 | "\n", 175 | "\n", 176 | "Two *r.v.*s $X,Y$ are **independent** if and only if their joint probability equals the product of the marginal probabilities\n", 177 | "\n", 178 | "$$f_{X,Y}(x,y)=f_X(x)f_Y(y).$$" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "**Example: marginal and conditional from the joint**\n", 186 | "\n", 187 | "Let $X$ and $Y$ be two discrete random variables with joint probability distribution\n", 188 | "$$\n", 189 | "p(x,y) = \\frac{1}{21}(x+y)\n", 190 | "$$\n", 191 | "for $x=1,2,3$ and $y=1,2$.\n", 192 | "\n", 193 | "The marginal distribution of $X$ is:\n", 194 | "$$\n", 195 | "p(x) = \\sum_{y=1}^2 p(x,y) = \\sum_{y=1}^2 \\frac{1}{21}(x+y) = \\frac{1}{21}(2x+3)\n", 196 | "$$\n", 197 | "for $x=1,2,3$.\n", 198 | "\n", 199 | "The conditional distribution of $Y$ given $X=1$ is:\n", 200 | "\n", 201 | "$$\n", 202 | "p(y|1)=\\frac{p(1,y)}{p(1)}= \\frac{\\frac{1}{21}(1+y)}{\\frac{5}{21}} = \\frac{1}{5}(1+y)\n", 203 | "$$\n", 204 | "\n", 205 | "for $y=1,2$.\n" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "### Notation\n", 213 | "Given two random variables $X$ and $Y$, it is common practice to indicate their density/probability as $p(x)$ and $p(y)$ instead of $p_X(x)$ and $p_Y(y)$. So the density/probability function is somewhat implicit.\n", 214 | "\n", 215 | "In other words, the meaning of $p$ depends on the context, in particular the variable name used as argument. \n", 216 | "For example we can have $p(x) = \\frac{1}{\\sigma\\sqrt{2\\pi}}e^{-\\frac{1}{2}\\big(\\frac{x-\\mu}{\\sigma}\\big)^2}$ and $p(y) = \\lambda e^{-\\lambda y}$ without contradiction.\n", 217 | "\n", 218 | "The letter $p$ indicates a different mass/density function depending on the variable name used as its argument.\n", 219 | "\n", 220 | "The same holds for conditional probabilities $p_{X|Y}(x|y) \\rightarrow p(x|y)$, joint distributions $p_{X,Y}(x,y) \\rightarrow p(x,y)$ and in many other cases." 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "### Covariance and correlation\n", 228 | "\n", 229 | "**Covariance** measures the common variation of $X$ and $Y$. \n", 230 | "It is defined as $\\text{cov}(X,Y)=\\mathbb{E}[(X-\\mathbb{E}[X])(Y-\\mathbb{E}[Y])] = \\mathbb{E}[XY]-\\mathbb{E}[X]\\mathbb{E}[Y].$\n", 231 | "\n", 232 | "The covariance of a random variable with itself is called **variance**: $\\text{var}(X)=\\text{cov}(X,X)=\\mathbb{E}[X-\\mathbb{E}(X)^2]$.\n", 233 | "\n", 234 | "The **correlation coefficient** between $X$ and $Y$ is the normalized covariance: $\\displaystyle{\\rho=\\frac{\\text{cov}(X,Y)}{\\sqrt{\\text{var}(X)\\text{var}(Y)}}}$. \n", 235 | "\n", 236 | "The two variables are said to be *perfectly correlated* when $\\rho=1$ and *anti-correlated* when $\\rho=-1$.\n" 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": {}, 242 | "source": [ 243 | "### Independence\n", 244 | "\n", 245 | "Two random variables $X$ and $Y$ are **independent** if and only if\n", 246 | "\n", 247 | "$$\n", 248 | "p(x,y) = p(x)p(y)\n", 249 | "$$\n", 250 | "that is equivalent to\n", 251 | "$$\n", 252 | "p(y|x) = p(y)\n", 253 | "$$\n", 254 | "$$\n", 255 | "p(x|y) = p(x)\n", 256 | "$$\n", 257 | "\n", 258 | "Two random variables $X$ and $Y$ are **conditionally independent** given $Z$ if and only if \n", 259 | "\n", 260 | "$$p(x|y,z) = p(x|z) \\ \\ \\forall \\ x,y,z$$\n", 261 | "\n", 262 | "In other words, knowing $Y$ doesn't add any more information on $X$ if we already know $Z$.\n", 263 | "\n", 264 | "An alternative definition is $p(x,y|z) = p(y|z)p(x|z)$. In other words, $X$ and $Y$ are independent events given $Z$.\n", 265 | "\n", 266 | "\n", 267 | "### Bayes' theorem\n", 268 | "Let $X$ and $Y$ be two random variables with joint probability distribution $p(x,y)$\n", 269 | "\n", 270 | "$$p(x | y) = \\frac{p(y | x)p(x)}{p(y)}$$\n", 271 | "\n", 272 | "Notice that this is just a trivial consequence of the definition of conditional probability.\n" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "## Information Theory\n", 280 | "\n", 281 | "### Entropy\n", 282 | "The entropy of a distribution is a measure of uncertainty of that distribution, it's related to the amount of information that you know on the random variable $X$ before observing it:\n", 283 | "$$\\text{H}(p) = -\\mathbb{E}_{x \\in \\mathcal{X}} [\\log{p(x)}]$$\n", 284 | "\n", 285 | "### Kullback–Leibler divergence\n", 286 | "Kullback–Leibler (KL) divergence is a measure of difference between two distributions:\n", 287 | "\n", 288 | "$$ D_{KL}(\\ p \\ ||\\ q\\ ) := \\mathbb{E}_{x \\sim p(x)}[\\log{p(x)} - \\log{q(x)}]$$\n", 289 | "\n", 290 | "A simple interpretation of the KL divergence of $p$ from $q$ is the expected excess surprise from using $q$ as a model when the actual distribution is $p$.\n", 291 | "\n", 292 | "An important property is that $D_{KL}(\\ p \\ ||\\ q\\ ) \\geq 0$ and $D_{KL}(p || q) = 0 \\iff p=q$.\n", 293 | "\n", 294 | "However it is not a distance, since in general $D_{KL}(p || q) \\neq D_{KL}(q || p)$\n", 295 | "\n", 296 | "### Mutual Information\n", 297 | "The mutual information between two random variables $X$ and $Y$ is a measure of dependence:\n", 298 | "$$\\text{I}(X,Y) = D_{KL}(\\ p(x,y)\\ ||\\ p(x)p(y)\\ )$$\n", 299 | "\n", 300 | "It measures how much information one variable carries about the other, by comparing the joint distribution with the product of the marginals.\n", 301 | "\n", 302 | "Notice that $ \\text{I}(X,Y) = \\text{H}(X) + \\text{H}(Y) - \\text{H}(X,Y)$" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "## Random variables: more formal definition\n", 310 | "\n", 311 | "**Measurable space** $(\\Omega,\\mathcal{F})$:\n", 312 | "\n", 313 | "- $\\Omega$ is a set;\n", 314 | "- $\\mathcal{F}$ is a $\\sigma$-algebra on $\\Omega$, i.e. $\\mathcal{F}$:\n", 315 | " - contains $\\emptyset, \\Omega$;\n", 316 | " - is closed under complementary sets;\n", 317 | " - is closed under countable unions.\n", 318 | "\n", 319 | "\n", 320 | "**Measurable function** $f$:\n", 321 | "\n", 322 | "- $f:(\\Omega_1,\\mathcal{F}_1)\\rightarrow (\\Omega_2,\\mathcal{F}_2)$;\n", 323 | "- The pre-image $f^{-1}(E)$, $\\forall$ measurable set $E\\in\\mathcal{F}_2$, is measurable (i.e. $f^{-1}(E)\\in\\mathcal{F}_1$).\n", 324 | "\n", 325 | "\n", 326 | "**Probability measure** $P$ on $(\\Omega,\\mathcal{F})$:\n", 327 | "\n", 328 | "- $P:\\mathcal{F}\\rightarrow [0,1]$;\n", 329 | "- $P$ is countably additive on pairwise disjoint sets;\n", 330 | "- $P(\\emptyset)=0$ and $P(\\Omega)=1$.\n", 331 | "\n", 332 | "\n", 333 | "**Random variable** $X$:\n", 334 | "\n", 335 | "- $(\\Omega,\\mathcal{F},P)$ probability space;\n", 336 | "- $(\\mathcal{X},\\mathcal{A})$ measurable space;\n", 337 | "- Measurable $X:(\\Omega,\\mathcal{F},P)\\rightarrow (\\mathcal{X},\\mathcal{A})$.\n", 338 | "\n", 339 | "$X$ induces the push-forward probability measure $\\mu$ on $\\mathcal{X}$: $\\mu(A):=X_*P(A)=P(X\\in A) := P(X^{-1}(A))$ for any $A\\in\\mathcal{A}$." 340 | ] 341 | }, 342 | { 343 | "cell_type": "markdown", 344 | "metadata": {}, 345 | "source": [ 346 | "## References\n", 347 | "- [J. Jacod, P. Protter, \"Probability Essentials\"](https://zero.sci-hub.ru/6098/787f72eac157546be3d98fcc129b8ba6/jacod2004.pdf)" 348 | ] 349 | } 350 | ], 351 | "metadata": { 352 | "kernelspec": { 353 | "display_name": "Python 3 (ipykernel)", 354 | "language": "python", 355 | "name": "python3" 356 | }, 357 | "language_info": { 358 | "codemirror_mode": { 359 | "name": "ipython", 360 | "version": 3 361 | }, 362 | "file_extension": ".py", 363 | "mimetype": "text/x-python", 364 | "name": "python", 365 | "nbconvert_exporter": "python", 366 | "pygments_lexer": "ipython3", 367 | "version": "3.10.12" 368 | } 369 | }, 370 | "nbformat": 4, 371 | "nbformat_minor": 4 372 | } 373 | -------------------------------------------------------------------------------- /09_exercise.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "c65471a2", 6 | "metadata": {}, 7 | "source": [ 8 | "\"Open" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Notebook 9: Variational Inference Exercise" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "Using the following dataset about weather in Sidney to predict if tomorrow it will rain:\n", 23 | "1. Fit a bayesian logistic regression model with VI (using as predictors only the numerical columns)\n", 24 | "2. Compare the Bayesian prediction with the frequentist prediction (for the frequentist you can use the mean of the guide)\n", 25 | "\n", 26 | "Hint: you may need to call `.to_event()` when defining a distribution in the model, see [this page](https://pyro.ai/examples/tensor_shapes.html) for more information." 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 1, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "ename": "HTTPError", 36 | "evalue": "HTTP Error 404: Not Found", 37 | "output_type": "error", 38 | "traceback": [ 39 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 40 | "\u001b[0;31mHTTPError\u001b[0m Traceback (most recent call last)", 41 | "Cell \u001b[0;32mIn[1], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# Loading the data\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m sydney_data \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://raw.githubusercontent.com/r-doz/PML2025/main/data/weatherSydney.csv\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 8\u001b[0m sydney_data\n", 42 | "File \u001b[0;32m~/.conda/envs/stdenv/lib/python3.11/site-packages/pandas/io/parsers/readers.py:1026\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 1013\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m 1014\u001b[0m dialect,\n\u001b[1;32m 1015\u001b[0m delimiter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1022\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[1;32m 1023\u001b[0m )\n\u001b[1;32m 1024\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m-> 1026\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _read(filepath_or_buffer, kwds)\n", 43 | "File \u001b[0;32m~/.conda/envs/stdenv/lib/python3.11/site-packages/pandas/io/parsers/readers.py:620\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 617\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m 619\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 620\u001b[0m parser \u001b[38;5;241m=\u001b[39m TextFileReader(filepath_or_buffer, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds)\n\u001b[1;32m 622\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[1;32m 623\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n", 44 | "File \u001b[0;32m~/.conda/envs/stdenv/lib/python3.11/site-packages/pandas/io/parsers/readers.py:1620\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1617\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 1619\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1620\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_engine(f, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mengine)\n", 45 | "File \u001b[0;32m~/.conda/envs/stdenv/lib/python3.11/site-packages/pandas/io/parsers/readers.py:1880\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1878\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[1;32m 1879\u001b[0m mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1880\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m get_handle(\n\u001b[1;32m 1881\u001b[0m f,\n\u001b[1;32m 1882\u001b[0m mode,\n\u001b[1;32m 1883\u001b[0m encoding\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mencoding\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[1;32m 1884\u001b[0m compression\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcompression\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[1;32m 1885\u001b[0m memory_map\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmemory_map\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m),\n\u001b[1;32m 1886\u001b[0m is_text\u001b[38;5;241m=\u001b[39mis_text,\n\u001b[1;32m 1887\u001b[0m errors\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mencoding_errors\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstrict\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 1888\u001b[0m storage_options\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstorage_options\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[1;32m 1889\u001b[0m )\n\u001b[1;32m 1890\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1891\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n", 46 | "File \u001b[0;32m~/.conda/envs/stdenv/lib/python3.11/site-packages/pandas/io/common.py:728\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 725\u001b[0m codecs\u001b[38;5;241m.\u001b[39mlookup_error(errors)\n\u001b[1;32m 727\u001b[0m \u001b[38;5;66;03m# open URLs\u001b[39;00m\n\u001b[0;32m--> 728\u001b[0m ioargs \u001b[38;5;241m=\u001b[39m _get_filepath_or_buffer(\n\u001b[1;32m 729\u001b[0m path_or_buf,\n\u001b[1;32m 730\u001b[0m encoding\u001b[38;5;241m=\u001b[39mencoding,\n\u001b[1;32m 731\u001b[0m compression\u001b[38;5;241m=\u001b[39mcompression,\n\u001b[1;32m 732\u001b[0m mode\u001b[38;5;241m=\u001b[39mmode,\n\u001b[1;32m 733\u001b[0m storage_options\u001b[38;5;241m=\u001b[39mstorage_options,\n\u001b[1;32m 734\u001b[0m )\n\u001b[1;32m 736\u001b[0m handle \u001b[38;5;241m=\u001b[39m ioargs\u001b[38;5;241m.\u001b[39mfilepath_or_buffer\n\u001b[1;32m 737\u001b[0m handles: \u001b[38;5;28mlist\u001b[39m[BaseBuffer]\n", 47 | "File \u001b[0;32m~/.conda/envs/stdenv/lib/python3.11/site-packages/pandas/io/common.py:384\u001b[0m, in \u001b[0;36m_get_filepath_or_buffer\u001b[0;34m(filepath_or_buffer, encoding, compression, mode, storage_options)\u001b[0m\n\u001b[1;32m 382\u001b[0m \u001b[38;5;66;03m# assuming storage_options is to be interpreted as headers\u001b[39;00m\n\u001b[1;32m 383\u001b[0m req_info \u001b[38;5;241m=\u001b[39m urllib\u001b[38;5;241m.\u001b[39mrequest\u001b[38;5;241m.\u001b[39mRequest(filepath_or_buffer, headers\u001b[38;5;241m=\u001b[39mstorage_options)\n\u001b[0;32m--> 384\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m urlopen(req_info) \u001b[38;5;28;01mas\u001b[39;00m req:\n\u001b[1;32m 385\u001b[0m content_encoding \u001b[38;5;241m=\u001b[39m req\u001b[38;5;241m.\u001b[39mheaders\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mContent-Encoding\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 386\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m content_encoding \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgzip\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 387\u001b[0m \u001b[38;5;66;03m# Override compression based on Content-Encoding header\u001b[39;00m\n", 48 | "File \u001b[0;32m~/.conda/envs/stdenv/lib/python3.11/site-packages/pandas/io/common.py:289\u001b[0m, in \u001b[0;36murlopen\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 284\u001b[0m \u001b[38;5;124;03mLazy-import wrapper for stdlib urlopen, as that imports a big chunk of\u001b[39;00m\n\u001b[1;32m 285\u001b[0m \u001b[38;5;124;03mthe stdlib.\u001b[39;00m\n\u001b[1;32m 286\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 287\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01murllib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrequest\u001b[39;00m\n\u001b[0;32m--> 289\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m urllib\u001b[38;5;241m.\u001b[39mrequest\u001b[38;5;241m.\u001b[39murlopen(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", 49 | "File \u001b[0;32m~/.conda/envs/stdenv/lib/python3.11/urllib/request.py:216\u001b[0m, in \u001b[0;36murlopen\u001b[0;34m(url, data, timeout, cafile, capath, cadefault, context)\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 215\u001b[0m opener \u001b[38;5;241m=\u001b[39m _opener\n\u001b[0;32m--> 216\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m opener\u001b[38;5;241m.\u001b[39mopen(url, data, timeout)\n", 50 | "File \u001b[0;32m~/.conda/envs/stdenv/lib/python3.11/urllib/request.py:525\u001b[0m, in \u001b[0;36mOpenerDirector.open\u001b[0;34m(self, fullurl, data, timeout)\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m processor \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprocess_response\u001b[38;5;241m.\u001b[39mget(protocol, []):\n\u001b[1;32m 524\u001b[0m meth \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(processor, meth_name)\n\u001b[0;32m--> 525\u001b[0m response \u001b[38;5;241m=\u001b[39m meth(req, response)\n\u001b[1;32m 527\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n", 51 | "File \u001b[0;32m~/.conda/envs/stdenv/lib/python3.11/urllib/request.py:634\u001b[0m, in \u001b[0;36mHTTPErrorProcessor.http_response\u001b[0;34m(self, request, response)\u001b[0m\n\u001b[1;32m 631\u001b[0m \u001b[38;5;66;03m# According to RFC 2616, \"2xx\" code indicates that the client's\u001b[39;00m\n\u001b[1;32m 632\u001b[0m \u001b[38;5;66;03m# request was successfully received, understood, and accepted.\u001b[39;00m\n\u001b[1;32m 633\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;241m200\u001b[39m \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m code \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m300\u001b[39m):\n\u001b[0;32m--> 634\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mparent\u001b[38;5;241m.\u001b[39merror(\n\u001b[1;32m 635\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttp\u001b[39m\u001b[38;5;124m'\u001b[39m, request, response, code, msg, hdrs)\n\u001b[1;32m 637\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n", 52 | "File \u001b[0;32m~/.conda/envs/stdenv/lib/python3.11/urllib/request.py:563\u001b[0m, in \u001b[0;36mOpenerDirector.error\u001b[0;34m(self, proto, *args)\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_err:\n\u001b[1;32m 562\u001b[0m args \u001b[38;5;241m=\u001b[39m (\u001b[38;5;28mdict\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdefault\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttp_error_default\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;241m+\u001b[39m orig_args\n\u001b[0;32m--> 563\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_chain(\u001b[38;5;241m*\u001b[39margs)\n", 53 | "File \u001b[0;32m~/.conda/envs/stdenv/lib/python3.11/urllib/request.py:496\u001b[0m, in \u001b[0;36mOpenerDirector._call_chain\u001b[0;34m(self, chain, kind, meth_name, *args)\u001b[0m\n\u001b[1;32m 494\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m handler \u001b[38;5;129;01min\u001b[39;00m handlers:\n\u001b[1;32m 495\u001b[0m func \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(handler, meth_name)\n\u001b[0;32m--> 496\u001b[0m result \u001b[38;5;241m=\u001b[39m func(\u001b[38;5;241m*\u001b[39margs)\n\u001b[1;32m 497\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m result \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 498\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n", 54 | "File \u001b[0;32m~/.conda/envs/stdenv/lib/python3.11/urllib/request.py:643\u001b[0m, in \u001b[0;36mHTTPDefaultErrorHandler.http_error_default\u001b[0;34m(self, req, fp, code, msg, hdrs)\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mhttp_error_default\u001b[39m(\u001b[38;5;28mself\u001b[39m, req, fp, code, msg, hdrs):\n\u001b[0;32m--> 643\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(req\u001b[38;5;241m.\u001b[39mfull_url, code, msg, hdrs, fp)\n", 55 | "\u001b[0;31mHTTPError\u001b[0m: HTTP Error 404: Not Found" 56 | ] 57 | } 58 | ], 59 | "source": [ 60 | "import pandas as pd\n", 61 | "import numpy as np\n", 62 | "import matplotlib.pyplot as plt\n", 63 | "import torch\n", 64 | "\n", 65 | "# Loading the data\n", 66 | "sydney_data = pd.read_csv('https://raw.githubusercontent.com/r-doz/PML2025/main/data/weatherSydney.csv')\n", 67 | "sydney_data" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 2, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "# extract predictors and target variable\n", 77 | "numerical_columns = sydney_data.select_dtypes(include=[np.number])\n", 78 | "y = sydney_data['RainTomorrow']\n", 79 | "\n", 80 | "X = torch.from_numpy(numerical_columns.to_numpy()).float()\n", 81 | "y = torch.from_numpy((y== \"Yes\").to_numpy()).float()\n", 82 | "\n", 83 | "# Split train and test\n", 84 | "def split_and_standardize(*, X, y, ratio=0.05):\n", 85 | " \n", 86 | " # Splitting train and test sets\n", 87 | " cut = int(ratio * len(y))\n", 88 | " X_train = X[:cut]\n", 89 | " y_train = y[:cut]\n", 90 | " \n", 91 | " X_test = X[cut:]\n", 92 | " y_test = y[cut:]\n", 93 | " \n", 94 | " # Standardizing data\n", 95 | " x_mean = X_train.mean(axis=0)\n", 96 | " x_std = X_train.std(axis=0)\n", 97 | " \n", 98 | " X_train = (X_train - x_mean) / x_std\n", 99 | " X_test = (X_test - x_mean) / x_std\n", 100 | " \n", 101 | " return X_train, y_train, X_test, y_test\n", 102 | "\n", 103 | "X_train, y_train, X_test, y_test = split_and_standardize(X=X, y=y)\n", 104 | "\n", 105 | "# Add bias term\n", 106 | "X_train = torch.hstack([torch.ones((X_train.shape[0], 1)), X_train])\n", 107 | "X_test = torch.hstack([torch.ones((X_test.shape[0], 1)), X_test])" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 3, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "data": { 117 | "text/plain": [ 118 | "tensor([[ 1.0000, 0.0237, -0.2067, ..., -1.2894, 0.2598, 0.0224],\n", 119 | " [ 1.0000, 0.1077, 0.1015, ..., 0.2579, 0.3972, 0.4307],\n", 120 | " [ 1.0000, 0.5273, 0.4867, ..., -0.1289, 0.6719, 0.6758],\n", 121 | " ...,\n", 122 | " [ 1.0000, -0.6477, -0.2580, ..., -0.9026, -0.4271, -0.7671],\n", 123 | " [ 1.0000, -1.1093, -0.8487, ..., -1.2894, -1.0747, -1.0122],\n", 124 | " [ 1.0000, -0.8575, 0.2813, ..., -1.2894, -0.8981, -0.1954]])" 125 | ] 126 | }, 127 | "execution_count": 3, 128 | "metadata": {}, 129 | "output_type": "execute_result" 130 | } 131 | ], 132 | "source": [ 133 | "X_train" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 4, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "data": { 143 | "text/plain": [ 144 | "tensor([0., 0., 0., 0., 1., 0., 1., 0., 1., 1., 1., 0., 1., 0., 0., 0., 0., 0.,\n", 145 | " 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,\n", 146 | " 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,\n", 147 | " 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,\n", 148 | " 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.])" 149 | ] 150 | }, 151 | "execution_count": 4, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | } 155 | ], 156 | "source": [ 157 | "y_train" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 5, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [] 166 | } 167 | ], 168 | "metadata": { 169 | "kernelspec": { 170 | "display_name": "stdenv", 171 | "language": "python", 172 | "name": "python3" 173 | }, 174 | "language_info": { 175 | "codemirror_mode": { 176 | "name": "ipython", 177 | "version": 3 178 | }, 179 | "file_extension": ".py", 180 | "mimetype": "text/x-python", 181 | "name": "python", 182 | "nbconvert_exporter": "python", 183 | "pygments_lexer": "ipython3", 184 | "version": "3.11.8" 185 | } 186 | }, 187 | "nbformat": 4, 188 | "nbformat_minor": 4 189 | } 190 | -------------------------------------------------------------------------------- /Homeworks/Homework_2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "d877b276", 6 | "metadata": {}, 7 | "source": [ 8 | "\"Open" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "## Homework 2" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "Probabilistic Machine Learning -- Spring 2025, UniTS" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "### Problem 1: Marginal Likelihood Optimisation\n", 30 | "\n", 31 | "Given the Bayesian Linear Regression model implemented in the Notebook 6, considering the same dataset and variables (Apparent Temperature vs Humidity), optimize alpha and beta by maximizing the Marginal Likelihood.\n", 32 | "\n", 33 | "Note: You can show here only the piece of code that you used and write the optimal alpha and beta that you obtained (so you can run it in directly on Notebook 6)\n", 34 | "\n", 35 | "Hint: import scipy.optimize as optimize" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "### Problem 2: Rejection Sampling\n", 50 | "\n", 51 | "Consider the unnormalized probability density function:\n", 52 | "\n", 53 | "$$\n", 54 | "\\tilde{p}(x) = \\exp\\left(-\\frac{x^4}{4} - \\frac{x^2}{2}\\right)\n", 55 | "$$\n", 56 | "\n", 57 | "This density is defined for $x \\in \\mathbb{R}$, but it is not normalized. Let $Z$ be its normalization constant:\n", 58 | "\n", 59 | "$$\n", 60 | "Z = \\int_{-\\infty}^{\\infty} \\tilde{p}(x)\\, dx\n", 61 | "$$\n", 62 | "\n", 63 | "You are given access to a proposal distribution $q(x) = \\mathcal{N}(0, 1)$ from which you can sample and evaluate its density.\n", 64 | "\n", 65 | "- Implement and then use rejection sampling with $q(x)$ to generate samples from the normalized target distribution $p(x) = \\frac{1}{Z} \\tilde{p}(x)$.\n", 66 | " \n", 67 | "- Estimate the normalization constant $Z$ (hint: see your course notes!)\n", 68 | "\n", 69 | "- Compare your result with a numerical approximation of $Z$ using integration methods (e.g., scipy.integrate.quad).\n" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "### Problem 3: MCMC Convergence Diagnostics\n", 84 | "\n", 85 | "You have implemented a Metropolis-Hastings algorithm and used it to draw samples from a univariate target distribution. In this exercise, you will compute convergence diagnostics from scratch to assess whether your chains have mixed well.\n", 86 | "\n", 87 | "Consider 4 parallel chains, each of length $N$ (after burn-in), starting from different initial values (use\n", 88 | "initial_values = [-10, -2, 2, 10]). The unnormalized probability density function $\\tilde{p}(x)$ is defined as a mixture of two Gaussian distributions:\n", 89 | "\n", 90 | "$$\n", 91 | "\\tilde{p}(x) = \\mathcal{N}(x; -5, 1) + \\mathcal{N}(x; 5, 1)\n", 92 | "$$\n", 93 | "\n", 94 | "Perform the following steps two times, considering a proposal standard deviation of 0.1 and 2.0:\n", 95 | "\n", 96 | "- Compute the within variance $W$ and the between variance $B$\n", 97 | "- Compute the statistics $\\hat{R}$\n", 98 | "- For a single chain of samples $x_1, x_2, \\dots, x_N$, compute the lag-$k$ autocorrelation $\\rho_k$ and plot $\\rho_k$ for $k = 1, 2, \\dots, 20$\n", 99 | "- Estimate the effective number of samples $n_{eff}$\n", 100 | "\n", 101 | "Repeat the analysis with the distribution:\n", 102 | "\n", 103 | "$$\n", 104 | "\\tilde{p}(x) = \\mathcal{N}(x; -2, 1) + \\mathcal{N}(x; 2, 1)\n", 105 | "$$\n", 106 | "\n", 107 | "Discuss the results\n", 108 | "\n" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "### Problem 4: ADSAI football matches\n", 123 | "Over the years, the PhD students of ADSAI have kept track of the results of their evening five-a-side football matches. Since it’s difficult to always organize teams with the same players, the teams were formed on a rotating basis, each time with different players. \n", 124 | "The names of our champions have been anonymized using numbers from 1 to 62. \n", 125 | "In the dataset located at `data/ADSAI_football.csv` in the GitHub repository, you will find the following columns:\n", 126 | "- **Team A**: IDs of the players who played in Team A in that match; \n", 127 | "- **Team B**: same as above, for Team B; \n", 128 | "- **Goal A**: total goals scored by Team A in that match; \n", 129 | "- **Goal B**: same as above, for Team B.\n" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "\n", 137 | "Your goal is to model **the skill of each player** based on information about the team they belonged to and the overall result achieved by that team.\n" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "\n", 145 | "The model to implement consists in the following structure: \n", 146 | "- $ \\theta = (\\theta_1, \\theta_2, \\dots, \\theta_{62}) \\in \\mathbb{R} $: players' skills.\n", 147 | "- $M=54$: number of matches.\n", 148 | "- For each match $ i = 1, \\dots, M=54$:\n", 149 | " - $ A_i \\subset \\{1, \\dots, N=62\\} $: set of players IDs of team A in match $ i $.\n", 150 | " - $ B_i \\subset \\{1, \\dots, N=62\\} $: set of players IDs of team B in match $ i $.\n", 151 | " - $ y_i \\in \\mathbb{Z} $: observed outcome, i.e. goal difference between the two teams, defined as $ (\\text{goal}_A - \\text{goal}_B) $ in match $ i $.\n", 152 | "\n", 153 | "*(in this exercise, you are asked to follow the proposal of Karlis and Ntzoufras approach, that focuses on the goal difference in place of the individual goal counts of each team!)*\n" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "\n", 161 | "The model is defined as follows:\n", 162 | "\n", 163 | "- The \"strength\" of Team A, defined as the sum of the individual players' skills $\\theta_j$ composing the team in match $i = 1, \\dots, M=54$: \n", 164 | " $$\n", 165 | " s_A^{(i)} = \\sum_{j \\in A_i} \\theta_j\n", 166 | " $$\n", 167 | "\n", 168 | "- The \"strength\" of Team B, defined similarly as the sum of the individual players' skills $\\theta_j$ composing the team in match $i = 1, \\dots, M=54$: \n", 169 | " $$\n", 170 | " s_B^{(i)} = \\sum_{j \\in B_i} \\theta_j\n", 171 | " $$\n", 172 | "\n", 173 | "Specifically, the observed goal difference in match $i$ is modeled using a Skellam distribution: \n", 174 | "$$\n", 175 | "y_i \\sim \\text{Skellam}(\\lambda_A^{(i)}, \\lambda_B^{(i)}), \\quad \\text{where} \\quad \\lambda_A^{(i)} = \\exp(s_A^{(i)}), \\quad \\lambda_B^{(i)} = \\exp(s_B^{(i)})\n", 176 | "$$\n", 177 | "\n", 178 | "The **Skellam distribution** models the difference between two independent random variables: \n", 179 | "$$\n", 180 | "\\text{Skellam}(\\lambda_A, \\lambda_B) = \\text{Poisson}(\\lambda_A) - \\text{Poisson}(\\lambda_B)\n", 181 | "$$\n", 182 | "It is formally defined as: \n", 183 | "$$\n", 184 | "\\text{Skellam}(k; \\lambda_A, \\lambda_B) = e^{-(\\lambda_A + \\lambda_B)} \\left( \\frac{\\lambda_A}{\\lambda_B} \\right)^{k/2} I_{|k|}(2 \\sqrt{\\lambda_A \\lambda_B})\n", 185 | "$$\n", 186 | "\n", 187 | "for each $ k \\in \\mathbb{Z} $, and $ I_k $ is the modified Bessel function of the first kind of order $ k $.\n", 188 | "\n", 189 | "$$\n", 190 | "I_k(z) = \\sum_{m=0}^\\infty \\frac{1}{m! \\, \\Gamma(m + k + 1)} \\left( \\frac{z}{2} \\right)^{2m + k}\n", 191 | "$$\n", 192 | "\n", 193 | "where $\\Gamma$ is the Gamma function.\n" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 2, 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "#import only relevant quantities as follows:\n", 203 | "\n", 204 | "import pandas as pd\n", 205 | "import ast\n", 206 | "import torch\n", 207 | "\n", 208 | "def preprocessing_dataset(dataset_path='ADSAI_football.csv'):\n", 209 | " football = pd.read_csv(dataset_path)\n", 210 | " football['Team A'] = football['Team A'].apply(ast.literal_eval)\n", 211 | " football['Team B'] = football['Team B'].apply(ast.literal_eval)\n", 212 | "\n", 213 | " max_player_id = max(\n", 214 | " max(p for team in football['Team A'] for p in team),\n", 215 | " max(p for team in football['Team B'] for p in team)\n", 216 | " )\n", 217 | "\n", 218 | " goal_diff = torch.tensor((football['Goal A'] - football['Goal B']).values, dtype=torch.int)\n", 219 | "\n", 220 | " teams_A = [torch.tensor(team) for team in football['Team A']]\n", 221 | " teams_B = [torch.tensor(team) for team in football['Team B']]\n", 222 | "\n", 223 | " return teams_A, teams_B, goal_diff, max_player_id\n", 224 | "\n", 225 | "\n", 226 | "teams_A, teams_B, goal_diff, max_player_id = preprocessing_dataset()\n", 227 | "\n" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "\n", 235 | "You are asked to: \n", 236 | "1. Find the MAP estimate for $ \\theta = (\\theta_1, \\theta_2, \\dots, \\theta_{62})$ choosing as `log_prior` function a (log-)Standard Normal distribution and as `log_likelihood` function the (log-)Skellam. Perform optimization using Gradient Descent. Use the MAP estimate to implement a Laplace approximation of the posterior, as done during the lab (reuse the function `compute_hessian(f, w)` of Notebook 6).\n", 237 | "\n", 238 | "**Important Hint:** The Skellam log-likelihood involves the modified Bessel function $I_k(z)$, which is non-differentiable in PyTorch if evaluated via scipy. To preserve differentiability, replace $\\log I_k(z)$ with a smooth approximation, such as an asymptotic expansion, to allow gradient-based optimization.\n", 239 | "\n", 240 | "2. **(Useful for the next point, not strictly necessary for the previous one):** Implement your `Skellam` distribution, inheriting from `torch.distributions.Distribution`;\n", 241 | "3. Write the Pyro model corresponding to the problem depicted above assuming (again) the `theta` values being distributed initially as a Standard Normal;\n", 242 | "4. Perform inference on $ \\theta = (\\theta_1, \\theta_2, \\dots, \\theta_{62})$ values running a MCMC simulation using the `NUTS` kernel;\n", 243 | "5. Compare the `theta` values obtained by these two options using the `performances_evaluation` function given in this notebook.\n" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "\n", 253 | "# 1. Find the MAP estimate for $ \\theta = (\\theta_1, \\theta_2, \\dots, \\theta_{62})$ choosing as `log_prior` function a (log-)Standard Normal distribution and as `log_likelihood` function the (log-)Skellam. Perform optimization using Gradient Descent;\n", 254 | "\n", 255 | "def log_likelihood(teams_A, teams_B, goal_diff, theta):\n", 256 | " # TODO\n", 257 | " return ...\n", 258 | "\n", 259 | "def log_prior(theta):\n", 260 | " # TODO\n", 261 | " return ...\n", 262 | "\n", 263 | "def loss_function(theta):\n", 264 | " # TODO \n", 265 | " return ...\n", 266 | "\n", 267 | "def gradient_descent_optimization(loss_function, lr: float, n_iter: int, initial_guess: torch.Tensor):\n", 268 | " # TODO\n", 269 | " return ...\n", 270 | "\n", 271 | "def compute_hessian(f, w):\n", 272 | " # TODO\n", 273 | " return ...\n", 274 | "\n", 275 | "# here we want to obtain \n", 276 | "theta_MAP = ...\n", 277 | "posterior_cov = ...\n" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": {}, 284 | "outputs": [], 285 | "source": [ 286 | "#Visualize the marginal distribution of some thetas\n", 287 | "\n", 288 | "import matplotlib.pyplot as plt\n", 289 | "from torch.distributions import MultivariateNormal\n", 290 | "\n", 291 | "# Sample from the full posterior\n", 292 | "mvn = MultivariateNormal(loc=theta_MAP, covariance_matrix=torch.from_numpy(posterior_cov).float())\n", 293 | "posterior_samples = mvn.sample((1000,)) \n", 294 | "\n", 295 | "# Indices of thetas you want to visualize\n", 296 | "selected_indices = [0, 1, 2, 20] # Change these to the indices you're interested in\n", 297 | "\n", 298 | "# Plot histograms\n", 299 | "plt.figure(figsize=(10, 6))\n", 300 | "for i, idx in enumerate(selected_indices):\n", 301 | " plt.subplot(2, 2, i + 1)\n", 302 | " plt.hist(posterior_samples[:, idx].numpy(), bins=40, density=True, alpha=0.7)\n", 303 | " plt.title(f\"Posterior of $\\\\theta_{{{idx}}}$\")\n", 304 | " plt.xlabel(\"Value\")\n", 305 | " plt.ylabel(\"Density\")\n", 306 | "plt.tight_layout()\n", 307 | "plt.show()\n" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": null, 313 | "metadata": {}, 314 | "outputs": [], 315 | "source": [ 316 | "# 2. Implement your `Skellam` distribution, inheriting from `torch.distributions.Distribution`;\n", 317 | "\n", 318 | "import torch\n", 319 | "import pyro\n", 320 | "\n", 321 | "class Skellam(torch.distributions.Distribution):\n", 322 | " arg_constraints = ...\n", 323 | "\n", 324 | " def __init__(self, lambdaA, lambdaB, validate_args = None):\n", 325 | " self.lambdaA = lambdaA\n", 326 | " self.lambdaB = lambdaB\n", 327 | " batch_shape = torch.broadcast_shapes(lambdaA.shape, lambdaB.shape)\n", 328 | " super().__init__(batch_shape, validate_args=validate_args)\n", 329 | "\n", 330 | " def sample(self, sample_shape=torch.Size()):\n", 331 | " # Approximate sampling: sample two Poisson and subtract\n", 332 | " # TODO\n", 333 | " return ...\n", 334 | " \n", 335 | " def log_prob(self, value):\n", 336 | " # Exact log probability\n", 337 | " return ...\n", 338 | " " 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [ 347 | " \n", 348 | "# 3. Write the Pyro model corresponding to the problem depicted above assuming (again) the `theta` values being distributed initially as a Standard Normal;\n", 349 | "\n", 350 | "def model(goal_diff, teams_A, teams_B, max_player_id):\n", 351 | " # TODO\n", 352 | " ...\n", 353 | " " 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": null, 359 | "metadata": {}, 360 | "outputs": [], 361 | "source": [ 362 | " \n", 363 | "# 4. Perform inference on $ \\theta = (\\theta_1, \\theta_2, \\dots, \\theta_{62})$ values running a MCMC simulation using the `NUTS` kernel;\n", 364 | "\n", 365 | "from pyro.infer import MCMC, NUTS\n", 366 | "\n", 367 | "# TODO\n", 368 | "\n", 369 | "theta_MCMC = ..." 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": {}, 376 | "outputs": [], 377 | "source": [ 378 | "\n", 379 | "# 5. Compare the `theta_*` values obtained by these two options using the `performances_evaluation` function given in this notebook.\n", 380 | "\n", 381 | "import numpy as np\n", 382 | "import torch\n", 383 | "\n", 384 | "def predict_goal_diff_skellam(teamA_ids, teamB_ids, theta, n_sim=10_000, posterior_cov=None):\n", 385 | " \"\"\"\n", 386 | " Predicts the goal difference (Skellam distribution) between two teams using theta.\n", 387 | "\n", 388 | " Args:\n", 389 | " teamA_ids (list): Indices of the players in team A.\n", 390 | " teamB_ids (list): Indices of the players in team B.\n", 391 | " theta (torch.Tensor): Tensor containing the skill levels of the players.\n", 392 | " n_sim (int): Number of simulations to run.\n", 393 | "\n", 394 | " Returns:\n", 395 | " float: Mean predicted goal difference (A - B).\n", 396 | " \"\"\"\n", 397 | "\n", 398 | " # Calculate the total strength for each team (sum of player skill levels)\n", 399 | " sA = theta[teamA_ids].sum()\n", 400 | " sB = theta[teamB_ids].sum()\n", 401 | "\n", 402 | " # Lambda parameters for Poisson distribution (expected goals)\n", 403 | " lam_A = torch.exp(sA).item() # Team A's expected goal rate\n", 404 | " lam_B = torch.exp(sB).item() # Team B's expected goal rate\n", 405 | "\n", 406 | " # Simulate goals for each team using Poisson distribution\n", 407 | " goals_A = np.random.poisson(lam_A, size=n_sim)\n", 408 | " goals_B = np.random.poisson(lam_B, size=n_sim)\n", 409 | "\n", 410 | " # Calculate the difference in goals (A - B)\n", 411 | " diff = goals_A - goals_B\n", 412 | "\n", 413 | " # Return the mean predicted difference\n", 414 | " return diff.mean()\n", 415 | "\n", 416 | "\n", 417 | "def predict_goal_diff_laplace(teamA_ids, teamB_ids, theta_map, cov_matrix, n_sim=1000):\n", 418 | " # Combine all relevant indices\n", 419 | " all_ids = torch.tensor(sorted(set(teamA_ids) | set(teamB_ids)))\n", 420 | "\n", 421 | " # Extract subvector of means (mu_A and mu_B)\n", 422 | " theta_sub = theta_map[all_ids].float()\n", 423 | "\n", 424 | " # Extract submatrix of covariances\n", 425 | " cov_sub = cov_matrix[np.ix_(all_ids, all_ids)] # numpy version\n", 426 | " cov_sub = torch.from_numpy(cov_sub).float()\n", 427 | "\n", 428 | " # Create multivariate normal from posterior\n", 429 | " mvn = MultivariateNormal(loc=theta_sub, covariance_matrix=cov_sub)\n", 430 | "\n", 431 | " # Sample from posterior\n", 432 | " theta_samples = mvn.sample((n_sim,)) # shape: (n_sim, len(all_ids))\n", 433 | "\n", 434 | " # Compute strength for each team in each sample\n", 435 | " teamA_len = len(teamA_ids)\n", 436 | " sA = theta_samples[:, :teamA_len].sum(dim=1)\n", 437 | " sB = theta_samples[:, teamA_len:].sum(dim=1)\n", 438 | "\n", 439 | " # Expected goals via Poisson lambdas\n", 440 | " lam_A = torch.exp(sA)\n", 441 | " lam_B = torch.exp(sB)\n", 442 | "\n", 443 | " # Sample goals\n", 444 | " goals_A = torch.poisson(lam_A)\n", 445 | " goals_B = torch.poisson(lam_B)\n", 446 | "\n", 447 | " # Goal differences\n", 448 | " diff = goals_A - goals_B\n", 449 | " return diff.mean()\n", 450 | "\n", 451 | "\n", 452 | "def evaluate_performance(theta_MAP, theta_MCMC, posterior_cov, teams_A, teams_B, goal_diff, n_sim=10_000):\n", 453 | " \"\"\"\n", 454 | " Evaluates the performance of different sets of theta values by comparing the Mean Bias Error (MBE)\n", 455 | " and Mean Absolute Error (MAE) between predicted and actual goal differences.\n", 456 | "\n", 457 | " Args:\n", 458 | " theta_values (list of torch.Tensor): List of different theta values to evaluate.\n", 459 | " teams_A (list of lists): List of player IDs for team A in each match.\n", 460 | " teams_B (list of lists): List of player IDs for team B in each match.\n", 461 | " goal_diff (list): List of actual goal differences (A - B).\n", 462 | " n_sim (int): Number of simulations to run for each set of theta values.\n", 463 | "\n", 464 | " Returns:\n", 465 | " dict: Dictionary with MBE and MAE for each set of theta values.\n", 466 | " \"\"\"\n", 467 | " results = {}\n", 468 | "\n", 469 | " # Loop through each set of theta values\n", 470 | " \n", 471 | " predicted_diffs_MAP = [] # List to store predicted goal differences\n", 472 | " predicted_diffs_MCMC = []\n", 473 | " actual_diffs = goal_diff # Actual goal differences\n", 474 | "\n", 475 | " # Simulate the match results for each game\n", 476 | " for i in range(len(goal_diff)):\n", 477 | " teamA_ids = teams_A[i]\n", 478 | " teamB_ids = teams_B[i]\n", 479 | "\n", 480 | " # Predict the goal difference \n", 481 | " # Use the Laplace approximation if posterior_cov is provided\n", 482 | " \n", 483 | " predicted_MAP = predict_goal_diff_laplace(teamA_ids, teamB_ids, theta_MAP, posterior_cov, n_sim)\n", 484 | " predicted_MCMC = predict_goal_diff_skellam(teamA_ids, teamB_ids, theta_MCMC, n_sim)\n", 485 | " \n", 486 | " predicted_diffs_MAP.append(predicted_MAP)\n", 487 | " predicted_diffs_MCMC.append(predicted_MCMC)\n", 488 | "\n", 489 | " # Convert lists to numpy arrays for easier calculations\n", 490 | " predicted_diffs_MAP = np.array(predicted_diffs_MAP)\n", 491 | " predicted_diffs_MCMC = np.array(predicted_diffs_MCMC)\n", 492 | " actual_diffs = np.array(actual_diffs)\n", 493 | "\n", 494 | " # Compute MAE and MBE\n", 495 | " mae_MAP = np.mean(np.abs(predicted_diffs_MAP - actual_diffs)) # Mean Absolute Error\n", 496 | " mbe_MAP = (predicted_diffs_MAP - actual_diffs).mean() # Mean Bias Error\n", 497 | " mae_MCMC = np.mean(np.abs(predicted_diffs_MCMC - actual_diffs)) # Mean Absolute Error\n", 498 | " mbe_MCMC = (predicted_diffs_MCMC - actual_diffs).mean() # Mean Bias Error\n", 499 | "\n", 500 | " # Store results for the current theta set\n", 501 | " results = {'MAE_MAP': mae_MAP, 'MBE_MAP': mbe_MAP,\n", 502 | " 'MAE_MCMC': mae_MCMC, 'MBE_MCMC': mbe_MCMC}\n", 503 | "\n", 504 | " return results\n", 505 | "\n", 506 | "\n", 507 | "# Evaluate the performance of different theta values on the same dataset used to fix those values\n", 508 | "performance_results = evaluate_performance(theta_MCMC, theta_MAP, posterior_cov, teams_A, teams_B, goal_diff)\n", 509 | "\n", 510 | "# Print the results\n", 511 | "print(\"Performance Results:\")\n", 512 | "print(f\"MAP - MAE: {performance_results['MAE_MAP']}, MBE: {performance_results['MBE_MAP']}\")\n", 513 | "print(f\"MCMC - MAE: {performance_results['MAE_MCMC']}, MBE: {performance_results['MBE_MCMC']}\")" 514 | ] 515 | } 516 | ], 517 | "metadata": { 518 | "kernelspec": { 519 | "display_name": "general", 520 | "language": "python", 521 | "name": "python3" 522 | }, 523 | "language_info": { 524 | "codemirror_mode": { 525 | "name": "ipython", 526 | "version": 3 527 | }, 528 | "file_extension": ".py", 529 | "mimetype": "text/x-python", 530 | "name": "python", 531 | "nbconvert_exporter": "python", 532 | "pygments_lexer": "ipython3", 533 | "version": "3.12.8" 534 | } 535 | }, 536 | "nbformat": 4, 537 | "nbformat_minor": 4 538 | } 539 | -------------------------------------------------------------------------------- /04_exact_inference.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "1372465f", 6 | "metadata": {}, 7 | "source": [ 8 | "\"Open" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Exact inference with Belief Propagation\n", 16 | "\n", 17 | "This notebook is inspired from [Jessica Stringham's work](https://jessicastringham.net)" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "We are going to perform inference through the sum-product message passing, or belief propagation, on tree-like factor graphs (without any loop). We work only with discrete distributions and without using ad-hoc libraries, to better understand the algorithm." 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 1, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "import numpy as np" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "### Probability distributions\n", 41 | "\n", 42 | "First of all, we need to represent a discrete probability distribution and check that it is normalized.\n", 43 | "For example, we can represent a discrete conditional distribution $p(v_1 | h_1)$ with a 2D array, as:\n", 44 | "\n", 45 | "| | $h_1=a$ | $h_1=b$ | $h_1=c$|\n", 46 | "|---|-----|-----|----|\n", 47 | "| $v_1=0$ | 0.4 | 0.8 | 0.9|\n", 48 | "| $v_1=1$ | 0.6 | 0.2 | 0.1|\n", 49 | "\n", 50 | "We can build a class for the distributions containing the arrays and the labels of the axes\n" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 2, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "class Distribution():\n", 60 | " \"\"\"\"\n", 61 | " Discrete probability distributions, expressed using labeled arrays\n", 62 | " probs: array of probability values\n", 63 | " axes_labels: list of axes names\n", 64 | " \"\"\"\n", 65 | " def __init__(self, probs, axes_labels):\n", 66 | " self.probs = probs\n", 67 | " self.axes_labels = axes_labels\n", 68 | "\n", 69 | " def get_axes(self):\n", 70 | " #returns a dictionary with axes names and the corresponding coordinates\n", 71 | " return {name: axis for axis, name in enumerate(self.axes_labels)}\n", 72 | " \n", 73 | " def get_other_axes_from(self, axis_label):\n", 74 | " #returns a tuple containing all the axes except from axis_label\n", 75 | " return tuple(axis for axis, name in enumerate(self.axes_labels) if name != axis_label)\n", 76 | " \n", 77 | " def is_valid_conditional(self, variable_name):\n", 78 | " #variable_name is the name of the variable for which we are computing the distribution, e.g. in p(y|x) it is 'y'\n", 79 | " return np.all(np.isclose(np.sum(self.probs, axis=self.get_axes()[variable_name]), 1.0))\n", 80 | " \n", 81 | " def is_valid_joint(self):\n", 82 | " return np.all(np.isclose(np.sum(self.probs), 1.0))" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 3, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "name": "stdout", 92 | "output_type": "stream", 93 | "text": [ 94 | "Is p(v1|h1) a valid conditional distribution? True\n", 95 | "Is p(v1|h1) a valid joint distribution? False\n", 96 | "Is p(h1) a valid conditional distribution? True\n", 97 | "Is p(h1) a valid joint distribution? True\n", 98 | "Is p(v1|h1, h2) a valid conditional distribution? True\n", 99 | "Is p(v1|h1, h2) a valid joint distribution? False\n" 100 | ] 101 | } 102 | ], 103 | "source": [ 104 | "#Let's see the previous distribution:\n", 105 | "\n", 106 | "p_v1_given_h1 = Distribution(np.array([[0.4, 0.8, 0.9], [0.6, 0.2, 0.1]]), ['v1', 'h1'])\n", 107 | "\n", 108 | "print('Is p(v1|h1) a valid conditional distribution? ', p_v1_given_h1.is_valid_conditional('v1'))\n", 109 | "print('Is p(v1|h1) a valid joint distribution? ', p_v1_given_h1.is_valid_joint())\n", 110 | "\n", 111 | "#Consider also a joint distribution and a conditional distribution with more than one 'given' variables\n", 112 | "\n", 113 | "p_h1 = Distribution(np.array([0.6, 0.3, 0.1]), ['h1'])\n", 114 | "\n", 115 | "print('Is p(h1) a valid conditional distribution? ', p_h1.is_valid_conditional('h1'))\n", 116 | "print('Is p(h1) a valid joint distribution? ', p_h1.is_valid_joint())\n", 117 | "\n", 118 | "p_v1_given_h0_h1 = Distribution(np.array([[[0.9, 0.2, 0.7], [0.3, 0.2, 0.5]],[[0.1, 0.8, 0.3], [0.7, 0.8, 0.5]]]), ['v1', 'h0', 'h1'])\n", 119 | "print('Is p(v1|h1, h2) a valid conditional distribution? ', p_v1_given_h0_h1.is_valid_conditional('v1'))\n", 120 | "print('Is p(v1|h1, h2) a valid joint distribution? ', p_v1_given_h0_h1.is_valid_joint())" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "We need to allow multiplications between distributions like $p(v_1|h_1,...,h_n) p(h_i)$, where $p(h_i)$ is a 1D array.\n", 128 | "To do it, we can exploit broadcasting. But first, we need to reshape $p(h_i)$ accordingly to the dimension $h_i$ of the distribution $p(v_1|h_1,...,h_n)$" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 4, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "def multiply(p_v_given_h, p_hi):\n", 138 | " ''' \n", 139 | " Compute the product of the distributions p(v|h1,..,hn)p(hi) where p(hi) is a 1D array\n", 140 | " '''\n", 141 | " #Get the axis corresponding to hi in the conditional distribution\n", 142 | " axis=p_v_given_h.get_axes()[next(iter(p_hi.get_axes()))]\n", 143 | "\n", 144 | " # Reshape p(hi) in order to exploit broadcasting. Consider also the case in which p(hi) is a scalar.\n", 145 | " dims = np.ones_like(p_v_given_h.probs.shape)\n", 146 | " dims[axis] = p_v_given_h.probs.shape[axis]\n", 147 | "\n", 148 | " if (p_hi.probs.shape != () ):\n", 149 | " reshaped_p_hi = p_hi.probs.reshape(dims)\n", 150 | " else:\n", 151 | " reshaped_p_hi = p_hi.probs\n", 152 | "\n", 153 | " return Distribution(p_v_given_h.probs*reshaped_p_hi, p_v_given_h.axes_labels)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 5, 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "name": "stdout", 163 | "output_type": "stream", 164 | "text": [ 165 | "[[0.24 0.24 0.09]\n", 166 | " [0.36 0.06 0.01]]\n", 167 | "True\n", 168 | "[[[0.54 0.06 0.07]\n", 169 | " [0.18 0.06 0.05]]\n", 170 | "\n", 171 | " [[0.06 0.24 0.03]\n", 172 | " [0.42 0.24 0.05]]]\n" 173 | ] 174 | } 175 | ], 176 | "source": [ 177 | "p_v1_h1 = multiply(p_v1_given_h1, p_h1)\n", 178 | "print(p_v1_h1.probs)\n", 179 | "print(p_v1_h1.is_valid_joint())\n", 180 | "\n", 181 | "p_v1_h1_given_h0 = multiply(p_v1_given_h0_h1, p_h1)\n", 182 | "print(p_v1_h1_given_h0.probs)" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "### Factor graphs\n", 190 | "\n", 191 | "Factor graphs are bipartite graphs, with variable nodes and factor nodes. Edges can only connect nodes of different type. Consider for example:\n", 192 | "\n", 193 | "![factor_ex](imgs/factor_example.png)\n", 194 | "\n" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 6, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "class Node(object):\n", 204 | " def __init__(self, name):\n", 205 | " self.name = name\n", 206 | " self.neighbors = []\n", 207 | "\n", 208 | " def is_valid_neighbor(self, neighbor):\n", 209 | " raise NotImplemented()\n", 210 | "\n", 211 | " def add_neighbor(self, neighbor):\n", 212 | " assert self.is_valid_neighbor(neighbor)\n", 213 | " self.neighbors.append(neighbor)\n", 214 | "\n", 215 | "\n", 216 | "class Variable(Node):\n", 217 | " def is_valid_neighbor(self, factor):\n", 218 | " return isinstance(factor, Factor) # Variables can only neighbor Factors\n", 219 | "\n", 220 | "\n", 221 | "class Factor(Node):\n", 222 | " def is_valid_neighbor(self, variable):\n", 223 | " return isinstance(variable, Variable) # Factors can only neighbor Variables\n", 224 | "\n", 225 | " def __init__(self, name):\n", 226 | " super(Factor, self).__init__(name)\n", 227 | " self.data = None" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "We can build some parsing methods in order to create a factor graph from a string representing the factorization of the joint probability distribution" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 8, 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [ 243 | "from collections import namedtuple\n", 244 | " \n", 245 | "ParsedTerm = namedtuple('ParsedTerm', [\n", 246 | " 'term',\n", 247 | " 'var_name',\n", 248 | " 'given',\n", 249 | "])\n", 250 | "\n", 251 | "\n", 252 | "def _parse_term(term):\n", 253 | " # Given a term like (a|b,c), returns a list of variables\n", 254 | " # and conditioned-on variables\n", 255 | " assert term[0] == '(' and term[-1] == ')'\n", 256 | " term_variables = term[1:-1]\n", 257 | "\n", 258 | " # Handle conditionals\n", 259 | " if '|' in term_variables:\n", 260 | " var, given = term_variables.split('|')\n", 261 | " var = var.split(',')\n", 262 | " given = given.split(',')\n", 263 | " else:\n", 264 | " var = term_variables\n", 265 | " var = var.split(',')\n", 266 | " given = []\n", 267 | "\n", 268 | " return var, given\n", 269 | "\n", 270 | "\n", 271 | "def _parse_model_string_into_terms(model_string):\n", 272 | " return [\n", 273 | " ParsedTerm('p' + term, *_parse_term(term))\n", 274 | " for term in model_string.split('p')\n", 275 | " if term\n", 276 | " ]\n", 277 | "\n", 278 | "def parse_model_into_variables_and_factors(model_string):\n", 279 | " # Takes in a model_string such as p(h1)p(h2∣h1)p(v1∣h1)p(v2∣h2) and returns a\n", 280 | " # dictionary of variable names to variables and a list of factors.\n", 281 | " \n", 282 | " # Split model_string into ParsedTerms\n", 283 | " parsed_terms = _parse_model_string_into_terms(model_string)\n", 284 | " \n", 285 | " # First, extract all of the variables from the model_string (h1, h2, v1, v2). \n", 286 | " # These each will be a new Variable that are referenced from Factors below.\n", 287 | " variables = {}\n", 288 | " for parsed_term in parsed_terms:\n", 289 | " # if the variable name wasn't seen yet, add it to the variables dict\n", 290 | " for term in parsed_term.var_name:\n", 291 | " if term not in variables:\n", 292 | " variables[term] = Variable(term)\n", 293 | "\n", 294 | " # Now extract factors from the model. Each term (e.g. \"p(v1|h1)\") corresponds to \n", 295 | " # a factor. \n", 296 | " # Then find all variables in this term (\"v1\", \"h1\") and add the corresponding Variables\n", 297 | " # as neighbors to the new Factor, and this Factor to the Variables' neighbors.\n", 298 | " factors = []\n", 299 | " for parsed_term in parsed_terms:\n", 300 | " # This factor will be neighbors with all \"variables\" (left-hand side variables) and given variables\n", 301 | " \n", 302 | " new_factor = Factor(parsed_term.term)\n", 303 | " all_var_names = parsed_term.var_name + parsed_term.given\n", 304 | " for var_name in all_var_names:\n", 305 | " new_factor.add_neighbor(variables[var_name])\n", 306 | " variables[var_name].add_neighbor(new_factor)\n", 307 | " factors.append(new_factor)\n", 308 | "\n", 309 | " return factors, variables" 310 | ] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "metadata": {}, 315 | "source": [ 316 | "We can combine factor nodes and variable nodes to create a factor graph and add a distribution to each factor node." 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 9, 322 | "metadata": {}, 323 | "outputs": [], 324 | "source": [ 325 | "class PGM(object):\n", 326 | " def __init__(self, factors, variables):\n", 327 | " self._factors = factors\n", 328 | " self._variables = variables\n", 329 | "\n", 330 | " @classmethod\n", 331 | " def from_string(cls, model_string):\n", 332 | " factors, variables = parse_model_into_variables_and_factors(model_string)\n", 333 | " return PGM(factors, variables)\n", 334 | "\n", 335 | " def set_distributions(self, data):\n", 336 | " var_dims = {}\n", 337 | " for factor in self._factors:\n", 338 | " factor_data = data[factor.name]\n", 339 | "\n", 340 | " if set(factor_data.axes_labels) != set(v.name for v in factor.neighbors):\n", 341 | " missing_axes = set(v.name for v in factor.neighbors) - set(data[factor.name].axes_labels)\n", 342 | " raise ValueError(\"data[{}] is missing axes: {}\".format(factor.name, missing_axes))\n", 343 | " \n", 344 | " for var_name, dim in zip(factor_data.axes_labels, factor_data.probs.shape):\n", 345 | " if var_name not in var_dims:\n", 346 | " var_dims[var_name] = dim\n", 347 | " \n", 348 | " if var_dims[var_name] != dim:\n", 349 | " raise ValueError(\"data[{}] axes is wrong size, {}. Expected {}\".format(factor.name, dim, var_dims[var_name])) \n", 350 | " \n", 351 | " factor.data = data[factor.name]\n", 352 | " \n", 353 | " def variable_from_name(self, var_name):\n", 354 | " return self._variables[var_name]" 355 | ] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": [ 361 | "We can notice that, in the previous example, we can write the marginal as a combination of sums and products:\n", 362 | "\n", 363 | "$$p(x_5) = \\sum_{x_1, x_2, x_3, x_4}p(x_1, x_2, x_3, x_4, x_5) =\\\\ = \\sum_{x_3, x_4}f_3(x_3,x_4,x_5)\\bigg[\\sum_{x_1}f_1(x_1, x_3)\\bigg]\\bigg[\\sum_{x_2}f_2(x_2, x_3)\\bigg]$$\n", 364 | "\n", 365 | "and interpret them as messages flowing from factors to variables (including a summation) or from variables to factors (via multiplication)." 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 11, 371 | "metadata": {}, 372 | "outputs": [], 373 | "source": [ 374 | "class Messages(object):\n", 375 | " def __init__(self):\n", 376 | " self.messages = {}\n", 377 | " \n", 378 | " def _variable_to_factor_messages(self, variable, factor):\n", 379 | " # Take the product over all incoming factors into this variable except the variable\n", 380 | " # If there are no incoming messages, this is 1 (BASE CASE)\n", 381 | " incoming_messages = [self.factor_to_variable_message(neighbor_factor, variable) for neighbor_factor in variable.neighbors if neighbor_factor.name != factor.name]\n", 382 | " \n", 383 | " return np.prod(incoming_messages, axis=0)\n", 384 | " \n", 385 | " def _factor_to_variable_messages(self, factor, variable):\n", 386 | " #reinstantiate to obtain a deep copy\n", 387 | " factor_dist = Distribution(factor.data.probs, factor.data.axes_labels)\n", 388 | "\n", 389 | " for neighbor_variable in factor.neighbors:\n", 390 | " if neighbor_variable.name == variable.name:\n", 391 | " continue\n", 392 | " #Retrieve the incoming message and multiply the conditional distribution of the factor with the message\n", 393 | " incoming_message = self.variable_to_factor_messages(neighbor_variable, factor)\n", 394 | " factor_dist = multiply(factor_dist, Distribution(incoming_message, [neighbor_variable.name]))\n", 395 | "\n", 396 | " # Sum over the axes that aren't `variable`\n", 397 | " factor_dist = factor_dist.probs\n", 398 | " other_axes = factor.data.get_other_axes_from(variable.name)\n", 399 | " return np.squeeze(np.sum(factor_dist, axis=other_axes))\n", 400 | " \n", 401 | " def marginal(self, variable):\n", 402 | " # p(variable) is proportional to the product of incoming messages to variable.\n", 403 | " unnorm_p = np.prod([self.factor_to_variable_message(neighbor_factor, variable) for neighbor_factor in variable.neighbors], axis=0)\n", 404 | " return unnorm_p / np.sum(unnorm_p)\n", 405 | " \n", 406 | " def variable_to_factor_messages(self, variable, factor):\n", 407 | " message_name = (variable.name, factor.name)\n", 408 | " if message_name not in self.messages:\n", 409 | " self.messages[message_name] = self._variable_to_factor_messages(variable, factor)\n", 410 | " return self.messages[message_name]\n", 411 | " \n", 412 | " def factor_to_variable_message(self, factor, variable):\n", 413 | " message_name = (factor.name, variable.name)\n", 414 | " if message_name not in self.messages:\n", 415 | " self.messages[message_name] = self._factor_to_variable_messages(factor, variable)\n", 416 | " return self.messages[message_name] " 417 | ] 418 | }, 419 | { 420 | "cell_type": "markdown", 421 | "metadata": {}, 422 | "source": [ 423 | "We can try to build the following factor graph:\n", 424 | "\n", 425 | "![factor1](imgs/factor2.png)" 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": 12, 431 | "metadata": {}, 432 | "outputs": [], 433 | "source": [ 434 | "p_h1 = Distribution(np.array([[0.2], [0.8]]), ['h1'])\n", 435 | "p_h2_given_h1 = Distribution(np.array([[0.5, 0.2], [0.5, 0.8]]), ['h2', 'h1'])\n", 436 | "p_v1_given_h1 = Distribution(np.array([[0.6, 0.1], [0.4, 0.9]]), ['v1', 'h1'])\n", 437 | "p_v2_given_h2 = Distribution(p_v1_given_h1.probs, ['v2', 'h2'])\n", 438 | "\n", 439 | "pgm = PGM.from_string(\"p(h1)p(h2|h1)p(v1|h1)p(v2|h2)\")\n", 440 | "\n", 441 | "pgm.set_distributions({\n", 442 | " \"p(h1)\": p_h1,\n", 443 | " \"p(h2|h1)\": p_h2_given_h1,\n", 444 | " \"p(v1|h1)\": p_v1_given_h1,\n", 445 | " \"p(v2|h2)\": p_v2_given_h2,\n", 446 | "})" 447 | ] 448 | }, 449 | { 450 | "cell_type": "markdown", 451 | "metadata": {}, 452 | "source": [ 453 | "And compute the marginal distribution $p(v_2)$" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 13, 459 | "metadata": {}, 460 | "outputs": [ 461 | { 462 | "data": { 463 | "text/plain": [ 464 | "array([0.23, 0.77])" 465 | ] 466 | }, 467 | "execution_count": 13, 468 | "metadata": {}, 469 | "output_type": "execute_result" 470 | } 471 | ], 472 | "source": [ 473 | "pgm = PGM.from_string(\"p(h1)p(h2|h1)p(v1|h1)p(v2|h2)\")\n", 474 | "\n", 475 | "pgm.set_distributions({\n", 476 | " \"p(h1)\": p_h1,\n", 477 | " \"p(h2|h1)\": p_h2_given_h1,\n", 478 | " \"p(v1|h1)\": p_v1_given_h1,\n", 479 | " \"p(v2|h2)\": p_v2_given_h2,\n", 480 | "})\n", 481 | "\n", 482 | "m = Messages()\n", 483 | "m.marginal(pgm.variable_from_name('v2'))" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 14, 489 | "metadata": {}, 490 | "outputs": [ 491 | { 492 | "data": { 493 | "text/plain": [ 494 | "{('p(h1)', 'h1'): array([0.2, 0.8]),\n", 495 | " ('v1', 'p(v1|h1)'): 1.0,\n", 496 | " ('p(v1|h1)', 'h1'): array([1., 1.]),\n", 497 | " ('h1', 'p(h2|h1)'): array([0.2, 0.8]),\n", 498 | " ('p(h2|h1)', 'h2'): array([0.26, 0.74]),\n", 499 | " ('h2', 'p(v2|h2)'): array([0.26, 0.74]),\n", 500 | " ('p(v2|h2)', 'v2'): array([0.23, 0.77])}" 501 | ] 502 | }, 503 | "execution_count": 14, 504 | "metadata": {}, 505 | "output_type": "execute_result" 506 | } 507 | ], 508 | "source": [ 509 | "m.messages" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": 15, 515 | "metadata": {}, 516 | "outputs": [ 517 | { 518 | "data": { 519 | "text/plain": [ 520 | "array([0.2, 0.8])" 521 | ] 522 | }, 523 | "execution_count": 15, 524 | "metadata": {}, 525 | "output_type": "execute_result" 526 | } 527 | ], 528 | "source": [ 529 | "m.marginal(pgm.variable_from_name('v1'))" 530 | ] 531 | }, 532 | { 533 | "cell_type": "markdown", 534 | "metadata": {}, 535 | "source": [ 536 | "### Exercise 1\n", 537 | "\n", 538 | "(From Bayesian Reasoning and Machine Learning, David Barber) You live in a house with three rooms, labelled 1, 2, 3. There is a door between rooms 1 and 2 and another between rooms 2 and 3. One cannot directly pass between rooms 1 and 3 in one time-step. An annoying fly is buzzing from one room to another and there is some smelly cheese in room 1 which seems to attract the fly more. Using $x_t$ for which room the fly is in at time t, with $dom(x_t) = {1,2,3}$, the movement of the fly can be described by a transition:\n", 539 | "$p(x_{t+1} = i|x_t = j) = M_{ij}$\n", 540 | "\n", 541 | "where M is a transition matrix:\n", 542 | "\n", 543 | "$$\n", 544 | "\\begin{bmatrix}\n", 545 | "0.7 & 0.5 & 0 \\\\\n", 546 | "0.3 & 0.3 & 0.5 \\\\\n", 547 | "0 & 0.2 & 0.5 \\\\\n", 548 | "\\end{bmatrix}\n", 549 | "$$\n", 550 | "\n", 551 | "Given that the fly is in room 1 at time 1, what is the probability of room occupancy at time t = 5? Assume a Markov chain which is defined by the joint distribution\n", 552 | "\n", 553 | "$p(x_1, . . . , x_T ) = p(x_1) \\prod p(x_{t+1}|x_t)$\n", 554 | "\n", 555 | "We are asked to compute $p(x_5|x_1 = 1)$ which is given by\n", 556 | "$\\sum p(x_5|x_4)p(x_4|x_3)p(x_3|x_2)p(x_2|x_1 = 1)$\n", 557 | "\n", 558 | "\n" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": 19, 564 | "metadata": {}, 565 | "outputs": [ 566 | { 567 | "name": "stdout", 568 | "output_type": "stream", 569 | "text": [ 570 | "[0.5746 0.318 0.1074]\n" 571 | ] 572 | } 573 | ], 574 | "source": [ 575 | "pgm = PGM.from_string(\"p(x5|x4)p(x4|x3)p(x3|x2)p(x2|x1)p(x1)\")\n", 576 | "p_x5_given_x4 = Distribution(np.array([[0.7, 0.5, 0], [0.3, 0.3, 0.5], [0, 0.2, 0.5]]), ['x5', 'x4'])\n", 577 | "p_x4_given_x3 = Distribution(np.array([[0.7, 0.5, 0], [0.3, 0.3, 0.5], [0, 0.2, 0.5]]), ['x4', 'x3'])\n", 578 | "p_x3_given_x2 = Distribution(np.array([[0.7, 0.5, 0], [0.3, 0.3, 0.5], [0, 0.2, 0.5]]), ['x3', 'x2'])\n", 579 | "p_x2_given_x1 = Distribution(np.array([[0.7, 0.5, 0], [0.3, 0.3, 0.5], [0, 0.2, 0.5]]), ['x2', 'x1'])\n", 580 | "p_x1 = Distribution(np.array([1,0,0]), ['x1'])\n", 581 | "\n", 582 | "pgm.set_distributions({\n", 583 | " \"p(x5|x4)\": p_x5_given_x4,\n", 584 | " \"p(x4|x3)\": p_x4_given_x3,\n", 585 | " \"p(x3|x2)\": p_x3_given_x2,\n", 586 | " \"p(x2|x1)\": p_x2_given_x1,\n", 587 | " \"p(x1)\": p_x1,\n", 588 | "})\n", 589 | "\n", 590 | "m2 = Messages()\n", 591 | "print(m2.marginal(pgm.variable_from_name('x5')))\n" 592 | ] 593 | }, 594 | { 595 | "cell_type": "markdown", 596 | "metadata": {}, 597 | "source": [ 598 | "### Exercise 2: Hidden Markov Models\n", 599 | "\n", 600 | "Imagine you're trying to guess someone's mood without directly asking them or using brain electrodes. Instead, you observe their facial expressions, whether they're smiling or frowning, to make an educated guess.\n", 601 | "\n", 602 | "We assume moods can be categorized into two states: good and bad. When you meet someone for the first time, there's a 70% chance they're in a good mood and a 30% chance they're in a bad mood.\n", 603 | "\n", 604 | "If someone is in a good mood, there's an 80% chance they'll stay in a good mood and a 20% chance they'll switch to a bad mood over time. The same probabilities of switching the mood apply if they start in a bad mood.\n", 605 | "\n", 606 | "Lastly, when someone is in a good mood, they're 90% likely to smile and 10% likely to frown. Conversely, if they're in a bad mood, they have a 10% chance of smiling and a 90% chance of frowning.\n", 607 | "\n", 608 | "The transitions are summarized in the following graph.\n", 609 | "\n", 610 | "Your task is to use these probabilities to figure out the first and second hidden mood states (the probability that the first mood is good/bad and the probability that the second mood is good/bad) based on the observable facial expressions you see (imagine you see the sequence [smiling, frowning]).\n", 611 | "\n", 612 | "![factor1](imgs/mood.png)\n", 613 | "(image by Y. Natsume)" 614 | ] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": 20, 619 | "metadata": {}, 620 | "outputs": [ 621 | { 622 | "name": "stdout", 623 | "output_type": "stream", 624 | "text": [ 625 | "[0.88064516 0.11935484]\n" 626 | ] 627 | } 628 | ], 629 | "source": [ 630 | "pgm = PGM.from_string(\"p(z1)p(z2|z1)p(x1|z1)p(x2|z2)p(x1)p(x2)\")\n", 631 | "p_z1 = Distribution(np.array([0.7, 0.3]), ['z1'])\n", 632 | "p_x1 = Distribution(np.array([1, 0]), ['x1']) \n", 633 | "p_x2 = Distribution(np.array([0, 1]), ['x2'])\n", 634 | "p_z2_given_z1 = Distribution(np.array([[0.8, 0.2], [0.2, 0.8]]), ['z2', 'z1'])\n", 635 | "p_x1_given_z1 = Distribution(np.array([[0.9, 0.1], [0.1, 0.9]]), ['x1', 'z1'])\n", 636 | "p_x2_given_z2 = Distribution(np.array([[0.9, 0.1], [0.1, 0.9]]), ['x2', 'z2'])\n", 637 | "\n", 638 | "\n", 639 | "pgm.set_distributions({\n", 640 | " \"p(z1)\": p_z1,\n", 641 | " \"p(z2|z1)\": p_z2_given_z1,\n", 642 | " \"p(x1|z1)\": p_x1_given_z1,\n", 643 | " \"p(x2|z2)\": p_x2_given_z2,\n", 644 | " \"p(x1)\": p_x1,\n", 645 | " \"p(x2)\": p_x2,\n", 646 | "})\n", 647 | "\n", 648 | "m3 = Messages()\n", 649 | "print(m3.marginal(pgm.variable_from_name('z1')))" 650 | ] 651 | } 652 | ], 653 | "metadata": { 654 | "kernelspec": { 655 | "display_name": "general", 656 | "language": "python", 657 | "name": "python3" 658 | }, 659 | "language_info": { 660 | "codemirror_mode": { 661 | "name": "ipython", 662 | "version": 3 663 | }, 664 | "file_extension": ".py", 665 | "mimetype": "text/x-python", 666 | "name": "python", 667 | "nbconvert_exporter": "python", 668 | "pygments_lexer": "ipython3", 669 | "version": "3.12.8" 670 | } 671 | }, 672 | "nbformat": 4, 673 | "nbformat_minor": 2 674 | } 675 | -------------------------------------------------------------------------------- /02_numpy_pandas_sklearn/021_numpy.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "73dc52ad", 6 | "metadata": {}, 7 | "source": [ 8 | "\"Open" 9 | ] 10 | }, 11 | { 12 | "attachments": {}, 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# Notebook 2.1: Introduction to `numpy`\n", 17 | "\n", 18 | "Probabilistic Machine Learning -- Spring 2025, UniTS\n" 19 | ] 20 | }, 21 | { 22 | "attachments": {}, 23 | "cell_type": "markdown", 24 | "metadata": { 25 | "id": "YneEBH1gDUgs" 26 | }, 27 | "source": [ 28 | "## What is `numpy`?\n", 29 | "\n", 30 | "\n", 31 | "\n", 32 | "\n", 33 | "\n", 34 | "NumPy (Numerical Python) is the fundamental package for scientific computing in Python, it provides support to multidimensional arrays and many mathematical functions that operate on them. These include \n", 35 | "- mathematical operations\n", 36 | "- linear algebra\n", 37 | "- basic statistical operations\n", 38 | "- random simulation\n", 39 | "\n", 40 | "and much more.\n", 41 | "\n", 42 | "Moreover, operations on arrays with NumPy are really fast, as these are based on C compiled code." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 2, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "import numpy as np\n", 52 | "# This is the standard way to import it" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "## Arrays\n", 60 | "At the core of the NumPy package, is the `ndarray` (n-dimensional array) object. This encapsulates n-dimensional arrays of homogeneous data types. There are several important differences between NumPy arrays and the standard Python sequences:\n", 61 | "\n", 62 | "- Fixed size at creation\n", 63 | "- All elements have to be of the same type (e.g. float)\n", 64 | "- Support mathematical operations (e.g. summing two arrays)\n", 65 | "\n", 66 | "One way we can initialize NumPy arrays is from Python lists, using nested lists for two- or higher-dimensional data:" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 3, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "data": { 76 | "text/plain": [ 77 | "array([1, 2, 3, 4, 5, 6])" 78 | ] 79 | }, 80 | "execution_count": 3, 81 | "metadata": {}, 82 | "output_type": "execute_result" 83 | } 84 | ], 85 | "source": [ 86 | "a = np.array([1, 2, 3, 4, 5, 6])\n", 87 | "a" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 4, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "data": { 97 | "text/plain": [ 98 | "array([[ 1, 2, 3, 4],\n", 99 | " [ 5, 6, 7, 8],\n", 100 | " [ 9, 10, 11, 12]])" 101 | ] 102 | }, 103 | "execution_count": 4, 104 | "metadata": {}, 105 | "output_type": "execute_result" 106 | } 107 | ], 108 | "source": [ 109 | "a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])\n", 110 | "a" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "A NumPy array is characterized by a type (`.dtype`) and a shape (`.shape`):" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 5, 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "name": "stdout", 127 | "output_type": "stream", 128 | "text": [ 129 | "a:\n", 130 | " [1 2 3 4 5 6]\n", 131 | "type: int64\n", 132 | "shape: (6,)\n" 133 | ] 134 | } 135 | ], 136 | "source": [ 137 | "a = np.array([1, 2, 3, 4, 5, 6])\n", 138 | "\n", 139 | "print('a:\\n', a)\n", 140 | "print('type:', a.dtype)\n", 141 | "print('shape:', a.shape) " 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 6, 147 | "metadata": {}, 148 | "outputs": [ 149 | { 150 | "name": "stdout", 151 | "output_type": "stream", 152 | "text": [ 153 | "a:\n", 154 | " [[ 1. -2. 3.6 4. ]\n", 155 | " [ 5. 6. 7. 8. ]\n", 156 | " [ 9. 10. 1. 2.2]]\n", 157 | "type: float64\n", 158 | "shape: (3, 4)\n" 159 | ] 160 | } 161 | ], 162 | "source": [ 163 | "a = np.array([[1, -2, 3.6, 4], [5, 6.0, 7, 8], [9, 10, 1.0, 2.2]])\n", 164 | "\n", 165 | "print('a:\\n', a)\n", 166 | "print('type:', a.dtype)\n", 167 | "print('shape:', a.shape) " 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "In NumPy, dimensions are called **axes**. For example, the previous array with shape (3, 4) have two **axes**, the first **axis** has length 3, and the second **axis** has length 4." 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "## Indexing and slicing" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "You can index and slice NumPy arrays in the same ways you can slice Python lists:" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 7, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "data": { 198 | "text/plain": [ 199 | "1" 200 | ] 201 | }, 202 | "execution_count": 7, 203 | "metadata": {}, 204 | "output_type": "execute_result" 205 | } 206 | ], 207 | "source": [ 208 | "a = np.array([1, 2, 3, 4, 5, 6])\n", 209 | "\n", 210 | "a[0] # accessing the first element" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 8, 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "data": { 220 | "text/plain": [ 221 | "6" 222 | ] 223 | }, 224 | "execution_count": 8, 225 | "metadata": {}, 226 | "output_type": "execute_result" 227 | } 228 | ], 229 | "source": [ 230 | "a[-1] # accessing the last element" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 9, 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "data": { 240 | "text/plain": [ 241 | "array([10, 2, 3, 4, 5, 6])" 242 | ] 243 | }, 244 | "execution_count": 9, 245 | "metadata": {}, 246 | "output_type": "execute_result" 247 | } 248 | ], 249 | "source": [ 250 | "# modifiying an element\n", 251 | "a[0] = 10\n", 252 | "a" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "Slicing refers to selecting a subset of elements of an array, the notation for selecting the elements of an array from `start` included to `stop` excluded is\n", 260 | "\n", 261 | "```array[start : stop]```\n", 262 | "\n", 263 | "or \n", 264 | "\n", 265 | "```array[start : stop : step]```\n", 266 | "\n", 267 | "if you want a step different from 1" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 10, 273 | "metadata": {}, 274 | "outputs": [ 275 | { 276 | "data": { 277 | "text/plain": [ 278 | "array([10, 2])" 279 | ] 280 | }, 281 | "execution_count": 10, 282 | "metadata": {}, 283 | "output_type": "execute_result" 284 | } 285 | ], 286 | "source": [ 287 | "a[0:2]" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 11, 293 | "metadata": {}, 294 | "outputs": [ 295 | { 296 | "data": { 297 | "text/plain": [ 298 | "array([2, 3, 4])" 299 | ] 300 | }, 301 | "execution_count": 11, 302 | "metadata": {}, 303 | "output_type": "execute_result" 304 | } 305 | ], 306 | "source": [ 307 | "a[1:-2]" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 12, 313 | "metadata": {}, 314 | "outputs": [ 315 | { 316 | "data": { 317 | "text/plain": [ 318 | "array([3, 4, 5, 6])" 319 | ] 320 | }, 321 | "execution_count": 12, 322 | "metadata": {}, 323 | "output_type": "execute_result" 324 | } 325 | ], 326 | "source": [ 327 | "a[2:] # implicitly goes to the end" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 13, 333 | "metadata": {}, 334 | "outputs": [ 335 | { 336 | "data": { 337 | "text/plain": [ 338 | "array([10, 2, 3, 4])" 339 | ] 340 | }, 341 | "execution_count": 13, 342 | "metadata": {}, 343 | "output_type": "execute_result" 344 | } 345 | ], 346 | "source": [ 347 | "a[:4] # implicitly starts from the beginning" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 14, 353 | "metadata": {}, 354 | "outputs": [ 355 | { 356 | "data": { 357 | "text/plain": [ 358 | "array([10, 3, 5])" 359 | ] 360 | }, 361 | "execution_count": 14, 362 | "metadata": {}, 363 | "output_type": "execute_result" 364 | } 365 | ], 366 | "source": [ 367 | "a[::2] # modify the step size" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": 15, 373 | "metadata": {}, 374 | "outputs": [ 375 | { 376 | "data": { 377 | "text/plain": [ 378 | "array([ 6, 5, 4, 3, 2, 10])" 379 | ] 380 | }, 381 | "execution_count": 15, 382 | "metadata": {}, 383 | "output_type": "execute_result" 384 | } 385 | ], 386 | "source": [ 387 | "a[::-1] # reverse the array" 388 | ] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": {}, 393 | "source": [ 394 | "" 395 | ] 396 | }, 397 | { 398 | "cell_type": "markdown", 399 | "metadata": {}, 400 | "source": [ 401 | "With multidimensional arrays, you can specify an index/slice for every **axis**:" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 16, 407 | "metadata": {}, 408 | "outputs": [ 409 | { 410 | "data": { 411 | "text/plain": [ 412 | "array([[ 7, 8],\n", 413 | " [11, 12]])" 414 | ] 415 | }, 416 | "execution_count": 16, 417 | "metadata": {}, 418 | "output_type": "execute_result" 419 | } 420 | ], 421 | "source": [ 422 | "a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])\n", 423 | "\n", 424 | "a[1:3, -2:]" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": 17, 430 | "metadata": {}, 431 | "outputs": [ 432 | { 433 | "data": { 434 | "text/plain": [ 435 | "array([[ 5, 6, 7, 8],\n", 436 | " [ 9, 10, 11, 12]])" 437 | ] 438 | }, 439 | "execution_count": 17, 440 | "metadata": {}, 441 | "output_type": "execute_result" 442 | } 443 | ], 444 | "source": [ 445 | "# You don't need to specify the slice/index for all axes, the unspecified axes will considered as complete slices [:]\n", 446 | "a[1:3] # equivalent to a[1:3, :]" 447 | ] 448 | }, 449 | { 450 | "cell_type": "markdown", 451 | "metadata": {}, 452 | "source": [ 453 | "" 454 | ] 455 | }, 456 | { 457 | "cell_type": "markdown", 458 | "metadata": {}, 459 | "source": [ 460 | "## Arrays operations" 461 | ] 462 | }, 463 | { 464 | "cell_type": "markdown", 465 | "metadata": {}, 466 | "source": [ 467 | "One of the main features of NumPy are **vectorized** operations:" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": 18, 473 | "metadata": {}, 474 | "outputs": [], 475 | "source": [ 476 | "a = np.array([20, 30, 40, 50])\n", 477 | "b = np.array([1, 2, 3, 4])\n", 478 | "\n", 479 | "c = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 19, 485 | "metadata": {}, 486 | "outputs": [ 487 | { 488 | "data": { 489 | "text/plain": [ 490 | "array([ 41, 61, 81, 101])" 491 | ] 492 | }, 493 | "execution_count": 19, 494 | "metadata": {}, 495 | "output_type": "execute_result" 496 | } 497 | ], 498 | "source": [ 499 | "2*a + 1" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": 20, 505 | "metadata": {}, 506 | "outputs": [ 507 | { 508 | "data": { 509 | "text/plain": [ 510 | "array([21, 32, 43, 54])" 511 | ] 512 | }, 513 | "execution_count": 20, 514 | "metadata": {}, 515 | "output_type": "execute_result" 516 | } 517 | ], 518 | "source": [ 519 | "a + b # element-wise addition" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": 21, 525 | "metadata": {}, 526 | "outputs": [ 527 | { 528 | "data": { 529 | "text/plain": [ 530 | "array([ 400, 900, 1600, 2500])" 531 | ] 532 | }, 533 | "execution_count": 21, 534 | "metadata": {}, 535 | "output_type": "execute_result" 536 | } 537 | ], 538 | "source": [ 539 | "a**2" 540 | ] 541 | }, 542 | { 543 | "cell_type": "code", 544 | "execution_count": 22, 545 | "metadata": {}, 546 | "outputs": [ 547 | { 548 | "data": { 549 | "text/plain": [ 550 | "array([2.99573227, 3.40119738, 3.68887945, 3.91202301])" 551 | ] 552 | }, 553 | "execution_count": 22, 554 | "metadata": {}, 555 | "output_type": "execute_result" 556 | } 557 | ], 558 | "source": [ 559 | "np.log(a)" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": 23, 565 | "metadata": {}, 566 | "outputs": [ 567 | { 568 | "data": { 569 | "text/plain": [ 570 | "array([[ 2, 6, 12, 20],\n", 571 | " [ 30, 42, 56, 72],\n", 572 | " [ 90, 110, 132, 156]])" 573 | ] 574 | }, 575 | "execution_count": 23, 576 | "metadata": {}, 577 | "output_type": "execute_result" 578 | } 579 | ], 580 | "source": [ 581 | "c + c**2" 582 | ] 583 | }, 584 | { 585 | "cell_type": "markdown", 586 | "metadata": {}, 587 | "source": [ 588 | "## Statistics" 589 | ] 590 | }, 591 | { 592 | "cell_type": "code", 593 | "execution_count": 24, 594 | "metadata": {}, 595 | "outputs": [], 596 | "source": [ 597 | "a = np.array([20, 30, 40, 50, 12, 90, 23])" 598 | ] 599 | }, 600 | { 601 | "cell_type": "code", 602 | "execution_count": 25, 603 | "metadata": {}, 604 | "outputs": [ 605 | { 606 | "data": { 607 | "text/plain": [ 608 | "265" 609 | ] 610 | }, 611 | "execution_count": 25, 612 | "metadata": {}, 613 | "output_type": "execute_result" 614 | } 615 | ], 616 | "source": [ 617 | "np.sum(a)" 618 | ] 619 | }, 620 | { 621 | "cell_type": "code", 622 | "execution_count": 26, 623 | "metadata": {}, 624 | "outputs": [ 625 | { 626 | "data": { 627 | "text/plain": [ 628 | "37.857142857142854" 629 | ] 630 | }, 631 | "execution_count": 26, 632 | "metadata": {}, 633 | "output_type": "execute_result" 634 | } 635 | ], 636 | "source": [ 637 | "np.mean(a)" 638 | ] 639 | }, 640 | { 641 | "cell_type": "code", 642 | "execution_count": 27, 643 | "metadata": {}, 644 | "outputs": [ 645 | { 646 | "data": { 647 | "text/plain": [ 648 | "24.321821897385963" 649 | ] 650 | }, 651 | "execution_count": 27, 652 | "metadata": {}, 653 | "output_type": "execute_result" 654 | } 655 | ], 656 | "source": [ 657 | "np.std(a)" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": 28, 663 | "metadata": {}, 664 | "outputs": [ 665 | { 666 | "data": { 667 | "text/plain": [ 668 | "90" 669 | ] 670 | }, 671 | "execution_count": 28, 672 | "metadata": {}, 673 | "output_type": "execute_result" 674 | } 675 | ], 676 | "source": [ 677 | "np.max(a)" 678 | ] 679 | }, 680 | { 681 | "cell_type": "code", 682 | "execution_count": 29, 683 | "metadata": {}, 684 | "outputs": [ 685 | { 686 | "data": { 687 | "text/plain": [ 688 | "array([[ 1, 2, 3, 4],\n", 689 | " [ 5, 6, 7, 8],\n", 690 | " [ 9, 10, 11, 12]])" 691 | ] 692 | }, 693 | "execution_count": 29, 694 | "metadata": {}, 695 | "output_type": "execute_result" 696 | } 697 | ], 698 | "source": [ 699 | "b = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])\n", 700 | "b" 701 | ] 702 | }, 703 | { 704 | "cell_type": "code", 705 | "execution_count": 30, 706 | "metadata": {}, 707 | "outputs": [ 708 | { 709 | "data": { 710 | "text/plain": [ 711 | "78" 712 | ] 713 | }, 714 | "execution_count": 30, 715 | "metadata": {}, 716 | "output_type": "execute_result" 717 | } 718 | ], 719 | "source": [ 720 | "np.sum(b)" 721 | ] 722 | }, 723 | { 724 | "cell_type": "code", 725 | "execution_count": 31, 726 | "metadata": {}, 727 | "outputs": [ 728 | { 729 | "data": { 730 | "text/plain": [ 731 | "array([15, 18, 21, 24])" 732 | ] 733 | }, 734 | "execution_count": 31, 735 | "metadata": {}, 736 | "output_type": "execute_result" 737 | } 738 | ], 739 | "source": [ 740 | "# reduce the array along the specified axis\n", 741 | "# (3, 4) -> (4)\n", 742 | "np.sum(b, axis=0)" 743 | ] 744 | }, 745 | { 746 | "cell_type": "code", 747 | "execution_count": 32, 748 | "metadata": {}, 749 | "outputs": [ 750 | { 751 | "data": { 752 | "text/plain": [ 753 | "array([10, 26, 42])" 754 | ] 755 | }, 756 | "execution_count": 32, 757 | "metadata": {}, 758 | "output_type": "execute_result" 759 | } 760 | ], 761 | "source": [ 762 | "# (3, 4) -> (3)\n", 763 | "np.sum(b, axis=1)" 764 | ] 765 | }, 766 | { 767 | "cell_type": "markdown", 768 | "metadata": {}, 769 | "source": [ 770 | "\n", 771 | "\n", 772 | "" 773 | ] 774 | }, 775 | { 776 | "cell_type": "markdown", 777 | "metadata": {}, 778 | "source": [ 779 | "## Reject loops, embrace **vectorization**!" 780 | ] 781 | }, 782 | { 783 | "cell_type": "markdown", 784 | "metadata": {}, 785 | "source": [ 786 | "Vectorized operations are orders of magnitude faster than python loops.\n", 787 | "Unless strictly necessary, you should **avoid using loops** at all!\n", 788 | "\n", 789 | "Here there is a demonstration, let's say we want to find the maximum of a given very large array:" 790 | ] 791 | }, 792 | { 793 | "cell_type": "code", 794 | "execution_count": 33, 795 | "metadata": {}, 796 | "outputs": [ 797 | { 798 | "data": { 799 | "text/plain": [ 800 | "array([ 0, 1, 2, ..., 29999997, 29999998, 29999999])" 801 | ] 802 | }, 803 | "execution_count": 33, 804 | "metadata": {}, 805 | "output_type": "execute_result" 806 | } 807 | ], 808 | "source": [ 809 | "a = np.arange(30_000_000) # create an array with elements from 0 to n-1\n", 810 | "a" 811 | ] 812 | }, 813 | { 814 | "cell_type": "markdown", 815 | "metadata": {}, 816 | "source": [ 817 | "This is the inefficient way to do it:" 818 | ] 819 | }, 820 | { 821 | "cell_type": "code", 822 | "execution_count": 34, 823 | "metadata": {}, 824 | "outputs": [ 825 | { 826 | "data": { 827 | "text/plain": [ 828 | "29999999" 829 | ] 830 | }, 831 | "execution_count": 34, 832 | "metadata": {}, 833 | "output_type": "execute_result" 834 | } 835 | ], 836 | "source": [ 837 | "# inefficient way\n", 838 | "max_value = a[0] \n", 839 | "for i in a:\n", 840 | " if i>max_value:\n", 841 | " max_value = i\n", 842 | "max_value" 843 | ] 844 | }, 845 | { 846 | "cell_type": "markdown", 847 | "metadata": {}, 848 | "source": [ 849 | "The vectorized operation is way faster:" 850 | ] 851 | }, 852 | { 853 | "cell_type": "code", 854 | "execution_count": 35, 855 | "metadata": {}, 856 | "outputs": [ 857 | { 858 | "data": { 859 | "text/plain": [ 860 | "29999999" 861 | ] 862 | }, 863 | "execution_count": 35, 864 | "metadata": {}, 865 | "output_type": "execute_result" 866 | } 867 | ], 868 | "source": [ 869 | "np.max(a)" 870 | ] 871 | }, 872 | { 873 | "cell_type": "code", 874 | "execution_count": 36, 875 | "metadata": {}, 876 | "outputs": [], 877 | "source": [ 878 | "del a" 879 | ] 880 | }, 881 | { 882 | "cell_type": "markdown", 883 | "metadata": {}, 884 | "source": [ 885 | "## Reshaping" 886 | ] 887 | }, 888 | { 889 | "cell_type": "markdown", 890 | "metadata": {}, 891 | "source": [ 892 | "Sometimes it can be useful to reshape arrays:" 893 | ] 894 | }, 895 | { 896 | "cell_type": "code", 897 | "execution_count": 37, 898 | "metadata": {}, 899 | "outputs": [ 900 | { 901 | "name": "stdout", 902 | "output_type": "stream", 903 | "text": [ 904 | "a.shape: (12,)\n" 905 | ] 906 | }, 907 | { 908 | "data": { 909 | "text/plain": [ 910 | "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])" 911 | ] 912 | }, 913 | "execution_count": 37, 914 | "metadata": {}, 915 | "output_type": "execute_result" 916 | } 917 | ], 918 | "source": [ 919 | "a = np.array([1,2,3,4,5,6,7,8,9,10,11,12])\n", 920 | "print(\"a.shape:\", a.shape)\n", 921 | "a" 922 | ] 923 | }, 924 | { 925 | "cell_type": "code", 926 | "execution_count": 38, 927 | "metadata": {}, 928 | "outputs": [ 929 | { 930 | "name": "stdout", 931 | "output_type": "stream", 932 | "text": [ 933 | "a.shape: (3, 4)\n" 934 | ] 935 | }, 936 | { 937 | "data": { 938 | "text/plain": [ 939 | "array([[ 1, 2, 3, 4],\n", 940 | " [ 5, 6, 7, 8],\n", 941 | " [ 9, 10, 11, 12]])" 942 | ] 943 | }, 944 | "execution_count": 38, 945 | "metadata": {}, 946 | "output_type": "execute_result" 947 | } 948 | ], 949 | "source": [ 950 | "a = a.reshape(3, 4)\n", 951 | "print(\"a.shape:\", a.shape)\n", 952 | "a" 953 | ] 954 | }, 955 | { 956 | "cell_type": "code", 957 | "execution_count": 39, 958 | "metadata": {}, 959 | "outputs": [ 960 | { 961 | "name": "stdout", 962 | "output_type": "stream", 963 | "text": [ 964 | "a.shape: (3, 2, 2)\n" 965 | ] 966 | }, 967 | { 968 | "data": { 969 | "text/plain": [ 970 | "array([[[ 1, 2],\n", 971 | " [ 3, 4]],\n", 972 | "\n", 973 | " [[ 5, 6],\n", 974 | " [ 7, 8]],\n", 975 | "\n", 976 | " [[ 9, 10],\n", 977 | " [11, 12]]])" 978 | ] 979 | }, 980 | "execution_count": 39, 981 | "metadata": {}, 982 | "output_type": "execute_result" 983 | } 984 | ], 985 | "source": [ 986 | "a = a.reshape(3, 2, 2)\n", 987 | "print(\"a.shape:\", a.shape)\n", 988 | "a" 989 | ] 990 | }, 991 | { 992 | "cell_type": "markdown", 993 | "metadata": {}, 994 | "source": [ 995 | "Notice that the lexicographical order is preserved" 996 | ] 997 | }, 998 | { 999 | "cell_type": "code", 1000 | "execution_count": 40, 1001 | "metadata": {}, 1002 | "outputs": [ 1003 | { 1004 | "data": { 1005 | "text/plain": [ 1006 | "array([[ 1, 2],\n", 1007 | " [ 3, 4],\n", 1008 | " [ 5, 6],\n", 1009 | " [ 7, 8],\n", 1010 | " [ 9, 10],\n", 1011 | " [11, 12]])" 1012 | ] 1013 | }, 1014 | "execution_count": 40, 1015 | "metadata": {}, 1016 | "output_type": "execute_result" 1017 | } 1018 | ], 1019 | "source": [ 1020 | "a.reshape(-1, 2) # -1 means the value is inferred from the length of the array and remaining dimensions" 1021 | ] 1022 | }, 1023 | { 1024 | "cell_type": "code", 1025 | "execution_count": 41, 1026 | "metadata": {}, 1027 | "outputs": [ 1028 | { 1029 | "data": { 1030 | "text/plain": [ 1031 | "array([[ 1],\n", 1032 | " [ 2],\n", 1033 | " [ 3],\n", 1034 | " [ 4],\n", 1035 | " [ 5],\n", 1036 | " [ 6],\n", 1037 | " [ 7],\n", 1038 | " [ 8],\n", 1039 | " [ 9],\n", 1040 | " [10],\n", 1041 | " [11],\n", 1042 | " [12]])" 1043 | ] 1044 | }, 1045 | "execution_count": 41, 1046 | "metadata": {}, 1047 | "output_type": "execute_result" 1048 | } 1049 | ], 1050 | "source": [ 1051 | "a.reshape(-1, 1) # is this useful?" 1052 | ] 1053 | }, 1054 | { 1055 | "cell_type": "markdown", 1056 | "metadata": {}, 1057 | "source": [ 1058 | "**Very important convention**:\n", 1059 | "\n", 1060 | "When you have a dataset, the first dimension/axis is the number of observations. Many scientific functions and libraries give this for granted!\n", 1061 | "\n", 1062 | "For example, if i have a dataset of $n$ observations and each observation has $p$ features, then the array will have a shape `(n, p)`" 1063 | ] 1064 | }, 1065 | { 1066 | "cell_type": "markdown", 1067 | "metadata": {}, 1068 | "source": [ 1069 | "## Random" 1070 | ] 1071 | }, 1072 | { 1073 | "cell_type": "markdown", 1074 | "metadata": {}, 1075 | "source": [ 1076 | "NumPy's `random` contains functions for sampling from many distributions:" 1077 | ] 1078 | }, 1079 | { 1080 | "cell_type": "code", 1081 | "execution_count": 42, 1082 | "metadata": {}, 1083 | "outputs": [ 1084 | { 1085 | "data": { 1086 | "text/plain": [ 1087 | "array([[ 1.99638502, 0.08776601],\n", 1088 | " [-0.74125928, 0.71477195],\n", 1089 | " [ 0.94934134, 0.64139157]])" 1090 | ] 1091 | }, 1092 | "execution_count": 42, 1093 | "metadata": {}, 1094 | "output_type": "execute_result" 1095 | } 1096 | ], 1097 | "source": [ 1098 | "np.random.normal(loc=0, scale=1, size=(3, 2))" 1099 | ] 1100 | }, 1101 | { 1102 | "cell_type": "code", 1103 | "execution_count": 43, 1104 | "metadata": {}, 1105 | "outputs": [ 1106 | { 1107 | "data": { 1108 | "text/plain": [ 1109 | "array([4, 0, 3, 1, 2, 1, 4])" 1110 | ] 1111 | }, 1112 | "execution_count": 43, 1113 | "metadata": {}, 1114 | "output_type": "execute_result" 1115 | } 1116 | ], 1117 | "source": [ 1118 | "np.random.binomial(n=10, p=0.2, size=7) # n is the number of trials, p is the probability of success" 1119 | ] 1120 | }, 1121 | { 1122 | "cell_type": "markdown", 1123 | "metadata": {}, 1124 | "source": [ 1125 | "## Broadcasting" 1126 | ] 1127 | }, 1128 | { 1129 | "cell_type": "markdown", 1130 | "metadata": {}, 1131 | "source": [ 1132 | "We saw how to do element-wise binary operations among two (or more) arrays (e.g. the sum). Notice that we did it only among arrays with the same shape.\n", 1133 | "\n", 1134 | "Now consider the following array of shape `(4,2)`" 1135 | ] 1136 | }, 1137 | { 1138 | "cell_type": "code", 1139 | "execution_count": 44, 1140 | "metadata": {}, 1141 | "outputs": [ 1142 | { 1143 | "data": { 1144 | "text/plain": [ 1145 | "array([[0, 1],\n", 1146 | " [2, 3],\n", 1147 | " [4, 5],\n", 1148 | " [6, 7]])" 1149 | ] 1150 | }, 1151 | "execution_count": 44, 1152 | "metadata": {}, 1153 | "output_type": "execute_result" 1154 | } 1155 | ], 1156 | "source": [ 1157 | "a = np.arange(8).reshape(4, 2)\n", 1158 | "a" 1159 | ] 1160 | }, 1161 | { 1162 | "cell_type": "markdown", 1163 | "metadata": {}, 1164 | "source": [ 1165 | "Now let's say we want to add to each row the following array:" 1166 | ] 1167 | }, 1168 | { 1169 | "cell_type": "code", 1170 | "execution_count": 52, 1171 | "metadata": {}, 1172 | "outputs": [ 1173 | { 1174 | "data": { 1175 | "text/plain": [ 1176 | "array([ -1, -10])" 1177 | ] 1178 | }, 1179 | "execution_count": 52, 1180 | "metadata": {}, 1181 | "output_type": "execute_result" 1182 | } 1183 | ], 1184 | "source": [ 1185 | "b = np.array([-1, -10])\n", 1186 | "b" 1187 | ] 1188 | }, 1189 | { 1190 | "cell_type": "code", 1191 | "execution_count": 53, 1192 | "metadata": {}, 1193 | "outputs": [ 1194 | { 1195 | "data": { 1196 | "text/plain": [ 1197 | "(2,)" 1198 | ] 1199 | }, 1200 | "execution_count": 53, 1201 | "metadata": {}, 1202 | "output_type": "execute_result" 1203 | } 1204 | ], 1205 | "source": [ 1206 | "b.shape" 1207 | ] 1208 | }, 1209 | { 1210 | "cell_type": "markdown", 1211 | "metadata": {}, 1212 | "source": [ 1213 | "We could do a for loop where we iteratively select rows of the matrix and add that array, but remeber, we don't do that here!\n", 1214 | "NumPy offers an alternative to that:" 1215 | ] 1216 | }, 1217 | { 1218 | "cell_type": "code", 1219 | "execution_count": 46, 1220 | "metadata": {}, 1221 | "outputs": [ 1222 | { 1223 | "data": { 1224 | "text/plain": [ 1225 | "array([[-1, -9],\n", 1226 | " [ 1, -7],\n", 1227 | " [ 3, -5],\n", 1228 | " [ 5, -3]])" 1229 | ] 1230 | }, 1231 | "execution_count": 46, 1232 | "metadata": {}, 1233 | "output_type": "execute_result" 1234 | } 1235 | ], 1236 | "source": [ 1237 | "a + b.reshape(1,2)" 1238 | ] 1239 | }, 1240 | { 1241 | "cell_type": "markdown", 1242 | "metadata": {}, 1243 | "source": [ 1244 | "What happened?\n", 1245 | "\n", 1246 | "Array `a` has shape `(4,2)` while array `b` has shape `(1,2)`.\n", 1247 | "Numpy **expanded** array `b` to `(4,2)` by repeating it 4 times, then it computed the sum.\n", 1248 | "\n", 1249 | "Broadcasting is a mechanism that allows NumPy to perform operations on arrays of different shapes. The dimensions of your array must be compatible, for example, when the dimensions of both arrays are equal or when one of them is 1. If the dimensions are not compatible, you will get a ValueError.\n", 1250 | "\n", 1251 | "" 1252 | ] 1253 | }, 1254 | { 1255 | "cell_type": "code", 1256 | "execution_count": 47, 1257 | "metadata": {}, 1258 | "outputs": [ 1259 | { 1260 | "name": "stdout", 1261 | "output_type": "stream", 1262 | "text": [ 1263 | "row_vector:\n", 1264 | " [[10 20 30]]\n", 1265 | "col_vector:\n", 1266 | " [[1]\n", 1267 | " [2]\n", 1268 | " [3]]\n", 1269 | "row_vector + col_vector:\n", 1270 | " [[11 21 31]\n", 1271 | " [12 22 32]\n", 1272 | " [13 23 33]]\n" 1273 | ] 1274 | } 1275 | ], 1276 | "source": [ 1277 | "# Another example\n", 1278 | "row_vector = np.array([10, 20, 30]).reshape(1, 3) # shape: (1,3)\n", 1279 | "col_vector = np.array([1, 2, 3]).reshape(3, 1) # shape: (3,1)\n", 1280 | "\n", 1281 | "print('row_vector:\\n', row_vector)\n", 1282 | "print('col_vector:\\n', col_vector)\n", 1283 | "\n", 1284 | "print('row_vector + col_vector:\\n', row_vector + col_vector) # (1,3) + (3,1) -> (3,3)" 1285 | ] 1286 | }, 1287 | { 1288 | "cell_type": "markdown", 1289 | "metadata": {}, 1290 | "source": [ 1291 | "## Views and copies" 1292 | ] 1293 | }, 1294 | { 1295 | "cell_type": "markdown", 1296 | "metadata": {}, 1297 | "source": [ 1298 | "When doing array operations, arrays are sometimes copied, and sometimes not, depending on the operation" 1299 | ] 1300 | }, 1301 | { 1302 | "cell_type": "code", 1303 | "execution_count": 48, 1304 | "metadata": {}, 1305 | "outputs": [ 1306 | { 1307 | "data": { 1308 | "text/plain": [ 1309 | "array([0., 0., 0., 0., 0.])" 1310 | ] 1311 | }, 1312 | "execution_count": 48, 1313 | "metadata": {}, 1314 | "output_type": "execute_result" 1315 | } 1316 | ], 1317 | "source": [ 1318 | "a = np.zeros(5) # equivalent to np.array([0, 0, 0, 0, 0])\n", 1319 | "a" 1320 | ] 1321 | }, 1322 | { 1323 | "cell_type": "code", 1324 | "execution_count": 49, 1325 | "metadata": {}, 1326 | "outputs": [ 1327 | { 1328 | "name": "stdout", 1329 | "output_type": "stream", 1330 | "text": [ 1331 | "a: [77. 0. 0. 0. 0.]\n", 1332 | "b: [77. 0. 0. 0.]\n" 1333 | ] 1334 | } 1335 | ], 1336 | "source": [ 1337 | "b = a[:-1]\n", 1338 | "b[0] = 77\n", 1339 | "print('a: ', a)\n", 1340 | "print('b: ', b)" 1341 | ] 1342 | }, 1343 | { 1344 | "cell_type": "markdown", 1345 | "metadata": {}, 1346 | "source": [ 1347 | "Just using the assignment operator `=` or slicing doesn't create a copy!" 1348 | ] 1349 | }, 1350 | { 1351 | "cell_type": "markdown", 1352 | "metadata": {}, 1353 | "source": [ 1354 | "The same happens when slicing and reshaping.\n", 1355 | "\n", 1356 | "Other operations instead create a copy automatically:" 1357 | ] 1358 | }, 1359 | { 1360 | "cell_type": "code", 1361 | "execution_count": 50, 1362 | "metadata": {}, 1363 | "outputs": [ 1364 | { 1365 | "name": "stdout", 1366 | "output_type": "stream", 1367 | "text": [ 1368 | "a: [0. 0. 0. 0. 0.]\n", 1369 | "b: [77. 1. 1. 1. 1.]\n" 1370 | ] 1371 | } 1372 | ], 1373 | "source": [ 1374 | "a = np.zeros(5)\n", 1375 | "b = a + 1\n", 1376 | "b[0] = 77\n", 1377 | "print('a: ', a)\n", 1378 | "print('b: ', b)" 1379 | ] 1380 | }, 1381 | { 1382 | "cell_type": "markdown", 1383 | "metadata": {}, 1384 | "source": [ 1385 | "If you just want to make a deepcopy of an array, you can use the `.copy()` function" 1386 | ] 1387 | } 1388 | ], 1389 | "metadata": { 1390 | "colab": { 1391 | "include_colab_link": true, 1392 | "provenance": [] 1393 | }, 1394 | "kernelspec": { 1395 | "display_name": "general", 1396 | "language": "python", 1397 | "name": "python3" 1398 | }, 1399 | "language_info": { 1400 | "codemirror_mode": { 1401 | "name": "ipython", 1402 | "version": 3 1403 | }, 1404 | "file_extension": ".py", 1405 | "mimetype": "text/x-python", 1406 | "name": "python", 1407 | "nbconvert_exporter": "python", 1408 | "pygments_lexer": "ipython3", 1409 | "version": "3.12.8" 1410 | } 1411 | }, 1412 | "nbformat": 4, 1413 | "nbformat_minor": 0 1414 | } 1415 | --------------------------------------------------------------------------------