├── .DS_Store ├── Ch5 ├── .DS_Store ├── Central_Limit_Theorem.ipynb ├── Quantiles_Chebyshev_inequality.ipynb └── Lévy_alpha-stable_distribution.ipynb ├── Ch6 ├── .DS_Store ├── Covariance_matrix_vs_Scale_Matrix.ipynb ├── Marginal_vs_conditional_distribution_functions..ipynb └── Stress_Testing.ipynb ├── .gitignore ├── requirements.txt ├── README.md └── Ch8 └── Least_squares_solution.ipynb /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FinancialComputingUCL/DataDrivenModeling/HEAD/.DS_Store -------------------------------------------------------------------------------- /Ch5/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FinancialComputingUCL/DataDrivenModeling/HEAD/Ch5/.DS_Store -------------------------------------------------------------------------------- /Ch6/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FinancialComputingUCL/DataDrivenModeling/HEAD/Ch6/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/macos,windows,linux 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=macos,windows,linux 3 | 4 | ### Linux ### 5 | *~ 6 | 7 | # temporary files which can be created if a process still has a handle open of a deleted file 8 | .fuse_hidden* 9 | 10 | # KDE directory preferences 11 | .directory 12 | 13 | # Linux trash folder which might appear on any partition or disk 14 | .Trash-* 15 | 16 | # .nfs files are created when an open file is removed but is still being accessed 17 | .nfs* 18 | 19 | ### macOS ### 20 | # General 21 | .DS_Store 22 | .AppleDouble 23 | .LSOverride 24 | 25 | # Icon must end with two \r 26 | Icon 27 | 28 | 29 | # Thumbnails 30 | ._* 31 | 32 | # Files that might appear in the root of a volume 33 | .DocumentRevisions-V100 34 | .fseventsd 35 | .Spotlight-V100 36 | .TemporaryItems 37 | .Trashes 38 | .VolumeIcon.icns 39 | .com.apple.timemachine.donotpresent 40 | 41 | # Directories potentially created on remote AFP share 42 | .AppleDB 43 | .AppleDesktop 44 | Network Trash Folder 45 | Temporary Items 46 | .apdisk 47 | 48 | ### macOS Patch ### 49 | # iCloud generated files 50 | *.icloud 51 | 52 | ### Windows ### 53 | # Windows thumbnail cache files 54 | Thumbs.db 55 | Thumbs.db:encryptable 56 | ehthumbs.db 57 | ehthumbs_vista.db 58 | 59 | # Dump file 60 | *.stackdump 61 | 62 | # Folder config file 63 | [Dd]esktop.ini 64 | 65 | # Recycle Bin used on file shares 66 | $RECYCLE.BIN/ 67 | 68 | # Windows Installer files 69 | *.cab 70 | *.msi 71 | *.msix 72 | *.msm 73 | *.msp 74 | 75 | # Windows shortcuts 76 | *.lnk 77 | 78 | .ipynb_checkpoints 79 | 80 | # End of https://www.toptal.com/developers/gitignore/api/macos,windows,linux -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | anyio==3.7.1 2 | appnope==0.1.3 3 | argon2-cffi==21.3.0 4 | argon2-cffi-bindings==21.2.0 5 | arrow==1.2.3 6 | asttokens==2.2.1 7 | async-lru==2.0.4 8 | attrs==23.1.0 9 | Babel==2.12.1 10 | backcall==0.2.0 11 | beautifulsoup4==4.12.2 12 | bleach==6.0.0 13 | certifi==2023.7.22 14 | cffi==1.15.1 15 | charset-normalizer==3.2.0 16 | comm==0.1.3 17 | contourpy==1.1.0 18 | cycler==0.11.0 19 | debugpy==1.6.7 20 | decorator==5.1.1 21 | defusedxml==0.7.1 22 | exceptiongroup==1.1.2 23 | executing==1.2.0 24 | fastjsonschema==2.18.0 25 | fonttools==4.41.1 26 | fqdn==1.5.1 27 | idna==3.4 28 | importlib-metadata==6.8.0 29 | importlib-resources==6.0.0 30 | ipykernel==6.25.0 31 | ipython==8.14.0 32 | isoduration==20.11.0 33 | jedi==0.19.0 34 | Jinja2==3.1.2 35 | json5==0.9.14 36 | jsonpointer==2.4 37 | jsonschema==4.18.4 38 | jsonschema-specifications==2023.7.1 39 | jupyter-events==0.7.0 40 | jupyter-lsp==2.2.0 41 | jupyter_client==8.3.0 42 | jupyter_core==5.3.1 43 | jupyter_server==2.7.0 44 | jupyter_server_terminals==0.4.4 45 | jupyterlab==4.0.3 46 | jupyterlab-pygments==0.2.2 47 | jupyterlab_server==2.24.0 48 | kiwisolver==1.4.4 49 | MarkupSafe==2.1.3 50 | matplotlib==3.7.2 51 | matplotlib-inline==0.1.6 52 | mistune==3.0.1 53 | nbclient==0.8.0 54 | nbconvert==7.7.3 55 | nbformat==5.9.2 56 | nest-asyncio==1.5.7 57 | notebook==7.0.1 58 | notebook_shim==0.2.3 59 | numpy==1.25.2 60 | overrides==7.3.1 61 | packaging==23.1 62 | pandas==2.0.3 63 | pandocfilters==1.5.0 64 | parso==0.8.3 65 | pexpect==4.8.0 66 | pickleshare==0.7.5 67 | Pillow==10.0.0 68 | platformdirs==3.10.0 69 | prometheus-client==0.17.1 70 | prompt-toolkit==3.0.39 71 | psutil==5.9.5 72 | ptyprocess==0.7.0 73 | pure-eval==0.2.2 74 | pycparser==2.21 75 | Pygments==2.15.1 76 | pyparsing==3.0.9 77 | python-dateutil==2.8.2 78 | python-json-logger==2.0.7 79 | pytz==2023.3 80 | PyYAML==6.0.1 81 | pyzmq==25.1.0 82 | referencing==0.30.0 83 | requests==2.31.0 84 | rfc3339-validator==0.1.4 85 | rfc3986-validator==0.1.1 86 | rpds-py==0.9.2 87 | scipy==1.11.1 88 | Send2Trash==1.8.2 89 | six==1.16.0 90 | sniffio==1.3.0 91 | soupsieve==2.4.1 92 | stack-data==0.6.2 93 | terminado==0.17.1 94 | tinycss2==1.2.1 95 | tomli==2.0.1 96 | tornado==6.3.2 97 | traitlets==5.9.0 98 | typing_extensions==4.7.1 99 | tzdata==2023.3 100 | uri-template==1.3.0 101 | urllib3==2.0.4 102 | wcwidth==0.2.6 103 | webcolors==1.13 104 | webencodings==0.5.1 105 | websocket-client==1.6.1 106 | zipp==3.16.2 107 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Probabilistic data-driven modeling 2 | 3 | This repo contains the code related to the book titled __Probabilistic data-driven modeling__, written by Tomaso Aste, professor of Complexity Science at University College London. The book introduces and guides the reader through a selection of methodologies and approaches to construct models from data. These data-driven approaches have been originally developed in different fields from statistics to complexity science. They are general procedures and tools that apply to any domain where models must be built from observational data. The code in the current repository is organised by Chapters and presents an applied perspective on modeling of real complex systems. 4 | 5 | ## How to 6 | 7 | In the following lines, we report the main steps to download, access and run the code provided in the current GitHub repo. 8 | 9 | #### Cloning the GitHub repository 10 | In order to clone the current GitHub repository, the user is required to have Git locally installed. If this is not the case, follow the instructions at https://github.com/git-guides/install-git. 11 | 12 | Once you have Git locally installed and correctly set up, clone the following repository typing `git clone https://github.com/FinancialComputingUCL/DataDrivenModeling.git`. 13 | 14 | #### Python installation 15 | All the provided code is written using the Python programming language. The user is required to have a Python version installed locally. If this is not the case, follow the following steps to obtain it. 16 | - Access the URL: https://www.python.org/downloads/. 17 | - Download the latest stable version of the language (`3.11.4` at the writing time). 18 | - Follow the prompted installation steps to correctly finalise the process. 19 | - Double check everything was correctly installed typing `python --version` in a new Terminal. The prompted Python's version should coincide with the one you previously decided to download. 20 | 21 | #### Required packages' installation 22 | Using Python, packages can be managed in many different ways. We advise the user to use `virtualenv` to easily manage project' specific dependencies (i.e. packages). 23 | 24 | To create a new virtual environment, open a new Terminal in the chosen directory and follow these steps: 25 | - Type `which virtualenv` 26 | - _if_ `virtualenv not found` message is prompted: 27 | - Install `virtualnv` typing `pip3 install virtualenv`. 28 | - Double check everything was correctly installed typing `which virtualenv` command. 29 | - _else_: 30 | - Skip to the next step. 31 | - Create a new virtual environment typing `virtualenv `. If you have multiple Python versions installed locally, be sure to specify the Python's version to be used to create the new virtual environment (e.g. `virtualenv -p /usr/bin/python3.11.4 `). 32 | - Source the newly created virtual environment typing `source /bin/activate`. 33 | - Install the packages required to run the code provided in the current GitHub repo typing `pip3 install -r requirements.txt`. 34 | 35 | To temporarily deactivate your environment, type `deactivate`. 36 | To permanently delete your environment, type `sudo rm -rf `. 37 | 38 | #### Running the code 39 | The majority of the code provided in the current GitHub repo is in the form of Jupyter Notebooks. Once you run all the previously listed steps, you can run each Notebook in the following way: 40 | - Access the Chapter's folder you are interested in (e.g., `cd ./Chapter_5`). 41 | - Type `juyter notebook`. 42 | - Choose the `.ipynb` you are interested in and use the graphical interface to run all or specific cells. -------------------------------------------------------------------------------- /Ch6/Covariance_matrix_vs_Scale_Matrix.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "baf1360e", 6 | "metadata": {}, 7 | "source": [ 8 | "# Covaraince matrix vs Scale matrix\n", 9 | "\n", 10 | "In linear algebra and statistics, the covariance matrix and the scale matrix (also known as variance-covariance matrix or dispersion matrix) are closely related concepts. The covariance matrix represents the covariance between multiple random variables, while the scale matrix contains the variances of individual variables along the diagonal and covariances between them in off-diagonal elements.\n", 11 | "\n", 12 | "The `covariance_matrix` function calculates the covariance matrix. To do this, we first center the data by subtracting the mean of each column from the corresponding column elements. Then we compute the covariance matrix by multiplying the transposed centered data by itself and dividing by `(num_samples - 1)` to obtain an unbiased estimator.\n", 13 | "\n", 14 | "The `scale_matrix` function calculates the scale matrix, which is equivalent to the variance-covariance matrix. We use NumPy's cov function with `rowvar=False` to compute the variance-covariance matrix for the given data.\n", 15 | "\n", 16 | "Finally, we print both matrices and check if they are equal within a small tolerance using `np.allclose`. The equality check is necessary because covariance matrices and scale matrices should be identical in practice. However, due to floating-point precision, exact equality may not hold, so we use the `np.allclose` function to verify their similarity." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 4, 22 | "id": "5098098d", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import numpy as np" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 7, 32 | "id": "6f5f43a7", 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | "Covariance Matrix:\n", 40 | "[[ 1.07152674 -0.01672361 0.04491799 0.03792043 0.04564139]\n", 41 | " [-0.01672361 1.01288579 0.00360372 0.00975283 0.06587266]\n", 42 | " [ 0.04491799 0.00360372 1.00234635 -0.03316989 -0.00862546]\n", 43 | " [ 0.03792043 0.00975283 -0.03316989 0.9391796 -0.01612904]\n", 44 | " [ 0.04564139 0.06587266 -0.00862546 -0.01612904 0.94788399]]\n", 45 | "\n", 46 | "Scale Matrix (Variance-Covariance Matrix):\n", 47 | "[[ 1.07045521 -0.01670689 0.04487308 0.0378825 0.04559575]\n", 48 | " [-0.01670689 1.0118729 0.00360011 0.00974308 0.06580679]\n", 49 | " [ 0.04487308 0.00360011 1.00134401 -0.03313672 -0.00861683]\n", 50 | " [ 0.0378825 0.00974308 -0.03313672 0.93824042 -0.01611291]\n", 51 | " [ 0.04559575 0.06580679 -0.00861683 -0.01611291 0.94693611]]\n", 52 | "\n", 53 | "Are the Covariance Matrix and Scale Matrix equal? False\n" 54 | ] 55 | } 56 | ], 57 | "source": [ 58 | "def generate_data(num_samples, num_features):\n", 59 | " # Generate random data with mean 0 and variance 1\n", 60 | " return np.random.randn(num_samples, num_features)\n", 61 | "\n", 62 | "def covariance_matrix(data):\n", 63 | " # Calculate the covariance matrix\n", 64 | " mean_centered_data = data - np.mean(data, axis=0)\n", 65 | " return np.dot(mean_centered_data.T, mean_centered_data) / (data.shape[0] - 1)\n", 66 | "\n", 67 | "def scale_matrix(data):\n", 68 | " # Calculate the scale matrix (variance-covariance matrix)\n", 69 | " return np.cov(data, rowvar=False, bias=True)\n", 70 | "\n", 71 | "def main():\n", 72 | " # Define the dimensions of the data\n", 73 | " num_samples = 1000\n", 74 | " num_features = 5\n", 75 | "\n", 76 | " # Generate random data\n", 77 | " data = generate_data(num_samples, num_features)\n", 78 | "\n", 79 | " # Calculate the covariance matrix and scale matrix\n", 80 | " cov_matrix = covariance_matrix(data)\n", 81 | " scale_matrix_result = scale_matrix(data)\n", 82 | "\n", 83 | " # Output the covariance matrix and scale matrix\n", 84 | " print(\"Covariance Matrix:\")\n", 85 | " print(cov_matrix)\n", 86 | "\n", 87 | " print(\"\\nScale Matrix (Variance-Covariance Matrix):\")\n", 88 | " print(scale_matrix_result)\n", 89 | "\n", 90 | " # Check if both matrices are equal (within a small tolerance). Try to tune rtol and atol to see the difference.\n", 91 | " matrices_equal = np.allclose(cov_matrix, scale_matrix_result, rtol=1e-05, atol=1e-08)\n", 92 | " print(\"\\nAre the Covariance Matrix and Scale Matrix equal?\", matrices_equal)\n", 93 | "\n", 94 | "if __name__ == \"__main__\":\n", 95 | " main()" 96 | ] 97 | } 98 | ], 99 | "metadata": { 100 | "kernelspec": { 101 | "display_name": "Python 3 (ipykernel)", 102 | "language": "python", 103 | "name": "python3" 104 | }, 105 | "language_info": { 106 | "codemirror_mode": { 107 | "name": "ipython", 108 | "version": 3 109 | }, 110 | "file_extension": ".py", 111 | "mimetype": "text/x-python", 112 | "name": "python", 113 | "nbconvert_exporter": "python", 114 | "pygments_lexer": "ipython3", 115 | "version": "3.8.12" 116 | } 117 | }, 118 | "nbformat": 4, 119 | "nbformat_minor": 5 120 | } 121 | -------------------------------------------------------------------------------- /Ch5/Central_Limit_Theorem.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "e9a36957", 6 | "metadata": {}, 7 | "source": [ 8 | "# Central Limit Theorem\n", 9 | "\n", 10 | "In this code, we generate multiple samples of random variables and calculate the mean of each sample. The means are then stored in the “sample_means” list. We plot a histogram of these sample means, which represents the empirical distribution of the means.\n", 11 | "\n", 12 | "The code also calculates the theoretical mean and standard deviation of the distribution of sample means based on the number of variables being summed. It then plots the corresponding normal distribution curve using these theoretical parameters.\n", 13 | "\n", 14 | "By running this code, you can observe how the empirical distribution of sample means converges towards a normal distribution, as predicted by the Central Limit Theorem. The histogram should resemble the shape of the normal distribution curve." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "id": "d0ae6803", 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "import numpy as np\n", 25 | "import matplotlib.pyplot as plt" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "id": "8aab2190", 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "image/png": "\n", 37 | "text/plain": [ 38 | "
" 39 | ] 40 | }, 41 | "metadata": { 42 | "needs_background": "light" 43 | }, 44 | "output_type": "display_data" 45 | } 46 | ], 47 | "source": [ 48 | "# Define the number of samples to draw\n", 49 | "num_samples = 1000\n", 50 | "\n", 51 | "# Define the number of random variables to sum\n", 52 | "num_variables = 10\n", 53 | "\n", 54 | "# Store the means of each sample\n", 55 | "sample_means = []\n", 56 | "\n", 57 | "# Generate samples and calculate means\n", 58 | "for _ in range(num_samples):\n", 59 | " # Generate a sample of random variables\n", 60 | " sample = np.random.rand(num_variables)\n", 61 | " \n", 62 | " # Calculate the mean of the sample\n", 63 | " sample_mean = np.mean(sample)\n", 64 | " \n", 65 | " # Store the sample mean\n", 66 | " sample_means.append(sample_mean)\n", 67 | "\n", 68 | "# Plot the histogram of sample means\n", 69 | "plt.hist(sample_means, bins=30, density=True, alpha=0.5, color='blue')\n", 70 | "\n", 71 | "# Calculate the theoretical mean and standard deviation\n", 72 | "theoretical_mean = 0.5\n", 73 | "theoretical_std = 1 / np.sqrt(12 * num_variables)\n", 74 | "\n", 75 | "# Generate a range of x values\n", 76 | "x = np.linspace(theoretical_mean - 3 * theoretical_std, theoretical_mean + 3 * theoretical_std, 100)\n", 77 | "\n", 78 | "# Calculate the corresponding y values using the normal distribution formula\n", 79 | "y = 1 / (np.sqrt(2 * np.pi * theoretical_std**2)) * np.exp(-(x - theoretical_mean)**2 / (2 * theoretical_std**2))\n", 80 | "\n", 81 | "# Plot the theoretical normal distribution curve\n", 82 | "plt.plot(x, y, color='red', linewidth=2)\n", 83 | "\n", 84 | "# Set plot labels and title\n", 85 | "plt.xlabel('Sample Mean')\n", 86 | "plt.ylabel('Probability Density')\n", 87 | "plt.title('Convergence towards Normal Distribution')\n", 88 | "\n", 89 | "# Display the plot\n", 90 | "plt.show()" 91 | ] 92 | } 93 | ], 94 | "metadata": { 95 | "kernelspec": { 96 | "display_name": "Python 3 (ipykernel)", 97 | "language": "python", 98 | "name": "python3" 99 | }, 100 | "language_info": { 101 | "codemirror_mode": { 102 | "name": "ipython", 103 | "version": 3 104 | }, 105 | "file_extension": ".py", 106 | "mimetype": "text/x-python", 107 | "name": "python", 108 | "nbconvert_exporter": "python", 109 | "pygments_lexer": "ipython3", 110 | "version": "3.8.12" 111 | } 112 | }, 113 | "nbformat": 4, 114 | "nbformat_minor": 5 115 | } 116 | -------------------------------------------------------------------------------- /Ch6/Marginal_vs_conditional_distribution_functions..ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "7a243bc4", 6 | "metadata": {}, 7 | "source": [ 8 | "# Marginal and conditional distribution functions.\n", 9 | "\n", 10 | "To demonstrate the comparison between marginal and conditional probability distribution functions, we'll create a simple example using Python and matplotlib. We'll consider two random variables, X and Y, and visualize their marginal and conditional probability distributions.\n", 11 | "\n", 12 | "The `marginal_prob_distribution` function calculates the marginal probability distribution of a random variable. It uses NumPy's histogram function to calculate the relative frequencies of different values in the data.\n", 13 | "\n", 14 | "The `conditional_prob_distribution` function calculates the conditional probability distribution of X given a specific value of Y. It filters the data to consider only those instances where Y equals the specified value and then calculates the relative frequencies using the `histogram` function.\n", 15 | "\n", 16 | "The comparison between the marginal and conditional probability distributions allows us to observe how the probabilities of individual outcomes change when we condition on specific values of another random variable. It helps in understanding the relationship between the two variables and their joint behavior." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 5, 22 | "id": "2190e2ec", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import numpy as np\n", 27 | "import matplotlib.pyplot as plt" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 6, 33 | "id": "879f870a", 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "data": { 38 | "image/png": "\n", 39 | "text/plain": [ 40 | "
" 41 | ] 42 | }, 43 | "metadata": { 44 | "needs_background": "light" 45 | }, 46 | "output_type": "display_data" 47 | } 48 | ], 49 | "source": [ 50 | "def generate_data(num_samples):\n", 51 | " # Generate random data for variables X and Y\n", 52 | " X = np.random.randint(1, 7, num_samples) # Random integers from 1 to 6 (inclusive)\n", 53 | " Y = np.random.randint(1, 7, num_samples)\n", 54 | " return X, Y\n", 55 | "\n", 56 | "def marginal_prob_distribution(data):\n", 57 | " # Calculate the marginal probability distribution of a random variable\n", 58 | " return np.histogram(data, bins=np.arange(1, 8), density=True)[0]\n", 59 | "\n", 60 | "def conditional_prob_distribution(data_x, data_y, value_y):\n", 61 | " # Calculate the conditional probability distribution of X given a specific value of Y\n", 62 | " mask = data_y == value_y\n", 63 | " conditional_data = data_x[mask]\n", 64 | " return np.histogram(conditional_data, bins=np.arange(1, 8), density=True)[0]\n", 65 | "\n", 66 | "def main():\n", 67 | " # Define the number of samples\n", 68 | " num_samples = 10000\n", 69 | "\n", 70 | " # Generate random data for X and Y\n", 71 | " X, Y = generate_data(num_samples)\n", 72 | "\n", 73 | " # Calculate the marginal probability distributions of X and Y\n", 74 | " marginal_X = marginal_prob_distribution(X)\n", 75 | " marginal_Y = marginal_prob_distribution(Y)\n", 76 | "\n", 77 | " # Choose a specific value for Y to calculate the conditional probability distribution of X\n", 78 | " value_Y = 3\n", 79 | " conditional_X_given_Y = conditional_prob_distribution(X, Y, value_Y)\n", 80 | "\n", 81 | " # Plot the results\n", 82 | " plt.figure(figsize=(10, 4))\n", 83 | "\n", 84 | " plt.subplot(1, 2, 1)\n", 85 | " plt.bar(np.arange(1, 7), marginal_X, width=0.4, label=\"X\")\n", 86 | " plt.bar(np.arange(1, 7) + 0.4, marginal_Y, width=0.4, label=\"Y\")\n", 87 | " plt.xlabel(\"Values\")\n", 88 | " plt.ylabel(\"Probability\")\n", 89 | " plt.title(\"Marginal Probability Distribution\")\n", 90 | " plt.legend()\n", 91 | "\n", 92 | " plt.subplot(1, 2, 2)\n", 93 | " plt.bar(np.arange(1, 7), conditional_X_given_Y, width=0.4)\n", 94 | " plt.xlabel(\"Values of X (Conditional on Y=3)\")\n", 95 | " plt.ylabel(\"Probability\")\n", 96 | " plt.title(\"Conditional Probability Distribution of X given Y=3\")\n", 97 | "\n", 98 | " plt.tight_layout()\n", 99 | " plt.show()\n", 100 | "\n", 101 | "if __name__ == \"__main__\":\n", 102 | " main()" 103 | ] 104 | } 105 | ], 106 | "metadata": { 107 | "kernelspec": { 108 | "display_name": "Python 3 (ipykernel)", 109 | "language": "python", 110 | "name": "python3" 111 | }, 112 | "language_info": { 113 | "codemirror_mode": { 114 | "name": "ipython", 115 | "version": 3 116 | }, 117 | "file_extension": ".py", 118 | "mimetype": "text/x-python", 119 | "name": "python", 120 | "nbconvert_exporter": "python", 121 | "pygments_lexer": "ipython3", 122 | "version": "3.8.12" 123 | } 124 | }, 125 | "nbformat": 4, 126 | "nbformat_minor": 5 127 | } 128 | -------------------------------------------------------------------------------- /Ch8/Least_squares_solution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "03be8cf1", 6 | "metadata": {}, 7 | "source": [ 8 | "# Least squares solution of linear regression and correlation coefficients\n", 9 | "\n", 10 | "In this notebook we will underline the relationship between least squares solution of linear regression and correlation coefficients.\n", 11 | "\n", 12 | "### Linear Regression and Correlation Coefficients\n", 13 | "\n", 14 | "In linear regression analysis with two random variables $X$ and $Y$, we aim to compute coefficients that best represent their linear relationship:\n", 15 | "\n", 16 | "$ Y = b + \\beta X + \\epsilon $\n", 17 | "\n", 18 | "The objective is to minimize the variance of the error $ \\text{Var}(\\epsilon) $, and this approach is known as the least squares method. The error variance can be represented as:\n", 19 | "\n", 20 | "$ \\text{Var}(\\epsilon) = \\text{Var}(Y) + \\beta^2 \\text{Var}(X) - 2\\beta \\text{Cov}(X,Y) $\n", 21 | "\n", 22 | "Going deeper with this equation we can derive:\n", 23 | "\n", 24 | "$ \\frac{\\text{Var}(\\epsilon)}{\\text{Var}(Y)} = (1 - \\text{Corr}(X, Y)^2) $\n", 25 | "\n", 26 | "### Coefficient of Determination ($R^2$)\n", 27 | "\n", 28 | "What reported above reveals that the square of correlation coefficient $R^2$ is a measure of the goodness of the linear model:\n", 29 | "\n", 30 | "$ R^2 = \\text{Corr}(X,Y)^2 $\n", 31 | "\n", 32 | "A high $R^2$ value, near 1, indicates that the linear model aptly describes the dependency between the variables. Conversely, when $R^2$ approaches 0, the linear model poorly describes the relationship between the variables." 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "id": "7b4e4b62", 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "import numpy as np\n", 43 | "import matplotlib.pyplot as plt" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 3, 49 | "id": "3b16b0d2", 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEWCAYAAABhffzLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAA5P0lEQVR4nO2de3gV1dX/P4sYIAgmWDClwQq+WhS5Ey+F9n0TsVLvKbZWH2vBVqmvCviWUrH6a7VqwZf21Xqpl1a0VlpEULy2YoVoS9UKogVBihdUguIFEokECcn6/XEm8eRk5uTc5sycc9bnec7DObNnZq/ZGb6zZ+211xZVxTAMwygcugVtgGEYhpFdTPgNwzAKDBN+wzCMAsOE3zAMo8Aw4TcMwygwTPgNwzAKDBN+w3dE5KsisjFoO/IBEXlFRKrytT4jO5jwGxlDRDaLyHGx21X1b6o6JAibYhGRK0WkWUQaRaReRP4hIl8O2q5EUdUjVLU20+cVkVoR2e20S9vny9H1OW13b6brNrKPCb+Rt4jIPh5F96lqb6AfsAK434e6RURy7f/XxaraO+rzbNAGGf6QazemkYOISJWIbIn6vVlEfiQi/xKRBhG5T0R6RpWfLCIvRfXIR0SVzRaR10Vkp4isF5FvRJVNEZGVInK9iHwEXBnPLlXdCywAKkSkv3OOUhG5U0TeFZE6EblGRIqcsiIR+ZWIfCgib4rIxSKibQ8Yp9d8rYisBHYBB4vIYSLypIhsF5GNInJGlL0nOtew06nrR872fiLyqHP920Xkb20Pkei3KhHpISI3iMhW53ODiPSIbnMRmSki7zvXc24Kf7vNInKciHwd+Anwbedt4OVkz2WEBxN+IyjOAL4ODAZGAFMARGQ0MB/4AfA54Hbg4TZBA14HvgqUAlcB94rIgKjzHg28AZQD18YzQES6A98FPgJ2OJvvBvYChwCjgeOB85yy84ETgFHAGKDG5bTnAFOBPsAHwJPAH4EDgDOB34jIUGffO4EfqGofYBiw3Nk+E9gC9Heu4yeAW26Vy4FjHHtGAkcBV0SVf55IO1UA3wduEZG+3i3ijar+BfgFztuSqo5M5TxGODDhN4LiRlXdqqrbgUeIiBdERPN2VX1eVVtU9ffAp0QEDlW93zmuVVXvAzYREbw2tqrqTaq6V1WbPOo+Q0TqgSYiYv5NVd0rIuXAicAlqvqJqr4PXE9EsCHysPq1qm5R1R3AXJdz362qrzhvE18HNqvqXY49a4AlwLecfZuBoSKyn6ruUNUXo7YPAA5S1WZnjMRN+M8Gfq6q76vqB0QehOdElTc75c2q+jjQCMQba7nRecuoF5EX4+xn5Dgm/EZQvBf1fRfQ2/l+EDAzSoDqgQOBLwCIyHej3ED1RHrK/aLO9U4CdS9S1TIivel1wNiououBd6POfzuR3jqODdHnd6srettBwNEx13I2kZ44wOlEHjRvicjTUYPM84DXgGUi8oaIzPa4ji8Ab0X9fsvZ1sZHzgOojeh2dmO6qpY5nzFx9jNyHK/BL8MIineAa1W1k5tGRA4CfgtMAJ5V1RYReQmQqN0STjerqh+KyFRglYj80an7U6BfjGC28S4wMOr3gW6njbmWp1X1ax71vwCcJiLFwMXAIuBAVd1JxN0zU0SGActF5AVVfSrmFFuJPFxecX5/0dnmF5bKN0+wHr+RaYpFpGfUJ9nOxW+BC0TkaCcyZl8ROUlE+gD7EhGfDwCcwcph6RirqhuBJ4Afq+q7wDLgVyKyn4h0E5H/EJH/cnZfBMwQkQoRKQMu7eL0jwJfEpFzRKTY+RwpIoeLSHcROVtESlW1GfgYaHWu62QROUREBGgAWtrKYvgTcIWI9BeRfsBPAT/DLbcBgyT3opWMGOwPaGSax4n4zts+VyZzsKquIuJ3v5nIgOtrOAO/qroe+BXwLBERGg6szIDN84CpInIAkcHe7sB6p/7FRPztEHkoLQP+Bawhcq17iQiz27XsJDI4fCaRnvh7wHVA20D1OcBmEfkYuICIGwjgUOCvRHzyzwK/UdUVLlVcA6xy7FkLvOhs84u2sNePbAwgtxFbiMUwUkNETgBuU9WDgrbFMJLBevyGkSAiUuLE3u8jIhXAz4AHg7bLMJLFevyGkSAi0gt4GjiMiBvrMWCGqn4cqGGGkSQm/IZhGAWGuXoMwzAKjJyI4+/Xr58OGjQoqWM++eQT9t13X38MShOzLTXMtuQJq11gtqVKMratXr36Q1Xt36lAVUP/GTt2rCbLihUrkj4mW5htqWG2JU9Y7VI121IlGduAVeqiqebqMQzDKDBM+A3DMAoME37DMIwCIycGd91obm5my5Yt7N6927W8tLSUDRs2ZNmqxDDbUsNP23r27MnAgQMpLi725fyGESZyVvi3bNlCnz59GDRoEJFcVh3ZuXMnffr0CcCyrjHbUsMv21SVjz76iC1btjB48OCMn98wwkbOCv/u3bs9Rd8wkkFE+NznPscHH3wQtClGllm6po55T2xka30TXygrYdbEIdSMrgjaLN/xzccvIvOdtT7XRW37loi8IiKtIlKZgTrSPYVhAHYvFSJL19Rx2QNrqatvQoG6+iYue2AtS9fUBW2a7/g5uHs3kaXnolkHTAKe8bFewzCMLpn3xEaamjtm1G5qbmHeExsDsih7+Cb8qvoMsD1m2waNLHyRF2zZsoXTTjuNQw89lP/4j/9gxowZ7Nmzx3XfrVu38s1vfrPLc5544onU19enZM+VV17JL3/5S9ftFRUVjBo1ikMPPZRJkyaxfv36Ls939913s3Wrnws6GUZwbK13X5LZa3s+EVofv7Mk3lSA8vJyamtrO5SXlpayc+dOz+NbWlrilqeLqnLaaadx3nnnce+999LS0sL06dOZNWsW11zTcS2MvXv30qdPH+666y527twZ17b77rsPICXbP/30U4qLizsd++mnn3LhhRcyffp0AJYsWUJ1dTXPPfcc/fr167BvtG133nkngwcPDs1gr99/0927d3e6zxKlsbEx5WP9JKx2QfC2zR7Vyp6WzgubdS/qFrht8ciEbaEVflW9A7gDoLKyUquqqjqUb9iwIa4gxUaAZHoQ56mnnmLfffflv//7v9u33XzzzQwePJg5c+awaNEiHnjgARobG2lpaeH3v/89J598MuvWrWPbtm1MmzaNdevWMWTIELZu3cott9xCZWUlgwYNYtWqVTQ2NnLCCSfwla98hX/84x9UVFTw0EMPUVJSwm9/+1vuuOMO9uzZwyGHHMIf/vAHevXqRY8ePejRo0endondPmXKFJYvX87DDz/MjBkz+PnPf84jjzxCU1MTRx55JPPnz2fJkiWsWbOGqVOnUlJSwrPPPsu8efPa9xs3bhy33357Vn3jfkcc9ezZk9GjR6d0bG1tLbH3aBgIq10QvG31jo8/2t1TUlzEnEnD6d2wKa/brSAmcPkxiPPKK68wduzYDtv2228/vvjFL/Laa68B8OKLL7J48WKefvrpDvv97ne/o2/fvqxfv56rr76a1atXu9axadMmLrroIl555RXKyspYsmQJAJMmTeKFF17g5Zdf5vDDD+fOO+9M2v4xY8bw6quvAnDxxRfzwgsvsG7dOpqamnj00Uf55je/SWVlJQsWLOCll16ipKTEdT/DyFVqRlcwZ9JwKspKEKCirIQ5k4YXRFRPaHv8mSTeII6ff+Svfe1r7L///p22P/vss8ycOROAYcOGMWLECNfjBw8ezKhRowAYO3YsmzdvBmDdunVcccUV1NfX09jYyMSJE5O2TaPWYVixYgX/+7//y65du/joo48YNWoUp5xySqdjovfbvn07RxxxhOt+hpEr1IyuKAihj8XPcM4/EVkoeoiIbBGR74vIN0RkC/Bl4DERecKv+qPxYxBn6NChnXrqH3/8MW+//TaHHHIIQNppXXv06NH+vaioiL179wIRV83NN9/M2rVr+dnPfuY5ezkea9as4fDDD2f37t1ceOGFLF68mLVr1zJ58mTX88Xud/7556dUr2EYweNnVM9ZqjpAVYtVdaCq3qmqDzrfe6hquaom31VNgS+UlSS1PREmTJjArl27uOeee4DIwOPMmTOZMmUKvXr1invsMcccw6JFiwBYv349a9euTarunTt3MmDAAJqbm1mwYEHSti9ZsoRly5Zx1llntYt3v379aGxs5KGHHmrfr0+fPu2DqbH7LV68OOl6DcNInKVr6hg/dzmDZz/G+LnLMzq/oCB8/LMmDqGkuKjDtpLiImZNHJLyOUWEBx98kPvvv59DDz2UL33pS/Ts2ZNf/OIXXR573nnn8cEHHzB06FCuuOIKjjjiCEpLSxOu++qrr+boo49m/PjxHHbYYQkdc/3117eHc957770sX76c/v37U1ZWxvnnn8+wYcOYOHEiY8aMaT9mypQpXHDBBYwaNYoePXp02O/II49M2F7DMJLD78llObHmbmVlpa5atarDtg0bNnD44Yd7HuN3VE861NfX07NnT3r27Mnrr7/Occcdx8aNG+nevXsg9kRTiLl62ujqnopH0BEqXoTVLjDb4jF+7nLqXFzRFWUlXHtMt4RtE5HVqtopS0JBDO5CuAZxdu3axXHHHUdzczOqym9+85tQiL5hGOEg/rhk+ktCFozwh4k+ffoQ+wZjGIbRxhfKSlx7/OmMS0ZTED5+wzCMXMKPccloTPgNwzBCht+Ty8zVYxiGEUL8HJe0Hr9hGEaBYcKfBr179+607bbbbmuf1JUtqqqqGDJkCCNHjuTII4/kpZdeymr98Xj44YeZO3duRs41YMCATtuCaG/DyHXM1ZNhLrjgAl/Pr6qoKt26dXxmL1iwgMrKSu666y5mzZrFk08+mXZdLS0tFBUVdb1jHE499VROPfXUtG3xwu/2NgqbMM3/ySTW488w0YuhVFVVcemll3LUUUfxpS99ib/97W9ARFBnzZrFkUceyYgRI7j99tuBSJ7tCRMmMGbMGIYPH96ePmHz5s0MGTKE7373uwwbNox33nnHs/4vf/nL1NVFZvd98sknfO973+Ooo45i9OjR7efbtWsXZ5xxBkOHDuUb3/gGRx99dHt46YABA5g5cyYjR47k2Wef5d577+Woo45i1KhR/OAHP6ClpYWWlhamTJnCsGHDGD58ONdffz0AN954I0OHDmXEiBGceeaZQGQxl4svvrj9Oo499lhGjBjBhAkTePvtt4HIDOHp06czbtw4Dj744KTSQaTT3oYRj3xemjE/evyXXAIx7o2SlhZIp7c6ahTccEMaRkXYu3cv//znP3n88ce56qqr+Otf/8o999xDaWkpL7zwAp9++injx4/n+OOP58ADD+TBBx9kv/3248MPP+SYY45p7y1v2rSJ3//+9xxzzDFx6/vLX/5CTU0NANdeey3HHnss8+fPp76+nqOOOorjjjuOW2+9tT0t9Lp169ozgELkYXH00Ufzq1/9ig0bNnDdddexcuVKiouLufDCC1mwYAFHHHEEdXV1rFsXWU65bcWwuXPn8uabb9KjRw/XVcSmTZvG5MmTmTx5MvPnz2f69OksXboUgHfffZe///3vvPrqq5x66qkJrVaWaHvfeeedru09ePDglOowCoOulmbM5TeB/BD+EDNp0iSgY1rl5cuXs379+vaebUNDA5s2bWLgwIH85Cc/4ZlnnqFbt27U1dWxbds2AA466KC4on/22WezZ88eGhsb2338y5Yt4+GHH27vEe/evZu3336bv//978yYMQPonBa6qKiI008/HYgsNrN69er2vDxNTU0ccMABnHLKKbzxxhtMmzaNk046ieOPPx6AESNGcPbZZ1NTU9P+8Inm2Wef5YEHHgDgnHPO4cc//nF7WU1NDd26dWPo0KHt15wKbu29bNky/vWvf3VqbxP+zJGPLhGv2bNtPf+2h0LbbyBnrjk/hN+lZ94UkpwzbamVo9Mqqyo33XRTpzz6d999Nx988AGrV6+muLiYQYMGtWfF7CrF84IFCxg7diyzZs1i2rRpPPDAA6gqS5YsYciQxCd99OzZs92vr6pMnjyZOXPmdNrv5Zdf5oknnuC2225j0aJFzJ8/n8cee4xnnnmGRx55hGuvvTaprKPRKajTyR+VTHsbmWFpzEpWuSiEbnjNni0Syej6HkE8NM3HHwATJkzg1ltvpbm5GYB///vffPLJJzQ0NHDAAQdQXFzMihUreOutt5I6r4hw9dVX89xzz/Hqq68yceJEbrrppnYhXbNmDQDjx49PKC30hAkTWLx4Me+//z4A27dv56233uLDDz+ktbWV008/nWuuuYYXX3yR1tZW3nnnHaqrq7nuuutoaGigsbGxw/nGjRvHwoULgciD6qtf/WpS15cqEydOdG1vIzN05RLJVbxmz7Z4dExSWd8jqHGE/OjxB8SuXbsYOHBg++8f/vCHCR03efJk3nvvPcaMGYOq0r9/f5YuXcrZZ5/NKaecwvDhw6msrEw45XI0JSUlzJw5k3nz5nHzzTdzySWXMGLECFpbWxk8eDCPPvooF154IZMnT2bo0KEcdthhnmmhhw4dyjXXXMPxxx9Pa2srxcXF3HLLLZSUlHDuuefS2hpZqHrOnDm0tLTwne98h4aGBlSV6dOnU1ZW1uF8N910E+eeey7z5s2jf//+3HXXXUldW6rtfd5557F58+ZO7W1kBj8WOgoDbb3u2N74vCc2ZiyPTlCrAxZMWuYwEbRtLS0tNDc3u6aFDtq2eFha5uTJhl3xUgivnH2s53FhbTOIb1usaws+W6Q9WbEePPsx3BRYgDfnnpS0bZ3O45GW2c+lF+eLyPsisi5q2/4i8qSIbHL+7etX/YY3u3bt4itf+QojR47kG9/4hqWFNtLC74RiYSOTeXT8WB0wEfx09dwN3AxET6ucDTylqnNFZLbz+1IfbTBcsLTQRibxconk8sBuV2Qqj86siUNc3x78fmj6Jvyq+oyIDIrZfBpQ5Xz/PVBLGsKvqohIqocbRju54PIMM2Fa6CiXCOqh6auP3xH+R1V1mPO7XlXLnO8C7Gj77XLsVGAqQHl5+di2aJA2evfuTXl5OaWlpa7in4l0A35htqWGX7apKg0NDWzbtq1TJFKiNDY2uuZuCpqw2gVmW6okY1t1dbWrjz8w4Xd+71DVLv38boO7zc3NbNmypT3OPZbdu3fTs2fPNKz3D7MtNfy0rWfPngwcOJDi4uKUjg/rQGVY7QKzLVUyMbib7XDObSIyQFXfFZEBwPupnqi4uDjuzMva2lpGjx6d6ul9xWxLjTDbZoSHfJxFnGmyLfwPA5OBuc6/D2W5fsMw8piwzSIO60PIz3DOPwHPAkNEZIuIfJ+I4H9NRDYBxzm/DcMwMkKYZhGHObunn1E9Z3kUTfCrTsMwCpswzSIOalZuIliuHsMw8oagJkS5ES+75/i5ywPt+ZvwG4aRNwQ1i3jpmjrGz13O4NmPtYt6vIdNl26fe+4BkchnxYqM22vCbxiGL7iJod9kMp1Conj58qsP69/pIRRNp7GH1laYOTMi9pMnf7Z93LiM22zZOQ0jxwlj5EiQ0TXZnkXs5ctf8eoHzJk03DObJzjuoE2b4Etf6ljQty+sXg0+LRZkPX7DyGHCGjkSpugav4k3oFwzuoKVs4+lwsXtM/OZP/DmdSd3FP2qKti5E7Zv9030wXr8hpHThDVyJEzRNX7jtVJXtI8/Ohnb5utOdj/Rnj2Q4szxZDHhN4wcJqwCm4gY5guJZNisGV1BzZiBbodDAAkCzdVjGDlMmMIXoymkHP1xB5Q/+uiz6JwoNpcN4PAr/szSF7cEYrP1+A0jhwkqn3tXFFqO/k4DynfdBWO+12m/G7/8bf7vP8+J/AjQJWfCbxg5TJgFtiBz9H/uc5GB2RhOnHIj68sP7rQ9KJecCb9h5DgFKbBhw2tBqL17oaiIhrnLIURjHubjNwwjpwliohgQGZR18d+3l6mCs3BQ2MY8TPgNw8hZApnHsG5dROy7uchnm+DHEMSM4niYq8cwjJwlq/MYzjgD7r+/8/bp0+HXv+7y8DC55Ez4DcPIWbIyj8HLf79+PRx+eObqySIm/IZh5Cy+ThTzEvwAJlxlmkCEX0RmAOcDAvxWVW8Iwg7DMHKP6KR0pSXFFBcJzS2fiXFag6Z790JxMVVuZXkg+G1kfXBXRIYREf2jgJHAySJySLbtMAwj94gdzK1vagaFvr2K0xs0ffjhSA/fLVeOx4BtLhNEj/9w4HlV3QUgIk8Dk4D/DcAWwzByCLfB3OZWpVf3fVjz0+OTP6GHO+edb32LAxctSsXEnEA0y08yETkceAj4MtAEPAWsUtVpMftNBaYClJeXj124cGFS9TQ2NtK7d++M2JxpzLbUMNuSJ6x2QWq2ra1r8CwbXlGa8Hmqqqtdt//j/vvZ069fJ9vqm5rZ1rCbPS2tdC/qRnlpT8pKspNJM5Zk2q26unq1qlbGbs+68AOIyPeBC4FPgFeAT1X1Eq/9KysrddWqVUnVUVtbS1VVVRpW+ofZlhpmW/KE1S5Izbbxc5e7DuYWifCrM0Z27eJJcMA22rbYRWUgMo6QqTj8ZBfSSabdRMRV+AOZwKWqd6rqWFX9T2AH8O8g7DAMI7dwmwEL0KLqPXFrx46uZ9jGwc9FZYJaSCcQ4ReRA5x/v0jEv//HIOwwDCO3aJsBW+Qi4p3EePbsiNjvv3/nEyUxYOvnXIGgVioLKmXDEhFZDzwCXKSq9QHZYRiGT/iVQ6dmdAWtHqK9tb7ps979ddd1LLzyypQidPxc8yCohXQCieNX1a8GUa9hGNnB78XW3SZueS5p2NAA++0X19ZYH3tZVLmfax4EtVKZJWkzDCPj+O3CiPb1b77uZHfRb+vddyH6bj72+qbm9n38TLAWVNZOS9lgGEbG8duFUbPPdmquOcG9MAlXjtcDaltDc4dtfiVYC2ohHRN+w8gyyYbv5SK+uTDKy+H9993LUghN93oQ7WlpTfpcqRJE1k5z9RhGFgkqfC/bZNyF0TZgGyv6t92WVkoFrwdR96L8lsb8vjrDCBlBhe/5hVfkTs3oCk4fW9EedlkkwuljU+jZesXfNzdHxP4HP0jLfq8HVHlpz7TOG3bM1WMYWSSo8D0/iBe5A7BkdR0tTk+8RZUlq+uoPGj/rsVf1X11q7ayJOzryqXm5WMva9iUcD25iAm/YWSRoML3/KCrt5ekV8a691445xz3siRdOcmEk7r52Gtr81v4zdVjGFkkbItup0O8t5ek3mza3Dluop+i/z7fXGqZxoTfMLJI2BbdTod4M1oTme1aVV3t7r9fujTtHPj55FLzA3P1GEaWCdOi2+nQ1YxWzzKPDJmDLn00kvXyi8OpcbalGvqaTy41PzDhNwwjJRKZfNRWNmjfbqz46QlwTefzDLr00fbv0eMA6aR98DPNQj5gwm8YeUhbT/nMA3dy+dzlvk0Si/f2UjO6gprfXgu33upaftO9S/nV2s4S1OaOieen7+pagpoRmyuY8BtGntGhp3xg5hOkJYTXgifQ7rvvvvAR1+I2d0y6fvp8can5gQ3uGkaeEWhEi9eEqzVrOg3Ylpf2jBvh5Gc65ELHhN8w8oxAIlq6WuFq1KhORWUlxa4RTvDZEouxZzQ/fWYwV49h5BlZi2h5+2046CD3sgRDMWPdMbEDugqI82+F+ekzRlBLL/6PiLwiIutE5E8ikt+JMQwji/g+SeyggyK9ezfRTzP+3s1N1Sb6K2cfa6KfIbLe4xeRCmA6MFRVm0RkEXAmcHe2bTGMfCE23v30sRWsePUDYGfmespeA7Zjx8KqVemd28EmXmWHoFw9+wAlItIM9AK2BmSHYeQ8bvHuS1bXMWfScMoaNjHt7Kr0KvAS/G3b4IAD0jt3DKUlxR1Wv2rDBnQzi2gar2UpVyoyA7gWaAKWqerZLvtMBaYClJeXj124cGFSdTQ2NtK7d+8MWJt5zLbUMNvc2fjeTteFQ7oXdaOit6RsV1V1tev2pY8vo6ykOKVzRhPbZvVNzWzZ0USsJgnCwP1LMlJnqrb5RX1TM9sadrOnpZXuRd0oL+3Z5XUmY1t1dfVqVa2M3Z514ReRvsAS4NtAPXA/sFhV7/U6prKyUlcl+SpZW1tLVVVV6ob6iNmWGvlkWyZX4Ro8+zHc/hcLcNfX902uzVasgGOPdS1qm2FbUlyUkfxCsW3WFskTS99exaz56fFp1ZUs2bjXYt/UILG2TcY2EXEV/iAGd48D3lTVD1S1GXgAGBeAHYYRCJlehSsj8e5t4Zguoj/o0kdd0ypkGi8/fv2uzq6ffCDI+RZBCP/bwDEi0ktEBJgAbAjADsMIhEz/h08riscr/n7aNFBlcJTgR+PHYGuhTdgKciA768Kvqs8Di4EXgbWODXdk2w7DCIpM/4dPKdWzl+Dv2RMJx7zxRqBrMfZaejEVZk0cQnG3jjYVd5O8nbAV5IMukKgeVf0Z8LMg6jaMoPFjglVCeWlSWNIwXpbLdLJnehL7LIqT8ifXCTKDqKVsMIwsk/VVuG64IdK7dxP9LiZcxXubyLTLat4TG2lu6WhLc4vm7apZQS7KYykbDCPLZC1lsAhVXmVJRPN5vU1k2mVViJO3gsogasJvGAHg6394rwlX994LZ3eaMpMymXZZ2apZ2cNcPYaRL3SVITODog+Zd1nl00L0Ycd6/IbvZHKykhFDYyP06eNephqZ7ONT1Zl2WdmqWdnDhN/wFV8iPwz45jdhyRL3sizOxs+0y8pWzcoOJvyGr6SzbqrhQgJLGhpGV5iP3/CVQozU8AUP//2KBX9OOwd+mMjkhDDDG+vxG75ikRpp4tHDb0+YtgHmrKnLi7cncwtmD+vx5yi50jOySI0UeP11zx5+thKmBUGgi8QXGNbjz0HC3jPyWg0q1yI1sh6NVFoKH3/sXqbK4NmPuRbli9vM3ILZw4Q/BwnzgGm81aCCti0Zsvpw9RqwPeQQ2LSp/We+u83y/frChLl6cpAw94zy5XU9K9fhNeFq27bIYG2U6EP+u83y/frChPX4c5Aw94zC/FBKBl+vw6uH30VkTr5PcMr36wsTnsIvIo8DF6rq5uyZYyRCkOlcuyLMD6VkyPh1PP44nHSSe1kGEqblC/l+fWEhnqvnLmCZiFwuItlb5TiHyVakTZDpXLsiX17XM3EdS9fUfebOcRP9EMXf50qUmJEZPHv8qnq/iPwZ+H/AKhH5A9AaVf5/qVQoIkOA+6I2HQz8VFVvSOV8YSHbkTZh7Rnly+t62tchQo3L5rdO+zYHLV2YMTszQdijxIzM05WPfw/wCdAD6EOU8KeKqm4ERgGISBFQBzyY7nmDJsyRNtkmrA+lZEnpOjz894f8aCl7i/ahoqyElRmwLVnihabavVt4xPPxfx34P+BhYIyq7vKh/gnA66r6lg/nzir5MqhZCGQ8Pl+Vqupq16JBMYuVB3E/dNWjt3u38IjX478c+JaqvuJj/WcCf/Lx/FkjXwY185FooS/rVUzj7r00t0Z862m5NWbNgl/+0rVo/JynQnM/dNWjt3u38BANaHBJRLoDW4EjVHWbS/lUYCpAeXn52IULk/OLNjY20rt370yYmhD1Tc3U7WiiNao9u4lQ0beEspKOY+PZti0Z8s02t7+LG92LujHk8x557WPw6t0D1K5Y4Vmv1/3gJ42NjbzZ0OJZPryiNDBb8+1eyxbJ2FZdXb1aVStjtwcp/KcBF6nq8V3tW1lZqatWrUrq/LW1tVRVVaVoXWok6kIIwrZEyTfbxs9d7tqbjUWAN+d6hFu27+QRf3/TTdQOG9bJtiuWruVPz79DiypFIpx19IFcUzM8McMzRG1tLZc/1+raBhVlJaycfSwQzGI5+XavZYtkbBMRV+EPcgLXWeSJm6eNfBnUzCcS9VPHdWskMuGqtrZD0dI1dSxZXUeLs0+LKktW11F50P4Zu0cSFetE5n3YvVtYBJKyQUT2Bb4GPBBE/UZ2CTJGPBE/tWt8/scfd72GbRz8TvnQNmBbV9+E8tlYhVvbhnnehxEMgfT4VfUT4HNB1G1kl6BjxN16u8VFwr7d96GhqblzT/mII2D9eveTJeEW9TtSJtkQTOvRG9FYrh7DV4KOEU94IlaGlzT0O1LGQjCNdDDhN3wlDAIVt7frJfgrV8K4cSnXmUo+pWQGWC0E00gHS8ts+IqXEAUuUF3579MQfUjer56Mzx7yJyeSEQzW4zd8JVSZRF95BYYNcy/zIaw5Gb96Kj77tuNyOSeSEQwm/IavhEKgMuy/94NUXGI2YGukigm/4TuBCZSX4JeUwC4/Uk+ljvnsjWxiPn4jVGQk5t/Lf//ee5EefshEH8xnb2QX6/EboSHtmP8UlzQMA6FwiRkFgwm/0Ykg8rZAijH/998PZ5zhXpYDgh+N+eyNbGGuHqMD9U3NSYUVZhKvgcy6+ibGz13O2rqGz9w/be4cN9EP0ZKGhhFGTPiNDmxr2O1rjpl4eA1kCrQPfK68bAI1YwZ23unMM0Mh+LZ2rZELmKvH6MCellbc+gPZmGnrFvMvgAKbrzvZ/aDmZtgnHLdx0HmJDCNRwvE/xggN3YvcXwKzEVYYO8BZsV8P/n7511z3HXzpo13nz48iG+MWQeclMoxEMeE3OlBe2pOS4pbAZtrWjK6g5s45cMstruVta9hWJPEg8qMn3ulBMrKFrfV7XPe1xGlG2DAfv9GBspLi4HK3tw3Yuoj+oEsf5aZ7lwLJP4gynRvfLa9O3Y4mSj2WKbRJWEbYsB5/DuK32yLrYYVe8fc33wwXXcTSNXVUPLER2ElFCteb6Qyhbg+SVlVEIg+lUOQlMow4mPDnGHk1gJjghKu2B1FtbS3Tzq5KuppMp0PwemDU72rm+m+PysociKDmWhj5QSDCLyJlwO+AYUSCNr6nqs8GYUuukfMDiDt2wP77u5f5FIqZ6Qyh8R4k2XhbyquHvxEIQfn4fw38RVUPA0YCGwKyI+cIw8ImKdG/f6SH7yb6LvH3mYyHz/Sas255dbqJZM2l4/d6vkb+k/Uev4iUAv8JTAFQ1T2AeziE0Ymgszgm7WJIISWyV492zrgi1/0TIZM9cbe8OhV9W7LW287Zh78RGkSzPNNRREYBdwDrifT2VwMznAXYo/ebCkwFKC8vH7tw4cKk6mlsbKR3796ZMDnjpGNbfVMzdTuaaI36u3UToaJvCWUeUSWZsq2ruuubmtnWsJs9La1M+06N6zlevOkmPvZaDMVh43s7nYlkHRnQC/r1LU38YrJINu83r/bpXtSNIZ/vE5hdyWK2pUYytlVXV69W1crY7UEIfyXwHDBeVZ8XkV8DH6vq//M6prKyUletWpVUPbW1tVRVVaVsp5+DZ7lq2/i5y13fNtoibS57YC0brjnB/cRJ3GeDZz+G294zh+9l2tmnJXyebJLu3zQZYt+IIDJm4ea+yqZdyWK2pUYytomIq/AHMbi7Bdiiqs87vxcDswOww5OwD54l4rbw4+Hg5Uros2kDNZdNoMalbPycp1g5+9ik6vFyZ3nNKg4Ctwlc2cJSOBvpknXhV9X3ROQdERmiqhuBCUTcPqEh1yNn/HpwxQqyZ/4cPpthKyn4nb2icMpLuyd9Lj9wa9+6HS0sXVOXtfvDUjgb6RBUHP80YIGIdAfeAM4NyA5Xcn3wzK8HV1funF3FPRj6wyUdtqUy6OzVoy1r2JS80T7gNYErun0tzt4IM4EIv6q+BHTyO4WFoCNn0sWvB1fNmIGu7pwx0xawvVfnQdd0YuXderS1teEQ/q7aN+yuQsOwmbsuZHrCj9/E9i5LnQibWFJ+cHmEZLa5czrt7tSVr73crjoGue4qNPIfE34XcmnwzK13WVwkFHcTmls/i41J+sH10ENQU+Nepsr4ucvBI8In2cHcXMOtYxA9gSsbrkJzJRnpYMLvQa4Mnrn1LptblL69iunVfZ+khaGqutq7MCokM8i3oqBFr6sJXH67Cs2VZKSLCX+WiBar2aNaqc9QBEi8hGFrfnp84ifymmE7YwbccEOnzUG9FYVF9GI7BrW1te3f/X4omivJSBcTfp+IFvqyXsU07t7b7nrZ09KaMbFKu3fpJfgJLGkYxFtRLoie3w/FXI86M4LHhN8HYnulO3Z1HmjNlFil1LtsbYUi97w3tStWhHbGIuSO6Pn5UMz1qDMjeMIzFTKPcOuVupEJsUoq8+SVV0Z6+G6i75IhM4x4iVshiZ5bdtAwR50Z4aOge/x+DRImKuiZEqsue5cpZMgMK7kWausHuRR1ZoSTghV+PwcJvV7Fo8mKWHkJ/qJF8K1v+Vu3T/glekFHCiVLrkSdGeGkYIXfz0FCt15pcZGwb/d9aGhqpntRt7gLgaQtQgkuaZgoYRPFTIteWCKFDCNbFKzw+zlI2FWvtLa2lqo4op+SCH38MZR65KpPw51TCKKYC5FChpFJClb4/Y6MSLVX6iVCMxe9zP/c91LnHndVFTz9tPvJMuC/LwRRzJVIIcPIFAUb1RPWyAgvsWlRRYn0uGctfjnizhFxF/0MRugUgihapJBRaBSs8Gd6Ae5M0ZXYbL7uZDb94qRO259a9FdfQjKTEcVMLpCeTcLaCTAMvyhYVw+EIzIiduC0+rD+LFld18m94rXoSVuGzIrXhQk+2Jdo+GQujwVYeKRRaBSU8KcSneJnRIubWC5ZXcfpYytY8eoHFL/5OrV3THU9NjYlsl+ul0RFMdfHAsLQCTCMbBGI8IvIZmAn0ALsdVsMONOk0iP1uxfrJZY/Pmsc1+xudD3GKwe+n/7oRESxEMYCDCNfCLLHX62qH2arslR6pH73YmNF0cud895Xj+P0Ey9ja30TfWMSvkE4/NGWP8YwcoeCcfWk0iP1uxfbJpaei5bX10NpKZ8HVkZtDtuEKrBUCoaRS4gGkKtFRN4EdgAK3K6qd7jsMxWYClBeXj524cKFSdXR2NhI7969239vfG8ne1paO+3XvagbQz7fx/UcqRyTjG1ei54sfXwZZSXFKZ8/HWLbLRnqm5rZ1rCbPS2tCIKidC/qRnlpz4SuJ/p4t+PSsc1vwmpbWO0Csy1VkrGturp6tZsrPage/1dUtU5EDgCeFJFXVfWZ6B2ch8EdAJWVlZpsquDa2toO6YXrY/z1EOmRzpk03HMWbSrHdMnTT8Mpp7gWjZ/zVMq990y9BcS2Wyp2dG6zFuZMGhrXnqVr6rjsqbU0NXejLco49rh0bfOTsNoWVrvAbEuVTNgWiPCrap3z7/si8iBwFPBM/KPSI5WQvYyG+ZWUwO7d7mXOW9dK99IuCVMoZarjIrkeFWQYuUTWhV9E9gW6qepO5/vxwM+zUXcqIXtph/l5JUy7+Wa46KLUzxtFmEQz3rhIvLcSiwoyjOwRRI+/HHhQIoK4D/BHVf1LAHb4i5fgt7RQ+8wzcV/VknXbhEk0vaJ7ynoVx30rsaggw8geWU/ZoKpvqOpI53OEql6bbRu86CrlwNI1dYz++TIGzX6MQbMfY9RVyzru09r6WQ6d2HO/uIXxc55i8E/+zMb3dnqmM2hz29TVN7Xn5rnsgbVx0x+EKdeMV/oDVTzfSuIdZ1FBhpF5CjZXTyzxBHfpmjpGXbWMS+57qcP6ufVNzcy6/2We/9Xv4i5puPTFLR3O3bbYupuYx3PbeBEm0fTKgdTQ1HndYfjsrSSsuZMMIx8pmDj+rvAS3CsffoVP97a6rqHrGX/fp08kP34X53bzwafitglbrhm3cZF5T2zs0pVjaRMMIzuY8Dt4CWu9S0/VU/BXrYKxYxM+t9v2VH3dYRdNm+BlGOEhb1099U3NSaUITsQfvvm6k11Ff/ycpyIhmS6iH+/cbtvD5LbJJObKMYzwkJc9/qVr6qjb0URdfURAE4lr9+qR9pYWXrjaOyVycTdhXheinExvN2xum0wS9rcSwygU8lL45z2xkTMP7JiKoqu49ljB/Xr9a9x6+yWu+7ZlyCwrKebKU4/oUsxiz93VYusmkIZh+EleCv/W+iY40H17vBj5mtEV1Fx1ETz0UKdjf3H6j/jtIVV8oayEG1LogUeLebzF1g3DMPwmL4U/4jvf2Wl7aUmcSUTHjYDt2zufrKEB9tuPnwA/8dFmwzCMbJGXg7uzJg6hW8wkqpLiIkQ6TyLacM0J1IwZ2Fn029av3W8/v801DMPIKnnZ468ZXcHS99ZTUVbUwaXzP/e9BMA+LXt57Zc1nQ/cf3/46KOs2prLhHFdAMMwuiYvhR8iA68rZ1d12HbLQy/y5FWndtr3qjMu42f3/SJLluUHYcoIahhGcuSt8Hdg0yYYOZInmzpOjBp/wXy29xvAnEnDAzIsdwlTRlDDMJIjv4V/61ao6ChCG8+9mO8dWsPWht18oayEOeaeSIkwZQQ1DCM58lv477vvs+9//COcdRZDSH3BE+MzLI2yYeQueRnV086MGbBzZyQ656yzgrYmr8jX1BKGUQjkd4+/WzcI6YLJuU4+p5YwjHwnMOEXkSJgFVCnqh7pLo0wY6klDCM3CbLHPwPYAOT1DCm3WPeyoI0yDKOgCcTHLyIDgZOA3wVRf7bwWtXLLce/YRhGtghqcPcG4MdAa0D1ZwWvWPdtDbsDssgwDANEVbveK5MVipwMnKiqF4pIFfAjNx+/iEwFpgKUl5ePXbhwYVL1NDY20jvggd21dQ2u28tL4ID9SzNaV31TM9sadrOnpZXuRd0oL+1JWUlx0ucJQ7t5YbYlT1jtArMtVZKxrbq6erWqVsZuD0L45wDnAHuBnkR8/A+o6ne8jqmsrNRVq1YlVU9tbS1VVVVpWJo+4+cud411v2xUKz8485SM1RObPgEioZWprHAVhnbzwmxLnrDaBWZbqiRjm4i4Cn/WXT2qepmqDlTVQcCZwPJ4op/LeMW6l5f2zGg98dInGIZhxJLfcfwB4xXrXtawKaP1WPoEwzCSIVDhV9VaoDZIG/zGLda9tjazwm/pEwzDSIb8TtlQIFj6BMMwksFcPXmApU8wDCMZTPjzBEufYBhGopirxzAMo8Aw4TcMwygwzNWTJrbguGEYuYYJfxrYguOGYeQi5upJA5sxaxhGLmI9fodUXDY2Y9YwjFzEevx4581fuqYu7nFeM2NtxqxhGGHGhJ/UXTY2Y9YwjFzEXD2k7rLJxIxZiwoyDCPbmPCTXpKzdGbMWlSQYRhBYK4egnPZWFSQYRhBYD1+gktyZlFBhmEEgQm/QxBJziyPvmEYQWCungCxqCDDMIIg6z1+EekJPAP0cOpfrKo/y7YdYcDy6BuGEQRBuHo+BY5V1UYRKQb+LiJ/VtXnArAlcCyPvmEY2Sbrwq+qCjQ6P4udj2bbDsMwjEJFIjqc5UpFioDVwCHALap6qcs+U4GpAOXl5WMXLlyYVB2NjY307t07A9ZmHrMtNcy25AmrXWC2pUoytlVXV69W1cpOBaoa2AcoA1YAw+LtN3bsWE2WFStWJH1MtjDbUsNsS56w2qVqtqVKMrYBq9RFUwON6lHVekf4vx6kHYZhGIVE1oVfRPqLSJnzvQT4GvBqtu0wDMMoVLLu4xeREcDvgSIiD55FqvrzLo75AHgryar6AR+mZKT/mG2pYbYlT1jtArMtVZKx7SBV7R+7MZDB3WwgIqvUbVAjBJhtqWG2JU9Y7QKzLVUyYZvN3DUMwygwTPgNwzAKjHwW/juCNiAOZltqmG3JE1a7wGxLlbRty1sfv2EYhuFOPvf4DcMwDBdM+A3DMAqMnBR+Efm6iGwUkddEZLZLeQ8Ruc8pf15EBkWVXeZs3ygiEwOw7Ycisl5E/iUiT4nIQVFlLSLykvN5OADbpojIB1E2nBdVNllENjmfyVm26/oom/4tIvVRZX632XwReV9E1nmUi4jc6Nj+LxEZE1XmZ5t1ZdfZjj1rReQfIjIyqmyzs/0lEVmVSbsStK1KRBqi/m4/jSqLey9kwbZZUXatc+6v/Z0y39pNRA4UkRWONrwiIjNc9sncveaWxyHMHyITv14HDga6Ay8DQ2P2uRC4zfl+JnCf832os38PYLBznqIs21YN9HK+/3ebbc7vxoDbbQpws8ux+wNvOP/2db73zZZdMftPA+Zno82c8/8nMAZY51F+IvBnQIBjgOf9brME7RrXVh9wQptdzu/NQL8A26wKeDTde8EP22L2PQVYno12AwYAY5zvfYB/u/z/zNi9los9/qOA11T1DVXdAywETovZ5zQis4MBFgMTRESc7QtV9VNVfRN4zTlf1mxT1RWqusv5+RwwMIP1p2VbHCYCT6rqdlXdATxJ5vIrJWvXWcCfMlR3l6jqM8D2OLucBtyjEZ4DykRkAP62WZd2qeo/nHohu/dZIm3mRTr3qB+2Ze1eU9V3VfVF5/tOYAMQu1BHxu61XBT+CuCdqN9b6NxA7fuo6l6gAfhcgsf6bVs03yfyBG+jp4isEpHnRKQmg3YlY9vpzmvkYhE5MMlj/bQLxy02GFgetdnPNksEL/v9vteSIfY+U2CZiKyWSPrzIPiyiLwsIn8WkSOcbaFpMxHpRUQ8l0Rtzkq7ScQ1PRp4PqYoY/eaLbYeECLyHaAS+K+ozQepap2IHAwsF5G1qvp6Fs16BPiTqn4qIj8g8tZ0bBbr74oziSzV2RK1Leg2CzUiUk1E+L8StfkrTpsdADwpIq86PeFs8SKRv1ujiJwILAUOzWL9iXAKsFJVo98OfG83EelN5GFziap+nMlzR5OLPf464MCo3wOdba77iMg+QCnwUYLH+m0bInIccDlwqqp+2rZdVeucf98Aaok89bNmm6p+FGXP74CxiR7rp11RnEnMq7fPbZYIXvb7fa91iUQSIv4OOE1VP2rbHtVm7wMPkll3Z5eo6seq2uh8fxwoFpF+hKDNooh3r/nSbhJZinYJsEBVH3DZJXP3mh8DFX5+iLylvEHklb9tAOiImH0uouPg7iLn+xF0HNx9g8wO7iZi22giA1iHxmzvC/RwvvcDNpHBga0EbRsQ9f0bwHP62eDRm46NfZ3v+2fLLme/w4gMrkm22iyqnkF4D1SeRMcBt3/63WYJ2vVFImNY42K27wv0ifr+D+DrWW6zz7f9HYmI59tO+yV0L/hpm1NeSmQcYN9stZtz/fcAN8TZJ2P3WkYbNFsfIqPb/yYioJc7235OpAcN0BO437nx/wkcHHXs5c5xG4ETArDtr8A24CXn87CzfRyw1rnZ1wLfD8C2OcArjg0rgMOijv2e056vAedm0y7n95XA3JjjstFmfwLeBZqJ+E6/D1wAXOCUC3CLY/taoDJLbdaVXb8DdkTdZ6uc7Qc77fWy87e+PIA2uzjqPnuOqIeT272QTducfaYQCQKJPs7XdiPiilPgX1F/sxP9utcsZYNhGEaBkYs+fsMwDCMNTPgNwzAKDBN+wzCMAsOE3zAMo8Aw4TcMwygwTPgNI0mcTIpvRmVt7Ov8HhSwaYaRECb8hpEkqvoOcCsw19k0F7hDVTcHZpRhJIHF8RtGCjjT61cD84HzgVGq2hysVYaRGJakzTBSQFWbRWQW8BfgeBN9I5cwV49hpM4JRKb/DwvaEMNIBhN+w0gBERkFfI1Isqz/cRbEMIycwITfMJLEWc3tViI5098G5gG/DNYqw0gcE37DSJ7zgbdV9Unn92+Aw0Xkv+IcYxihwaJ6DMMwCgzr8RuGYRQYJvyGYRgFhgm/YRhGgWHCbxiGUWCY8BuGYRQYJvyGYRgFhgm/YRhGgfH/ARz0ilRp1EJDAAAAAElFTkSuQmCC\n", 55 | "text/plain": [ 56 | "
" 57 | ] 58 | }, 59 | "metadata": { 60 | "needs_background": "light" 61 | }, 62 | "output_type": "display_data" 63 | }, 64 | { 65 | "name": "stdout", 66 | "output_type": "stream", 67 | "text": [ 68 | "Correlation Coefficient (Corr(X,Y)): 0.8770824028342054\n", 69 | "Coefficient of Determination (R^2): 0.7692735413614232\n", 70 | "Variance of Residuals: 0.8065845639670531\n", 71 | "Expected Variance of Residuals based on R^2: 0.8065845639670501\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "# Set a random seed for reproducibility\n", 77 | "np.random.seed(42)\n", 78 | "\n", 79 | "# Let's start generating a synthetic dataset\n", 80 | "# --------------------------------\n", 81 | "\n", 82 | "# Generate random values for X\n", 83 | "X = 2 * np.random.rand(100, 1)\n", 84 | "# Generate Y values with a linear relationship to X\n", 85 | "# For this example, we'll assume a true relationship of Y = 4 + 3X + Gaussian noise\n", 86 | "Y = 4 + 3 * X + np.random.randn(100, 1)\n", 87 | "\n", 88 | "# Let's implement the Linear Regression Algorithm\n", 89 | "# --------------------------------------------\n", 90 | "\n", 91 | "# Calculate the covariance between X and Y\n", 92 | "cov_XY = np.mean(X * Y) - np.mean(X) * np.mean(Y)\n", 93 | "\n", 94 | "# Calculate the variance of X\n", 95 | "var_X = np.var(X)\n", 96 | "\n", 97 | "# Determine the coefficient β using the relationship\n", 98 | "# β = Cov(X,Y) / Var(X)\n", 99 | "beta = cov_XY / var_X\n", 100 | "\n", 101 | "# Determine the intercept b\n", 102 | "# b = E(Y) - β * E(X)\n", 103 | "b = np.mean(Y) - beta * np.mean(X)\n", 104 | "\n", 105 | "# Predict values of Y using the obtained b and β\n", 106 | "Y_pred = b + beta * X\n", 107 | "\n", 108 | "# Let's calculate the correlation coefficient, R^2, and relate it to the variance of the errors\n", 109 | "# ------------------------------------------------------------------------------------------\n", 110 | "\n", 111 | "# Calculate the correlation coefficient Corr(X,Y)\n", 112 | "correlation_coefficient = np.corrcoef(X.squeeze(), Y.squeeze())[0, 1]\n", 113 | "\n", 114 | "# Calculate R^2\n", 115 | "R_squared = correlation_coefficient**2\n", 116 | "\n", 117 | "# Calculate the residuals (errors)\n", 118 | "residuals = Y - Y_pred\n", 119 | "\n", 120 | "# Calculate the variance of the residuals\n", 121 | "var_residuals = np.var(residuals)\n", 122 | "\n", 123 | "# Calculate the expected variance of the residuals based on R^2\n", 124 | "expected_var_residuals = np.var(Y) * (1 - R_squared)\n", 125 | "\n", 126 | "# Let's finally plot the results and print the calculated values\n", 127 | "# ---------------------------------------------------\n", 128 | "\n", 129 | "# Plot the original data and the linear regression line\n", 130 | "plt.scatter(X, Y, label=\"Original Data\")\n", 131 | "plt.plot(X, Y_pred, color=\"red\", label=\"Linear Regression Line\")\n", 132 | "plt.xlabel(\"X\")\n", 133 | "plt.ylabel(\"Y\")\n", 134 | "plt.title(\"Linear Regression Fit\")\n", 135 | "plt.legend()\n", 136 | "plt.grid(True)\n", 137 | "plt.show()\n", 138 | "\n", 139 | "# Print the results\n", 140 | "print(f\"Correlation Coefficient (Corr(X,Y)): {correlation_coefficient}\")\n", 141 | "print(f\"Coefficient of Determination (R^2): {R_squared}\")\n", 142 | "print(f\"Variance of Residuals: {var_residuals}\")\n", 143 | "print(f\"Expected Variance of Residuals based on R^2: {expected_var_residuals}\")" 144 | ] 145 | } 146 | ], 147 | "metadata": { 148 | "kernelspec": { 149 | "display_name": "Python 3 (ipykernel)", 150 | "language": "python", 151 | "name": "python3" 152 | }, 153 | "language_info": { 154 | "codemirror_mode": { 155 | "name": "ipython", 156 | "version": 3 157 | }, 158 | "file_extension": ".py", 159 | "mimetype": "text/x-python", 160 | "name": "python", 161 | "nbconvert_exporter": "python", 162 | "pygments_lexer": "ipython3", 163 | "version": "3.8.12" 164 | } 165 | }, 166 | "nbformat": 4, 167 | "nbformat_minor": 5 168 | } 169 | -------------------------------------------------------------------------------- /Ch5/Quantiles_Chebyshev_inequality.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "b7c17cbf", 6 | "metadata": {}, 7 | "source": [ 8 | "# Quantiles and Chebyshev's inequality\n", 9 | "\n", 10 | "The provided code consists of two functions: `chebyshev_bound`, which calculates lower and upper bounds using Chebyshev's inequality for a given dataset and confidence level, and `plot_quantile_comparison`, which generates a plot to compare quantiles of different random variables. The `chebyshev_bound` function calculates the sample mean and standard deviation of the data, and then determines the bounds based on the confidence level using Chebyshev's inequality. The `plot_quantile_comparison` function sorts the data, computes quantiles using the standard normal distribution's cumulative distribution function, and plots the quantiles against the sorted data for each random variable. The code generates three random variables (normal, uniform, and exponential) and calculates quantiles for each. The resulting plot allows visual comparison of quantile behavior across the random variables. The x-axis represents sorted data, while the y-axis displays quantiles. Each curve in the plot corresponds to a different random variable, with the legend providing the corresponding label. The grid facilitates accurate quantile comparisons." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 4, 16 | "id": "d0f84ce6", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import numpy as np\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "from scipy.stats import norm" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 5, 28 | "id": "ffb80a74", 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "data": { 33 | "image/png": "\n", 34 | "text/plain": [ 35 | "
" 36 | ] 37 | }, 38 | "metadata": { 39 | "needs_background": "light" 40 | }, 41 | "output_type": "display_data" 42 | } 43 | ], 44 | "source": [ 45 | "def chebyshev_bound(data, confidence):\n", 46 | " mean = np.mean(data)\n", 47 | " std = np.std(data)\n", 48 | " k = 1 / np.sqrt(confidence)\n", 49 | " lower_bound = mean - k * std\n", 50 | " upper_bound = mean + k * std\n", 51 | " return lower_bound, upper_bound\n", 52 | "\n", 53 | "def plot_quantile_comparison(data_list, labels):\n", 54 | " plt.figure(figsize=(8, 6))\n", 55 | " for data, label in zip(data_list, labels):\n", 56 | " sorted_data = np.sort(data)\n", 57 | " n = len(sorted_data)\n", 58 | " probabilities = np.arange(1, n + 1) / n\n", 59 | " quantiles = norm.ppf(probabilities, loc=np.mean(data), scale=np.std(data))\n", 60 | " plt.plot(sorted_data, quantiles, label=label)\n", 61 | " plt.xlabel('Data')\n", 62 | " plt.ylabel('Quantiles')\n", 63 | " plt.title('Comparison of Quantiles for Various Random Variables')\n", 64 | " plt.legend()\n", 65 | " plt.grid(True)\n", 66 | " plt.show()\n", 67 | "\n", 68 | "# Generate various random variables for comparison\n", 69 | "np.random.seed(42) # For reproducibility\n", 70 | "size = 1000 # Number of samples for each random variable\n", 71 | "\n", 72 | "normal_data = np.random.normal(loc=0, scale=1, size=size)\n", 73 | "uniform_data = np.random.uniform(low=-1, high=1, size=size)\n", 74 | "exponential_data = np.random.exponential(scale=1, size=size)\n", 75 | "\n", 76 | "# Calculate the quantiles for each random variable\n", 77 | "data_list = [normal_data, uniform_data, exponential_data]\n", 78 | "labels = ['Normal', 'Uniform', 'Exponential']\n", 79 | "\n", 80 | "# Plot the comparison of quantiles using Chebyshev's inequality\n", 81 | "plot_quantile_comparison(data_list, labels)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "id": "83d38be3", 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [] 91 | } 92 | ], 93 | "metadata": { 94 | "kernelspec": { 95 | "display_name": "Python 3 (ipykernel)", 96 | "language": "python", 97 | "name": "python3" 98 | }, 99 | "language_info": { 100 | "codemirror_mode": { 101 | "name": "ipython", 102 | "version": 3 103 | }, 104 | "file_extension": ".py", 105 | "mimetype": "text/x-python", 106 | "name": "python", 107 | "nbconvert_exporter": "python", 108 | "pygments_lexer": "ipython3", 109 | "version": "3.8.12" 110 | } 111 | }, 112 | "nbformat": 4, 113 | "nbformat_minor": 5 114 | } 115 | -------------------------------------------------------------------------------- /Ch5/Lévy_alpha-stable_distribution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "7a901ab7", 6 | "metadata": {}, 7 | "source": [ 8 | "# Lévy alpha-stable distribution\n", 9 | "\n", 10 | "In this code, we perform multiple experiments with Lévy alpha-stable distributed random variables and calculate the histogram of outcomes for each experiment. Additionally, we plot the corresponding theoretical normal distribution curve based on the mean and standard deviation of the outcomes.\n", 11 | "\n", 12 | "The Lévy alpha-stable distribution is a family of probability distributions that includes the Gaussian (normal) distribution as a special case. However, unlike the normal distribution, which satisfies the Central Limit Theorem and converges to a normal distribution for sums of random variables, the Levy-stable distribution does not always follow this pattern. The Central Limit Theorem states that the sum of a large number of independent and identically distributed random variables tends to be approximately normally distributed, regardless of the original distribution.\n", 13 | "\n", 14 | "- For alpha = 2 (Gaussian distribution), beta = 0:\n", 15 | " - The histogram should resemble a bell-shaped curve, similar to the shape of the theoretical normal distribution curve (in red).\n", 16 | " - The mean and standard deviation of the outcomes should match closely with the theoretical values, and the histogram should be well-centered around the mean.\n", 17 | "- For alpha ≠ 2 and beta = 0:\n", 18 | " - The histogram may not closely resemble a normal distribution, even though the theoretical curve is still shown.\n", 19 | " - The shape of the histogram will be influenced by the stability parameter alpha, and it might exhibit heavier tails or skewness.\n", 20 | "- For beta ≠ 0:\n", 21 | " - The histogram might show asymmetry (skewness) and thicker tails compared to the theoretical normal distribution curve.\n", 22 | " - The presence of a non-zero skewness parameter (beta) in the Levy-stable distribution can lead to a significant departure from a normal distribution.\n", 23 | "- For alpha ≠ 2 and beta ≠ 0:\n", 24 | " - The histogram might display more pronounced deviations from a normal distribution, especially when both alpha and beta differ from their standard values.\n", 25 | " - Depending on the values of alpha and beta, the Levy-stable distribution can exhibit various features such as fat tails, heavy skewness, and long tails." 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "id": "a530b273", 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import numpy as np\n", 36 | "import matplotlib.pyplot as plt\n", 37 | "from scipy.stats import levy_stable, norm" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "id": "dba963ec", 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "image/png": "\n", 49 | "text/plain": [ 50 | "
" 51 | ] 52 | }, 53 | "metadata": { 54 | "needs_background": "light" 55 | }, 56 | "output_type": "display_data" 57 | } 58 | ], 59 | "source": [ 60 | "def generate_histogram(num_experiments, alpha, beta, subplot_position):\n", 61 | " # Perform the experiments\n", 62 | " outcomes = levy_stable.rvs(alpha, beta, scale=1, size=num_experiments)\n", 63 | "\n", 64 | " # Create a subplot\n", 65 | " plt.subplot(subplot_position)\n", 66 | "\n", 67 | " # Plot the histogram of outcomes\n", 68 | " plt.hist(outcomes, bins=50, density=True, alpha=0.5, color='blue')\n", 69 | "\n", 70 | " # Get mean and standard deviation of outcomes\n", 71 | " mu, std = np.mean(outcomes), np.std(outcomes)\n", 72 | "\n", 73 | " # Generate a range of x values\n", 74 | " x = np.linspace(mu - 3*std, mu + 3*std, 100)\n", 75 | "\n", 76 | " # Calculate the corresponding y values using the normal distribution\n", 77 | " y = norm.pdf(x, mu, std)\n", 78 | "\n", 79 | " # Plot the theoretical normal distribution curve\n", 80 | " plt.plot(x, y, color='red', linewidth=2)\n", 81 | "\n", 82 | " # Set plot title\n", 83 | " plt.title(f'Alpha: {alpha}, Beta: {beta}')\n", 84 | "\n", 85 | "# Set the number of experiments\n", 86 | "num_experiments = 1000\n", 87 | "\n", 88 | "# Create a 2x2 grid of subplots\n", 89 | "fig, axs = plt.subplots(2, 2, figsize=(10, 8))\n", 90 | "\n", 91 | "# Generate histograms for different alphas and betas\n", 92 | "generate_histogram(num_experiments, 1.5, 0, 221)\n", 93 | "generate_histogram(num_experiments, 1.5, 0.5, 222)\n", 94 | "generate_histogram(num_experiments, 2, -0.5, 223)\n", 95 | "generate_histogram(num_experiments, 2, 0, 224)\n", 96 | "\n", 97 | "# Set common labels\n", 98 | "fig.text(0.5, 0, 'Outcome', ha='center', va='center')\n", 99 | "fig.text(0, 0.5, 'Probability Density', ha='center', va='center', rotation='vertical')\n", 100 | "\n", 101 | "# Adjust spacing\n", 102 | "plt.tight_layout()\n", 103 | "\n", 104 | "# Display the plots\n", 105 | "plt.show()\n" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "id": "3e2e64b0", 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [] 115 | } 116 | ], 117 | "metadata": { 118 | "kernelspec": { 119 | "display_name": "Python 3 (ipykernel)", 120 | "language": "python", 121 | "name": "python3" 122 | }, 123 | "language_info": { 124 | "codemirror_mode": { 125 | "name": "ipython", 126 | "version": 3 127 | }, 128 | "file_extension": ".py", 129 | "mimetype": "text/x-python", 130 | "name": "python", 131 | "nbconvert_exporter": "python", 132 | "pygments_lexer": "ipython3", 133 | "version": "3.8.12" 134 | } 135 | }, 136 | "nbformat": 4, 137 | "nbformat_minor": 5 138 | } 139 | -------------------------------------------------------------------------------- /Ch6/Stress_Testing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "123a65b5", 6 | "metadata": {}, 7 | "source": [ 8 | "## Visualizing Systemic Risk in a Complex System with Stress using Python and Matplotlib\n", 9 | "\n", 10 | "To understand the systemic risk in a complex system and the effects of stress on this system, we'll employ Python and matplotlib for a visual demonstration. Our system is represented using two interrelated variables, making use of a bivariate normal distribution to demonstrate their relationship.\n", 11 | "\n", 12 | "1. **System Representation**:\n", 13 | " - The complex system is represented by a bivariate normal distribution with specified means and covariance. Here, the mean vector `mean` denotes the average values of the two random variables, and the covariance matrix `cov` signifies how these variables interact with each other.\n", 14 | "\n", 15 | "2. **Modeling Stress**:\n", 16 | " - Stress on the system is modeled using the `apply_stress` function. This function shifts the mean values, simulating a disturbance. By adjusting the `stress_factor` parameter, we can control the intensity of the stress.\n", 17 | "\n", 18 | "3. **Visualization**:\n", 19 | " - Using matplotlib, the original distribution (before stress) is plotted using blue contours, demonstrating the system's behavior in its natural state.\n", 20 | " - Post the application of stress, the disturbed distribution is visualized with red contours, allowing for a comparison between the system's behavior before and after the stress.\n", 21 | " \n", 22 | "Through this visualization, one can observe how the introduction of stress affects the systemic risk in the system. The shift between the blue and red contours visually captures the change in the conditional probabilities of the system's variables, demonstrating the resilience or vulnerability of the system to disturbances." 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "id": "4aa9dd65", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import numpy as np\n", 33 | "import matplotlib.pyplot as plt\n", 34 | "from scipy.stats import multivariate_normal" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 6, 40 | "id": "dab82ed7", 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "data": { 45 | "image/png": "\n", 46 | "text/plain": [ 47 | "
" 48 | ] 49 | }, 50 | "metadata": { 51 | "needs_background": "light" 52 | }, 53 | "output_type": "display_data" 54 | } 55 | ], 56 | "source": [ 57 | "# Set random seed for reproducibility\n", 58 | "np.random.seed(42)\n", 59 | "\n", 60 | "# 1. Define the multivariate normal distribution for the system variables\n", 61 | "# We will assume two variables for simplicity\n", 62 | "mean = [0, 0] # Mean of the two variables\n", 63 | "cov = [[1, 0.5], [0.5, 1]] # Covariance matrix showing some correlation between the variables\n", 64 | "\n", 65 | "# Create a grid of (x, y) coordinates\n", 66 | "x, y = np.mgrid[-3:3:.01, -3:3:.01]\n", 67 | "pos = np.dstack((x, y))\n", 68 | "\n", 69 | "# 2. Define a function to \"stress\" the system. Here, stress is modeled as a change in mean\n", 70 | "def apply_stress(original_mean, stress_factor):\n", 71 | " return [m + stress_factor for m in original_mean]\n", 72 | "\n", 73 | "# 3. Plot the distributions before and after stress\n", 74 | "fig, ax = plt.subplots()\n", 75 | "\n", 76 | "# Before stress\n", 77 | "rv = multivariate_normal(mean, cov)\n", 78 | "ax.contourf(x, y, rv.pdf(pos), levels=25, alpha=0.6, cmap=\"Blues\")\n", 79 | "\n", 80 | "# After stress\n", 81 | "stressed_mean = apply_stress(mean, 1) # Apply a stress factor of 1 to change the mean\n", 82 | "rv_stressed = multivariate_normal(stressed_mean, cov)\n", 83 | "ax.contourf(x, y, rv_stressed.pdf(pos), levels=25, alpha=0.6, cmap=\"Reds\")\n", 84 | "\n", 85 | "ax.set_xlabel('Variable 1')\n", 86 | "ax.set_ylabel('Variable 2')\n", 87 | "ax.set_title('Effect of Stress on System')\n", 88 | "ax.legend(['Before Stress', 'After Stress'])\n", 89 | "\n", 90 | "plt.show()" 91 | ] 92 | } 93 | ], 94 | "metadata": { 95 | "kernelspec": { 96 | "display_name": "Python 3 (ipykernel)", 97 | "language": "python", 98 | "name": "python3" 99 | }, 100 | "language_info": { 101 | "codemirror_mode": { 102 | "name": "ipython", 103 | "version": 3 104 | }, 105 | "file_extension": ".py", 106 | "mimetype": "text/x-python", 107 | "name": "python", 108 | "nbconvert_exporter": "python", 109 | "pygments_lexer": "ipython3", 110 | "version": "3.8.12" 111 | } 112 | }, 113 | "nbformat": 4, 114 | "nbformat_minor": 5 115 | } 116 | --------------------------------------------------------------------------------