├── .gitignore ├── run-lab.sh ├── img ├── ML-map.png ├── Bayes_Theorem.jpg ├── id3_algorithm.png ├── categorical_tree.dia ├── categorical_tree.png ├── cross_validation.dia ├── cross_validation.png ├── ML-map.dot └── random_variable.svg ├── Pipfile ├── tools ├── pd_helpers.py ├── hw.csv ├── stats.py ├── venn.py └── plots.py ├── Homework.ipynb ├── README.md ├── Lab09-Exercises.ipynb ├── Lab05-Exercises.ipynb ├── Lab12-Exercises.ipynb ├── Lab04-Exercises.ipynb ├── Lab13-Exercises.ipynb ├── Lab11-Exercises.ipynb ├── Lab14-Exercises.ipynb ├── Lab07-Exercises.ipynb ├── Lab01-Exercises.ipynb ├── Lab06-Exercises.ipynb ├── Lab02-Exercises.ipynb ├── Lab05.ipynb ├── Lab03-Exercises.ipynb ├── Lab06.ipynb ├── extras └── Lab-EM-Exercises.ipynb └── Lab10-Exercises.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | *-Solutions.ipynb 3 | -------------------------------------------------------------------------------- /run-lab.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python3 -m pipenv run jupyter-lab 3 | -------------------------------------------------------------------------------- /img/ML-map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spantiru/companion-lab/HEAD/img/ML-map.png -------------------------------------------------------------------------------- /img/Bayes_Theorem.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spantiru/companion-lab/HEAD/img/Bayes_Theorem.jpg -------------------------------------------------------------------------------- /img/id3_algorithm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spantiru/companion-lab/HEAD/img/id3_algorithm.png -------------------------------------------------------------------------------- /img/categorical_tree.dia: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spantiru/companion-lab/HEAD/img/categorical_tree.dia -------------------------------------------------------------------------------- /img/categorical_tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spantiru/companion-lab/HEAD/img/categorical_tree.png -------------------------------------------------------------------------------- /img/cross_validation.dia: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spantiru/companion-lab/HEAD/img/cross_validation.dia -------------------------------------------------------------------------------- /img/cross_validation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spantiru/companion-lab/HEAD/img/cross_validation.png -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | jupyterlab = "*" 10 | matplotlib = "*" 11 | matplotlib-venn = "*" 12 | pandas = "*" 13 | scikit-learn = "*" 14 | 15 | [requires] 16 | python_version = "3.12" 17 | -------------------------------------------------------------------------------- /tools/pd_helpers.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def apply_counts(df: pd.DataFrame, count_col: str): 4 | """ Denormalise a dataframe with a 'Counts' column by 5 | multiplying that column by the count and dropping the 6 | count_col. """ 7 | feats = [c for c in df.columns if c != count_col] 8 | return pd.concat([ 9 | pd.DataFrame([list(r[feats])] * r[count_col], columns=feats) 10 | for i, r in df.iterrows() 11 | ], ignore_index=True) -------------------------------------------------------------------------------- /tools/hw.csv: -------------------------------------------------------------------------------- 1 | Lab,Problems 2 | 1,"7, 14" 3 | 1,"2, 16" 4 | 1,"5, 8" 5 | 1,"3, 15" 6 | 1,"12, 13" 7 | 2,"3, 5" 8 | 2,"2, 4" 9 | 2,"1, 3" 10 | 3,"1, 4" 11 | 3,"2, 5" 12 | 3,"3, 4" 13 | 4,"1, 3" 14 | 4,"2, 3" 15 | 5,"1, 2" 16 | 6,"1, 3" 17 | 6,"2, 4" 18 | 6,"3, 5" 19 | 6,"4, 6" 20 | 7,1 21 | 7,2 22 | 7,3 23 | 7,4 24 | 7,5 25 | 9,"1, 2" 26 | 10,"1, 2" 27 | 10,"3, 4" 28 | 10,"5, 6" 29 | 11,"2, 4" 30 | 11,"1, 3" 31 | 11,"2, 3" 32 | 12,"1, 3" 33 | 12,"2, 3" 34 | 13,"1, 4" 35 | 13,"2, 5" 36 | 14,"4, 5" 37 | 14,"1, 3" 38 | 39 | -------------------------------------------------------------------------------- /tools/stats.py: -------------------------------------------------------------------------------- 1 | from typing import Set, Any 2 | from dataclasses import dataclass 3 | 4 | def probability(A: Set[Any], omega: Set[Any]): 5 | """ Probability for a uniform distribution 6 | in a finite space""" 7 | return len(A) / len(omega) 8 | 9 | 10 | @dataclass(frozen=True) 11 | class WeightedOutcome: 12 | """ Class adding a weight to any outcome. """ 13 | weight: float 14 | 15 | 16 | def probability_weighted(A: Set[WeightedOutcome], 17 | omega: Set[WeightedOutcome]): 18 | """ Probability for a uniform distribution 19 | in a finite space. Omega is defined as a dictionary with 20 | values being weights. """ 21 | A_weight = sum((o.weight for o in A)) 22 | omega_weight = sum((o.weight for o in omega)) 23 | return A_weight / omega_weight -------------------------------------------------------------------------------- /tools/venn.py: -------------------------------------------------------------------------------- 1 | from matplotlib_venn import venn2, venn2_circles 2 | import matplotlib.pyplot as plt 3 | 4 | omega = set(['10', '11', '01', '00']) 5 | A=set(['10', '11']) 6 | B=set(['11', '01']) 7 | 8 | def plot_venn(highlights): 9 | """ Plot a venn diagram with two intersecting sets A and B 10 | and highlight any combination of A, B and omega. """ 11 | to_hide = set(['10', '11', '01'])-set(highlights) 12 | figure = plt.figure(figsize=(4, 3)) 13 | ax=plt.gca() 14 | ax.text(0.7, 0.5, r'$\Omega$', fontsize=16) 15 | if '00' in highlights: 16 | figure.patch.set_facecolor('grey') 17 | subsets={'10': 1, '01': 1, '11': 1} 18 | v = venn2(subsets, set_labels = ('A', 'B'), alpha=1) 19 | for p in to_hide: 20 | v.get_patch_by_id(p).set_color('w') 21 | v.get_patch_by_id(p).set_alpha(1) 22 | for p in {'10', '11', '01'}: 23 | v.get_label_by_id(p).set_text('') 24 | venn2_circles(subsets) 25 | plt.show() -------------------------------------------------------------------------------- /Homework.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "044dc450-8772-43ec-92ef-8ad6ed454b13", 6 | "metadata": {}, 7 | "source": [ 8 | "# What is my homework?" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "67163e92-9054-4df2-afa9-5e00f9ce5171", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd\n", 19 | "from hashlib import sha1\n", 20 | "\n", 21 | "def what_is_my_homework(email: str, lab_no: int):\n", 22 | " \"\"\"\n", 23 | " What is my assigned homework for the given lab?\n", 24 | " \n", 25 | " Print a message that displays the assigned homework for a specific\n", 26 | " email and a specific lab.\n", 27 | " \n", 28 | " Parameters\n", 29 | " ----------\n", 30 | " email : string\n", 31 | " The email you provided at the beginning of the semester.\n", 32 | " lab_no : int\n", 33 | " The lab number for which to assign homework.\n", 34 | " \n", 35 | " Examples\n", 36 | " --------\n", 37 | " >>> what_is_my_homework('my_email@example.com', 1)\n", 38 | " Your homework for lab 1: 1, 7.\n", 39 | " \"\"\"\n", 40 | " df = pd.read_csv(\"tools/hw.csv\")\n", 41 | " if lab_no not in df['Lab'].unique():\n", 42 | " print(\"No homework assigned for this lab!\")\n", 43 | " return\n", 44 | " df_lab = df[df['Lab'] == lab_no]\n", 45 | " key = ('2024'+email+str(lab_no)).encode(\"utf-8\")\n", 46 | " idx = int(sha1(key).hexdigest(), 16) % 2**10\n", 47 | " hw = df_lab.sample(n=1, random_state=idx).iloc[0, 1]\n", 48 | " print(f\"Your homework for lab {lab_no}: {hw}.\")\n", 49 | "\n", 50 | "what_is_my_homework('my_email@example.com', 1)" 51 | ] 52 | } 53 | ], 54 | "metadata": { 55 | "kernelspec": { 56 | "display_name": "Python 3 (ipykernel)", 57 | "language": "python", 58 | "name": "python3" 59 | }, 60 | "language_info": { 61 | "codemirror_mode": { 62 | "name": "ipython", 63 | "version": 3 64 | }, 65 | "file_extension": ".py", 66 | "mimetype": "text/x-python", 67 | "name": "python", 68 | "nbconvert_exporter": "python", 69 | "pygments_lexer": "ipython3", 70 | "version": "3.12.3" 71 | } 72 | }, 73 | "nbformat": 4, 74 | "nbformat_minor": 5 75 | } 76 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/spantiru/companion-lab/master) 2 | 3 | # JupyterLab for the Machine Learning seminar 4 | 5 | Ștefan Panțiru, Faculty of Computer Science, "Alexandru Ioan Cuza" University Iași 6 | 7 | To run this lab, either click the `launch binder` button above, or run the code on your machine following the instructions below. 8 | 9 | ## Contents 10 | * [Lab01 - Elementary Notions in Probability and Statistics](Lab01.ipynb) 11 | * [Lab02 - Decision Trees (part1)](Lab02.ipynb) 12 | * [Lab03 - Decision Trees (part2)](Lab03.ipynb) 13 | * [Lab04 - Decision Trees (part3)](Lab04.ipynb) 14 | * [Lab05 - Naive Bayes (part1)](Lab05.ipynb) 15 | * [Lab06 - Naive Bayes (part2)](Lab06.ipynb) 16 | * [Lab07 - Maximum Likelihood Estimation](Lab07.ipynb) 17 | * Week 8 - Midterm Exam 18 | * [Lab09 - Logistic Regression](Lab09.ipynb) 19 | * [Lab10 - k-Nearest Neighbour](Lab10.ipynb) 20 | * [Lab11 - AdaBoost](Lab11.ipynb) 21 | * [Lab12 - Hierarchical Clustering](Lab12.ipynb) 22 | * [Lab13 - k-Means (part1)](Lab13.ipynb) 23 | * [Lab14 - k-Means (part2)](Lab14.ipynb) 24 | 25 | ## Useful resources: 26 | * [ML Homework](https://forms.gle/EmPiJqABNR7MZWgf8) 27 | * [Register for Piazza](https://forms.gle/KQP3pwRQxvqxVvLz6) 28 | * [Resources uploaded to Piazza](https://piazza.com/info.uaic.ro/spring2024/ml2024f/resources) 29 | * [Python official documentation](https://docs.python.org/3.12/library/index.html) 30 | * [Scikit-learn library](https://scikit-learn.org/stable/getting_started.html) - machine learning library for Python 31 | * [Scipy statistical functions](https://docs.scipy.org/doc/scipy/reference/stats.html) 32 | * [Pandas library](https://pandas.pydata.org/docs/reference/index.html) - library providing data structures and analysis tools for Python 33 | 34 | ## Running locally 35 | 36 | This lab uses Python 3.12 and `pipenv`, so make sure they are available on your system. 37 | 38 | ```bash 39 | $ python3 --version 40 | Python 3.12.3 41 | $ python3 -m pipenv --version 42 | pipenv, version 2023.12.1 43 | ``` 44 | 45 | Clone the repository using git: 46 | 47 | ```bash 48 | $ git clone https://github.com/spantiru/companion-lab.git 49 | ``` 50 | 51 | Inside the project folder, create the pipenv environment: 52 | 53 | ```bash 54 | $ cd companion-lab 55 | $ python3 -m pipenv install # Might take a few seconds 56 | ``` 57 | 58 | Run `jupyter-lab`, which should start in your default browser: 59 | 60 | ```bash 61 | $ python3 -m pipenv run jupyter-lab 62 | ``` 63 | -------------------------------------------------------------------------------- /img/ML-map.dot: -------------------------------------------------------------------------------- 1 | digraph G { 2 | rankdir=LR; // Left to right layout 3 | splines=false; // Make arrows straight lines 4 | 5 | // Global node styling for a professional look 6 | node [ 7 | shape=box, 8 | style=filled, 9 | fillcolor=lightblue, 10 | color=black, // Black border 11 | fontname="Helvetica", 12 | fontsize=10, 13 | penwidth=0.8 14 | ]; 15 | 16 | // Global edge styling 17 | edge [color=gray, arrowsize=0.8, penwidth=0.8]; 18 | 19 | // Define the structure 20 | "Machine Learning" -> "Supervised Learning"; 21 | "Machine Learning" -> "Unsupervised Learning"; 22 | 23 | // Linear and Non-linear split under Supervised Learning 24 | "Supervised Learning" -> "Linear"; 25 | "Supervised Learning" -> "Non-Linear"; 26 | 27 | // Linear Supervised Learning Algorithms 28 | "Linear" -> "Linear Regression"; 29 | "Linear" -> "Logistic Regression"; 30 | "Linear" -> "Naive Bayes"; 31 | "Linear" -> "Support Vector Machines (SVM)"; 32 | 33 | // Highlighted Logistic Regression 34 | "Logistic Regression" [fontcolor=red]; 35 | 36 | // Highlighted Naive Bayes 37 | "Naive Bayes" [fontcolor=red]; 38 | 39 | // Non-Linear Supervised Learning Algorithms 40 | "Non-Linear" -> "Decision Trees"; 41 | "Non-Linear" -> "k-Nearest Neighbors (k-NN)"; 42 | "Non-Linear" -> "Neural Networks"; 43 | "Non-Linear" -> "AdaBoost"; 44 | "Non-Linear" -> "ARIMA"; // Time series forecasting 45 | "Non-Linear" -> "XGBoost"; 46 | 47 | // Highlighted Decision Trees 48 | "Decision Trees" [fontcolor=red]; 49 | 50 | // Highlighted k-NN 51 | "k-Nearest Neighbors (k-NN)" [fontcolor=red]; 52 | 53 | // Highlighted AdaBoost 54 | "AdaBoost" [fontcolor=red]; 55 | 56 | // Unsupervised Learning split into Clustering and Dimensionality Reduction 57 | "Unsupervised Learning" -> "Clustering"; 58 | "Unsupervised Learning" -> "Dimensionality Reduction"; 59 | 60 | // Clustering Algorithms 61 | "Clustering" -> "K-means"; 62 | "Clustering" -> "Hierarchical Clustering"; 63 | "Clustering" -> "Gaussian Mixture Models (GMM)"; 64 | 65 | // Highlighted K-means 66 | "K-means" [fontcolor=red]; 67 | 68 | // Highlighted Hierarchical Clustering 69 | "Hierarchical Clustering" [fontcolor=red]; 70 | 71 | // Dimensionality Reduction Algorithms 72 | "Dimensionality Reduction" -> "Principal Component Analysis (PCA)"; 73 | "Dimensionality Reduction" -> "t-SNE"; 74 | } 75 | 76 | -------------------------------------------------------------------------------- /Lab09-Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "c6b704f5-a47c-4124-8be4-a27e13ca3503", 6 | "metadata": {}, 7 | "source": [ 8 | "# Logistic Regression\n", 9 | "\n", 10 | "## Exercise 1\n", 11 | "\n", 12 | "For the dataset below:\n", 13 | "\n", 14 | "1. Plot the decision surface of the Logistic Regression algorithm.\n", 15 | "2. Calculate the CVLOO error for Logistic Regression.\n", 16 | "3. Plot the decision surface of the ID3 algorithm (with entropy and no pruning).\n", 17 | "4. Calculate the CVLOO error for ID3." 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "id": "d0a7eabd-0aa1-4809-b58d-00de769ca895", 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "from sklearn.datasets import make_moons\n", 28 | "\n", 29 | "X, y = make_moons(n_samples=200, noise=0.2, random_state=42)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "id": "415d3052-6ae3-47c3-aa6e-8b7fec9a901c", 35 | "metadata": {}, 36 | "source": [ 37 | "## Exercise 2\n", 38 | "\n", 39 | "Given the dataset below, implement the gradient ascent formula from the lab. Starting from an initial $w=(0, 0, 0)$, apply 10 gradient ascent steps with $\\eta = 0.01$. What are the values of $w$ after the 10 steps? \n", 40 | "\n", 41 | "_Note: The component $x_0 = 1$ was already added to the dataset, so $w$ and $X$ have the same number of dimensions._" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "id": "20b07227-d6a6-4c6c-8253-1ea8ded7b496", 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "from sklearn.datasets import make_blobs\n", 52 | "import numpy as np\n", 53 | "\n", 54 | "X, y = make_blobs(n_samples=200, cluster_std=3, centers=2, random_state=42)\n", 55 | "\n", 56 | "def add_intercept(X):\n", 57 | " \"\"\"Add 1 as the first column of X\"\"\"\n", 58 | " return np.hstack((np.ones((len(X), 1)), X))\n", 59 | "\n", 60 | "X = add_intercept(X)" 61 | ] 62 | } 63 | ], 64 | "metadata": { 65 | "kernelspec": { 66 | "display_name": "Python 3 (ipykernel)", 67 | "language": "python", 68 | "name": "python3" 69 | }, 70 | "language_info": { 71 | "codemirror_mode": { 72 | "name": "ipython", 73 | "version": 3 74 | }, 75 | "file_extension": ".py", 76 | "mimetype": "text/x-python", 77 | "name": "python", 78 | "nbconvert_exporter": "python", 79 | "pygments_lexer": "ipython3", 80 | "version": "3.12.3" 81 | } 82 | }, 83 | "nbformat": 4, 84 | "nbformat_minor": 5 85 | } 86 | -------------------------------------------------------------------------------- /Lab05-Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "a90491ec-1f9f-479a-95b9-fd697241ae99", 6 | "metadata": {}, 7 | "source": [ 8 | "# Naive Bayes" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "5c43b866-e21d-4112-bb13-d7212ccefc62", 14 | "metadata": {}, 15 | "source": [ 16 | "## Exercise 1\n", 17 | "\n", 18 | "Given the following dataset, with input attributes $A$, $B$, and $C$ and target attribute $Y$, predict the entry $A=0, B=0, C=1$ using `BernoulliNB(alpha=1e-10)` and `predict_proba()` then manually calculate the probabilities using the formulas." 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 3, 24 | "id": "d44a550d-e8a9-4df8-9c74-6fdaa470e9f6", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import pandas as pd\n", 29 | "d = pd.DataFrame({'A': [0, 0, 1, 0, 1, 1, 1],\n", 30 | " 'B': [0, 1, 1, 0, 1, 0, 1],\n", 31 | " 'C': [1, 0, 0, 1, 1, 0, 0],\n", 32 | " 'Y': [0, 0, 0, 1, 1, 1, 1]})" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "id": "3f18f1dd-f66d-45c9-abb9-5f80a4c95ddf", 38 | "metadata": {}, 39 | "source": [ 40 | "## Exercise 2\n", 41 | "\n", 42 | "Consider two random variables $X_1$ and $X_2$ and a label $Y$ assigned to each instance as in the dataset `d` created below.\n", 43 | "\n", 44 | "1. Classify the instance $X_1=0,X_2=0$ using Naive Bayes.\n", 45 | "\n", 46 | "1. According to Naive Bayes, what is the probability of this classification?\n", 47 | "\n", 48 | "1. How many probabilities are estimated by the model (check the `class_log_prior_` and `feature_log_prob_` attributes)?\n", 49 | "\n", 50 | "1. How many probabilities would be estimated by the model if there were $n$ features instead of 2?" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 4, 56 | "id": "282a321a-de22-41ec-85c7-12833b244a65", 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "import pandas as pd\n", 61 | "from tools.pd_helpers import apply_counts\n", 62 | "\n", 63 | "d_grouped = pd.DataFrame({\n", 64 | " 'X1': [0, 0, 1, 1, 0, 0, 1, 1],\n", 65 | " 'X2': [0, 0, 0, 0, 1, 1, 1, 1],\n", 66 | " 'C' : [2, 18, 4, 1, 4, 1, 2, 18],\n", 67 | " 'Y' : [0, 1, 0, 1, 0, 1, 0, 1]})\n", 68 | "d = apply_counts(d_grouped, 'C')" 69 | ] 70 | } 71 | ], 72 | "metadata": { 73 | "kernelspec": { 74 | "display_name": "Python 3 (ipykernel)", 75 | "language": "python", 76 | "name": "python3" 77 | }, 78 | "language_info": { 79 | "codemirror_mode": { 80 | "name": "ipython", 81 | "version": 3 82 | }, 83 | "file_extension": ".py", 84 | "mimetype": "text/x-python", 85 | "name": "python", 86 | "nbconvert_exporter": "python", 87 | "pygments_lexer": "ipython3", 88 | "version": "3.12.3" 89 | } 90 | }, 91 | "nbformat": 4, 92 | "nbformat_minor": 5 93 | } 94 | -------------------------------------------------------------------------------- /Lab12-Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise 1\n", 8 | "Agglomerative clustering on a 2d dataset\n", 9 | "\n", 10 | "Considering the points (-4, -2), (-3, -2), (-2, -2), (-1, -2), (1, -1), (1, 1), (2, 3), (3, 2), (3, 4), (4, 3):\n", 11 | "1. create a scatter plot using `pyplot`;\n", 12 | "1. create the dendrogram using `AgglomerativeClustering` with single-linkage and then color the scatter plot using the best 4 clusters;\n", 13 | "1. create the dendrogram using `AgglomerativeClustering` with complete-linkage and then color the scatter plot using the best 4 clusters;\n", 14 | "1. what is the difference in behaviour between the two types of linkage? What shapes do they tend to give to the clusters?" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# Exercise 2\n", 22 | "\"Natural\" clusters\n", 23 | "\n", 24 | "Given the dataset {0, 4, 5, 20, 25, 39, 43, 44}:\n", 25 | "1. find the natural clusters using agglomerative clusters with single-linkage and plot clusters using a scatter plot;\n", 26 | "1. find the natural clusters using agglomerative clusters with average-linkage and plot clusters using a scatter plot." 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "# Exercise 3\n", 34 | "\n", 35 | "For each the following two datasets `d1` and `d2`:\n", 36 | "1. plot the points using `pyplot` and highlight (by using different colours for the points) the 2 clusters found by agglomerative clustering using single linkage;\n", 37 | "1. plot the points using `pyplot` and highlight (by using different colours for the points) the 2 clusters found by agglomerative clustering using average linkage." 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 1, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "from sklearn import datasets as ds\n", 47 | "import numpy as np\n", 48 | "import pandas as pd\n", 49 | "\n", 50 | "np.random.seed(0)\n", 51 | "X1, _ = ds.make_circles(n_samples=1500, factor=.5, noise=.05)\n", 52 | "X2, _ = ds.make_blobs(n_samples=1500,\n", 53 | " cluster_std=[1.0, 2.5, 0.5],\n", 54 | " random_state=170)\n", 55 | "\n", 56 | "d1 = pd.DataFrame(X1, columns=['X1', 'X2'])\n", 57 | "d2 = pd.DataFrame(X2, columns=['X1', 'X2'])" 58 | ] 59 | } 60 | ], 61 | "metadata": { 62 | "kernelspec": { 63 | "display_name": "Python 3 (ipykernel)", 64 | "language": "python", 65 | "name": "python3" 66 | }, 67 | "language_info": { 68 | "codemirror_mode": { 69 | "name": "ipython", 70 | "version": 3 71 | }, 72 | "file_extension": ".py", 73 | "mimetype": "text/x-python", 74 | "name": "python", 75 | "nbconvert_exporter": "python", 76 | "pygments_lexer": "ipython3", 77 | "version": "3.12.3" 78 | } 79 | }, 80 | "nbformat": 4, 81 | "nbformat_minor": 4 82 | } 83 | -------------------------------------------------------------------------------- /Lab04-Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Exercise 1\n", 8 | "\n", 9 | "Given the following dataset with two input random variables $X_1$ and $X_2$ and a target variable $Y$, we want to compare two extreme decision tree algorithms:\n", 10 | "\n", 11 | "* OVERFIT will build a full standard ID3 decision tree, with no pruning;\n", 12 | "* UNDERFIT will make no splits at all, always having a single node (which is both root and decision).\n", 13 | "\n", 14 | "1. Plot the full OVERFIT tree.\n", 15 | "1. What is the CVLOO error for OVERFIT?\n", 16 | "1. What is the CVLOO error for UNDERFIT?" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import pandas as pd\n", 26 | "d = pd.DataFrame({'X1': [1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8],\n", 27 | " 'X2': [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2],\n", 28 | " 'Y' : [0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]})" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "## Exercise 2\n", 36 | "\n", 37 | "Suppose we learned a decision tree from a training set with binary output values (either 0 or 1). We find that for a leaf node $l$, \n", 38 | "\n", 39 | "* there are $M$ training examples falling into it (labeled either 0 or 1); \n", 40 | "* its entropy is $H$. \n", 41 | "\n", 42 | "1. Create a graph using `matplotlib` that shows the entropy $H$ as a function of the proportion of 1s in $M$. The proportion should be on the $x$ axis (from 0 to 1), while the entropy should be on the $y$ axis.\n", 43 | "1. Create a simple algorithm which takes as input $M$ and $H$ and that outputs the number of training examples misclassified by leaf node $l$.\n" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "## Exercise 3\n", 51 | "\n", 52 | "Given the dataset below:\n", 53 | "1. plot the points and the labels using `matplotib.pyplot.scatter`;\n", 54 | "1. train a regular decision tree, then plot its decision surface;\n", 55 | "1. create a new dataset with 1000 random points with coordinates between 0 and 10, which the diagonal line $X1 = X2$ perfectly separates in two classes. See [numpy.random.random_sample](https://numpy.org/doc/stable/reference/random/generated/numpy.random.random_sample.html#numpy.random.random_sample) for easily generating random numbers between 0 and 1.\n", 56 | "1. train a regular decision tree, then plot its decision surface on the new dataset." 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 2, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "import pandas as pd\n", 66 | "d = pd.DataFrame({'X1': [1, 2, 3, 3, 3, 4, 5, 5, 5],\n", 67 | " 'X2': [2, 3, 1, 2, 4, 4, 1, 2, 4],\n", 68 | " 'Y': [1, 1, 0, 0, 0, 0, 1, 1, 0]})" 69 | ] 70 | } 71 | ], 72 | "metadata": { 73 | "kernelspec": { 74 | "display_name": "Python 3 (ipykernel)", 75 | "language": "python", 76 | "name": "python3" 77 | }, 78 | "language_info": { 79 | "codemirror_mode": { 80 | "name": "ipython", 81 | "version": 3 82 | }, 83 | "file_extension": ".py", 84 | "mimetype": "text/x-python", 85 | "name": "python", 86 | "nbconvert_exporter": "python", 87 | "pygments_lexer": "ipython3", 88 | "version": "3.12.3" 89 | } 90 | }, 91 | "nbformat": 4, 92 | "nbformat_minor": 4 93 | } 94 | -------------------------------------------------------------------------------- /tools/plots.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from matplotlib.colors import ListedColormap 4 | from scipy.spatial import Voronoi, voronoi_plot_2d 5 | import matplotlib.pyplot as plt 6 | from scipy.cluster.hierarchy import dendrogram 7 | import matplotlib as mpl 8 | 9 | def add_ellipses(gmm, ax, colors): 10 | """ Draw 2d ellipses, on a given axis, corresponding to the 11 | covariances of the GMM. """ 12 | for n, color in enumerate(colors): 13 | if gmm.covariance_type == 'full': 14 | covariances = gmm.covariances_[n][:2, :2] 15 | elif gmm.covariance_type == 'tied': 16 | covariances = gmm.covariances_[:2, :2] 17 | elif gmm.covariance_type == 'diag': 18 | covariances = np.diag(gmm.covariances_[n][:2]) 19 | elif gmm.covariance_type == 'spherical': 20 | covariances = np.eye(gmm.means_.shape[1]) * gmm.covariances_[n] 21 | v, w = np.linalg.eigh(covariances) 22 | u = w[0] / np.linalg.norm(w[0]) 23 | angle = np.arctan2(u[1], u[0]) 24 | angle = 180 * angle / np.pi # convert to degrees 25 | v = 2. * np.sqrt(2.) * np.sqrt(v) 26 | ell = mpl.patches.Ellipse(gmm.means_[n, :2], v[0], v[1], 27 | angle=180 + angle, color=color) 28 | ell.set_clip_box(ax.bbox) 29 | ell.set_alpha(0.5) 30 | ax.add_artist(ell) 31 | ax.set_aspect('equal', 'datalim') 32 | 33 | def plot_dendrogram(model, **kwargs): 34 | # Create linkage matrix and then plot the dendrogram 35 | 36 | # create the counts of samples under each node 37 | counts = np.zeros(model.children_.shape[0]) 38 | n_samples = len(model.labels_) 39 | for i, merge in enumerate(model.children_): 40 | current_count = 0 41 | for child_idx in merge: 42 | if child_idx < n_samples: 43 | current_count += 1 # leaf node 44 | else: 45 | current_count += counts[child_idx - n_samples] 46 | counts[i] = current_count 47 | 48 | linkage_matrix = np.column_stack([model.children_, model.distances_, 49 | counts]).astype(float) 50 | 51 | # Plot the corresponding dendrogram 52 | dendrogram(linkage_matrix, **kwargs) 53 | 54 | def plot_decision_surface(clas, X, Y): 55 | """Plot a decision surface for 2 classes. """ 56 | # step size in the mesh 57 | h = .02 58 | # Create color maps 59 | cmap_light = ListedColormap(['lightgreen', 'lightcoral']) 60 | cmap_bold = ListedColormap(['green','red']) 61 | # Plot the decision boundary. For that, we will assign a color to each 62 | # point in the mesh [x_min, x_max]x[y_min, y_max]. 63 | x_min, x_max = X['X1'].min() - 1, X['X1'].max() + 1 64 | y_min, y_max = X['X2'].min() - 1, X['X2'].max() + 1 65 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), 66 | np.arange(y_min, y_max, h)) 67 | Z = clas.predict(pd.DataFrame(np.c_[xx.ravel(), yy.ravel()], columns=['X1', 'X2'])) 68 | 69 | # Put the result into a color plot 70 | Z = Z.reshape(xx.shape) 71 | fig, ax = plt.subplots(figsize=(6, 6)) 72 | plt.pcolormesh(xx, yy, Z, cmap=cmap_light, shading='auto') 73 | 74 | # Plot also the training points 75 | plt.scatter(X['X1'], X['X2'], c=Y, cmap=cmap_bold, s=20) 76 | plt.xlabel('X1') 77 | plt.ylabel('X2') 78 | plt.xlim(xx.min(), xx.max()) 79 | plt.ylim(yy.min(), yy.max()) 80 | plt.title("Classification") 81 | plt.show() 82 | 83 | def plot_decision_surface_knn(knn, X, Y, voronoi=False): 84 | """Plot a decision surface for 2 classes, optionally 85 | overlaying the voronoi diagram. """ 86 | # step size in the mesh 87 | h = .02 88 | # Create color maps 89 | cmap_light = ListedColormap(['lightgreen', 'lightcoral']) 90 | cmap_bold = ListedColormap(['green','red']) 91 | # Plot the decision boundary. For that, we will assign a color to each 92 | # point in the mesh [x_min, x_max]x[y_min, y_max]. 93 | x_min, x_max = X['X1'].min() - 1, X['X1'].max() + 1 94 | y_min, y_max = X['X2'].min() - 1, X['X2'].max() + 1 95 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), 96 | np.arange(y_min, y_max, h)) 97 | Z = knn.predict(pd.DataFrame(np.c_[xx.ravel(), yy.ravel()], columns=['X1', 'X2'])) 98 | 99 | # Put the result into a color plot 100 | Z = Z.reshape(xx.shape) 101 | fig, ax = plt.subplots(figsize=(6, 6)) 102 | plt.pcolormesh(xx, yy, Z, cmap=cmap_light, shading='auto') 103 | 104 | if voronoi: 105 | vor = Voronoi(X) 106 | voronoi_plot_2d(vor, show_points=False, ax=ax) 107 | # Plot also the training points 108 | plt.scatter(X['X1'], X['X2'], c=Y, cmap=cmap_bold, s=20) 109 | plt.xlim(xx.min(), xx.max()) 110 | plt.ylim(yy.min(), yy.max()) 111 | plt.title("k-NN Classification") 112 | plt.show() -------------------------------------------------------------------------------- /Lab13-Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# k-Means\n", 8 | "\n", 9 | "## Exercise 1\n", 10 | "k-Means on two-dimensional data with cluster separators\n", 11 | "\n", 12 | "For the dataset below, plot the clusters and the centroids of the k-means algorithm for each iteration until convergence. The initial centroids will be the points A, D and G (therefore the algorithm will find 3 clusters). Include in each plot the Voronoi diagram for the centroids, to highlight the cluster separation." 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "import pandas as pd\n", 22 | "data = {\n", 23 | " 'A':[2, 10], 'B':[2, 5], 'C':[8, 4], 'D':[5, 8], \n", 24 | " 'E':[7, 5], 'F':[6, 4], 'G':[1, 2], 'H':[4, 9]\n", 25 | "}\n", 26 | "d = pd.DataFrame.from_dict(data, orient='index', columns=['X', 'Y'])" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "## Exercise 2\n", 34 | "k-Means on two-dimensional data\n", 35 | "\n", 36 | "For the dataset below and the initial centroids A, D and G, independently implement the k-Means algorithm (i.e. do not use the one from `sklearn`) and plot the clusters and centroids for each iteration." 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 2, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "import pandas as pd\n", 46 | "data = {\n", 47 | " 'A':[2, 10], 'B':[2, 5], 'C':[8, 4], 'D':[5, 8], \n", 48 | " 'E':[7, 5], 'F':[6, 4], 'G':[1, 2], 'H':[4, 9]\n", 49 | "}\n", 50 | "d = pd.DataFrame.from_dict(data, orient='index', columns=['X', 'Y'])" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "## Exercise 3\n", 58 | "k-Means on an external dataset with starting centroids\n", 59 | "\n", 60 | "Apply k-means on this [two-dimensional dataset](https://profs.info.uaic.ro/~ciortuz/ML.ex-book/res/CMU.2004f.TM+AM.HW3.pr5.cl.dat) using these [starting centroids](https://profs.info.uaic.ro/~ciortuz/ML.ex-book/res/CMU.2004f.TM+AM.HW3.pr5.init.dat). Plot the clusters and centroids after each iteration until convergence. What is unusual about about the first iteration?" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "## Exercise 4\n", 68 | "Anisotropically distributed data\n", 69 | "\n", 70 | "Run the k-means algorithm for the datasets `d1` and `d2` with $k=3$ and the default parameters. \n", 71 | "1. Plot the resulting clusters.\n", 72 | "1. Which clusters look more 'natural' and why?" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 3, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "from sklearn.datasets import make_blobs\n", 82 | "import numpy as np\n", 83 | "\n", 84 | "n_samples = 1500\n", 85 | "random_state = 170\n", 86 | "X, y = make_blobs(n_samples=n_samples, random_state=random_state)\n", 87 | "# Anisotropically distributed data\n", 88 | "transformation = [[0.60834549, -0.63667341], [-0.40887718, 0.85253229]]\n", 89 | "anis = np.dot(X, transformation)\n", 90 | "# Compare these datasets\n", 91 | "d1, d2 = X, anis" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "## Exercise 5\n", 99 | "k-means and noise\n", 100 | "\n", 101 | "Considering the dataset `d` below and two sets of starting centroids `c1` and `c2`.\n", 102 | "\n", 103 | "1. Run k-means ($k=3$ and the default parameters), first starting with `c1` and then starting with `c2`. (You might want to also use `n_init=1` to prevent a warning.)\n", 104 | "1. Plot the resulting clusters for each of the two runs.\n", 105 | "1. In which of the two runs the clusters look more 'natural' and why?" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 4, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "from sklearn.datasets import make_blobs\n", 115 | "import numpy as np\n", 116 | "\n", 117 | "n_samples = 1500\n", 118 | "random_state = 110\n", 119 | "d, _ = make_blobs(n_samples=n_samples, random_state=random_state)\n", 120 | "# Dataset\n", 121 | "d = np.append(d, [[-10, 15]], axis=0)\n", 122 | "# Starting centroids\n", 123 | "c1 = np.array([[-6, 2], [-10, 15], [3, 3]])\n", 124 | "c2 = np.array([[-10, 3], [-2, 2], [3, 3]])" 125 | ] 126 | } 127 | ], 128 | "metadata": { 129 | "kernelspec": { 130 | "display_name": "Python 3 (ipykernel)", 131 | "language": "python", 132 | "name": "python3" 133 | }, 134 | "language_info": { 135 | "codemirror_mode": { 136 | "name": "ipython", 137 | "version": 3 138 | }, 139 | "file_extension": ".py", 140 | "mimetype": "text/x-python", 141 | "name": "python", 142 | "nbconvert_exporter": "python", 143 | "pygments_lexer": "ipython3", 144 | "version": "3.12.3" 145 | } 146 | }, 147 | "nbformat": 4, 148 | "nbformat_minor": 4 149 | } 150 | -------------------------------------------------------------------------------- /Lab11-Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise 1\n", 8 | "AdaBoost on a uni-dimensional array\n", 9 | "\n", 10 | "Given the dataset below and the AdaBoost algorithm using the usual decision stumps as weak learners:\n", 11 | "\n", 12 | "1. Plot the dataset using `pyplot`.\n", 13 | "2. Draw the decision surface corresponding to the first weak learner.\n", 14 | "3. What are the values of $\\epsilon_1$ (training error of the first decision stump) and $\\alpha_1$ (the \"weight\" of the vode of the first decision stump)?\n", 15 | "4. What will be the updated weights of the training instances, after the first update?\n", 16 | "5. Draw the decision surface after adding the second weak learner." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import pandas as pd\n", 26 | "d = pd.DataFrame({\n", 27 | " 'X': [-1, -0.7, -0.4, -0.1, 0.2, 0.5, 0.8],\n", 28 | " 'Y': [1, 1, 1, -1, -1, -1, 1]\n", 29 | "})\n", 30 | "X, Y = d[['X']], d['Y']" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "# Exercise 2\n", 38 | "AdaBoost on a two-dimensional array\n", 39 | "\n", 40 | "Given the dataset below and the AdaBoost algorithm using the usual decision stumps as weak learners:\n", 41 | "1. Plot the dataset using `pyplot`.\n", 42 | "2. Draw the decision surface corresponding to the first weak learner as chosen by `AdaBoostClassifier` with the default `base_estimator`.\n", 43 | "3. Show why AdaBoost chose that learner, by plotting the decision surface of all the candidates and their corresponding error rate." 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 2, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "import pandas as pd\n", 53 | "d = pd.DataFrame({\n", 54 | " 'X1': [1, 2, 2.75, 3.25, 4, 5],\n", 55 | " 'X2': [1, 2, 1.25, 2.75, 2.25, 3.5],\n", 56 | " 'Y': [1, 1, -1, 1, -1, -1]\n", 57 | "})\n", 58 | "X, Y = d[['X1', 'X2']], d['Y']" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "# Exercise 3\n", 66 | "AdaBoost vs ID3\n", 67 | "\n", 68 | "Given the dataset below:\n", 69 | "1. Plot the dataset using `pyplot`.\n", 70 | "2. Compare the training error of the AdaBoost algorithm (using the usual decision stumps as weak learners) and the ID3 algorithm.\n", 71 | "2. Compare the CVLOO error of the AdaBoost algorithm (using the usual decision stumps as weak learners) and the ID3 algorithm." 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 3, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "from scipy.stats import norm\n", 81 | "import pandas as pd\n", 82 | "import numpy as np\n", 83 | "x_red = norm.rvs(0, 1, 100, random_state=1)\n", 84 | "y_red = norm.rvs(0, 1, 100, random_state=2)\n", 85 | "x_green = norm.rvs(1, 1, 100, random_state=3)\n", 86 | "y_green = norm.rvs(1, 1, 100, random_state=4)\n", 87 | "d = pd.DataFrame({\n", 88 | " 'X1': np.concatenate([x_red,x_green]),\n", 89 | " 'X2': np.concatenate([y_red,y_green]),\n", 90 | " 'Y': [1]*100+[0]*100\n", 91 | "})\n", 92 | "X, Y = d[['X1', 'X2']], d['Y']" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "## Exercise 4\n", 100 | "Finding the optimum number of weak learners\n", 101 | "\n", 102 | "For the dataset below:\n", 103 | "1. plot the points using `pyplot.scatter`;\n", 104 | "1. plot a line chart using `pyplot.plot` that shows the training error and the CVLOO error of AdaBoost using between 1 and 15 weak learners.\n", 105 | "1. What is the best number of weak learners in this case?" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 4, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "from scipy.stats import norm\n", 115 | "import pandas as pd\n", 116 | "import numpy as np\n", 117 | "x_red = norm.rvs(0, 1, 100, random_state=1)\n", 118 | "y_red = norm.rvs(0, 1, 100, random_state=2)\n", 119 | "x_green = norm.rvs(1, 1, 100, random_state=3)\n", 120 | "y_green = norm.rvs(1, 1, 100, random_state=4)\n", 121 | "d = pd.DataFrame({\n", 122 | " 'X1': np.concatenate([x_red,x_green]),\n", 123 | " 'X2': np.concatenate([y_red,y_green]),\n", 124 | " 'Y': [1]*100+[0]*100\n", 125 | "})\n", 126 | "X, Y = d[['X1', 'X2']], d['Y']" 127 | ] 128 | } 129 | ], 130 | "metadata": { 131 | "kernelspec": { 132 | "display_name": "Python 3 (ipykernel)", 133 | "language": "python", 134 | "name": "python3" 135 | }, 136 | "language_info": { 137 | "codemirror_mode": { 138 | "name": "ipython", 139 | "version": 3 140 | }, 141 | "file_extension": ".py", 142 | "mimetype": "text/x-python", 143 | "name": "python", 144 | "nbconvert_exporter": "python", 145 | "pygments_lexer": "ipython3", 146 | "version": "3.12.3" 147 | } 148 | }, 149 | "nbformat": 4, 150 | "nbformat_minor": 4 151 | } 152 | -------------------------------------------------------------------------------- /Lab14-Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# k-Means\n", 8 | "\n", 9 | "## Exercise 1\n", 10 | "Intra-cluster cohesion increase\n", 11 | "\n", 12 | "For the dataset `d` and `start_centroids` below, show that the $J$ criterion (or inertia) monotonically decreases for each successive iteration of the algorithm by plotting it on a line chart using `pyplot.plot`." 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "from sklearn.datasets import make_blobs\n", 22 | "import numpy as np\n", 23 | "import pandas as pd\n", 24 | "\n", 25 | "n_samples = 1500\n", 26 | "random_state = 170\n", 27 | "X, y = make_blobs(n_samples=n_samples, random_state=random_state)\n", 28 | "d = pd.DataFrame(X, columns=['X1', 'X2'])\n", 29 | "start_centroids = np.array([[0, 0.1], [0, 0.2], [0, 0.3]])" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "## Exercise 2\n", 37 | "Intra-cluster cohension calculation\n", 38 | "\n", 39 | "Given the dataset `d` and `start_centroids` below:\n", 40 | "1. Run the k-means algorithm form `sklearn` on the dataset `d` and plot the resulting clusters using a scatterplot.\n", 41 | "1. Print the intra-cluster cohesion as computed by the algorithm for the resulting clusters.\n", 42 | "1. Independently calculate the intra-cluster cohesion for the resulting clusters (it should match the one computed by the algorithm)." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 2, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "from sklearn.datasets import make_blobs\n", 52 | "import numpy as np\n", 53 | "import pandas as pd\n", 54 | "\n", 55 | "n_samples = 1500\n", 56 | "random_state = 110\n", 57 | "X, y = make_blobs(n_samples=n_samples, random_state=random_state)\n", 58 | "d = pd.DataFrame(X, columns=['X1', 'X2'])\n", 59 | "start_centroids = np.array([[0, 0.1], [0, 0.2], [0, 0.3]])" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "## Exercise 3\n", 67 | "k-means++\n", 68 | "\n", 69 | "Show that _k-means++_ provides better centroid initialisation by comparing the average value of $J$ (i.e. the `inertia_` attribute) for _random_ initializations with _k-means++_ initialisations. More specifically:\n", 70 | "* on the dataset `d` below, run the k-means algorithm 1000 times using _random_ initialisation and record the `inertia_` attribute. Plot the histogram of the recorded values and print their mean;\n", 71 | "* repeat the process using _k-means++_;\n", 72 | "* Which method performs better?\n", 73 | "\n", 74 | "For the `KMeans` function, make sure to always use the parameters `max_iter=1, n_init=1, algorithm='full', n_clusters=3, random_state=None` to emphasize the effect." 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 3, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "from sklearn.datasets import make_blobs\n", 84 | "import numpy as np\n", 85 | "import pandas as pd\n", 86 | "\n", 87 | "n_samples = 1500\n", 88 | "random_state = 100\n", 89 | "X, y = make_blobs(n_samples=n_samples, random_state=random_state)\n", 90 | "d = pd.DataFrame(X, columns=['X1', 'X2'])" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "## Exercise 4\n", 98 | "k-means++ calculation\n", 99 | "\n", 100 | "Given a dataset containing the points A(-1, 0), B(1, 0), C(0, 1), D(3, 0) and E(3, 1):\n", 101 | "1. Plot the points using a scatterplot.\n", 102 | "1. Considering the k-means algorithm with $k=2$ and `random` initialisation. During the initial centroid selection (before the first iteration), if the first centroid was chosen at random to be the point A, what is the probability that the next centroid will be chosen from the set {B, C}?\n", 103 | "1. Calculate the same probability, but this time for `k-means++` instead of `random`." 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "## Exercise 5\n", 111 | "The best value for $k$\n", 112 | "\n", 113 | "For the dataset `d` below, find the number of clusters for the k-means algorithm using the \"elbow\" method. Make sure to plot the points using a scatterplot and the line chart for the $J$ criterion." 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 4, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "from sklearn.datasets import make_blobs\n", 123 | "import numpy as np\n", 124 | "import pandas as pd\n", 125 | "\n", 126 | "n_samples = 1500\n", 127 | "random_state = 160\n", 128 | "X, y = make_blobs(n_samples=n_samples, centers=6, random_state=random_state)\n", 129 | "d = pd.DataFrame(X, columns=['X1', 'X2'])" 130 | ] 131 | } 132 | ], 133 | "metadata": { 134 | "kernelspec": { 135 | "display_name": "Python 3 (ipykernel)", 136 | "language": "python", 137 | "name": "python3" 138 | }, 139 | "language_info": { 140 | "codemirror_mode": { 141 | "name": "ipython", 142 | "version": 3 143 | }, 144 | "file_extension": ".py", 145 | "mimetype": "text/x-python", 146 | "name": "python", 147 | "nbconvert_exporter": "python", 148 | "pygments_lexer": "ipython3", 149 | "version": "3.12.3" 150 | } 151 | }, 152 | "nbformat": 4, 153 | "nbformat_minor": 4 154 | } 155 | -------------------------------------------------------------------------------- /Lab07-Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Estimating the parameters of a distribution\n", 8 | "\n", 9 | "## Exercise 1\n", 10 | "Linked Bernoulli distributions\n", 11 | "\n", 12 | "Consider two coins. The probability of getting \"heads\" is $p$ for the first coin and $2p$ for the second. We then toss the first coin 5 times and the second 10 times, as simulated in the code below:" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "name": "stdout", 22 | "output_type": "stream", 23 | "text": [ 24 | "X: [0 1 0 0 0]\n", 25 | "Y: [1 1 1 1 1 1 1 0 1 1]\n" 26 | ] 27 | } 28 | ], 29 | "source": [ 30 | "from scipy.stats import bernoulli\n", 31 | "p = 0.3\n", 32 | "X = bernoulli.rvs(p, size=5, random_state=1)\n", 33 | "Y = bernoulli.rvs(2*p, size=10, random_state=2)\n", 34 | "print('X:', X) # 1 = heads; 0 = tails\n", 35 | "print('Y:', Y)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "1. Plot the log-likelihood of the data as a function of $\\hat{p}$. Note that the data consists of both `X` and `Y`, so the likelihood function becomes $L(\\hat{p} | X,Y)$\n", 43 | "1. Experimentally determine the MLE estimation for $p$ corresponding to the observations in `X` and `Y`.\n", 44 | "1. Analytically determine the MLE estimation for $p$." 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "## Exercise 2\n", 52 | "Poisson distribution\n", 53 | "\n", 54 | "A call centre keeps track of the number of phone calls received every day. In order to accurately plan the resources, the number of calls for the last 100 days are modelled as a random variable $X$ following a Poisson distribution of an unknown parameter $\\lambda$.\n", 55 | "\n", 56 | "We will simulate this for $\\lambda = 4$ using the `poisson.rvs` function:" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 2, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/plain": [ 67 | "array([2, 2, 3, 4, 4, 6, 1, 5, 3, 5])" 68 | ] 69 | }, 70 | "execution_count": 2, 71 | "metadata": {}, 72 | "output_type": "execute_result" 73 | } 74 | ], 75 | "source": [ 76 | "from scipy.stats import poisson\n", 77 | "lambda_ = 4\n", 78 | "X = poisson.rvs(lambda_, size=100, random_state=1)\n", 79 | "X[:10] # Calls received in the first 10 days" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "1. Plot the histogram of the data.\n", 87 | "1. Plot the log-likelihood of the data as a function of $\\hat{\\lambda}$.\n", 88 | "1. Experimentally determine the MLE estimation for $\\lambda$ corresponding to the observations in `X`.\n", 89 | "1. Analytically determine the MLE estimation for $\\lambda$." 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "## Exercise 3\n", 97 | "Uniform distribution\n", 98 | "\n", 99 | "Consider a hashing function that returns a number in the interval $[-w, w]$ for any file. Any value in that interval is equally likely to appear so the hash values are following a uniform distribution $U(-w,w)$. We can simulate the hashes for 100 files using the `uniform.rvs` function:" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 3, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/plain": [ 110 | "array([-1.65955991, 4.40648987, -9.9977125 ])" 111 | ] 112 | }, 113 | "execution_count": 3, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "from scipy.stats import uniform\n", 120 | "w = 10\n", 121 | "X = uniform.rvs(-w, 2*w, size=100, random_state=1)\n", 122 | "X[:3] # The first 3 hashes" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "1. Plot the histogram of the data.\n", 130 | "1. Experimentally determine the MLE estimation for $w$ given the observations in `X`.\n", 131 | "1. Analytically determine the MLE estimation for $w$." 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "## Exercise 4\n", 139 | "Exponential distribution\n", 140 | "\n", 141 | "Seismologists are tracking the time interval between consecutive major earthquakes. They noticed that it follows an exponential distribution $Exp(\\lambda)$.\n", 142 | "\n", 143 | "To simulate the observed intervals between 100 earthquakes that occur on average once per year, we can use the `expon.rvs` function:" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 4, 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "data": { 153 | "text/plain": [ 154 | "array([5.39605837e-01, 1.27412525e+00, 1.14381359e-04])" 155 | ] 156 | }, 157 | "execution_count": 4, 158 | "metadata": {}, 159 | "output_type": "execute_result" 160 | } 161 | ], 162 | "source": [ 163 | "from scipy.stats import expon \n", 164 | "lambda_ = 1 # Once per year, on average\n", 165 | "X = expon.rvs(scale=1/lambda_, size=100, random_state=1)\n", 166 | "X[:3] # The first 3 intervals " 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "1. Plot the histogram of the data.\n", 174 | "1. Experimentally determine the MLE estimation for $\\lambda$ corresponding to the observations in `X`.\n", 175 | "1. Analytically determine the MLE estimation for $\\lambda$." 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "## Exercise 5\n", 183 | "Variance of a Gaussian distribution\n", 184 | "\n", 185 | "Consider $X$ a random variable representing size of pollen grains following a normal (Gaussian) distribution with known mean 0 and a variance $\\sigma^2$, formally written as $X \\sim N(0, \\sigma^2)$. Load the observations for this variable as a `numpy` array, by running the following code:" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 5, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "from sklearn.datasets import fetch_openml\n", 195 | "pollen = fetch_openml('pollen', version=1, as_frame=False, parser='auto')\n", 196 | "X = pollen.data[:,1]" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "1. Plot the histogram of the data.\n", 204 | "1. Experimentally find the value of $\\hat{\\sigma}^2_\\text{MLE}$ by testing candidates in the interval $[1, 10]$. Note that since the dataset is quite large, calculating the likelihood of the data can quickly result in an underflow on most systems. Try using the log-likelihood instead.\n", 205 | "1. Analytically find the estimator $\\hat{\\sigma}^2_\\text{MLE}$ and apply the resulting formula to the dataset." 206 | ] 207 | } 208 | ], 209 | "metadata": { 210 | "kernelspec": { 211 | "display_name": "Python 3 (ipykernel)", 212 | "language": "python", 213 | "name": "python3" 214 | }, 215 | "language_info": { 216 | "codemirror_mode": { 217 | "name": "ipython", 218 | "version": 3 219 | }, 220 | "file_extension": ".py", 221 | "mimetype": "text/x-python", 222 | "name": "python", 223 | "nbconvert_exporter": "python", 224 | "pygments_lexer": "ipython3", 225 | "version": "3.12.3" 226 | } 227 | }, 228 | "nbformat": 4, 229 | "nbformat_minor": 4 230 | } 231 | -------------------------------------------------------------------------------- /Lab01-Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Elementary Notions in Probability" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Exercise 1*\n", 15 | "(Events, implementation)\n", 16 | "\n", 17 | "Illustrate DeMorgan's laws using the `plot_venn()` function and standard Python set operations:\n", 18 | "1. $\\neg (A\\cup B) = \\neg A \\cap \\neg B$\n", 19 | "1. $\\neg (A\\cap B) = \\neg A \\cup \\neg B$" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 1, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "from tools.venn import A, B, omega, plot_venn\n", 29 | "# First law\n", 30 | "\n", 31 | "# Second law" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## Exercise 2\n", 39 | "(Product of sample spaces, implementation)\n", 40 | "\n", 41 | "Two dice are thrown simultaneously. Calculate the probability that the sum is 11." 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "from itertools import product\n", 51 | "\n", 52 | "# Code here" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "## Exercise 3\n", 60 | "(Conditional probabilities, implementation)\n", 61 | "\n", 62 | "The event S represents the sum of two dice. What is the probability that S=11 knowing that S is a prime?" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 3, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "from itertools import product\n", 72 | "\n", 73 | "# Code here" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "## Exercise 4* - Monty Hall problem\n", 81 | "(Bayes' theorem, implementation and analysis)\n", 82 | "\n", 83 | "Suppose you are in a game show and you're given the choice of three doors; behind one is a car, behind the others, goats. You pick door no. 1, but don't open it. The game host (who knows what is behind each door) then opens a door which always has a goat (in this case opens door no. 2) and asks you if you still want to open door no.1 or you want to switch to no.3. \n", 84 | "\n", 85 | "What are the probabilities of finding the car in the two cases?\n", 86 | "\n", 87 | "1. Create a Python simulation for 1000 games to estimate the answer.\n", 88 | "2. Find the answer using the `tool.stats.probability_weighted` function (see [this approach](http://web.mit.edu/neboat/Public/6.042/probabilityintro.pdf) for constructing the sample space).\n", 89 | "3. Find the answer mathematically by applying Bayes' theorem and the law of total probability." 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "## Exercise 5\n", 97 | "(Probabilities, analysis)\n", 98 | "\n", 99 | "Using the definition of the probability, prove that:\n", 100 | "\n", 101 | "1. $P(\\neg A) = 1-P(A)$\n", 102 | "1. $A \\subseteq B \\Rightarrow P(A) \\leq P(B)$\n", 103 | "\n", 104 | "## Exercise 6*\n", 105 | "(Probabilities, analysis)\n", 106 | "\n", 107 | "Using the definition of the probability, prove that:\n", 108 | "\n", 109 | "1. $P(A \\setminus B) = P(A) - P(A \\cap B)$\n", 110 | "1. $P(A \\cup B) = P(A) + P(B) - P(A \\cap B)$" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "## Exercise 7\n", 118 | "\n", 119 | "(Independent events, analysis)\n", 120 | "\n", 121 | "Two soldiers A and B are doing target practice. The probability that soldier A misses is 1/5. The probability that soldier B misses is 1/2. Probability that both miss at the same time is 1/10.\n", 122 | "\n", 123 | "1. Are the two events independent?\n", 124 | "1. What is the probability that at least one of the soldiers misses?\n", 125 | "1. What is the probability that exactly one of the soldiers misses?" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "## Exercise 8\n", 133 | "(Independent events, implementation)\n", 134 | "\n", 135 | "Consider the event space corresponding to two tosses of a fair coin, and the events A \"heads on toss 1\", B \"heads on toss 2\" and C \"the two tosses are equal\". Using the `tools.stats.probability` function, find if:\n", 136 | "\n", 137 | "1. events A and B are independent;\n", 138 | "1. events A and C are independent." 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 4, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "from tools.stats import probability\n", 148 | "\n", 149 | "# Code here" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "# Elementary Notions in Statistics" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "## Exercise 9*\n", 164 | "(Random variables, implementation)\n", 165 | "\n", 166 | "Give an example of a a real phenomenon modelled by the following discrete distributions and plot an illustrative pmf for that phenomenon using `matplotlib` and `scipy.stats` functions:\n", 167 | "\n", 168 | "1. binomial;\n", 169 | "2. geometric." 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "## Exercise 10*\n", 177 | "(Random variables, implementation)\n", 178 | "\n", 179 | "Give an example of a real phenomenon modelled by the following continuous distributions and plot an illustrative pdf for that phenomenon using `matplotlib` and `scipy.stats` functions:\n", 180 | "\n", 181 | "1. gamma;\n", 182 | "2. Pareto." 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "## Exercise 11*\n", 190 | "\n", 191 | "(Random variables, implementation)\n", 192 | "\n", 193 | "Suppose you measure the temperature 10 consecutive days with a thermometer that has a small random error. \n", 194 | "\n", 195 | "1. What is the mean temperature, knowing that the mean error is +1°C and the measurements are those in the variable $Y$ below.\n", 196 | "2. A second thermometer with a Fahrenheit scale ($T_{(°F)} = T_{(°C)} × 1.8 + 32$) measures the temperature in a different region. The variance measured by this thermometer (in Fahrenheit) is 8. Where is the temperature more stable: in your region, or in the region measured by the second thermometer?" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 1, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "Y = [21, 20, 22, 23, 20, 19, 19, 18, 19, 20]\n", 206 | "\n", 207 | "# Your code here" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "## Exercise 12\n", 215 | "(Random variable, implementation)\n", 216 | "\n", 217 | "Let $S$ be the outcome of a random variable describing the sum of two dice thrown independently.\n", 218 | "\n", 219 | "1. Print the probability distribution of $S$ graphically.\n", 220 | "1. Determine $E[S]$ and $Var(S)$." 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "## Exercise 13\n", 228 | "(Random variable, conceptual)\n", 229 | "\n", 230 | "The probability distribution of a discrete random variable $X$ is given by\n", 231 | "\n", 232 | "$P(X=-1)=1/5, P(X=0)=2/5, P(X=1)=2/5$.\n", 233 | "\n", 234 | "1. Compute $E[X]$.\n", 235 | "1. Give the probability distribution of $Y=X^2$ and compute $E[Y]$ using the distribution of $Y$.\n", 236 | "1. Determine $E[X^2]$ using the change-of-variable formula. Check your answer against the answer in 2.\n", 237 | "1. Determine $Var(X)$." 238 | ] 239 | }, 240 | { 241 | "cell_type": "markdown", 242 | "metadata": {}, 243 | "source": [ 244 | "## Exercise 14\n", 245 | "(binomial distribution, applied)\n", 246 | "\n", 247 | "A sailor is trying to walk on a slippery deck, but because of the movements of the ship, he can make exactly one step every second, either forward (with probability $p=0.5$) or backward (with probability $1-p=0.5$). Using the `scipy.stats.binom` package, determine the probability that the sailor is in position +8 after 16 seconds." 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "## Exercise 15\n", 255 | "(geometric distribution, applied)\n", 256 | "\n", 257 | "In order to finish a board game, a player must get an exact 3 on a regular die. Using the `scipy.stats.geom` package, determine how many tries will it take to win the game (on average)? What are the best and worst cases?" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": {}, 263 | "source": [ 264 | "## Exercise 16\n", 265 | "(gamma distribution, applied)\n", 266 | "\n", 267 | "The grades from an exam roughly follow a Gamma distribution with parameters $k=9$ (shape parameter) and $\\theta=0.5$ (scale parameter). Using the `scipy.stats.gamma` package, determine what percentage of students will pass the exam, if the minimum score is 3." 268 | ] 269 | } 270 | ], 271 | "metadata": { 272 | "kernelspec": { 273 | "display_name": "Python 3 (ipykernel)", 274 | "language": "python", 275 | "name": "python3" 276 | }, 277 | "language_info": { 278 | "codemirror_mode": { 279 | "name": "ipython", 280 | "version": 3 281 | }, 282 | "file_extension": ".py", 283 | "mimetype": "text/x-python", 284 | "name": "python", 285 | "nbconvert_exporter": "python", 286 | "pygments_lexer": "ipython3", 287 | "version": "3.12.3" 288 | } 289 | }, 290 | "nbformat": 4, 291 | "nbformat_minor": 4 292 | } 293 | -------------------------------------------------------------------------------- /Lab06-Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Naive Bayes" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Exercise 1\n", 15 | "\n", 16 | "Learned probabilities\n", 17 | "\n", 18 | "Given the following run of the Naive Bayes algorithm without smoothing:" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "name": "stdout", 28 | "output_type": "stream", 29 | "text": [ 30 | "[-1.09861229 -0.40546511]\n", 31 | "[[-2.87682072e-01 -1.38629436e+00 -1.38629436e+00]\n", 32 | " [-2.51052925e+01 -1.24997790e-11 -6.93147181e-01]]\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "import pandas as pd\n", 38 | "from sklearn.naive_bayes import BernoulliNB\n", 39 | "\n", 40 | "# Create the training set\n", 41 | "features = ['study', 'free', 'money']\n", 42 | "target = 'is_spam'\n", 43 | "messages = pd.DataFrame(\n", 44 | "[(1, 0, 0, 0),\n", 45 | "(0, 0, 1, 0),\n", 46 | "(1, 0, 0, 0),\n", 47 | "(1, 1, 0, 0)] +\n", 48 | "[(0, 1, 0, 1)] * 4 +\n", 49 | "[(0, 1, 1, 1)] * 4,\n", 50 | "columns=features+[target])\n", 51 | "\n", 52 | "# Create the prediction set\n", 53 | "X = messages[features]\n", 54 | "y = messages[target]\n", 55 | "cl = BernoulliNB(alpha=1e-10).fit(X, y)\n", 56 | "\n", 57 | "print(cl.class_log_prior_)\n", 58 | "print(cl.feature_log_prob_)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "1. Write a function that independently calculates the value of the `class_log_prior_` attribute without smoothing using only `messages` as parameter. (These are the natural logarithms of class probabilities $P(v_j)$).\n", 66 | "2. Write a function that independently calculates the value of the `feature_log_prob_` attribute without smoothing using only `messages` as parameter. (These are the natural logarithms of attribute probabilities $P(a_i|v_j)$)." 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "## Exercise 2\n", 74 | "Expected error rate in training\n", 75 | "\n", 76 | "Consider a binary classification problem with features $X_1$ and $X_2$ and label $Y$. The two features are assumed to be conditionally independent with respect to $Y$ . The prior probabilities $P(Y=0)$ and $P(Y=1)$ are both equal to 0.5. The conditional probabilities are:\n", 77 | "\n", 78 | "\n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | "
P(X1|Y)Y=0Y=1
X1=00.70.2
X1=10.30.8
\n", 93 | "\n", 94 | "\n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | "
P(X2|Y)Y=0Y=1
X2=00.90.5
X2=10.10.5
\n", 109 | "\n", 110 | "
\n", 111 | "\n", 112 | "1. Generate a `DataFrame` with 1000 entries and three columns `['x1', 'x2', 'y']`, according to the description above, using the `bernoulli.rvs` function from `scipy`.\n", 113 | "1. After training on the DataFrame above, predict every combination of values for $X_1$ and $X_2$.\n", 114 | "1. Calculate the average error rate on the training dataset.\n", 115 | "1. Create a new attribute $X_3$ as a copy of $X_2$. What is the new average error rate on the training dataset?" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "## Exercise 3\n", 123 | "Joint Bayes\n", 124 | "\n", 125 | "Considering the dataset below:" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 2, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "import pandas as pd\n", 135 | "from tools.pd_helpers import apply_counts\n", 136 | "\n", 137 | "d = pd.DataFrame({'X1': [0, 0, 1, 1, 0, 0, 1, 1],\n", 138 | " 'X2': [0, 0, 0, 0, 1, 1, 1, 1],\n", 139 | " 'C' : [2, 18, 4, 1, 4, 1, 2, 18],\n", 140 | " 'Y' : [0, 1, 0, 1, 0, 1, 0, 1]})\n", 141 | "d=apply_counts(d, 'C')" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "1. Implement a simple version of the Joint Bayes algorithm by creating the `BernoulliJB` class, similar to `BernoulliNB` from `scikit`, but only implement the `fit(X,y)` and `predict_proba(X)` without smoothing.\n", 149 | "1. How many probabilities are estimated by the the Joint Bayes algorithm?\n", 150 | "1. What are probability estimates for the instance $X_1 = 0$, $X_2 = 0$ calculated by `predict_proba(X)` from `BernoulliJB`?\n", 151 | "1. What are the predicted probabilities of Naive Bayes (using `predict_proba(X)` from `BernoulliNB`) without smoothing for this instance?" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "## Exercise 4\n", 159 | "Measuring the naivety assumption\n", 160 | "\n", 161 | "Consider a simple text classification that only considers two words: $w_1$ and $w_2$. The label $y$ will only be 1 if $w_1$ is present and $w_2$ is not, so the label is effectively the function $w1 \\land \\lnot w2$.\n", 162 | "\n", 163 | "The `correlated_df` function below will return such a dataset, with 10,000 entries and the columns `['w1', 'w2', 'y']`. The parameter `corr` specifies approximately how much correlation should exist between `w1` and `w2`." 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 3, 169 | "metadata": {}, 170 | "outputs": [ 171 | { 172 | "name": "stdout", 173 | "output_type": "stream", 174 | "text": [ 175 | "Correlation: PearsonRResult(statistic=np.float64(0.469833135691177), pvalue=np.float64(0.0))\n" 176 | ] 177 | } 178 | ], 179 | "source": [ 180 | "from scipy.stats import bernoulli\n", 181 | "from scipy.stats import pearsonr\n", 182 | "size = 10000\n", 183 | "\n", 184 | "def correlated_df(corr):\n", 185 | " w1 = bernoulli.rvs(0.5, size=size, random_state=1)\n", 186 | " d = pd.DataFrame({'w1': w1})\n", 187 | " mask = bernoulli.rvs(corr, size=size, random_state=2)\n", 188 | " random = bernoulli.rvs(0.5, size=size, random_state=3)\n", 189 | " d['w2'] = d['w1'] & mask | random & ~mask\n", 190 | " d['mask'] = mask\n", 191 | " d['random'] = random\n", 192 | " d['y'] = d['w1'] & ~ d['w2']\n", 193 | " return d\n", 194 | "\n", 195 | "d = correlated_df(0.5)\n", 196 | "\n", 197 | "# Check that the correlation is indeed close to 0.5\n", 198 | "print(\"Correlation: \", pearsonr(d['w1'], d['w2']))" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "1. With the function above, create a line chart using `matplotlib` that shows how the correlation affects the training error of Naive Bayes (no smoothing).\n", 206 | "1. Using the function above, create a line chart that shows how the correlation affects the training error of a decision tree classifier (see the `DecisionTreeClassifier` class from `sklearn`)." 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": [ 213 | "## Exercise 5\n", 214 | "Average error rate\n", 215 | "\n", 216 | "Given the function $Y = (A \\land B) \\lor \\neg(B \\lor C)$ where $A$, $B$ and $C$ are independent binary random variables, each of which having 50% chance of being 0 and 50% chance of being 1.\n", 217 | "\n", 218 | "1. Generate a DataFrame with 1000 entries and four columns `A`, `B`, `C` and `Y`, according to the description above, using the `bernoulli.rvs` function from `scipy`.\n", 219 | "1. Calculate the error rate for Naive Bayes on the training dataset.\n", 220 | "1. What is the average error rate on this training dataset for the Joint Bayes algorithm? (Note that you don't have to actually build the algorithm, just provide a theoretical justification.)" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "## Exercise 6\n", 228 | "Text classification\n", 229 | "\n", 230 | "A news company would like to automatically sort the news articles related to sport from those related to politics. They are using 8 key words ($w_1,...,w_8)$ and have annotated several articles in each category for training:" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 4, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [ 239 | "import pandas as pd\n", 240 | "\n", 241 | "features = [f'w{i}' for i in range(1,9)]\n", 242 | "\n", 243 | "politics=pd.DataFrame([\n", 244 | "(1, 0, 1, 1, 1, 0, 1, 1),\n", 245 | "(0, 0, 0, 1, 0, 0, 1, 1),\n", 246 | "(1, 0, 0, 1, 1, 0, 1, 0),\n", 247 | "(0, 1, 0, 0, 1, 1, 0, 1),\n", 248 | "(0, 0, 0, 1, 1, 0, 1, 1),\n", 249 | "(0, 0, 0, 1, 1, 0, 0, 1)],\n", 250 | "columns=features)\n", 251 | "\n", 252 | "sport=pd.DataFrame([\n", 253 | "(1, 1, 0, 0, 0, 0, 0, 0),\n", 254 | "(0, 0, 1, 0, 0, 0, 0, 0),\n", 255 | "(1, 1, 0, 1, 0, 0, 0, 0),\n", 256 | "(1, 1, 0, 1, 0, 0, 0, 1),\n", 257 | "(1, 1, 0, 1, 1, 0, 0, 0),\n", 258 | "(0, 0, 0, 1, 0, 1, 0, 0),\n", 259 | "(1, 1, 1, 1, 1, 0, 1, 0)],\n", 260 | "columns=features)" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "According to Naive Bayes (without smoothing), what is the probability that the document `x = (1, 0, 0, 1, 1, 1, 1, 0)` is about politics?" 268 | ] 269 | } 270 | ], 271 | "metadata": { 272 | "kernelspec": { 273 | "display_name": "Python 3 (ipykernel)", 274 | "language": "python", 275 | "name": "python3" 276 | }, 277 | "language_info": { 278 | "codemirror_mode": { 279 | "name": "ipython", 280 | "version": 3 281 | }, 282 | "file_extension": ".py", 283 | "mimetype": "text/x-python", 284 | "name": "python", 285 | "nbconvert_exporter": "python", 286 | "pygments_lexer": "ipython3", 287 | "version": "3.12.3" 288 | } 289 | }, 290 | "nbformat": 4, 291 | "nbformat_minor": 4 292 | } 293 | -------------------------------------------------------------------------------- /img/random_variable.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 19 | 21 | 29 | 34 | 35 | 44 | 49 | 50 | 59 | 64 | 65 | 73 | 79 | 80 | 88 | 94 | 95 | 96 | 115 | 117 | 118 | 120 | image/svg+xml 121 | 123 | 124 | 125 | 126 | 127 | 132 | 139 | 146 | 153 | Heads 164 | Tails 175 | +1 186 | -1 197 | 200 | 1 211 | 2 222 | 227 | 228 | Sample Space 239 | Random Variable 250 | Probability 261 | 266 | 271 | 276 | 281 | Domain of random variable 293 | Range of random variableDomain of probability mass function 309 | Range of probability mass function 321 | 322 | 323 | -------------------------------------------------------------------------------- /Lab02-Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction to Information Theory\n", 8 | "\n", 9 | "## Exercise 1\n", 10 | "\n", 11 | "(entropy, implementation)\n", 12 | "\n", 13 | "Consider two fair dice with 6 sides each.\n", 14 | "\n", 15 | "1. Print the probability distribution of the sum $S$ of the numbers obtained by throwing the two dice.\n", 16 | "1. What is the information content in bits of the events $S=2$, $S=11$, $S=5$, $S=7$.\n", 17 | "1. Calculate the entropy of S.\n", 18 | "1. Lets say you throw the die one at a time, and the first die shows 4. What is the entropy of S after this observation? Was any information gained/lost in the process of observing the outcome of the first die toss? If so, calculate how much information (in bits) was lost or gained." 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## Exercise 2\n", 26 | "\n", 27 | "(information gain, implementation or analysis)\n", 28 | "\n", 29 | "Given the dataset below, calculate the information gain for the target variable 'Edible' and each feature ('Weight', 'Smell', 'Spots', 'Smooth'):" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 1, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "name": "stdout", 39 | "output_type": "stream", 40 | "text": [ 41 | " Weight Smell Spots Smooth Edible\n", 42 | "A 1 0 0 0 1\n", 43 | "B 1 0 1 0 1\n", 44 | "C 0 1 0 1 1\n", 45 | "D 0 0 0 1 0\n", 46 | "E 1 1 1 0 0\n", 47 | "F 1 0 1 1 0\n", 48 | "G 1 0 0 1 0\n", 49 | "H 0 1 0 0 0\n" 50 | ] 51 | } 52 | ], 53 | "source": [ 54 | "import pandas as pd\n", 55 | "features = ['Weight', 'Smell', 'Spots', 'Smooth', 'Edible']\n", 56 | "mushrooms = pd.DataFrame([\n", 57 | " (1, 0, 0, 0, 1),\n", 58 | " (1, 0, 1, 0, 1),\n", 59 | " (0, 1, 0, 1, 1),\n", 60 | " (0, 0, 0, 1, 0),\n", 61 | " (1, 1, 1, 0, 0),\n", 62 | " (1, 0, 1, 1, 0),\n", 63 | " (1, 0, 0, 1, 0),\n", 64 | " (0, 1, 0, 0, 0)\n", 65 | "],\n", 66 | "index=['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'],\n", 67 | "columns=features)\n", 68 | "print(mushrooms)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "## Exercise 3\n", 76 | "\n", 77 | "(entropy and information gain, implementation or analysis)\n", 78 | "\n", 79 | "The following code simulates the season results for football team F:" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 2, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/html": [ 90 | "
\n", 91 | "\n", 104 | "\n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | "
opponentstadiumresult
0Team AHomeWin
1Team AAwayDraw
2Team BHomeDraw
3Team BAwayWin
4Team CHomeLoss
5Team CAwayLoss
6Team DHomeLoss
7Team DAwayDraw
8Team EHomeWin
9Team EAwayWin
10Team AHomeDraw
11Team AAwayLoss
12Team BHomeDraw
13Team BAwayWin
14Team CHomeLoss
15Team CAwayDraw
16Team DHomeWin
17Team DAwayDraw
18Team EHomeDraw
19Team EAwayWin
\n", 236 | "
" 237 | ], 238 | "text/plain": [ 239 | " opponent stadium result\n", 240 | "0 Team A Home Win\n", 241 | "1 Team A Away Draw\n", 242 | "2 Team B Home Draw\n", 243 | "3 Team B Away Win\n", 244 | "4 Team C Home Loss\n", 245 | "5 Team C Away Loss\n", 246 | "6 Team D Home Loss\n", 247 | "7 Team D Away Draw\n", 248 | "8 Team E Home Win\n", 249 | "9 Team E Away Win\n", 250 | "10 Team A Home Draw\n", 251 | "11 Team A Away Loss\n", 252 | "12 Team B Home Draw\n", 253 | "13 Team B Away Win\n", 254 | "14 Team C Home Loss\n", 255 | "15 Team C Away Draw\n", 256 | "16 Team D Home Win\n", 257 | "17 Team D Away Draw\n", 258 | "18 Team E Home Draw\n", 259 | "19 Team E Away Win" 260 | ] 261 | }, 262 | "execution_count": 2, 263 | "metadata": {}, 264 | "output_type": "execute_result" 265 | } 266 | ], 267 | "source": [ 268 | "from itertools import product\n", 269 | "import pandas as pd\n", 270 | "import random\n", 271 | "random.seed(1)\n", 272 | "opponents = ['Team '+chr(ord('A') + i) for i in range(5)]\n", 273 | "stadiums = ['Home', 'Away']\n", 274 | "games = pd.DataFrame(list(product(opponents, stadiums))*2,\n", 275 | " columns=['opponent', 'stadium'])\n", 276 | "games['result'] = random.choices([\"Win\", \"Loss\", \"Draw\"],\n", 277 | " k=len(games))\n", 278 | "games" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "1. What is the entropy of the `result` $H(result)$ (ignoring all other variables)?\n", 286 | "1. What are the average conditional entropies $H(result | stadium)$ and $H(result | opponent)$?\n", 287 | "1. Which of the two variables is more important in deciding the result of a game? Answer this question by calculating the information gain for the two variables: $IG(result; stadium)$ and $IG(result;opponent)$." 288 | ] 289 | }, 290 | { 291 | "cell_type": "markdown", 292 | "metadata": {}, 293 | "source": [ 294 | "# Exercise 4\n", 295 | "\n", 296 | "(entropy, implementation or analysis)\n", 297 | "\n", 298 | "Consider the random variable $C$ \"a person has a cold\" and the random variable $T$ \"outside temperature\". The joint distribution of the two variables is given below:" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 3, 304 | "metadata": {}, 305 | "outputs": [ 306 | { 307 | "data": { 308 | "text/html": [ 309 | "
\n", 310 | "\n", 323 | "\n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | "
T_SunnyT_RainyT_Snowy
C_No0.300.200.1
C_Yes0.050.150.2
\n", 347 | "
" 348 | ], 349 | "text/plain": [ 350 | " T_Sunny T_Rainy T_Snowy\n", 351 | "C_No 0.30 0.20 0.1\n", 352 | "C_Yes 0.05 0.15 0.2" 353 | ] 354 | }, 355 | "execution_count": 3, 356 | "metadata": {}, 357 | "output_type": "execute_result" 358 | } 359 | ], 360 | "source": [ 361 | "import pandas as pd\n", 362 | "d = pd.DataFrame({'T_Sunny': [0.3, 0.05], \n", 363 | " 'T_Rainy': [0.2, 0.15], \n", 364 | " 'T_Snowy': [0.1, 0.2]}, \n", 365 | " index=['C_No', 'C_Yes'])\n", 366 | "d" 367 | ] 368 | }, 369 | { 370 | "cell_type": "markdown", 371 | "metadata": {}, 372 | "source": [ 373 | "1. Plot the pmf of $C$ and $T$.\n", 374 | "1. Calculate $H(C)$, $H(T)$.\n", 375 | "1. Calculate $H(C|T)$, $H(T|C)$. Does the temperature (T) reduce the uncertainty regarding someone having a cold (C)?" 376 | ] 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "metadata": {}, 381 | "source": [ 382 | "# Exercise 5\n", 383 | "\n", 384 | "(decision tree, implementation)\n", 385 | "\n", 386 | "Consider the Boolean expression $A \\lor (B \\land C)$. The corresponding truth table can be generated with:" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 4, 392 | "metadata": {}, 393 | "outputs": [], 394 | "source": [ 395 | "from itertools import product\n", 396 | "X = [list(c) for c in product([0,1], repeat=3)]\n", 397 | "y = [A or (B and C) for A, B, C in X]" 398 | ] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "metadata": {}, 403 | "source": [ 404 | "1. Fit a decision tree classifier on the truth table above and visualise the resulting tree. Make sure to use the entropy as a metric.\n", 405 | "1. Is the tree above optimal? Can you find a decision tree with fewer levels or nodes that correctly represents this function?" 406 | ] 407 | } 408 | ], 409 | "metadata": { 410 | "kernelspec": { 411 | "display_name": "Python 3 (ipykernel)", 412 | "language": "python", 413 | "name": "python3" 414 | }, 415 | "language_info": { 416 | "codemirror_mode": { 417 | "name": "ipython", 418 | "version": 3 419 | }, 420 | "file_extension": ".py", 421 | "mimetype": "text/x-python", 422 | "name": "python", 423 | "nbconvert_exporter": "python", 424 | "pygments_lexer": "ipython3", 425 | "version": "3.12.3" 426 | } 427 | }, 428 | "nbformat": 4, 429 | "nbformat_minor": 4 430 | } 431 | -------------------------------------------------------------------------------- /Lab05.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0de207cb-ff2c-498a-9e5b-f649ee51ee40", 6 | "metadata": {}, 7 | "source": [ 8 | "# Naive Bayes\n", 9 | "\n", 10 | "Here is a step by step explanation of the algorithm: https://youtu.be/O2L2Uv9pdDA\n", 11 | "\n", 12 | "Bayesian classifiers and in particular the naive Bayes classifier are a family of probabilistic classification algorithms particularly suited to problems like text classification.\n", 13 | "\n", 14 | "When to use it:\n", 15 | "\n", 16 | "* The target function $f$ takes value from a finite set $V=\\{v_1,...,v_k\\}$\n", 17 | "* Moderate or large training data set is available\n", 18 | "* The attributes $$ that describes instances are conditionally independent with respect to the given classification:\n", 19 | "\n", 20 | "$$P(a_1,a_2,...,a_n|v_j)=\\prod_i P(a_i|v_j)$$\n", 21 | "\n", 22 | "The most probable value of $f(x)$ is:\n", 23 | "\n", 24 | "\\begin{align}\n", 25 | "v_{MAP} &= \\mbox{argmax}_{v_j \\in V}P(v_j|a_1,a_2,...,a_n) \\\\\n", 26 | " &= \\mbox{argmax}_{v_j \\in V}\\frac{P(a_1,a_2,...,a_n|v_j)P(v_j)}{P(a_1,a_2,...,a_n)}\\\\\n", 27 | " &= \\mbox{argmax}_{v_j \\in V} P(a_1,a_2,...,a_n|v_j)P(v_j)\\\\\n", 28 | " &= \\mbox{argmax}_{v_j \\in V} \\prod_i P(a_i|v_j)P(v_j)\n", 29 | "\\end{align}\n", 30 | "\n", 31 | "where MAP stands for [_maximum a posteriori probability_](https://en.wikipedia.org/wiki/Maximum_a_posteriori_estimation).\n", 32 | "\n", 33 | "As an example, let's consider a simplified dataset of only 12 messages, 8 of which are spam. For each message, only consider the words \"study\", \"free\" and \"money\":" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 14, 39 | "id": "167d846e-9dc3-4db9-a485-7417639ba786", 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "data": { 44 | "text/html": [ 45 | "
\n", 46 | "\n", 59 | "\n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | "
studyfreemoneyis_spam
01000
10010
21000
31100
40101
50101
60101
70101
80111
90111
100111
110111
\n", 156 | "
" 157 | ], 158 | "text/plain": [ 159 | " study free money is_spam\n", 160 | "0 1 0 0 0\n", 161 | "1 0 0 1 0\n", 162 | "2 1 0 0 0\n", 163 | "3 1 1 0 0\n", 164 | "4 0 1 0 1\n", 165 | "5 0 1 0 1\n", 166 | "6 0 1 0 1\n", 167 | "7 0 1 0 1\n", 168 | "8 0 1 1 1\n", 169 | "9 0 1 1 1\n", 170 | "10 0 1 1 1\n", 171 | "11 0 1 1 1" 172 | ] 173 | }, 174 | "execution_count": 14, 175 | "metadata": {}, 176 | "output_type": "execute_result" 177 | } 178 | ], 179 | "source": [ 180 | "import pandas as pd\n", 181 | "features = ['study', 'free', 'money']\n", 182 | "target = 'is_spam'\n", 183 | "messages = pd.DataFrame(\n", 184 | " [(1, 0, 0, 0),\n", 185 | " (0, 0, 1, 0),\n", 186 | " (1, 0, 0, 0),\n", 187 | " (1, 1, 0, 0)] +\n", 188 | " [(0, 1, 0, 1)] * 4 +\n", 189 | " [(0, 1, 1, 1)] * 4,\n", 190 | "columns=features+[target])\n", 191 | "messages" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "id": "6ac9f768-9995-46ac-97fa-7ff8a203fe18", 197 | "metadata": {}, 198 | "source": [ 199 | "Given this labelled dataset, a common requirement is to classify a new message, for which the label is unknown. For example, the message \"money for psychology study\", can be encoded as:" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 15, 205 | "id": "40095ef3-b498-4cda-ac71-02ff1d8309c1", 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "data": { 210 | "text/html": [ 211 | "
\n", 212 | "\n", 225 | "\n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | "
studyfreemoney
0101
\n", 243 | "
" 244 | ], 245 | "text/plain": [ 246 | " study free money\n", 247 | "0 1 0 1" 248 | ] 249 | }, 250 | "execution_count": 15, 251 | "metadata": {}, 252 | "output_type": "execute_result" 253 | } 254 | ], 255 | "source": [ 256 | "new_messages = pd.DataFrame(\n", 257 | " [(1, 0, 1)],\n", 258 | "columns = features)\n", 259 | "new_messages" 260 | ] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "id": "39069082-c9c6-4616-8010-e8ab0472b430", 265 | "metadata": {}, 266 | "source": [ 267 | "Using the [`BernoulliNB`](https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.BernoulliNB.html) learner from `sklearn`, we can train a regular Naive Bayes classifier with:" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 16, 273 | "id": "414e66f3-266c-454f-9816-e6f58e7ce6eb", 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [ 277 | "from sklearn.naive_bayes import BernoulliNB\n", 278 | "X = messages[features]\n", 279 | "y = messages[target]\n", 280 | "cl = BernoulliNB().fit(X, y)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "id": "535272ac-bd0a-47ba-a4e4-fd1dc61534f9", 286 | "metadata": {}, 287 | "source": [ 288 | "and then predict the class of the new message with:" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 17, 294 | "id": "f573c707-9cc6-477e-bd03-e793f7970c12", 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "data": { 299 | "text/plain": [ 300 | "array([0])" 301 | ] 302 | }, 303 | "execution_count": 17, 304 | "metadata": {}, 305 | "output_type": "execute_result" 306 | } 307 | ], 308 | "source": [ 309 | "cl.predict(new_messages)" 310 | ] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "id": "0f9816ca-bafb-4783-8f30-2e6c13910339", 315 | "metadata": {}, 316 | "source": [ 317 | "The prediction is 0, so this message is not considered to be spam.\n", 318 | "\n", 319 | "In order to see the probabilities of each class, not just the most probable class, we can do:" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 18, 325 | "id": "f62ed56b-8b13-49eb-89ad-d84d6f1ddcda", 326 | "metadata": {}, 327 | "outputs": [ 328 | { 329 | "data": { 330 | "text/plain": [ 331 | "array([[0.93676815, 0.06323185]])" 332 | ] 333 | }, 334 | "execution_count": 18, 335 | "metadata": {}, 336 | "output_type": "execute_result" 337 | } 338 | ], 339 | "source": [ 340 | "cl.predict_proba(new_messages)" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "id": "913e30a6-3b89-473a-8870-15c80dfce98e", 346 | "metadata": {}, 347 | "source": [ 348 | "To see the classes corresponding to these probabilieis, we can look at the `classes_` attribute:" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": 19, 354 | "id": "253b4ada-f66d-47dc-9ab2-90970732453c", 355 | "metadata": {}, 356 | "outputs": [ 357 | { 358 | "data": { 359 | "text/plain": [ 360 | "array([0, 1])" 361 | ] 362 | }, 363 | "execution_count": 19, 364 | "metadata": {}, 365 | "output_type": "execute_result" 366 | } 367 | ], 368 | "source": [ 369 | "cl.classes_" 370 | ] 371 | }, 372 | { 373 | "cell_type": "markdown", 374 | "id": "88fa8e99-511c-4f81-82aa-b5b90092b158", 375 | "metadata": {}, 376 | "source": [ 377 | "which means the first probability is for class '0', while the second probability is for class '1'." 378 | ] 379 | }, 380 | { 381 | "cell_type": "markdown", 382 | "id": "b2de02a2-c4ef-4f40-aae1-df216bc47cb0", 383 | "metadata": {}, 384 | "source": [ 385 | "Some of the most useful attributes provided by this learner are:\n", 386 | "\n", 387 | "* `classes_` Class labels known to the classifier;\n", 388 | "* `class_count_` Number of samples encountered for each class during fitting;\n", 389 | "* `class_log_prior_` Natural logarithm of the probability of each class (smoothed);\n", 390 | "* `feature_count_` Number of samples encountered for each (class, feature) during fitting;\n", 391 | "* `feature_log_prob_` Empirical log probability of features given a class, $P(a_i|v_j)$." 392 | ] 393 | }, 394 | { 395 | "cell_type": "markdown", 396 | "id": "c2da7df2-a186-4156-a41b-e4c61ee85f31", 397 | "metadata": {}, 398 | "source": [ 399 | "---\n", 400 | "**Give it a try!**\n", 401 | "\n", 402 | "The datasets `X_art` and `y_art` below describe 6 news articles. `X_art` holds the frequency of words while `y_art` holds the topic of the article. `X_new_art` is meant to represent a new article, for which we don't know the topic. What is the probability that this article is about weather?" 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": 20, 408 | "id": "463d5f94-09cf-448d-8cc4-7a2a3cc107ef", 409 | "metadata": {}, 410 | "outputs": [], 411 | "source": [ 412 | "import pandas as pd\n", 413 | "import numpy as np\n", 414 | "rng = np.random.RandomState(1)\n", 415 | "cols = [f'word_{i}' for i in range(100)]\n", 416 | "\n", 417 | "X_art = pd.DataFrame(rng.randint(5, size=(6, 100)), columns=cols)\n", 418 | "y_art = pd.Series(np.array(['politics', 'economy', 'weather', 'sports', 'sports', 'culture']))\n", 419 | "X_new_art = pd.DataFrame(X_art[2:3])\n", 420 | "\n", 421 | "# Your code here" 422 | ] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "id": "1b9c4e41-17bf-4073-9e7e-f475037be204", 427 | "metadata": {}, 428 | "source": [ 429 | "Expected result: 0.99999998.\n", 430 | "\n", 431 | "---" 432 | ] 433 | }, 434 | { 435 | "cell_type": "markdown", 436 | "id": "a301cdcb-619e-4e55-beea-3a3e577642cf", 437 | "metadata": {}, 438 | "source": [ 439 | "## Prior probabilities" 440 | ] 441 | }, 442 | { 443 | "cell_type": "markdown", 444 | "id": "b2b734f2-b36e-4e36-a709-419dbba3b874", 445 | "metadata": {}, 446 | "source": [ 447 | "By default, the probabilities of the two classes (spam and non-spam) are determined from the dataset. In the results above, the prior probability of 'spam' is considered to be $8/12$, so approximately 0.67. If, however, we want to tweak the prediction to be more conservative and label less messages as spam, then we can directly specify the probability of spam to a lower value such as 0.1:" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": 21, 453 | "id": "4563b8b7-02e3-4dd0-a5e0-2ff149f044b6", 454 | "metadata": {}, 455 | "outputs": [ 456 | { 457 | "data": { 458 | "text/plain": [ 459 | "array([[0.99626401, 0.00373599]])" 460 | ] 461 | }, 462 | "execution_count": 21, 463 | "metadata": {}, 464 | "output_type": "execute_result" 465 | } 466 | ], 467 | "source": [ 468 | "cl = BernoulliNB(class_prior=[0.9,0.1]).fit(X, y)\n", 469 | "cl.predict_proba(new_messages)" 470 | ] 471 | }, 472 | { 473 | "cell_type": "markdown", 474 | "id": "fae83845-f220-4496-ae78-69b98490deed", 475 | "metadata": {}, 476 | "source": [ 477 | "As expected, the computed probability that the message is spam has decreased, from around 0.06 to 0.0037." 478 | ] 479 | } 480 | ], 481 | "metadata": { 482 | "kernelspec": { 483 | "display_name": "Python 3 (ipykernel)", 484 | "language": "python", 485 | "name": "python3" 486 | }, 487 | "language_info": { 488 | "codemirror_mode": { 489 | "name": "ipython", 490 | "version": 3 491 | }, 492 | "file_extension": ".py", 493 | "mimetype": "text/x-python", 494 | "name": "python", 495 | "nbconvert_exporter": "python", 496 | "pygments_lexer": "ipython3", 497 | "version": "3.12.3" 498 | } 499 | }, 500 | "nbformat": 4, 501 | "nbformat_minor": 5 502 | } 503 | -------------------------------------------------------------------------------- /Lab03-Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Decision Trees\n", 8 | "## Exercise 1 \n", 9 | "Ternary classification\n", 10 | "\n", 11 | "The following code creates a small dataset with two attributes and a target\n", 12 | "variable with three possible values:" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "import pandas as pd\n", 22 | "X = pd.DataFrame({'X1': [1, 1, 1, 1, 0, 0],\n", 23 | " 'X2': [1, 1, 1, 0, 0, 0]})\n", 24 | "Y = pd.Series([1, 1, 2, 3, 2, 3])" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "1. Calculate the information gain for `X1` and `X2` relative to `Y`.\n", 32 | "1. Based on these calculations, what attribute will be used for the first node of the ID3 tree?\n", 33 | "1. Learn the entire tree and classify the instance `{'X1': 0, 'X2': 1}`." 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## Exercise 2\n", 41 | "ID3 as a \"greedy\" algorithm\n", 42 | "\n", 43 | "The following code creates a dataset with features `A, B, C` and target variable `Y`:" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 2, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "import pandas as pd\n", 53 | "X = pd.DataFrame({'A': [1, 1, 0, 0],\n", 54 | " 'B': [1, 0, 1, 0],\n", 55 | " 'C': [0, 1, 1, 1]})\n", 56 | "Y = pd.Series([0, 1, 1, 0])" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "1. Find the decision tree using ID3. Is it _consistent_ with the training data (does it have 100% accuracy)?\n", 64 | "1. Is there a less deep decision tree consistent with the above data? If so, what logic concept does it represent?" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "## Exercise 3\n", 72 | "\n", 73 | "Titanic dataset\n", 74 | "\n", 75 | "The table bellow shows a few statistics on the survivors of the Titanic:" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 3, 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/html": [ 86 | "
\n", 87 | "\n", 100 | "\n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | "
ClassGenderAgePassengersSurvivors
0UpperMaleChild55
1UpperMaleAdult17557
2UpperFemaleChild11
3UpperFemaleAdult144140
4LowerMaleChild5924
5LowerMaleAdult1492281
6LowerFemaleChild4427
7LowerFemaleAdult281176
\n", 178 | "
" 179 | ], 180 | "text/plain": [ 181 | " Class Gender Age Passengers Survivors\n", 182 | "0 Upper Male Child 5 5\n", 183 | "1 Upper Male Adult 175 57\n", 184 | "2 Upper Female Child 1 1\n", 185 | "3 Upper Female Adult 144 140\n", 186 | "4 Lower Male Child 59 24\n", 187 | "5 Lower Male Adult 1492 281\n", 188 | "6 Lower Female Child 44 27\n", 189 | "7 Lower Female Adult 281 176" 190 | ] 191 | }, 192 | "execution_count": 3, 193 | "metadata": {}, 194 | "output_type": "execute_result" 195 | } 196 | ], 197 | "source": [ 198 | "import pandas as pd\n", 199 | "titanic = pd.DataFrame([\n", 200 | " ('Upper', 'Male', 'Child', 5, 5),\n", 201 | " ('Upper', 'Male', 'Adult', 175, 57),\n", 202 | " ('Upper', 'Female', 'Child', 1, 1),\n", 203 | " ('Upper', 'Female', 'Adult', 144, 140),\n", 204 | " ('Lower', 'Male', 'Child', 59, 24),\n", 205 | " ('Lower', 'Male', 'Adult', 1492, 281),\n", 206 | " ('Lower', 'Female', 'Child', 44, 27),\n", 207 | " ('Lower', 'Female', 'Adult', 281, 176)\n", 208 | "],\n", 209 | "columns=['Class', 'Gender', 'Age', 'Passengers', 'Survivors'])\n", 210 | "titanic" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "1. We want to build a decision tree to predict the target variable `Y` (survived) based on variables `C` (class), `G` (gender) and `A` (age). Using information gain, determine which of the three variables will be used in the root node.\n", 218 | "2. What is the training accuracy of the decision tree consisting only of the root node above?\n", 219 | "3. If you were to build the full tree using all attributes, what would be the training accuracy? Note that you don’t have to actually build the full tree!" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "## Exercise 4\n", 227 | "Exoplanets, one-hot encoding\n", 228 | "\n", 229 | "Given a dataset with data regarding 800 exoplanets, fit a decision tree to find how well `Size` and `Orbit` describe if a planet is habitable. In other words, find the training accuracy of a decision tree model that uses those two variables to predict `Habitable` and also print the resulting tree." 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 4, 235 | "metadata": {}, 236 | "outputs": [ 237 | { 238 | "data": { 239 | "text/html": [ 240 | "
\n", 241 | "\n", 254 | "\n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | "
BigOrbitHabitable
0BigNearYes
1BigNearYes
2BigNearYes
3BigNearYes
4BigNearYes
............
795SmallFarNo
796SmallFarNo
797SmallFarNo
798SmallFarNo
799SmallFarNo
\n", 332 | "

800 rows × 3 columns

\n", 333 | "
" 334 | ], 335 | "text/plain": [ 336 | " Big Orbit Habitable\n", 337 | "0 Big Near Yes\n", 338 | "1 Big Near Yes\n", 339 | "2 Big Near Yes\n", 340 | "3 Big Near Yes\n", 341 | "4 Big Near Yes\n", 342 | ".. ... ... ...\n", 343 | "795 Small Far No\n", 344 | "796 Small Far No\n", 345 | "797 Small Far No\n", 346 | "798 Small Far No\n", 347 | "799 Small Far No\n", 348 | "\n", 349 | "[800 rows x 3 columns]" 350 | ] 351 | }, 352 | "execution_count": 4, 353 | "metadata": {}, 354 | "output_type": "execute_result" 355 | } 356 | ], 357 | "source": [ 358 | "from tools.pd_helpers import apply_counts\n", 359 | "exoplanets = pd.DataFrame([\n", 360 | " ('Big', 'Near', 'Yes', 20),\n", 361 | " ('Big', 'Far', 'Yes', 170),\n", 362 | " ('Small', 'Near', 'Yes', 139),\n", 363 | " ('Small', 'Far', 'Yes', 45),\n", 364 | " ('Big', 'Near', 'No', 130),\n", 365 | " ('Big', 'Far', 'No', 30),\n", 366 | " ('Small', 'Near', 'No', 11),\n", 367 | " ('Small', 'Far', 'No', 255)\n", 368 | "],\n", 369 | "columns=['Big', 'Orbit', 'Habitable', 'Count'])\n", 370 | "exoplanets = apply_counts(exoplanets, 'Count')\n", 371 | "exoplanets" 372 | ] 373 | }, 374 | { 375 | "cell_type": "markdown", 376 | "metadata": {}, 377 | "source": [ 378 | "## Exercise 5\n", 379 | "\n", 380 | "Exoplanets, continuous variable\n", 381 | "\n", 382 | "Given a dataset with 9 exoplanets for which we know the `Temperature` as well as the target variable `Habitable`.\n", 383 | "\n", 384 | "1. Find the training accuracy of a decision tree that predicts `Habitable` using `Temperature` and print the resulting tree.\n", 385 | "1. Independently calculate the split points that the algorithm will use for `Temperature` and check it against the generated tree. (The solution does not need to be general, can be \"hard-coded\" for this dataset.)\n", 386 | "1. Independently calculate the entropy of the root node of the generated tree." 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 5, 392 | "metadata": {}, 393 | "outputs": [ 394 | { 395 | "data": { 396 | "text/html": [ 397 | "
\n", 398 | "\n", 411 | "\n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | "
TemperatureHabitable
02050
12050
22601
33801
42050
52601
62601
73800
83800
\n", 467 | "
" 468 | ], 469 | "text/plain": [ 470 | " Temperature Habitable\n", 471 | "0 205 0\n", 472 | "1 205 0\n", 473 | "2 260 1\n", 474 | "3 380 1\n", 475 | "4 205 0\n", 476 | "5 260 1\n", 477 | "6 260 1\n", 478 | "7 380 0\n", 479 | "8 380 0" 480 | ] 481 | }, 482 | "execution_count": 5, 483 | "metadata": {}, 484 | "output_type": "execute_result" 485 | } 486 | ], 487 | "source": [ 488 | "exoplanets = pd.DataFrame([\n", 489 | " (205, 0),\n", 490 | " (205, 0),\n", 491 | " (260, 1),\n", 492 | " (380, 1),\n", 493 | " (205, 0),\n", 494 | " (260, 1),\n", 495 | " (260, 1),\n", 496 | " (380, 0),\n", 497 | " (380, 0)\n", 498 | "],\n", 499 | "columns=['Temperature', 'Habitable'])\n", 500 | "exoplanets" 501 | ] 502 | } 503 | ], 504 | "metadata": { 505 | "kernelspec": { 506 | "display_name": "Python 3 (ipykernel)", 507 | "language": "python", 508 | "name": "python3" 509 | }, 510 | "language_info": { 511 | "codemirror_mode": { 512 | "name": "ipython", 513 | "version": 3 514 | }, 515 | "file_extension": ".py", 516 | "mimetype": "text/x-python", 517 | "name": "python", 518 | "nbconvert_exporter": "python", 519 | "pygments_lexer": "ipython3", 520 | "version": "3.12.3" 521 | } 522 | }, 523 | "nbformat": 4, 524 | "nbformat_minor": 4 525 | } 526 | -------------------------------------------------------------------------------- /Lab06.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Naive Bayes" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Measuring accuracy\n", 15 | "\n", 16 | "Just as for the Decision Tree algorithm, accuracy in this case is defined as the proportion of correctly classified instances.\n", 17 | "For _training accuracy_ we can use the dedicated score function:" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "data": { 27 | "text/plain": [ 28 | "0.9166666666666666" 29 | ] 30 | }, 31 | "execution_count": 1, 32 | "metadata": {}, 33 | "output_type": "execute_result" 34 | } 35 | ], 36 | "source": [ 37 | "# Prepare the data as usual\n", 38 | "import pandas as pd\n", 39 | "features = ['study', 'free', 'money']\n", 40 | "target = 'is_spam'\n", 41 | "messages = pd.DataFrame(\n", 42 | " [(1, 0, 0, 0),\n", 43 | " (0, 0, 1, 0),\n", 44 | " (1, 0, 0, 0),\n", 45 | " (1, 1, 0, 0)] +\n", 46 | " [(0, 1, 0, 1)] * 4 +\n", 47 | " [(0, 1, 1, 1)] * 4,\n", 48 | "columns=features+[target])\n", 49 | "messages\n", 50 | "\n", 51 | "# Fit a Naive Bayes classifier\n", 52 | "from sklearn.naive_bayes import BernoulliNB\n", 53 | "X = messages[features]\n", 54 | "y = messages[target]\n", 55 | "cl = BernoulliNB().fit(X, y)\n", 56 | "\n", 57 | "# Measure accuracy on the training set\n", 58 | "cl.score(X, y)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "Again, this is the same as predicting the training set and then counting how many\n", 66 | "predictions actually match the labels:" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 2, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "data": { 76 | "text/plain": [ 77 | "np.float64(0.9166666666666666)" 78 | ] 79 | }, 80 | "execution_count": 2, 81 | "metadata": {}, 82 | "output_type": "execute_result" 83 | } 84 | ], 85 | "source": [ 86 | "y_hat = cl.predict(X)\n", 87 | "(y_hat == y).mean()" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "Similarly, _cross-validation leave one out_ (_CVLOO_) works in the same way as for Decision Trees:" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 3, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "name": "stdout", 104 | "output_type": "stream", 105 | "text": [ 106 | "CVLOO scores: [1. 0. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1.]\n", 107 | "Mean CVLOO score: 0.8333333333333334\n" 108 | ] 109 | } 110 | ], 111 | "source": [ 112 | "from sklearn.model_selection import LeaveOneOut, cross_val_score\n", 113 | "from statistics import mean\n", 114 | "loo = LeaveOneOut()\n", 115 | "scores = cross_val_score(cl, X, y, cv=loo)\n", 116 | "print(\"CVLOO scores:\", scores)\n", 117 | "print(\"Mean CVLOO score: \", mean(scores))" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "## Correlated features\n", 125 | "\n", 126 | "The naive assumption in the naive Bayes algorithm is that the features are independent. We can see what happens when this assumption is not true by adding a new feature to our dataset that is highly correlated with an existing one.\n", 127 | "\n", 128 | "For example, if we also consider the word ”school”, which appears roughly in the same message as the word ”study” and we want to predict \"study money\" we end up with the following dataset:" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 4, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "data": { 138 | "text/html": [ 139 | "
\n", 140 | "\n", 153 | "\n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | "
studyfreemoneyschool
01001
10010
21001
31101
40100
50100
60100
70100
80110
90110
100110
110110
\n", 250 | "
" 251 | ], 252 | "text/plain": [ 253 | " study free money school\n", 254 | "0 1 0 0 1\n", 255 | "1 0 0 1 0\n", 256 | "2 1 0 0 1\n", 257 | "3 1 1 0 1\n", 258 | "4 0 1 0 0\n", 259 | "5 0 1 0 0\n", 260 | "6 0 1 0 0\n", 261 | "7 0 1 0 0\n", 262 | "8 0 1 1 0\n", 263 | "9 0 1 1 0\n", 264 | "10 0 1 1 0\n", 265 | "11 0 1 1 0" 266 | ] 267 | }, 268 | "execution_count": 4, 269 | "metadata": {}, 270 | "output_type": "execute_result" 271 | } 272 | ], 273 | "source": [ 274 | "new_messages = pd.DataFrame(\n", 275 | " [(1, 0, 1)],\n", 276 | "columns = features)\n", 277 | "\n", 278 | "new_messages_dep = new_messages.copy()\n", 279 | "new_messages_dep['school'] = new_messages['study']\n", 280 | "X_dep = X.copy()\n", 281 | "X_dep['school'] = X['study']\n", 282 | "X_dep" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "Now let's compare the prediction when \"school\" is used and when \"school\" is not used as a training word. When the dependent variable is present:" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 5, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "data": { 299 | "text/plain": [ 300 | "array([[0.98997649, 0.01002351]])" 301 | ] 302 | }, 303 | "execution_count": 5, 304 | "metadata": {}, 305 | "output_type": "execute_result" 306 | } 307 | ], 308 | "source": [ 309 | "# Correlated variable is present\n", 310 | "cl = BernoulliNB().fit(X_dep, y)\n", 311 | "cl.predict_proba(new_messages_dep)" 312 | ] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "And when it is not present:" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 6, 324 | "metadata": {}, 325 | "outputs": [ 326 | { 327 | "data": { 328 | "text/plain": [ 329 | "array([[0.93676815, 0.06323185]])" 330 | ] 331 | }, 332 | "execution_count": 6, 333 | "metadata": {}, 334 | "output_type": "execute_result" 335 | } 336 | ], 337 | "source": [ 338 | "# Correlated variable not present\n", 339 | "cl = BernoulliNB().fit(X, y)\n", 340 | "cl.predict_proba(new_messages)" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "The probability of being spam decreased to 0.01 from 0.063 since the presence of the word ”school” also contributes toward this new probability. This can lead to incorrect predictions since ”school” brings no new information as long as ”study” is already considered, so should not have changed the prediction.\n", 348 | "\n", 349 | "We can compare this with the behaviour of ID3. When the dependent variable is present:" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 7, 355 | "metadata": {}, 356 | "outputs": [ 357 | { 358 | "data": { 359 | "text/plain": [ 360 | "array([[1., 0.]])" 361 | ] 362 | }, 363 | "execution_count": 7, 364 | "metadata": {}, 365 | "output_type": "execute_result" 366 | } 367 | ], 368 | "source": [ 369 | "# Correlated variable is present\n", 370 | "from sklearn import tree\n", 371 | "dt = tree.DecisionTreeClassifier(criterion='entropy').fit(X_dep,y)\n", 372 | "dt.predict_proba(new_messages_dep)" 373 | ] 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "metadata": {}, 378 | "source": [ 379 | "And when it is not present:" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": 8, 385 | "metadata": {}, 386 | "outputs": [ 387 | { 388 | "data": { 389 | "text/plain": [ 390 | "array([[1., 0.]])" 391 | ] 392 | }, 393 | "execution_count": 8, 394 | "metadata": {}, 395 | "output_type": "execute_result" 396 | } 397 | ], 398 | "source": [ 399 | "# Correlated variable not present\n", 400 | "dt = tree.DecisionTreeClassifier(criterion='entropy').fit(X,y)\n", 401 | "dt.predict_proba(new_messages)" 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "metadata": {}, 407 | "source": [ 408 | "As expected, ID3 is not influenced by the cloned variable." 409 | ] 410 | }, 411 | { 412 | "cell_type": "markdown", 413 | "metadata": {}, 414 | "source": [ 415 | "## The Laplace estimator\n", 416 | "\n", 417 | "Apart from the naivety assumption, there is another phenomenon affecting the predictions of Naive Bayes, when rare words are encountered. For instance, we saw that the message \"study money\" is predicted as non-spam with 93% probability, which seems reasonable. But what if we need to predict the message \"study money university\", and the word \"university\" has never been seen in a spam message before, only in regular messages?\n", 418 | "\n", 419 | "If we look at the naive Bayes formula $ v_{MAP} = \\mbox{argmax}_{v_j \\in V} \\prod_i P(a_i|v_j)P(v_j)$, we can see that all the terms are multiplied, while the term $P(\\mbox{university}|\\mbox{spam})$ is equal to 0, which makes the entire probability of \"spam\" become exactly 0.\n", 420 | "\n", 421 | "This is unrealistic since there are many rare or misspelled words which can turn a prediction to 0 instantly, completely cancelling the effect of all the other words.\n", 422 | "\n", 423 | "The Python implementation however, doesn't seem to give a 0% chance of spam:" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 9, 429 | "metadata": {}, 430 | "outputs": [ 431 | { 432 | "data": { 433 | "text/plain": [ 434 | "array([[0.98015192, 0.01984808]])" 435 | ] 436 | }, 437 | "execution_count": 9, 438 | "metadata": {}, 439 | "output_type": "execute_result" 440 | } 441 | ], 442 | "source": [ 443 | "# Prepare the data as usual\n", 444 | "import pandas as pd\n", 445 | "features = ['study', 'free', 'money', 'university']\n", 446 | "target = 'is_spam'\n", 447 | "messages = pd.DataFrame(\n", 448 | " [(1, 0, 0, 0, 0),\n", 449 | " (0, 0, 1, 1, 0),\n", 450 | " (1, 0, 0, 0, 0),\n", 451 | " (1, 1, 0, 0, 0)] +\n", 452 | " [(0, 1, 0, 0, 1)] * 4 +\n", 453 | " [(0, 1, 1, 0, 1)] * 4,\n", 454 | "columns=features+[target])\n", 455 | "messages\n", 456 | "\n", 457 | "# Fit a Naive Bayes classifier\n", 458 | "from sklearn.naive_bayes import BernoulliNB\n", 459 | "X = messages[features]\n", 460 | "y = messages[target]\n", 461 | "cl = BernoulliNB().fit(X, y)\n", 462 | "\n", 463 | "# Predict the message \"study money university\"\n", 464 | "new_messages = pd.DataFrame(\n", 465 | " [(1, 0, 1, 1)],\n", 466 | "columns = features)\n", 467 | "cl.predict_proba(new_messages)" 468 | ] 469 | }, 470 | { 471 | "cell_type": "markdown", 472 | "metadata": {}, 473 | "source": [ 474 | "We can see the prediction for \"study money university\" is about 1.98%, even if \"university\" never shows up in a spam message. This is because `BernoulliNB()` uses a technique called _additive smoothing_ or _Laplace smoothing_, controlled through the `alpha` parameter. It essentially adds a constant value (by default 1) to each variable count, as if each variable was seen once for every value of the target. This makes sure that no probability becomes exactly 0. The Python implementation does not allow us to completely disable the estimator, but we can see what happens if we set it to something very close to 0:" 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": 10, 480 | "metadata": {}, 481 | "outputs": [ 482 | { 483 | "data": { 484 | "text/plain": [ 485 | "array([[1.00000000e+00, 5.55545733e-32]])" 486 | ] 487 | }, 488 | "execution_count": 10, 489 | "metadata": {}, 490 | "output_type": "execute_result" 491 | } 492 | ], 493 | "source": [ 494 | "cl = BernoulliNB(alpha=1e-10).fit(X, y)\n", 495 | "cl.predict_proba(new_messages)" 496 | ] 497 | }, 498 | { 499 | "cell_type": "markdown", 500 | "metadata": {}, 501 | "source": [ 502 | "By disabling the Laplace smoothing, the predictions are now 100% and 0% respectively." 503 | ] 504 | }, 505 | { 506 | "cell_type": "markdown", 507 | "metadata": {}, 508 | "source": [ 509 | "## Joint Bayes\n", 510 | "\n", 511 | "The independence assumption of naive Bayes could theoretically be dropped, creating an algorithm usually named _Joint Bayes_, which is simply looking for the value \n", 512 | "\n", 513 | "$$ v_{MAP} = \\mbox{argmax}_{v_j \\in V} P(a_1,a_2,...,a_n|v_j)P(v_j) $$\n", 514 | "\n", 515 | "Due to the exponential size of the model, this algorithm is not very practical, and consequently not available in the `scikit` library.\n", 516 | "\n", 517 | "Since the attributes are no longer considered independent, the conditional probability $P(a_1,a_2,...,a_n|v_j)$ can no longer be transformed to $\\prod_i P(a_i|v_j)P(v_j)$. If applied to a text dataset for instance, $n$ would be equal the number of words in a language and the model would have to learn all possible combinations of words in spam and non-spam, so $2 \\times 2^n = 2^{n+1}$. Since English has about $n=170,000$ words in current use, we can see how this approach quickly becomes unfeasable." 518 | ] 519 | } 520 | ], 521 | "metadata": { 522 | "kernelspec": { 523 | "display_name": "Python 3 (ipykernel)", 524 | "language": "python", 525 | "name": "python3" 526 | }, 527 | "language_info": { 528 | "codemirror_mode": { 529 | "name": "ipython", 530 | "version": 3 531 | }, 532 | "file_extension": ".py", 533 | "mimetype": "text/x-python", 534 | "name": "python", 535 | "nbconvert_exporter": "python", 536 | "pygments_lexer": "ipython3", 537 | "version": "3.12.3" 538 | } 539 | }, 540 | "nbformat": 4, 541 | "nbformat_minor": 4 542 | } 543 | -------------------------------------------------------------------------------- /extras/Lab-EM-Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Expectation-maximisation algorithm\n", 8 | "\n", 9 | "## Exercise 1\n", 10 | "\n", 11 | "Recreate the step-by-step example from the lab so that the algorithm also estimates the variance $\\sigma^2$. \n", 12 | "\n", 13 | "The maximisation step for $\\sigma^2_j$ is\n", 14 | "\n", 15 | "$$ \\sigma^2_j \\leftarrow \\frac{\\sum_{i=1}^{m}E[z_{ij}](x_i-\\mu_j)^2}{\\sum_{i=1}^{m}E[z_{ij}]} $$\n", 16 | "\n", 17 | "Use the same data, same starting hypothesis and perform at least 3 iterations." 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "## Exercise 2\n", 25 | "\n", 26 | "Recreate the step-by-step example from the lab, but also using the fact that $\\mu_1=\\mu_2-8$.\n", 27 | "\n", 28 | "Use the same data, same starting hypothesis and perform at least 3 iterations." 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "## Exercise 3\n", 36 | "\n", 37 | "Recreate the step-by-step example from the lab, but instead of the means, estimate the variations, while the means are fixed at $\\mu_1 = \\mu_2 = 15$.\n", 38 | "\n", 39 | "The maximisation step for $\\sigma^2_j$ is\n", 40 | "\n", 41 | "$$ \\sigma^2_j \\leftarrow \\frac{\\sum_{i=1}^{m}E[z_{ij}](x_i-\\mu_j)^2}{\\sum_{i=1}^{m}E[z_{ij}]} $$\n", 42 | "\n", 43 | "Use the same data, same starting hypothesis and perform at least 3 iterations." 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "## Exercise 4\n", 51 | "\n", 52 | "Recreate the step-by-step example from the lab, but also using the fact that $\\mu_1=\\frac{\\mu_2}{2}$.\n", 53 | "\n", 54 | "Use the same data, same starting hypothesis and perform at least 3 iterations." 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "## Exercise 5\n", 62 | "\n", 63 | "The dataset below is generated by two overlappingn Gaussian distributions in an \"X\" shape. Use the `GaussianMixture` algorithm from `sklearn` to identify the two clusters and visualise the result. Make sure you read the [API documentation](https://scikit-learn.org/stable/modules/generated/sklearn.mixture.GaussianMixture.html)." 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 1, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "data": { 73 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiIAAAGdCAYAAAAvwBgXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABfwElEQVR4nO3dfXgU9bk//vcmJBsSkg0hwEZFiBAsMTwIFcFQK2moKIoPra20WrV+OdaCP8WeHkClSjmKHHu+6LdYH6jFc2qB9rRSVDA9YKgIBrFglBipEAMoJEgSyGICScju748wYR9mZj/ztDO7+35dF9cFYXd2sjs7c8/nc9/3xxUIBAIgIiIiskGK3TtAREREyYuBCBEREdmGgQgRERHZhoEIERER2YaBCBEREdmGgQgRERHZhoEIERER2YaBCBEREdmmj907oMbv9+PIkSPIzs6Gy+Wye3eIiIhIQCAQwMmTJ3HeeechJUV9zMPRgciRI0cwZMgQu3eDiIiIdPj8889xwQUXqD7G0YFIdnY2gJ5fJCcnx+a9ISIiIhE+nw9DhgzpvY6rcXQgIk3H5OTkMBAhIiKKMyJpFUxWJSIiItswECEiIiLbMBAhIiIi2zAQISIiItswECEiIiLbMBAhIiIi2zAQISIiItswECEiIiLbOLqhmVW6/QHsrG/BlydPY1B2BiYW5iE1hWvZEBERxVrSBSIVNQ1Y/HotGlpP9/6swJOBR68vxvSSAhv3jIiIKPkk1dRMRU0D7n1ld0gQAgCNradx7yu7UVHTYNOeERERJaekCUS6/QEsfr0WAZn/k362+PVadPvlHkFERERWSJpAZGd9S8RISLAAgIbW09hZ3xK7nSIiIkpySROIfHlSOQjR8zgiIiIyLmkCkUHZGaY+joiIiIxLmkBkYmEeCjwZUCrSdaGnemZiYV4sd4uIiCipJU0gkpriwqPXFwNARDAi/fvR64vZT4SIiCiGkiYQAYDpJQV47rbx8HpCp1+8ngw8d9t49hEhIiKKsaRraDa9pADTir3srEpERHEnETuDJ10gAvRM00wePsDu3SAiIhKWqJ3Bk2pqhoiIKB4lcmdwBiJEREQOluidwRmIEBEROViidwZnIEJERORgid4Z3NJA5LnnnsOYMWOQk5ODnJwcTJ48GW+++aaVL0lERJRQEr0zuKWByAUXXIAnn3wSu3btwj/+8Q+UlZXhhhtuwMcff2zlyxIRESWMRO8Mbmkgcv311+Paa69FUVERRo4ciccffxz9+vXDjh07rHxZIiKihJHoncFjliPS3d2NtWvXoq2tDZMnT5Z9TEdHB3w+X8gfIiKiZJfIncEtb2i2Z88eTJ48GadPn0a/fv2wbt06FBcXyz526dKlWLx4sdW7REREFHcStTO4KxAIWFp43NnZiUOHDqG1tRV//vOf8dvf/hZvv/22bDDS0dGBjo6O3n/7fD4MGTIEra2tyMnJsXI3iYiIYioR27VLfD4fPB6P0PXb8kAkXHl5OYYPH44XXngh6mO1/CJERETxIlHbtUu0XL9j3kfE7/eHjHoQERElk0Ru166HpTkiCxcuxDXXXIMLL7wQJ0+exOrVq/H3v/8df/vb36x8WSIiIkeK1q7dhZ527dOKvQkzTRONpYHIl19+iR/96EdoaGiAx+PBmDFj8Le//Q3Tpk2z8mWJiIgcSUu79mRZJd7SQOSll16ycvNERERxJdHbtevBtWaIiIhiJNHbtetheR8RIiIi6iG1a29sPS2bJ+JCT5MykXbtiVL+y0CEiIgoRqR27fe+shsuICQY0dKuPZHKfzk1Q0REFENG27UnWvkvR0SIiIhiTG+79kQs/2UgQkREZIPUFJfmEt1ELP/l1AwREVGcSMTyXwYiREREcSIRy38ZiBAREcUJqfxXKfvDhZ7qGZHyX6dgIEJERBQnpPJfABHBiJbyXydhIEJERBRHjJb/Og2rZoiIiOKM3vJfJ2IgQkREFIf0lP86EadmiIiIyDYMRIiIiMg2nJohIiJygERZTVcrBiJEREQ2S6TVdLXi1AwREZGNEm01Xa0YiBAREdkk2mq6QM9qut1+uUckBgYiRERENtGymm6iYiBCRERkk0RcTVcrBiJEREQ2ScTVdLVi1QwREcW1eC57lVbTbWw9LZsn4kLPGjLxtJquVgxEiIgobsV72au0mu69r+yGCwgJRuJ1NV2tODVDRERxKVHKXhNtNV2tXIFAwLE1QT6fDx6PB62trcjJybF7d4iIyAZyUy8AMGVZpWLFiTSlsW1+WdyMJgT/nvn93EAAaGrriLvpJkDb9ZtTM0RE5FhKUy+3XnahcNlrvKxQK62mW1HTgH/9nw/jdrpJK07NEBGRI6lNvSzf/KnQNuKt7DVRppu0YCBCRESOI9JxVEQ8lb0ma5dVBiKUFLr9AVTVNWN99WFU1TUn3BeZKNFE6zgajQs90xnxVPaarF1WmSNCCS/ey/uIkkVwsua+oyeFn+fkslctPU6StcsqAxFKaNJ8a/j4hzTfmgylcUTxQO6GQcS88iKsff/zkOd5HXKjofUmKFm7rDIQoYQVbb7VhZ751mnFXtvvmoiSmdINgxqpPHduWRHmlhU5rrOqnpugZO2yyhwRSljJOt9KFE/UbhiUhE+9SGWvN4w7H5OHD4gIQmKdI6Y36VTqsgqc+x0lTppuMhtHRMhRlOZT9awlkazzrUTxRE9SqpapFztyxLTcBIX3OJG6rIbvs1Omm6zAQIQcQ+mEMXNsAV77sEHziSRZ51uJ4onojcDcqSNQNLifpqkXu3LEGn3GboKmlxRgWrHXcdNNVmEgQo6gdMJoaD2NF7bWRzxe5ESSrPOtRPFE9EagdES+pg6pojliZV8bjF0Hj5t2wa+oacCSNz4Weqza7y5NNyUDBiJkOz1zxCLJplzVksj5rLphEJ0embR0M1raunp/bmTaRjTpljdBoZisSrbT27hIJNk02Ve1JHI6qxI0Rad8goMQ4Nxo6zOb92lKbhW9oeJNUCSOiJDtjCaLRnt+ss23EsUbKxI09eZ+SYFE8Fo2IqMkojdUeVnpePymEt4EBWEgQrYzmiwq8vxkmm8likdm3zBEm/LRQiQnTfSG6pEZoxiEhOHUDNlOOmFoPd3E41oSRMlKpJdHtH4gWqSmuLBoRrHhIAQQW3BO9IbK6+lrwh4lFksDkaVLl+Kyyy5DdnY2Bg0ahBtvvBH//Oc/rXxJikNqc8RKOM9KFD8qahowZVklZq3cgfvXVmPWyh2YsqzS0iXtK2oasGRDrez/DchK17y9aDlp0W6oeOOkzNJA5O2338acOXOwY8cObNq0CV1dXfj2t7+NtrY2K1+W4pBSUmmBJwP3XFmIAiabEsUlqZIkPH9Cmu6wIhhRek3J4uuLdY3CAspTMMnaFdUMrkAgELP10I8dO4ZBgwbh7bffxpVXXhn18T6fDx6PB62trcjJyYnBHpLdzOysSkT26vYHMGVZpWJAIJWxbptfZtr3WfQ1F80oxpzVuwFA0/TNmtmTVPPNuNp3Dy3X75gmq7a2tgIA8vLkh6Y6OjrQ0dHR+2+fzxeT/SLnUEoqZbIpUfwx0urc6tfsn5UuW6mjRLT3B6v0tItZIOL3+/HAAw+gtLQUJSUlso9ZunQpFi9eHKtdoiTn1FEWp+4XkUT0GLVjvSctr3nDuPNDgoYDTW1Yvnmf4QaIvHHSJmaByJw5c1BTU4Nt27YpPmbhwoV48MEHe//t8/kwZMiQWOweRZFoF0crh0+NvFcc1iWn03KM2rHek9bXDA8aLvZmJ9WCc04QkxyRuXPnYv369di6dSsKCwuFn8ccEWdItIujUhtmKVQwkgSr5b0KD1iOt3VgzuoPFOern2dyLtlM63dHyteI1r7dihwRI6+ZaDdedtBy/bY0EAkEArjvvvuwbt06/P3vf0dRUZGm5zMQsZ+VF207WJk8p+W9kgtYUlyAWifp3Mw07HpkGk+IZAu93x3pewHIT3dYcQ6x4zUplJbrt6Xlu3PmzMErr7yC1atXIzs7G42NjWhsbMSpU6esfFkySbTVKwH1Bj9OpCV5Tgst75VSaWG0t/FEexdWVO7XtF9EZtH73bFjvSeuMRVfLM0Ree655wAAV111VcjPV61ahTvvvNPKlyYT2JHxbjWrkudE36sddc2aVxoOturdeswtG8FREYo5I98dOypJWL0SPywNRGLYooQsYEfGu9Xys9ymPk4i+h5Ufdaka6VhyYn2rrgK/ChxGE08taOShNUr8YGL3pEiOzLerdTtD6C2QbA3jcabJvH3wPjdWDwFfpQ4oi0iJ9pnQwkTRJMXAxFSZPWJR4RZJye55FA1TV91RH9QENH3avLwAVixxVieR7wEfpRYpBbm976y23CfjXDxUJmn5VzEoEobBiKkyMoTjwizTk5K1SxqtF7sRd+r1vbOqNUxSmIR+BGpkZJAzeyzofT9lNaicUJyqZZzUTwEVU4T07VmtGL5rjPY8cUyq2w4WslhOKN9DdTeKwCaA6Lg/QKi/968E6NYiHaciR6HdqxFo5XWsvxEandghGPXmqH4FOvs82ilsC70lMJOK/ZG3Ydo1SzBzBjlUXqvAGDKskqhICQ3Mw1AT2KqROSOk3diFCtqSaBajkOnV+ZpORfh7N/NOG8lGwYiJCSW2edmnpy0JHaa1cZZ7r2qqmsWDoha27sQADCvvAjD8rOEAr94GN6mxKf1ODSzMs+K0UCtvVOcHFQ5GQMRchwzTk7SSWnf0ZNC21o0YxTuLC207E5FS0AkncTXvv+50JC0mSNIRHrpOQ7NqsyzajTQihYGrHqLxECEHMfoyUlLhYw0By0ahGiZ+w5+XH4/bX1JgJ67pxWV+3B/+UjVxzl9eJuSg57jMFq1GQB4c9yqCdoVNQ34ydl27sHMGA20ooUBq94iMRAhx5FOTmontQKF6hEtFTJac0JE77rkHufNcSM3M6132kXU8s37ALhUu6kmYuM5cia1QFzPcahWbSY5fcaPTbWNssFEtz+ABa/ukX0dM0YDtbYwsLvdQbyydK0ZIj1SU1yYOVb9Dmbm2IKIE4va0LAcLetOKK0PI911VdQ0qD7uqK8DJ84GIVpPh8s3f4rSJ9/qfY1wB5rahbbDOzEyoqKmAVOWVWLWyh24f201Zq3cgSnLKnuPS72jB9OKvXigvAh90+QvR63tXSHfsWArKveHJHWH07t2lEQKlIDI7234jYyWx1IoBiLkGN3+AKrqmrFu9xf4n12HVR/72ocNEYvtiVbIXDokF2tmT8K2+WXCJcAiC9p1nvFHnSPPzUzD4BztAUGjr0P2ZFxR04CnN3+q+lwXlEeQiESIBOLS6IHSZVbuOJSCm+Wb96G9yy/7PKUFNrv9AazaXi+0/0ZGA7UsoMfF9vTh1Aw5gtbOp3I5D6Inmw8+P4HjbR3Cdyaic99L3vg46uNOtHfhD3ePR0qKC799pw5v7T0mtA+S4GFm0RGgAHgnluyMVJRoSULV0gBRyzSqXH7JzvoWnDilPBoSzOhooJYWBlxsTzsGImQ7PZ1PgcjAQ8vJ5pH1Nbi6JHJ6R+R1lPx+xyGhxzW1dcDdJwWVGoOQ8JOx6AjQvPIi3oklMaMVJVqSUEU7r2qdRpUEfxdFv5e5fdNMGQ3U0sKAi+1pw0CEbKX3hAREBh4TC/OQ2zdN6C6ppe3cKrbR7hbNzq349OhJrNl5SNfvDJw7AYueiIflZ+l8JYo34cfy8bZOzFltrL+M1iRUkREBLY0GgwV/F0W/l3eVDtPV9ZVih4EI2UrPCUkp+zw1xYW7SoedrTSJ7suTp2XvFnP7puGu0mGYW1aE1BSXUImhFs9uqTP0fOkEnGirI5Mxcsdyiku+EkVLRYme4yzaiICenI3w/BKR72X/zDTMLSvq/Te7DzsTk1XJVlpPSNGyz+eWFaGfWyy+PtDUJpuAd+JUF5Zv3ocJ/74JFTUNIdnwdgpP9tOTHEiJSSmZVG1xRdGKEiuOMz3Bcfh3Xq1KRfrZ0ptHR+SkRKt8o9hjIEK20npCipZ9npriwn98Z0zU7RR4MqJOj5wIKhucXlKAGWPsu2OSC8BYLkiAselNIPrNgBXHWbTgJliKC/jND+S/80pVKgVh5wnRyrfwSjyKDU7NkK1EhlfzstKw6LpL4M0Rm8+9dkwB7vmiEC9slS/tcwG49bILsTxK2SvQc5Ja/Hot/P4A3vgodndMKa7Qu1nP2ekiaXEtiRXLspuJ8/HW05tvIRG5GTD7OJN6BSl9R4OtmHUprlW5CZhW7EW2Ow1VnzUB6JkSmnTRAE05Kew+bC8GImQrtc6K0mnkiZtGaz7RLby2GGMvyMUj62vQ0nYueVWaD+44I9+zQE5D62n8/C8faXp9o1bMuhT7vmzDqu31OHGqq3e6aO37n0ec+J1aLsj5+NjYVNuo63laO32qHWdalz7YXNuIl7YfUH09kWNF7hj7y+4vIp7H7sPO5goEAo4di/L5fPB4PGhtbUVOTo7du0MWsuqipXSCrKprxqyVO8zYdUvcc2UhXtxaHzFKJJ3and4cSakkO172P14orbMiwgV9n4NcZc6SDfqWPlCSl5WGHQvLkd5HOXtAyzEm+n1fM3sSR0RMouX6zUCEHCOWw/jd/gCmLKs0NKRtFRcAl0s90bDAkyG0Mq8dor230p24U/ffDnqOfSPHcG5mGp68WftIo2gwER4M6OkVpBYUaD3GpMdHWweGx6R5tFy/OTVDjhHLJkDSlJDI3WR6qgud3bGL1wMAot0eOHk+m/Px2ugdDTSSG/LsrPEoLcrXvJ9aOqFK5cFlXxtsuHlZOK3HmMgUMBO77cOqGUpa00sK8JsfXBo1cz+WQYgWjb7QE7G0Vs/66sOoqmu2rQKA8/HijJSU6n3/CjwZmBQWAEY7dvRU5kjBwO+rDhhuXhZOzzHGdWCciyMilNT6Z7lNaVJmh90HWzBz7HlITXE5KjGUjdbEaFnDRe5OXe/7F37nL3LsGBl9Odgitjq0RCSJVu8x5tTE7mTHQISSWjzflf9+xyFs/uRLzBxbIJvYqqWNt5lESrJz+6bBHwig2x9I2ouA0SksrR1/XS7gX75RiOklBVGrV8KPHSPfk6F5mcKPFZ0mETrGMuXXmOE6MM7DqRlKavF+V97QehovyAQhgH2NmqJ1vAR6utf+8LfvYcqyyqTtaGl0CkvkfQ4WCAAvbq3H0o21mLKsErNW7lAsoQ0/dvR+T3Iz03D75GHCzctEp0mk3z1aQ0K9pc0UWwxEKKlp6fAYj0TbeJtNaT4+XDK31zZjCkt6nz2ZaULbCgB4YWu90DRL8LEjfU+0cgF4v74F15R4e6ebwv8fAH5cOgxrZk/CtvllwqN304q9yFX5vaWpLXZLdT4GIpTUtN5Vxis7pqCmlxRg2/wy/OHuy5HbV/6Ckczttc1aw8Xv77n7t8qXJ08jNcWFRTNGaX7u8fYu/PCl9/C7syMvrrBf1uvJwPO3jccvrr+kt7pF1M76FtXf264gnLRjIEJJT/Tu3QrTRg2KyevYNQWVmuJCSooLJ07xghHOjDVcNn50BHPW6GtoJio/yw2gJ7HbKCnWvOuKoVg0YxT+7eqLke1Ow/b9TZqrvVidlTiYrEqEyGz6A03tQmvRGLXpky8j1pUJl52RipOnu3W/hlzSXiybx/GCoUx0DRe5z2tTbSN+uvoDy/dxzprdePLm0ZqWRYjmv6oOKh7zotVerM5KHAxEiM4Kz6YvGtQPc9bsjtpczCilE7IUFnx/whD8Nsq6HGqkpD3pxB7rUl9eMNRFKymV+7y8OW6cNjEwUHOivQs/eWU35pWPNG2baoG3aLXX8bZO1SBe61o6ZB9OzRAp6J+VbnkQEix8QMLrycD/+cYw/O7dA1Gfm5Weqvh/wUl7Rhpo6WVWLkQik4LgG8adH5Irofh5+ToszQuRs/b9Q/DmGJ+eiUYkb6iipgFzVu9WDWgAdkuNFwxEiBTEeqrAHwAWzRiFZ24dhzWzJ2HRjGKsfOdA1JMtALR1Kk/dSDkYO+qaVRtoBQA8vK4GnSbfaZuRC5GM9HQztVJD62lcNqx/TF5LLW9I5H1JcQHP/uBSdkuNEwxEiBTYMVVwsKUdg7IzMGFofyzZUGvqtqs+a4pattnc1olLf/m/eGbzPtk233pbyLO9tnZGuplGo1b2qub1j2Lbl0PuZkDkffEHzEmupdhgjgiRgmjdG10Acvr2wcnTZ4RGLUT8d9VB/HfVQWRn9GzXXGIjDm2d3Vi++VO8sLUO91w5HHPLRmBTbaPhvBK219bGqhG52d8oxIJrRvV+Dk0nO7BkwyeWvJZRcjcDTH5OPAxEKG5ZXfkhsmLnsu+Mgd8P/HS1uSWUWoIQT98+8AeUnyMl7U0ePgArtuwX3m57UEDSLjP1o6eFPNtri7NqRG7lO/WYMLR/SEXOync+Q6Ovw5LXCxb+PVJ7nFKiKZOfEw+nZiguVdQ09Lapvn9tNWat3GFJu3CRKYVrxxTg+dvGq3aetPKev/XUGdUgBOjJwZh00QBdXWTlghAguZuR6aVlesvKrr/Bn1lqiguPzbzEgleJdOcVw6I+JlreEJOfE48rEIhlXYA2Pp8PHo8Hra2tyMnJsXt3yCGkSoLwA1c6MVmRcyAy+iI9ZlNtI/5afQQtbZ29/ydNY+w6eBwr36k3dd/UDMhKxw3jzsO0Ym9v7wm5986oNbMncaQjCj1l00rHuhnCP7OKmgYseHVPRDVO/8w03HlFoSl9deaVF2HVuwdUK35Epvyk9wWQH6lk3pH9tFy/GYhQXOn2BzBlWaVispo0pLttfpmtuQdKDah+8oq1XTDVSCd4AHhoXU1IoGTUM7eOww3jzjdte4lGb/Dc7Q9gReV+vLi1TrUySg+5z6zbH8COumZUfdYEoGcabdJFPcHKlGWVwiv9hnOhJ0H2uEoAcnfpMJSfDZhFvrux7odD2mi5fjNHhOKK0aXTYyU8F0IqObRTcE7HjoXfwqSlm9HSZk4vCs7HK1MrN5UWglv8ei2mFXtDLsByF1ozyX1mqSkuTBo+ACkpLnx58jRSzi4Oo5YvJUrtOS4AG2sa8dCMnukYkRFIJj8nDgYiFFecljEvmjBrZSmmqPCL3hM3jTY87M/uldHpCZ6tnJIJ/syCj9/8LDfeP9CMVe8eQOupczlH3hw3fnFdMfpnuXFX6bCIacdoSxSkuID/r6wIT7+1T/Exwe9B66lO4ZEOJj8nBksDka1bt+Kpp57Crl270NDQgHXr1uHGG2+08iUpwTkpY15paHjRjFHon+UOCU4aW09Zvj8igk/4UiLuz/70oaFhfzYjU7epVqz3hhQ8W9nILDgRVK4kW06jr0N1TZtoecr+AHBGMJl5c20jfrf9QMTvrqdCi+KHpYFIW1sbxo4dix//+Me4+eabrXwpShIivT1icYeudMfa0Ho64qSdm5mGM93OSsWSLnofHDquOwjJ7ZuGJ24aDU/fdKyvPhw1iTcZh8+7/QH8tfqI0GOl4NnK0TNvUJ6QVSMu8sReaV31Yc1TWBT/LA1ErrnmGlxzzTVWvgQlGZHeHlbfoWu9Y431miAiBmVnYONHDXhhq/4KnvbOM/j5Xz5EW8e5QCZ8CD3ZEwp31rcIJQXnZZ1bIdnsacW8rDQsuu4SeHPOBehTllXGtHX85Ivy8Zfdh1VvIPKy0tGs8l45Jf+LzOeoPiIdHR3w+Xwhf4jC2d0u3An5HkakuHoudvNf/cjQdjq7AyFBCBC6eJ4dC+w5jWhQcdO483uDZ7OnFVvaunCoua13Mb1YHr9ST49JwwdEXW/ohnHnCW2THVMTj6OSVZcuXYrFixfbvRsUB+zMmI/3E6E/ANy/ttqSbUtD6I+99jEAV9IPs+f3E1vvpLzY2/v3aNOPeizfvA8Xe7MxvaQgZsdv+AildAMRPkImTRd5+qbjd9sPRN0uK7QSj6MCkYULF+LBBx/s/bfP58OQIUNs3CNyMrsy5nkiVBcAorYLT8Rh9vBcmONtnfjlGx+rPkfKaZowtD+q6pp7n7toxijMWf2B7lJZOVLgF6vj1yszBad2A9HtD6DAk6E6WsOOqYnJUYGI2+2G280VE8nZrLhjdaq0FKDLb9324310SaKn54c0YjBzbAG++dSWiDyaf7myEK992GDaNIoU+E0szENuZpqpuUtSQPWr745FU1uH6gil0g1EaooLM8cWqOYtzRxbkNAjaMnKUTkiRPFASpgFrF1DxgmsDEKAxBhdUsqFicZ7Nth4cWt9xHMbWk/jha31WDSjGItmjDJtX60I/IKnYEqL8nHDuPN781G06PYH8NqH6nlDr33YwHWNEpClgchXX32F6upqVFdXAwDq6+tRXV2NQ4cOWfmyRJZTSpils3fHOW54cxJ/YbLOM348tK5G18jYf9w8Bq992KD63If+ugeDs80bJR6UnYGd9S2mjoaYlSQukkQrjepQYrF0auYf//gHpk6d2vtvKf/jjjvuwMsvv2zlSxNZTm6++3hbJ5ZsCB2iz81MA6BcxpubmYYnbizBL9/4BI2++J+qCACYNfFC+E514SWZ5MNYlVlbraKmAQ+t26O7Tf57B5qjXnhPtHdhwbo9urYfLLi/zhsfifU1kTN36oieaZUAok7BANp6yDitazLFjqWByFVXXQUHr6lHZJjcfPfVJZHJeEDPHV9j6yk0fdWJE+2dcLl6+itMOjuMve/LNlNWOLWTFHQt33yunXd4C3C5JEYnUruImtGCve5Ym9DjvuowvthdAMCtl/Uk/huZDisa3A+lI/KFHiuXN5OXlYabxp0vu7idk7omU2w5KlmVKBEoJeNFqw4Zlp9p1S5ZakBWOm4Ydx48fdPx9OZPIy7O0r3INSWDMXxgdsiKrk6l1ohtWrHXUAt2FwBPZhrerBFr/W6W5Zv3Ye37n2PRjFG6k633Hf0KVXXNUUvllQK1lraeUbKXth+IaGwnkgTuzXHH/XQeRXIFHDxkoWUZYaJ4V1XXjFkrd9i9G5rMKy/C3LIiAD3dOkUTNvOy0vHL6y/BgGy341q/K11EpT17oHyk4ZErs6tWREm/g5QkC+grD1brjtvtDwgdC9K+BOeXSO+90n7lZqbhyZtHO340jbRdv1k1Q+QQ0h1hLC/Flw7J1f1cF4C1738OQHu32Za2Tsxd+wFmrdyB+9dWY9bKHZiyrNL2bqtq7fuln616V39bfAD47vgLbGv7Hzj758+7DuP/zbpUd7K1Wndc0WNBej8Xv17bWwkjJYF7zk7xhWtt70qarrzJhIEIkcW6/QFU1TVjffVhVNU1K5Yfxros+I4rhuLP916BvCz5k340wU3JzEggNKv1u+j7LSfaRTQA42sHZbpTDT3fDM1tnXho3R48fM0orJk9Cc/cOg5rZk/Cb35wKQoEghO5IEKi5VgIPoYk04q9yOgj/x6pvS7FL+aIEFlI66JvSm2w+2em4Xh7l6mdNl/d/QUuH5aHm8adL1vdIkqaWjFK+r0WvLoH2e603iReLYwusid6Ec3tm4YTp/QFJEPznJELdPL0Gcxd+wHuubIQC68t7v351SUF2Fnfgu37j2HFljrF5yt1x9VzLAS/7zvrW1SrxxKxK2+y44gIkUX0Lvo2vaQA2+aXhdyp/uORaXje5L4lJ09346erPzA8mnGgqd3UaaUT7V344UvvaZ6q0ft+B4+gNJ1Ub00vmVIkVjkSTOqdcvvkYTGfglPzwtZ6bAwq6ZWSrYsGZws9P/z40XMsBAcvLONNPhwRIbJAtFyDaIu+yVXehPctyc9y42f/8yGO+oy1mn/9o0a4XOeqW7R6evOnuNjbD49eX9ybaGgGKYAQaZal9/2WG0EJLzcO5gIwOMeNfxw4rul3Ce6dkt4npfe9MnOEy4hH1tfg6pICU8pppSlGkWMhuL+J0del+MURESILiOQa6OkSKQUoN4w7H6VF+XhspvrS6v3cYvcaRmrnAuhZbXdasRcPlI/UvyGZ7QJi+QB63m+lERS1IAToadamtfFcePdR6b3y9NWXn2O2lrYu7KhrDvmZtCaNmv6ZabLltNIUo1q+iVJju2gjKonSlZfO4YgIkQXMHF5Wa6yllFOSndEH35lwATLTUvHs35Xn+c3S6OvA3NW7MHyg2HC+KNF8AK3vt9oIikSpEVvHGbEFeL4+NBd5WW5cNqw/bps0DNWfn8D66sM40NSONTsPhQQzThgZmbN6N578jrbSWLV9Dh7B21TbiL9WH0FLW2fv/ys1tgseUQl/XxKlKy+FYiBCZAGzhpdFki+nlxTA7w/gkfU1ve3GfafPYJWBBFQ93qw5CuCoJduOFmiIvt8HmtoBiJWY+gPAohmjkJ/tDgkAq8JGDpT84+AJAMD/1h7F4xv3qj7W7iAEAE6c6gqZChNZk+ZEe5dqkCiN4E0ePgAPzygWbveuFGDHS1de0oaBCJEFonWJDJ8blxv12FTbKNtYKzx3oqKmAXNWf+CIi5lVogUaEwvz4M1xo9Gnnmy69v1DmFs2Ao2tp4ReNy8rHTeMOz/itfR2Jo0HUi6N2UmjSh2Hlcit5eSUpndkLgYiRBbQMrwsN+rhzcnA6TPdUZMvy7422FC78VjSO/2Qq5CHECw1xYVZEy8MWeNGjjTNEzxFoKalrVM2SHRasmk07j4udJyJvqfBU2FOSBrVGrxQfGKyKpFFpOHl8JLb4MRFxZJT32nVYXHpgvH7qgOaOpraaXCOO2ryo5wT7V3YVBt9XRafYF+PN2sahAOR9w+0oPTJtyI6wAKQ/Wyd6qdXjcBvfnApsgSbqX158jSTRilmOCJCZCG14WWRhMloDra0m7avVpg7dTiKBmdjUHYG/IEAfvjb93Rt57HXPkbZ1wZj18HjssP03f4A1lUfFtrWf1cdFH7dio8jc16Cp8a2zS/DzvoWvFnToGm7sdQ/Mw1zy4qQmuKCJzNd6DMYlJ3BpFGKGQYiRBZTGl7Wuj6LHKd06VSSlpqKG8adj25/AMs36V8ortHXgfFL/hdfdXT3/iy3bxruKh2GuWVFZ6dbYrN+i3RBfmjdHpzq8sObk4Gri72ODUSW3jy6N1iYdNEATblLTBqlWGAgQmQTI50hpQvGoJwM1QZcdlu++VO0d3bhtQ8bDAddwUEI0FPlsXzzPqx69wBuvvR8hWdZp6WtC/P+WA0A8GQ491TqDzo49IxyMGmUrOYKBIy0MrKWlmWEieJNVV0zZq3cofl54Uu5O/YLTI6QnZGKXY98G+l9zqUEGl2ThygaLddv54bxRAlKqsJo9J1GXlaa4pSCCz0VI+4+KSFlqV5PBhbNGIUlGz5hEBLESJv6RHbydDcmLX0LT9xUEtJ7hqMc5BQMRIhiSO5OVI50OVh682jZC4YZ+SWJhkGIspa2zoh1e1gaS07BQIQoRqRSXZHrZXgyYPgFQzS/ZO7UESguyMaSDZ8wcIlD3hw3bhh3nik5NgGoL7RIZBcGIkQxEK1U14WeLp6PzBgFr6dv1GFy0SZSpSPyMXn4AFxdUoDlmz7Fii37te882cLdx4WnvjsWV4zIx79NH9U7KtZ0sgNLNnyia5si6/YQxRobmhHFgMjqsM1tnfB6+mLy8AFR71i1NptKTXGhdES+vp0nW3ScCeD23+3ElGWV2FTb2Lvq8p2lhaqffTSi7e2JYoWBCFEMWLFux6PXFwNAxAVJ7/LqseSEfVDjctAOSg3UKmoaAKh/9iJEu8oSxQoDEaIYsGLdjuklBfiXKwsjLpouV09pr9zy6otmFOuutLl90lDMKy+CN8etcwvneD0Z+O74CwxvxwoPXzsKtYunIy9Lezt6K0if1+LXa9F9tieI0vIBIvL6Gf/8iMzEQIQoBqxYt6OipgEvbq2PaGbmDwAvbq3vvYMOfvySDbXadjzItaMLcH/5SPziukuQl5WuezvXjSlAIBDAn3d/oXsbVnp686d4/u26mHVqFRG8GJ1kekkBts0vw5rZkzB36gjhbXlz4mN9HEoeDESIYkDPVIoakXVqgu+glRbXEyUFSRU1DZizereh4f03PmoI6YviNG2d3XjmLfVVfO0SPnUnleAWDe4n9HyRlYyJYo2BCFGMiKzGK0ok+VW6gzZjcT0piDK6HTJGaepOdErvrisKWbpLjsPyXaIYMqujpZbkVyPNz1JcwIpZl2J6SQGq6prZi8Qm4YvRhZOm/pQWswN6RkPmlolP4RDFCgMRohgzo6OlluRXI4vrrZg1HteO6RmpMbId0k9k6k5tMTvJk0Gr8BI5CadmiOKQluRXLZU4kgJPBp6/7VwQAmir6CHzZLn7CE3dKU39SZ8lF7Mjp+KICFEc0rKce7RhexeAwTlu/Of3xqHpqw7F6SKR4X9S982RA/H2p8c0PeerjjPwh5dGKeBidhSPOCJCFKdEk19FKnYem3kJSkfk44Zx5yt2djXaSIuAn3xzOH7zg0uhNS746eoPIsqxlUhTf2qfJZGTuAIB565Z6fP54PF40NraipycHLt3h8iRuv0BoTtguZV/C8IW1xNRUdOAh9bVsEOnRgWeDGybX4ad9S2YtXKH7uczsKB4oOX6zakZojgXnPyqFpSYNWw/vaQA733WjFXvHjT9d0lk0lSZ3qRfLlhHiYqBCFGCEBnxMKNip6KmgUGIBj0l0D1TZd3+AJpO6m/mxsolSkTMESFKAEqdU8MXTDNKao5G4v7f98fh2jEFqKhpwJRllViy4RPd22LlEiUijogQxTm1zqkB9CSWLn69FtOKvYbzC4w0R0tW/75xL/YcacWLW+sNVRuxPTslKo6IEMU5Le3ejeLUgHaNvtN4wWAQArBSiRIXAxGiOKel3btRnBqwz/H2LlOCSSKnYSBCFOe0tHs3asLQ/pp7YJB5OCJFiYiBCFGc09Lu3ahdB49DsMknWYAjUpSIYhKIPPvssxg2bBgyMjJw+eWXY+fOnbF4WaKkINI5VW3BNC0aW08Z3gZpZ2YwSeQ0lgcif/zjH/Hggw/i0Ucfxe7duzF27FhcffXV+PLLL61+aaKkIdru3Sh2U7WPUjDZ7Q+gqq4Z66sPo6quGd0csqI4Y3mL98svvxyXXXYZVqxYAQDw+/0YMmQI7rvvPixYsED1uWzxTqSNaLt3vdZ9cBjz/lht2vYourysNDxx02jZYNKstv1EZnNMi/fOzk7s2rULCxcu7P1ZSkoKysvLUVVVFfH4jo4OdHSc6zro8/ms3D2ihGNG51Q1g7Ldlm07GXiDVjned/QrrNiyP+pzFl13CaYVe1FV1xwSYG6qbcS9r+yOKAuWmtiZORJGZCVLA5GmpiZ0d3dj8ODBIT8fPHgw9u7dG/H4pUuXYvHixVbuEhEZwVF/3Vw4t8oxAFTVNQsFIoea2zBlWWXIqIc3x43TZ/wxaWJHZDVHVc0sXLgQra2tvX8+//xzu3eJiIKwfLTHohmjMHfqcOHH52WlRYxQiFQ79c9Mw/LN+yJb9/s6cKK9S/H1zGxiR2Q1S0dE8vPzkZqaiqNHj4b8/OjRo/B6vRGPd7vdcLs59EvkVExW7QkQRg7ORtNXYovX9XOnYsfCcqT3Cb3vk6qd7n1lN1wIHWySghOjA1AMHCkeWDoikp6ejgkTJuCtt97q/Znf78dbb72FyZMnW/nSRGSBL46zfDcA4Pbf7cSi9TVCj5/9jYsighCJWrXTA+UjVUc9RLDvCMUDyxe9e/DBB3HHHXfg61//OiZOnIinn34abW1tuOuuu6x+aSIyKLgK50BTG1a9e8DuXXKMrzq6oz7G5QKKBmWrPmZ6SQHKvjYYv686gIMt7Rial4nbJw/DmwZWTHahJ5hh3xGKB5YHIt///vdx7Ngx/OIXv0BjYyPGjRuHioqKiARWInIWudJQ0iYQAOas3o3nUpQrWOTe599uq8etl12o6zXNbmJHZDXL+4gYwT4iRPaoqGmQLQ0l7aTRiW3zyyICA6X3WXqUJzMNre1dsp+D6+z/Z/RJRaOPfUTIWRzTR4SI4k/nGT8eWlfDIMQkwRUswT1euv0BPPbax6oluK6gv8slsz5582hMK/Za2sSOyGoMRIioV0VNAx5atwctbcaSJCnSlydPh+TcvPPpMTT6lCtvAgCOt3fhmpLBeK/+eEjFkjds1MPKJnZEVmMgQkQAOB1jtQNNkY3JRLxZ09P+IC8rDTeNOx/lxV6OelBCcVRDMyKyR7c/gMWv1zIIsYhSYzItjrd14XfbD6D1VCeDEEooDESICDvrW1gdY6HjBvuBAOdyRBa/XssVdimhMBAhInbgjBNs3U6JiIEIEcVdB053n+SemmDgSImEgQgRYcLQ/sjLSjNlWy4A15ZEriVlpo4zyT01EW+BI5EaBiJESa6ipgHffGqLqSW7G2saTdsWneNCT8Mytm6nRMLyXaIkE75+zPLN+0zdfnKPVVgrALZup8TDQIQoSXT7A1hRuR+rttfjxCk2LLNDeIdUImIgQpQwuv0B7KhrRtVnTQBcmDx8ACZd1NNxc0XlfrywtQ7tndFXjCXrfLt4MP5We9TQNha8ugfTir29oyLBI1xKLd5FHmPk8URGMBAhSgAVNQ1Y8OoenAjqV7Fiy35kpaciADAAcYDrxhRgy94vDW/nRHsXdtQ1o7QoX3bl3vBF70QeE0zr44mM4uq7RHGuoqYBP3llt927QSpy+/bBiVNnTNvet4sH45LzcmTze6Rxi+duGw8Aqqv7Pnfb+JDgItpqwOGPJ1Ki5frNQIQojoQPmU8Y2h9X/kel6uJpZL/czLSQ0SqruQAMznEDcKHRp9xzxJvjxvYF30Jqigvd/oDqWjgu9Cy2t21+GadpKCot129OzRDFCbkh87ys9JBVWcl5vjP+fPxl9+GYvmYAEApOG30dWFG5H/eXF0Vt8x/c1ZWr/ZKZGIgQxQGlIXOrgpAUFxC8nEmBJwOtp7qYa6LDm3sa7N4FVcs3f4qLvf3QccYv9Hh2dSWzMRAhcrjOM348tG5PzMo+55WPxL1XDceug8dDqiY21TbGNBflmyMH4u1Pj8V9yWt7l9gF3k6LX6/Fr24ZK/RYdnUls7GzKpGDbfzoCCb8+yZTu56qcQFY+/4hpKb0lP/eMO58TB4+AKkpLkwvKcA9VxYiVukB4y/sj+dvGw+vhxc+rVzoyf/w5oi9dw2tp4FAz8iX0scb3NW12x9AVV0z1lcfRlVdM1cDJkM4IkLkUEs31uKFrfUxfU21PICKmga8uLU+ZqMTa98/hG3zyzCt2Iud9S3Yvr8JK7bsF35+rBNEnUIKJB6beQkACI9iNbV14NHri3HvK7sjRqGkbT56fTE21TayvJdMxRERIgfa+FFDzIOQYOF5AN3+ABa/XhvTKRIpIJJGZ4oG9xN+7t2lw/DEjSUW7p1zeT0ZIWW2/dxi95uDsjMwvaQAz8mMQknbBHrKgcOTWhtbT+PeV3ajosbZ+TDkTBwRIXKYbn8Aj6yvsXUfgvMAuv0BvLy9XrWiwirBAZGW3ISyrw1GSpKVmLr7pOBfvz0Sd1xRiNQUF57ZvA/LN38a9XlSWa60kN70koLeUajgHCEAmLKsUjYYDZzdzuLXa0O6vhKJYCBC5DA761tsLckNXt1VrmQ4lvL7uVFV14wvT55Gfj83Bmen4+jJ6O/Nz/7nQ1xyXnYM9tA5Os748fjGvXj273UIBAJo1dBALXwhPWkUKlhVXTPLe8kSDESIHEatAVUsLJrRc1FSKhmOBRd6cjwe/GM1jp481w8jNzNN6PmNvtO2v4920ZIXk5eVhiduGi2U2yFatsvyXtKKgQiRg1TUNGDJGx/bug/ZGX2wcmsdlm/eZ1vZbADAcZkLqnSRzUxPZU8TE9ww9jzhBFPRqTGW95JWTFYlcghpBCJWpbpKbv/dTjy+ca9tF/q8rHRkpqeqPsbdJwUPTf9ajPZIu8L8LLt3Qciqdw8KJ5hOLMwTLu8l0oKBCJEDmFWV0jctvr/SeVlpWH7L2KhB0PH2LrR2OLc0t76pze5dELb49VqhPiCpKS48en0xAEQEI8HlvUxUJa3i+6xFlCCirfMh4jc/GI//vGWcOTtkASnvw4XIC5nk328YjfcPHhfa3uHjp0zZp2QnJZiKiFbeKzrNw4ZoFIw5IkQOIJrgd9cVQ1Hx8VHVZlL3fFFoaw8SNU/ePBoAFCtxlmyoxbghHqFtnZebgQJPBhpbT2saSZLKVRfNGIUlGz6xrSLISbQkmCqV94qOhMhVYrEhWnJjIELkAKIJft++pACPXHeJ6kVg4bXFGHtBLh5ZX2N7vokkt28anvzOueoMvz+An67+IOJxja2n8aZgYFA6fCDGXJAr2wlUSfAUwvSSAlxdUoDlmz7V1LE1EQ3KzkC3PyAcXMiV94pQqsSSGqJpGVWhxMFAhOKelhOoU0mJgEp398FNp4IvAkq/+7VjzsPVJQXYWd+CzbWNeGn7gVj+OhGe/eF4lI7I793nJRs+kX2c1BhL+ruS3Mw0TDq7Bs5zt43HY6/VCpXrDs5x47GZl/Re7FJTXCgdkZ+0gYh0XB1v68CUZZWWjlKo5UGxIVpyYyBCcc1Jw7xGAiIpETDaOh/B24v2u0sBy+ThA9AvIw3PvLXP+C+pQ4EnA5MuOnf3HC0fRmRk48mbR/e+F9NLCpDtTsMPX3ov6vP+83vjegMiSbQgMFFJx9mogmzF0angUQqjAb/I586GaMmJgQjFLScN82oNiORO6tOKvbhujBdvfNQY8fjrxngxrdgb8npafnc7SyrDA6jGVrEk07tLh2HDngY0+s41NPOGjWhImto6wp8uq+mryMcFB4HJxOUCAgGgcu8x2f+XRikeXleD9z5rxvoPj4RM9WkN+NkQjZQwEKG45KRhXq1BgVzQkpuZhq4zfrTJlK0GALz+USPe2b8JT948GtOKvaq/OwAseHUPstL6ICXVhaavOmxbjOyaksERv7vStEy48mIvHppRLHQXbkazLU+SrNZ7bclgbKw5CpFClQCA5rZOrHr3YMT/aQ342RCNlDAQobjklGFerQGRUtAicgE80d6Fn7yyGw98qyhqpceJ9i7cvmqn6K9hmdsuH9b7dy0t4wtk8mHUiObYTBjav3ftGimw2VTbqKuV/cRh/bHzgFipsRNI78HuQydM2Z7WgF9LHhQlFwYiFJecMsyrJSCaWJhnStOypy3K9XD3SYHLBZzu8puyvX7uPpgUlFSr5XefObZA00iWSI7NzLEF+OZTW0I+L29OBk6f6db8mXhz3Jg18UJLAxHRSqBguQqjOtJ78P2vDzH1+NES8OvJg6LkwIZmFJecMsyrJSAyo2mZlTrO+E0LQgDge1+/oPeiovV3f3FrvebpJLVmW/9yZSFe3FofsQ+NvtOapmOkZmyPzbwEXk9fTfunldeTgevHeKM/EMDcqcOxZvYk7HpkGp6/bTwKwt6DvKx0/J9vFOLldw9YsKfi3wOzGqJRYuGICMUlpwzzagmIki0JLzi5Vs/vrifHR67Z1oSh/fHNp7aYUhHjDUrQ7PYHUODJMDW4nDt1OIoGZ/dOGwHAO/s3KQZL0nE+b9rFIVVEfn8gpI9Mc1snVr5jXZM7LQG/0YZolHgYiFBccsowr5aASLSNdiIIX/xM68iUkRyf8LySqrpmw8HC3KkjUDoiP+SCKR2DPzGx2mbyRfkoLQotL37y5tGKOSwBALdedmHIzypqGjBn9QcxKUXWG/DrbYhGiYlTMxS3nDDMq2UhsGirlyYCaeoiPAjU+7ubMYpkZBvSirLzpo3E5LMN1IJNLynA87eNR25mmsG97HHf2g/wy9c/Dll/RTrOw6dbJMs3f4rLHt+MX77+Mbbva8JjrxnPQ9Ii/LNWWkeG68uQElcgEHDs0eDz+eDxeNDa2oqcnBy7d4ccygmdVdX6iAQPQx9oasPyzft0JSLGA7XeElqqZiRrZk8yfOdcVdeMWSt3aH6edASJBLXd/gDmrt6NN2sie8DoFf5edvsDWFG5D8s329OYLpzcZ630PZg5tgCvfdjgiMaDFBtart+WBSKPP/44NmzYgOrqaqSnp+PEiROat8FAhOKJXEC0qbZRtmcIIFayG08WzRiFO0sLVYPApRtr8eI79RA56+RlpeH9h6cZDiq7/QGUPlkp1AI+mNYLpd6AR40LCOlsGt6GPdb6uVPxva8PwbRib0TArzXQ1BLoUfzRcv22bGqms7MTt9xyC+69916rXoLIUaR57xvGnY/Jwwf09qcIv3C0tnehtb0L141JjJOvNH0RLQipqGnAi1vFghAAuGnc+aaMbKWmuDBr4oXRHxhk7tTh2Da/TNMFUpp+Mtvi12t7g1wrghAXeqpqRDx763j84vpLIqaptJZnA+dGBKXfj5KXZYHI4sWLMW/ePIwePdqqlyByrGiNzgBg4x57up2aLYDoicF6LlTlxWKlqyKG5WdqenzpiIERv0+0HAcpX8jMScHgpF0rqq6kff3RpKFCj5/35w9ly6r1BknBvx8lL1bNEFlApNGZc7OztLluTEHUkQMtFyorSq+1VO2EV/wA4msJSYmloqsBi5Km+8wmlSN3nBHrH9PS1inb1t1okJRspe0UylGBSEdHBzo6zi1K5fP5bNwbIv2S6cS6bX8Tuv0B1RERre+H2aXXoivsylX8aF1LSOqTMf/PH+HPu78wZf+lnCOzVgmeOCwPEwv7Y/JF+Zg0fAB21DVrev7i12uR7U5DU1sHBmVnIL+f29D+cH2Z5KZpambBggVwuVyqf/bu3at7Z5YuXQqPx9P7Z8iQIbq3RWSnZDqxnmjvwvJN/1QtyRR9P/Ky0ixJXlQrs5b0z4x8bZEpNrkch9QUF5Z9d4wpZb3B6+5E+x1E7TzQghVb6vDDl97DmMf+hn/5/T+EnytNp/zwpfdw/9pqzFq5Az/7UzVyM9M075eUX8T1ZZKbpqqZY8eOoblZPXK+6KKLkJ5+LvHp5ZdfxgMPPCBUNSM3IjJkyBBWzVDckSoc1BqdJcjMTAilSpNo7wcADMhKR9XCbyG9j3XtjWRXPu6bhrtKh2FuWVHEKIxoJYxSmXFFTYPhhmfPhwVHGz86EtI11QmCj2fRY5tVM4lNS9WMpqmZgQMHYuDAgYZ2To3b7YbbbWyIj8gJonV+jYcgJMUFoaXig4VPVwSXNN962YV4evOnip1wH7+pxNIgBNDeXtzo4orTSwpwd+kwvLT9gOZ9zc1Mw5M3j47o07FkwychQYjLZX++kbQSryczDRl9UkPyY5T6iHjZR4TOsixH5NChQ2hpacGhQ4fQ3d2N6upqAMCIESPQr18/q16WyHKiDdSkxMXwO3CvJwPXlHjxOx0XJyW5fdNw4pS5d8j/dddEpLhcmP37f6C9s1voOcFLw/v9ASzZ8EnUHiqxviBpaS9uxuKK5cVeTYFIbmYa7rqiEHPLRoj16Tj7gx+XDoOnbzqWb/5U+LXMFEDP5/qHu8cjJcUV8f34t+mjbG88SM5kWSDyi1/8Av/1X//V++9LL70UALBlyxZcddVVVr0skSqjXVhFqyckSnfgO+tbTA1Env3heKS4ek7+73x6DH/efdjwNlvaO3HDuPPxf783VlOjKimH4KerP4j4v9b2LgQAzCsvwrD8LMdfkMxYXFEkyTQvKw2LrrsE3hz59yNarooLwJs1jdg2vwwAbAtGAKCprQM3jDs/4udcX4aUWDYO+vLLLyMQCET8YRBCdqmoacCUZZWYtXJHb5LdlGWVwsvNS3ekEUvJn52OUNpOeKMzs9edSXEBre2dmDx8ANx9UkwJQgBg39GTqKprxrRir+paJ1pIF82173+O68acJ7t+i5NoWUtI7zZcAJ64aTRuuvR8xfdDpBxc6sehtWeK2ZIpUZvMwUXvKCnoDSIkeqsnlKSmuLBoxijVUYYsd6rQtvwBYM7qD7DxoyNY/Hqt0HNErNhS1xusAcC2+WVYM3sS5k4dYWi78dbEyozFFY1uQ0uuil2BgNEKGCOL4nFBvfjmqD4iRFYQGdZe/HotphV7Fe9stdyRigw/S0mHcjLTUnHtaC+euHkMKvcejZgKUnr9h/+6B8fbz0R9ba3CE1AnFubhL7u/MNzPIp56rWhNcjV7G1pyVczsN6JFAMCtl+lruaB1ytOs55IzcESEEp6WIEKJ0eqJYEqjM5L2rm78efdhfPOpLQB6RiIWzRgVdbtWBCFA5IiPWf0s4m0IX26KLVbbiDaVFzwaYWa/Ea2Wb96naboTMDZaaXSkk5yBgQglPDOCCDOqJwBta640nD2ZbqptRH62+WXtmemp+P2PJ+IP/+fyqNMt4cGa0lSDCDax0k5rrorS52NGg7VotAQBRqY8zZ4uJftwaoYSnhlBhBnVE4C+xcEeXleD7192gabniPi/3xuLb4zs6QvU9FVHlEf3CA7WgqcaGn2nseSNj4WbbJndwj0ZqJWDy01DKE0FbaptFJru00t0uhMwNuVp9nQp2YeBCCU8M4KIaA3KALGLq9a8iACA5rZO/Obvn2l6nhq5+XO9wZo01VBV1ywUhORlpeGJm0Zz7l4npeCi2x/AS+98hoMt7Rial4nbJw9Dep+U3got6fE761swrdirO4AUJQUBOz5rRumIfNnHdPsD2L7/mND25L43Zk6Xkr0YiFDCMyuI0HpHKsfuvIhFM0bhztLCiN/VaLAmerJfdN0lDEIMCu/HsXRjLVa+Ux/SBffxjZ9g9jcKcemF/aMmcrpTU/DT1cba0CuZ84fdePI7kYGnXIKpGrnvjeh36UBTm9DjyD4MRCgpmBFESNsRrXyQa54mXfCtGhZXU+DJkA1CAOPBmuhFwZsTXwmqTrd0Yy1e2Fof8XN/AGd/Hvl/wVVQALBkg/aS7xvGFmD9h9FzQE6c6opYoVixQ6wMtQBYtDpo+eZ9uNibzQDYwTQtehdrWhbNIRIhBQeNvtNo+aoDeVnp8Hr6mt7dU62kEICmTqVmCV88TRIcMB1oasOanYfQ6DuXMyJSCimyyJ/Xk4Ft88uYG2KSU53dKH60Qtc6My70JK4eb9c3JdPPnYq2jm5NwYTU9XXKskpNgfi88iLZBQkBsaCGx549tFy/GYhQ0rGi70D4xXz55n0RjwlebRQAFry6J2TNFavdc2UhFl5bHPIzuffCm5OBWRMvxLD8TE29LqSLAiA/osJVVs1TUdOAn//5Q5w8LbYGkBOsmT0JAIRWMw6n9v18ZvM+oZb2SiskkzW0XL9ZvktJJVrfgY0fHdHcobGipgGlT55rHS8XhAChJYXTir3Y9cg0zCsvQm5f60sqAWDlO/XoPOPv/bfSe3HUdxpPb/4U7j4pUXtdBHe09PRNx7M/uNRQB1KKTvrc4ikIAXryiPQmjqqVBIu2tG9sPaXrtcl6zBGhpCHSd2Dumg9Ckv5y+6bhrtJhqkPDP3lFPNEvvKTw/vKRKBqUjUfW16ClrbP3ceF5GmbwB4DfVx3A3d+4yJRus0ojS4tmFKN/VjpXWbWAlj40TmMkUVvtmBTd7pINn6BveioDYgfiiAglDZEeHuEDICdOdWH55n2Y8O+bIu7Guv0BLHh1j659ke4MK2oaMGf17pAgBDA/CJEcbGkHIN6D4eXt9bKjQmojS3NW70brqU5DHUhJnp4+NE4gNbA73tYBvYeDUgdk0QUkj7d1stuqQzEQoaRhpJ/AifauiJPYjs+aded4DMrOsOXudmhezzC26HuxZMMnES272dHSPnqOYaUVf3Mz02LWAn76JV6sqNyHn67+ICLY1yr8PQjuOquGx6ZzMRChpGG0h0cAoSexqrpmzduQ2ptPGNofL2+vj+ndbYoLuH3yMADa3ovw+Xkz1u4hfUQ/NxeA2d8oxPMqK/4+efPo3sdabdW7BxRzp7SSew+k8vy8LPV8Kx6bzsQcEUoaZqxKKp3EJhbm4Yvj7ZqeK53wZ44twDef2mIoCElxhU4jFXgyUHJ+DjbVfqn4nNnfKER6n557Dy3vRfj8PDta2kf0cwsAeP3DI+jn7oN/u/pitLR1Iq+fG96c0Jwdud46ThWtqd70kgKc6vJj3h+ro26Lx6azMBAhR5JrBmY010CtaZcWm2sb8eCfqjWfvL2eDMwcW4AXt9Ybno7xB3q6pOZnu0PeH7kumymuniAkuHRX63sRfCdp1gKApJ2Wz63R1xEyCiGVwAZ/j8Ib9B1oaj/bR+bcsZ2bmYauM360ddpbpRMAcOtlF6o+RrRhHo9NZ2EfEXIcK/p8RNt++AiDWfq5U7HkxtHw5vRMxxgdCQn2zK3jcN2Y82TXHfl91YGIdUfkaG21Lb0mm5fZS+vnBoj3cwm/CTje1inbAt6Kyi4RaucCNtZzDjY0o7il1CnR7KZYoidbo4K7mVbVNetq5qRkXnkR1r7/ecjFKFq5sZxufwAvb6/Hkg2fRH2s1BSKzcvsp+Vzk4heiHs7ELeewpINn0RUdQVvDzA3IOkpAR+FfV9+FbUxoNwxxmPTGdjQjOJSLKsxpIXDpBLTa8cU4MelwwxvV+LNcUe0VNcyL60WQ7gA9M9Mw/LN+yLuiNXKjZWkprhwZ2mhagmklGQrzc9LyYFsXmaf1BQX8rPdmp4jkqxZUdOAKct6GvTN+9OHikGItD2j30bpmPtx6TCsmT0J2+aX4eqSAqx9/3PV13x4XU1Igz4Jj834wxwRcgwt1RhWtGqeVuzF77YfMGVbv5BZZVZ0XnrRjFEo8PTFnNXKd3XRTv5SubHoiVfPondaFgAka+jNdVAKirUsSBfs7tJh2LCnMSS3RJTcwpNVdc1Rp52a2zoxaelmPHFT5Oq+PDbjC0dEyDHsrsYQbYwkYsmGyJGbaNuXRh2+5s1Bl9+PB8pHYnBO5F3dA+UjhfqXhJcbR6PnTjJ8ZIkn+tjSe8zKBTBG+tqUF3uxfUEZ5pUXRX2sC8C88pF45tZxvSMg4ceW6He8pS2yv4+Ex2b84IgIOYbd1RhmVdUA8iM30UYdAgBOdXXjhy+91/tzb44b88qLMCw/q/eu7o2PjhjaDzW8k4wvWo9ZtRJYPV1bg7eXmuLC/eUjcbE3WzGRVjTpXOt3XK71u57Ku1g9h0IxECHHiNYjIVofASOkk0nHGT8eKC86W8LY0fv/eVlpaGnT1kVV7q5OGnUIP1FLS7KHj3Qc9XXg6c378Nxt43uDCa0naa0jSNKdJMUHpWMqnNIUm0TrcSIyZdfYekqxh4karX1uwgNuPZV3sXoORWIgQo6hJ0/BDHInE29OBuaVj8Sw/EwMyj5XequlGZpSwBA+6pDfz42f/ala9rFyi31JJ2nRu1f2TEh8Iv1A5HIxgmk9TtS2ZzSYDT4XiApev0kuz0XqECw3zRir55A8lu+SKcwcnozlXYaWcmHRRD6tvQpEy3ql0llpX6Kt+sueCclN63cyWg8OoKda6xfXX6JpdMOIipoGPLSuRrVyR7Jm9iRMLMzDlGWVikG63HdC+r2tfk6y0XL95ogIGWZ24BCrPAVp9VylcuHwkQiRIXCtIzfd/gC27z8mtL/BQ+fTSwrw/G3jseDVPbKJq1aOIFF80DoqIZJvEgDQNy1Fcbtm50tMLylA2dcGY9LSzapTo3lZaZgwtL+uyrtYPYeUMRAhQ6wanoxFnsKKyn2q1SfBJ5OJhXm9OSS/umUsEADe2nsUf60+EnK3Fm34O5jW7pjhQ+dSwLaicj9Wba/HiVPnfhct+0EkkYJtpQC3VaUs3KqRzPQ+KXjiptGyTcokLW1d+OZTW3BNiVdom8FBvZ5qPbsr/BINAxHSLVoDsvARBSfp9gewSrBniNzaMtIJ9uEZxbruALX0a1BL0u2pVCjC3LIRzNwnU0wr9uKx12oBRAYiSt9rkRsSpVFOkVEUkdHIxtbTwn2AgoN6PdV6dlf4JRoGIqRbPA9P7qxvCRlBUPOSzMnNyIiPln4NolMsrHQhs+ysb1FtTBb+vRa5IVn46h489trHIZVoBWcXgXztwwahUZRz0zRvyeaMSK/lUlk3Si6o11OtZ2eFXyJiQzPSLZ6HJ0X3SenSr7XlfLc/gKq6ZqyvPoyXt9cLT8ewLTXF2ubaRqHHSd8hkRuS4+1dIUEI0BPMvLA18rsgBflyTcp2HTweteW89HUM/+4qBfVSbozacxadHflcX30YVXXNvdvR8jqkjCMipJuVw5PBw7X5WW7ABTR91WHatIPoPqmFGKIjPnpWSgWAuVNHYN60kTyZUcxU1DTIjgDKkb5DZt9oqE3rir7W3aXDsLGmMbQkXyVfRWnqx3t21GbJBvncF6XnMD9LGwYipJtVw5PRLtxmJMBNLMxDXla6UFlgNGonR71rdwBA6Yh8BiEUM9IUSzTh3+v8ftoW3hOhFOSL3kCUF3vxkMb8Lbn+Pjvrm/HMW/sjHhs8Nbttfhnzswzi1AzpJjKkqXV4Urpwq40eNLSexk9e2Y2NGlqdh0tNceHGcefpfn4wpZOj3rU7wle6JYoF0RbvAZz7XlfUNCg24zNDeJA/YWh/5GWlRX3e8bYOXWvNSM9x90nBz/70oWwQAoROzQLgmjYGMRAhQ8xcclvrhXvumg+w8SOxpe7lfGvUYN3PBaIHDHrX7gA4v0yxJzrt8ePSYZheUtB70xCe+2Gm4CB/40cNZxNVoyeZL9nwCbr9gZDcrKq6ZqF8rnO/l/r7ETxqQ8ZwaoYMM6sBmdYLtz8A/HT1bjyfojOZ00BPYZGAQc/cudz8MhfVolgQnfaYVuwVvmlIUalgURM+/bN0Yy1e2Fov/PyG1tNYUbkfa98/pKmviZ5RTCcm48cbBiJkCjPKR/V+ofX2Kmlq038nJ5KQpiVJ9+7SYSgv9kYEGVxUi2JFS86X6E2DniAE6LlHGDfEgx11zTje1qEpCJEs3/xpxM+ild3rGcVkrxDjODVDjqH3C613eFTr6+VlpWH598dhzexJ2Da/LGogIJ3YRcKjjTWNskGIXL6MWnkjkV5acr7MGAXw5rhxz5WFKPDIfw/frDmKH770HuaurTb8WpJoZfdafi/mcpmHgQg5hpYLdzg9J0atr9fS1gVvToamxLdHry8WGuYND6aiNYkC1HuY6JkbJxLN+TJjFOA/vzcOC68txrb5ZZhXPtLw9kSp5XZo/b2Yy2UOTs2QY4gsuqVEz4nRyFLjoqaXFODHpcOEWk8Hb9tI11pO55ARIjlf0aZxRDR9dW5qdM3OQwb3uoeW84bcd1n09/LmuPHYzEv4fTIJR0TIUZTuyNQMyErHhKH9Db1eXla60OP1BDzTisUW4grett6utZzOITNEK31Vm8YRJR3v0VrKa+H1ZGBeeZGm1w8m8nvNKx+J7Qu+xSDERAxEyHGmlxRg2/wyrJk9Cc/cOg4PfEv9xNLc1olvPrVF90V2ekkBdiz8lmp/AiPzwdGmgOS2radrrdHpHCItFKdxctzIzUwTPt7NyDe5q3RYb+7W3LIizd+3YEq/V4EnA8/fNh73lxfJTsdwOlQ/y6ZmDhw4gCVLlqCyshKNjY0477zzcNttt+Hhhx9GerrY3Sclr/AqnK8VZEddeVPvInSA+lLjRnt7qE05KW1bT9faeF6EkOKT0jTOptpG4ePdaL7JPVcWYuG1xSE/0/p9E/29lJ7D6VBjLBsR2bt3L/x+P1544QV8/PHHWL58OZ5//nk89NBDVr0kJbDpJQV4++dTFUctzLjjN7M5m9Ft6+laG8+LEFL8kpvG0XK8TyzMgzcnejAS/j3Iy0rDb35waUQQApjzXRbtzMrpUONcgUAgZuNHTz31FJ577jl89tlnQo/3+XzweDxobW1FTk6OxXtHTldV14xZK3dEfdya2ZMM3fFb2UBM67a13GnF6v0hEiV6vFfUNOAnUZLGf/OD8eifla7pe2l1M8BufwBTllUqjkRKI5fb5pclXXWNlut3TKtmWltbkZenPMfe0dGBjo5zmdQ+ny8Wu0VxQuvy5HqZ0ZzNrG1rGSK2ahFCIr1Ej/fpJQV4/rbxWPDqHpxoD23hnpuZhidvHq1rRNLK7zLA6VCzxCwQ2b9/P37961/jV7/6leJjli5disWLF8dqlyiO6FmePFGInkz15KIQOYUUdO/4rBlVdc0AAph8UT4mOXghOU6HmkNzjsiCBQvgcrlU/+zduzfkOYcPH8b06dNxyy23YPbs2YrbXrhwIVpbW3v/fP7559p/I0o4WpYnT/ZOh1bmuRBZLTXFhdIR+fjXqy/Gv179NZQW5Ts2CAH0VbdRJM0jIj/72c9w5513qj7moosu6v37kSNHMHXqVFxxxRV48cUXVZ/ndrvhdru17hIlOD3LkyczsxYhJCJ1nA41h+ZAZODAgRg4cKDQYw8fPoypU6diwoQJWLVqFVJS2LaEtNO6PDlZPzdOROZMh3J1bQtzRA4fPoyrrroKQ4cOxa9+9SscO3as9/+8XrFOk0SAtuXJiYhiSZoODa9uE1mhm/1HelgWiGzatAn79+/H/v37ccEFF4T8XwwrhikBaBn+5N0FUfKy6/uvZzpU6j8Sfk4z2pwxHsW0j4hW7CNCEulLC8gPfz5323gA4N0FUZKKp9GFZOg/ouX6zaQNigvRqkEAsLshUZKKt+6mWvqPJIOYNjQjMkJp+BMApiyrVFzszYWekZJpxd64vbsgInnRFnt04vef/UdCMRChmDBr7lauGqSqrpndDYmSlBndTWOdW8L+I6EYiJDllOZuF80ojrp2hMgJgncXRMlrk8GlH+zILWH/kVAMRMhSSpnhDa2n8dPVoYtchX/5RU8QvLsgSk4VNQ34nYGlH+yqXOFyDKGYrEqWUZu7lROcWKYl+Uy6u1D6yrL1O1HiEV36AZD//kfLLQF6cku6/dYUlnI5hnM4IkKaic6nirZml0iJZY+99jEAl3DyGe8uiJKPlvOL9P0PPnc1neywPbeMyzH0YCBCmmiZT9WTkxEA0OjriPqY8BOEke6GRBR/RM8vd59d+kHu3GXm6+jF5RgYiJAGWudTrc7JCD9B8O6CKHmInl/Ki72K5y4zX4f0YyBCQvTU6kfLDDdK7gTBuwui5CBaeTJhaH9886ktms9ByVa5Yicmq5IQPZ0ApdwNAIqJpHow+ZSIUlNcmDm2QDXAePT6Yuw6eFzzdAxzy2KLgQgJ0durQykz3CieIIiSW0VNA17cWq/4//9yZSGmlxToyvFIxsoVO3FqhoQY6dUhl7txvK0DSzZ8ovlOJTczDU/ePJonCKIkJtIa4LUPG/Bv00cJn7sWzRiF/Gw3c8tswEAkCelpZ2y0E2Bw7ob0+v82/Wto+aoDeVnpaGnrxJINn0Td92dnjUdpUX7UxxFR4hIp3ZWmikXPXXeWFjL4sAkDkThkZF0Eve2MzerVodzufZTQyWISE1GJkp6WqWL2GXI+5ojEmYqaBkxZVolZK3fg/rXVmLVyB6YsqxRa5troUtlGOwGqvf6c1R9g5tie54efDniyIKJgWqeK2cXU2VyBQMCa/rUm8Pl88Hg8aG1tRU5Ojt27YzulWnjp0qz2her2BzBlWaXicKY04rBtflnUi72eERnR1180oxhLNsR2ASoiii/S+STaCGr4+SzWq+wmMy3Xb07NxAk9fTyCmbFUtkRPrw7R1++flY5t88t4siAiRXqnW9hnyJk4NRMn9PTxCKa3/NYsWud0Jw8fgBvGnY/JwwcwCCGiCJxuSRwcEYkTRgMJI+W3crQOcZr9+kREXNYhMTAQiRNGL+RGy2+D6am8MfP1iYgknG6Jf5yaiRPShVwpzo/W9lyt3brW8ls9lTdmvT4RESUWBiJxwowLudE51WgJs0BPwmy3X74Qi3O6RETO0e0PoKquGeurD6Oqrlnx3G01lu/GGb0NyYLpLWGrqmvGrJU7oj5uzexJqkOlLKEjoniWCOcwM64lali+m8DMSM7SO6dqVuUN53SJKF5ZfQGPBaWeVNIUe6xHqDk1E4fsKm9l5QsRJTOj3amdwOgUuxUYiJAwowmzRETxyokXcD2M9qSyAgORJGFGUhIrX4goWTnxAq6H3c0t5TBHJAmYOacpVb6Eb88bZ3OkRERaOPECrocTp9gZiCQ4K5KS2M2QiJKNEy/gejixuSSnZhKYlXOaXA+GiJJJouTIOXGKnYFIAkuUOU0iIrs58QKul9OaS3JqJoElypwmEZETJFKOnJOm2BmIJLBEmdMkInIKJ13AjXJKc0kGIgnMiUlJRETxzikXcEm8t5xnIJLApDnNe1/ZDRcQEozE25wmERFFSoSW80xWTXBOS0oiIiJzJELLeYAjIkkhkeY0iYgoensGF3raM0wr9jr+XM9AJEk4bU6TiIj009Kewennfk7NEBERxZlEas/AQISIiCjOJFJ7BksDkZkzZ+LCCy9ERkYGCgoKcPvtt+PIkSNWviQREVHCS5SW84DFgcjUqVPxpz/9Cf/85z/xl7/8BXV1dfjud79r5UsSERElvERqOe8KBALaVzzT6bXXXsONN96Ijo4OpKWlRX28z+eDx+NBa2srcnJyYrCHRERE8cOpfUS0XL9jVjXT0tKCP/zhD7jiiisUg5COjg50dHT0/tvn88Vq94iIiOJOIrRnsDxZdf78+cjKysKAAQNw6NAhrF+/XvGxS5cuhcfj6f0zZMgQq3ePiIgorkntGW4Ydz4mDx8QV0EIoCMQWbBgAVwul+qfvXv39j7+5z//OT744AP87//+L1JTU/GjH/0ISrNBCxcuRGtra++fzz//XP9vRkRERI6nOUfk2LFjaG5uVn3MRRddhPT09Iiff/HFFxgyZAjeffddTJ48OeprMUeEiIiSQbwvXBfO0hyRgQMHYuDAgbp2zO/3A0BIHggREVEyc2rCaaxYliPy3nvvYcWKFaiursbBgwdRWVmJWbNmYfjw4UKjIURERIkuURauM8KyQCQzMxOvvvoqvvWtb+Hiiy/G3XffjTFjxuDtt9+G2+226mWJiIjiQrSF64Ceheu6/THrsmELy8p3R48ejcrKSqs2T0REFNcSaeE6I7jWDBERkQ0SaeE6IxiIEBER2SCRFq4zgoEIERGRDRJp4TojGIgQERHZIJEWrjOCgQgREZFNppcU4LnbxsPrCZ1+8Xoy8Nxt45Oij0jMFr0jIiKiSImwcJ0RDESIiIhsJi1cl4w4NUNERES2YSBCREREtmEgQkRERLZhIEJERES2YSBCREREtmEgQkRERLZhIEJERES2YSBCREREtmEgQkRERLZxdGfVQCAAAPD5fDbvCREREYmSrtvSdVyNowORkydPAgCGDBli854QERGRVidPnoTH41F9jCsgEq7YxO/348iRI8jOzobLlRyL/xjh8/kwZMgQfP7558jJybF7d5IWPwf78TOwHz8DZ7DrcwgEAjh58iTOO+88pKSoZ4E4ekQkJSUFF1xwgd27EXdycnL4xXcAfg7242dgP34GzmDH5xBtJETCZFUiIiKyDQMRIiIisg0DkQTidrvx6KOPwu12270rSY2fg/34GdiPn4EzxMPn4OhkVSIiIkpsHBEhIiIi2zAQISIiItswECEiIiLbMBAhIiIi2zAQSUAHDhzA3XffjcLCQvTt2xfDhw/Ho48+is7OTrt3Lek8/vjjuOKKK5CZmYnc3Fy7dycpPPvssxg2bBgyMjJw+eWXY+fOnXbvUlLZunUrrr/+epx33nlwuVz461//avcuJZ2lS5fisssuQ3Z2NgYNGoQbb7wR//znP+3eLUUMRBLQ3r174ff78cILL+Djjz/G8uXL8fzzz+Ohhx6ye9eSTmdnJ2655Rbce++9du9KUvjjH/+IBx98EI8++ih2796NsWPH4uqrr8aXX35p964ljba2NowdOxbPPvus3buStN5++23MmTMHO3bswKZNm9DV1YVvf/vbaGtrs3vXZLF8N0k89dRTeO655/DZZ5/ZvStJ6eWXX8YDDzyAEydO2L0rCe3yyy/HZZddhhUrVgDoWa9qyJAhuO+++7BgwQKb9y75uFwurFu3DjfeeKPdu5LUjh07hkGDBuHtt9/GlVdeaffuROCISJJobW1FXl6e3btBZJnOzk7s2rUL5eXlvT9LSUlBeXk5qqqqbNwzInu1trYCgGOvAQxEksD+/fvx61//Gvfcc4/du0JkmaamJnR3d2Pw4MEhPx88eDAaGxtt2isie/n9fjzwwAMoLS1FSUmJ3bsji4FIHFmwYAFcLpfqn71794Y85/Dhw5g+fTpuueUWzJ4926Y9Tyx6PgciIjvMmTMHNTU1WLt2rd27oqiP3TtA4n72s5/hzjvvVH3MRRdd1Pv3I0eOYOrUqbjiiivw4osvWrx3yUPr50CxkZ+fj9TUVBw9ejTk50ePHoXX67Vpr4jsM3fuXLzxxhvYunUrLrjgArt3RxEDkTgycOBADBw4UOixhw8fxtSpUzFhwgSsWrUKKSkc/DKLls+BYic9PR0TJkzAW2+91Zsc6ff78dZbb2Hu3Ln27hxRDAUCAdx3331Yt24d/v73v6OwsNDuXVLFQCQBHT58GFdddRWGDh2KX/3qVzh27Fjv//HOMLYOHTqElpYWHDp0CN3d3aiurgYAjBgxAv369bN35xLQgw8+iDvuuANf//rXMXHiRDz99NNoa2vDXXfdZfeuJY2vvvoK+/fv7/13fX09qqurkZeXhwsvvNDGPUsec+bMwerVq7F+/XpkZ2f35kh5PB707dvX5r2TEaCEs2rVqgAA2T8UW3fccYfs57Blyxa7dy1h/frXvw5ceOGFgfT09MDEiRMDO3bssHuXksqWLVtkj/k77rjD7l1LGkrn/1WrVtm9a7LYR4SIiIhsw8QBIiIisg0DESIiIrINAxEiIiKyDQMRIiIisg0DESIiIrINAxEiIiKyDQMRIiIisg0DESIiIrINAxEiIiKyDQMRIiIisg0DESIiIrINAxEiIiKyzf8PI3+0ydJVAP8AAAAASUVORK5CYII=", 74 | "text/plain": [ 75 | "
" 76 | ] 77 | }, 78 | "metadata": {}, 79 | "output_type": "display_data" 80 | } 81 | ], 82 | "source": [ 83 | "from sklearn.datasets import make_blobs\n", 84 | "import numpy as np\n", 85 | "\n", 86 | "n_samples = 500\n", 87 | "random_state = 10\n", 88 | "X, y = make_blobs(n_samples=n_samples, centers=[(0, 0)], random_state=random_state)\n", 89 | "# Anisotropically distributed data\n", 90 | "transformation1 = [[0.60834549, -0.63667341], [-0.40887718, 0.85253229]]\n", 91 | "transformation2 = [[0.60834549, 0.63667341], [0.40887718, 0.85253229]]\n", 92 | "anis1 = np.dot(X, transformation1)\n", 93 | "anis2 = np.dot(X, transformation2)\n", 94 | "d = np.vstack([anis1, anis2])\n", 95 | "\n", 96 | "from sklearn.cluster import KMeans\n", 97 | "import matplotlib.pyplot as plt\n", 98 | "from matplotlib.colors import ListedColormap\n", 99 | "\n", 100 | "plt.scatter(d[:,0], d[:,1])\n", 101 | "plt.show()" 102 | ] 103 | } 104 | ], 105 | "metadata": { 106 | "kernelspec": { 107 | "display_name": "Python 3 (ipykernel)", 108 | "language": "python", 109 | "name": "python3" 110 | }, 111 | "language_info": { 112 | "codemirror_mode": { 113 | "name": "ipython", 114 | "version": 3 115 | }, 116 | "file_extension": ".py", 117 | "mimetype": "text/x-python", 118 | "name": "python", 119 | "nbconvert_exporter": "python", 120 | "pygments_lexer": "ipython3", 121 | "version": "3.12.3" 122 | } 123 | }, 124 | "nbformat": 4, 125 | "nbformat_minor": 4 126 | } 127 | -------------------------------------------------------------------------------- /Lab10-Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# k-Nearest Neighbours" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Exercise 1\n", 15 | "\n", 16 | "Ternary classification\n", 17 | "\n", 18 | "Given three points R=(-1, 0), G=(0, 1), B=(1, 0), colored in red, green and blue, respectively. Using `matplotlib`, create a plot that shows the decision surface of the 1-NN classifier using these three points as a dataset with three different classes, similart to the way `plot_decision_surface_knn` works for two classes (the function is located in `tools/plots.py`)." 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## Exercise 2\n", 26 | "Accuracy comparison with ID3\n", 27 | "\n", 28 | "Considering the dataset below:\n", 29 | "1. plot the points using the `scatter` function from `matplotlib`;\n", 30 | "1. compare the training accuracy of 1-NN and ID3;\n", 31 | "1. compare the CVLOO accuracy of 1-NN and ID3." 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 1, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "import pandas as pd\n", 41 | "d = pd.DataFrame({\n", 42 | " 'X1': [1, 2, 4, 5, 7],\n", 43 | " 'X2': [2, 1, 5, 4, 3],\n", 44 | " 'Y': [0, 0, 0, 1, 1]\n", 45 | "})\n", 46 | "X, Y = d[['X1', 'X2']], d['Y']" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "## Exercise 3\n", 54 | "1-NN decision boundary\n", 55 | "\n", 56 | "Using the code below, change the value of the `boundary` variable such that it correctly represents the decision boundary for 1-NN between the two classes of points. You can specify hard-coded values (you don't have to calculate them in Python)." 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 2, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAbsAAAGyCAYAAAB9ZmrWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABqf0lEQVR4nO2deVxU9f7/n2cWEA3BBVRU3ErcgXK3W7mVS/tiLqlZaes18/66tlwrb3XNut9u++LSrlndwha1oqS8JYoLqC2ipoE7YIKoCDNzzu+PkUkUlOXAOR94Px8PH8VnZs48X3xm5s3nnPecoxmGYSAIgiAItRiH1QKCIAiCUN1IsRMEQRBqPVLsBEEQhFqPFDtBEASh1iPFThAEQaj1SLETBEEQaj1S7ARBEIRajxQ7QRAEodYjxU4QBEGo9UixEwRBEGo9FS52K1eu5IorriAqKgpN01iyZMlp9/n111+58sorCQsLo0GDBvTq1YvMzEwzfAVBEAShwlS42B09epTY2FhefvnlUm//7bffuPDCC+nUqRPfffcdmzZtYubMmdSrV6/KsoIgCIJQGbSqnAha0zQSEhK4+uqrA2OjR4/G7Xbz7rvvmuEnCIIgCFXGZebGdF1n6dKl/P3vf+eyyy4jNTWVdu3a8eCDD5YoiCdTWFhIYWFhiW388ccfNGnSBE3TzNQTBEEQFMIwDPLz84mKisLhqGKLiVEFACMhISHw8759+wzAqF+/vvHss88aqampxuzZsw1N04zvvvuu1G08+uijBiD/5J/8k3/yT/6V+m/Xrl1VKVWGYRiGqbsx9+7dS8uWLRkzZgyLFi0K3O/KK6+kQYMGvP/++6dt49SVXV5eHtHR0bS88y3OCT2Hl0bHcUHbxjidTnw+H7quB+7rcDhwOp14vV5OjuF0OnE4HGWOezyeEg4ul3+B6/V6yzXudrvRdR2fz1fid+FyudB1nePHj5OUlMTAgQMJCgrC5XKV6W7HTB6Ph6SkJAYNGkRISEiZWe2cqTjDkCFDSvxFePI82TmTYRgkJiYycOBA3G53YPxsrz07ZSooKAi8D9xud6XfT1ZlKn4NDRw4kJCQkFLd7Z6pOMOll17KqZj9uVcdmfLy8ujQoQO5ubmEhYWdlqEimLobs2nTprhcLrp06VJivHPnzvzwww+lPiY4OJjg4ODTxvvFtGTtvkKmJmzjrUm96d2uoZmq1YrX66VNmzaB34dqFPs3adJESX/4M0OjRo2UzKD6awigQYMGSmeoDXNQnCEsLEzJDMWHssw4pGV6g0r//v3p0KFDiQaVa665hpCQkBKrvbI4fPgwYWFh7M/+g78t2coP23OoH+TkzZt70ad9k8qqCoIgCIpRXA/y8vJo2LBqC54KH/E7cuQIaWlppKWlAbBz507S0tIC36O7//77+eCDD5g3bx7bt2/npZde4vPPP+euu+6q0PMEOWH+xJ785bymHCvycfOba1m942BFdS3B5/OxZcuWEst6lVDdH9TPoLo/qJ9BdX9QP4OZ3hUuduvWrSM+Pp74+HgApk+fTnx8PI888gjgX8W99tprPP3003Tv3p358+fz8ccfc+GFF1boeXRdp57bybwJPbmoYwQFHh+T3lxL8m/2L3i6rpOenl5in7RKqO4P6mdQ3R/Uz6C6P6ifwUzvCu/EveSSSzjbns9bbrmFW265pdJSJ1PP7WTu+Au4/d31fL81m0lvpfDGxF70P7epKdsXBEEQaj9KnBuzntvJ6+Mv4JKYCI57dG55ey0/bs+xWksQBEFQBNsWu1O/QFhc8AYWF7y31vLDNnsWPIfDQXR0dNW/BGkRqvuD+hlU9wf1M6juD+pnMNO7St2Y1cHZum8KvT7ufG8DK7ZkEexynGhiibDAVBAEQahOLO3GrCnK6sIJdjl59abzGdI5kkKvzq1vr+P7rdk1bHdmfD4fqampSndAqewP6mdQ3R/Uz6C6P6ifwdJuzJriTF04wS4nr4y7gCGdm1Hk1Zn8zjq+S8+qQbszo+s6mZmZSndAqewP6mdQ3R/Uz6C6P6ifwUxv2xa7sxHkcvDKuPMZ2sVf8Ka8u54kGxU8QRAEwT4oW+zAX/BeHns+l3X1F7zb31lP0hYpeIIgCEJJbFvsytuFE+Ry8NLY8xnWtTlFPp3b313Pt78eqGa7M+NwOIiJiVG6A0plf1A/g+r+oH4G1f1B/Qx1uhuzLDw+nanvp7L8p/24nRqvjruAIV2aVaOpIAiCUJ3UiW7MUy8vcTbcTgcvjIlnZPcWeHwGdy5cT+Iv1qzwvF4vq1atqnAGu6C6P6ifQXV/UD+D6v6gfgYzvW1b7Cqz4HQ7HTw/Oo6RPfwF766F6/n65/3VYHdmDMMgOzu7UhnsgOr+oH4G1f1B/Qyq+4P6Gcz0tm2xqywup4Pnb4zjitioEwVvA1/+VPMFTxAEQbAPta7Ygb/g/WdULFfGRuHVDe5ZtIEvf9pntZYgCIJgEbYtdk6ns0qPdzkdPDsqlqvi/AXv7kWpLN9cMwXP6XQSFxdX5QxWobo/qJ9BdX9QP4Pq/qB+BjO9a003Zln4dIP/99FGElL34HRovDA6npE9WphgKgiCIFQn0o1ZAZwOjX/fEMu18S3x6QZTF6fyxaa9pmy7LLxeLytWrFC6A0plf1A/g+r+oH4G1f1B/Qxmelf44q01hZkLTqdD45kbYtE0jY837ObexWkYBlwRG2Xac5yMYRjk5+cr3QGlsj+on0F1f1A/g+r+oH4G6casBE6HxtPX9+D6C1rh0w3uXZzKp2l7rNYSBEEQaoA6U+zgRMG7rgejerZCN+C+D9Kk4AmCINQBbNugcujQIcLDw6vlOXTd4MFPNvPBul04NPi/UbFcE9/KxO3r5OTk0LRpUyXPSae6P6ifQXV/UD+D6v6gfobc3FwaNWpkSoOKbYudWd2YZaHrBg8lbGbxWn/B+/cNsVx7vnkFTxAEQagadaIb0+PxVOv2HQ6Nf13TnTG9o9EN+NtHG/nv+t2mbNvj8bB06dJqz1BdqO4P6mdQ3R/Uz6C6P6ifwUxv2xa7msDh0Hjy6m6M6xONYcD9/93IR+t2mbJtVVt9i1HdH9TPoLo/qJ9BdX+oHRnMoE4XO/AXvMev6sZNff0F7+8fb+JDkwqeIAiCYA/qfLGDPwvehH5tMAyY8fEmPlibabWWIAiCYBK2bVDJzc0lLCysRp/bMAwe++xn3k7OAOCpa7szund0pbaTn59PaGgomqaZrVntqO4P6mdQ3R/Uz6C6P6ifIS8vj/Dw8NrdoGIFmqbx2JVdubl/WwAe+GQzi9ZUboUXEhJiolnNo7o/qJ9BdX9QP4Pq/lA7MpiBbYudVQdVNU3j0Su6MGlAWwAeStjMwjUZFdqG1+tl2bJlyh4YVt0f1M+guj+on0F1f1A/Q524UrmVaJrGI5d34dYL2wHwcMJPvLu6YgVPEARBsA9S7MpA0zT+MbIzt50oeDOX/MS7yb9bKyUIgiBUCil2Z0DTNB4e2ZkpF7UHYOanP/P2qt+tlRIEQRAqjHRjlgPDMHjqyy28/v0OAB67ogs3D2h3xvt7vV5cLpeSHVCq+4P6GVT3B/UzqO4P6meQbswaRtM0HhjWiTsu7gDAY5//whs/7DzjYwoKCmpCrdpQ3R/Uz6C6P6ifQXV/qB0ZzMC2xc5u3UOapjFjWAx3XeIveP/84hfm/29Hqff1er0kJSXZLkN5Ud0f1M+guj+on0F1f1A/g3RjWoSmadx/WQz3DDwXgCeW/lpmwRMEQRDsgxS7CqJpGn+7tCN/HfRnwZu3UgqeIAiCnXFZLaAimqYxfWhHNE3jhW+38eSyX9ENg9tPHNMDcLnU/tWq7g/qZ1DV3+PxsHz5ctLT09m9ezd9+vShefPmVmtVClXn4GRqQwZTMCrI999/b1x++eVGixYtDMBISEgo87633367ARj/+c9/yr39vLw8AzDy8vIqqmYJz36dbrSZ8YXRZsYXxitJ263WEQRL+eKLL4xmzZoZgOFwOAzAcLvdxgMPPGB4vV6r9QTFMLMeVHg35tGjR4mNjeXll18+4/0SEhJYvXo1UVFRlSjB/svJq8B9Qzsybch5AMz5cguvfLcdXdfJyspSJsOpqO4P6mdQ0f/777/nqquuIisrC/jzPezxeJgzZw4zZsywUq/CqDgHp6J6BjO9K1zshg8fzhNPPME111xT5n327NnDX//6VxYuXIjb7a6UmM/nq9TjrGDakI5MH9oRgKe/TOelFdtITk5WKsPJ+Hw+pf1B/Qwq+j/00EMYhoFRyld3DcPgueeeY9++fRaYVQ4V5+BUVM9gprfpO3N1XWf8+PHcf//9dO3a9az3LywspLCwMPDz4cOHAf9fg8WXZHc4HDidTnw+X4lKXzzu9XpLvMGcTicOh6PM8VMv9V68T/vUNteyxt1uN7qul5iIuy5uh0ODf3+9lWe/2c6I1hpDPR40TcPlcpXpbsdMxdvyer2lZlUhU/FjdV0vsZ1id7tnKn7sqfc/03xYmWnXrl2sWrWKM2EYBh9++CF33XWXEpmKf/cej8eUzwgrMp38+qmOz73qzmTmys70YjdnzhxcLhdTp04t1/1nz57NrFmzThtPSkqifv36AERHRxMfH8+mTZvIzPzzkjsxMTF06tSJlJQUsrOzA+NxcXG0adOGlStXkp+fHxjv168fkZGRfP311yUmcuDAgYSEhLBs2bISDiNGjKCgoICkpKTAmMvlYuTIkeTk5JCcnBwYDw0N5Z5Bg8jNzWV+ShbLdjnhzW+4Kb4p/fv3Z9u2baSnpwfur0KmVatWMXjwYHbt2kVaWlpgPCIiQolMAAcPHmTt2rWB8dDQUAYNGmT7TEOHDgUgMTGxRKayXntWZ9q6dStnw+l0snfv3hJZ7ZypeJ4SExNN+4ywIlMx1fW5V52Z2rZte1qOylKl04VpmkZCQgJXX301AOvXr2fkyJFs2LAhcKyubdu2TJs2jWnTppW6jdJWdq1bt2b//v00btwYsNeKAc7+F85L36bz7Lf+ryNMHdSB6Zd2ss2KoTyZvF4vq1atYsCAAdSrV8/2q6DSxr1eL8nJyVx44YUlTpNkhxVDeTIBrFy5kv79+5foprPLiuHUTNnZ2bRu3fqMf4lrmsa8efOYMGFCiXG7Zip+H/Tv35969eoB9l4FlZapOMPFF1982u5lFVZ2eXl5REREmHK6MFOL3XPPPcf06dNxOP48FOjz+XA4HLRu3Zrff//9rNssPjemGeGs5LXvf+Op5VsAuHfwedx34pieINRWrr76ar744osyj7PUr1+f/fv3ExoaWsNmgqqYWQ9M/VL5+PHj2bRpE2lpaYF/UVFR3H///Xz11VcV2paq3UPgdx/e1sUDw2IAeP7bbTybuLXUA/d2RNd1MjIylJ8DlTOo6P/0008TGhqK0+ksMV68sn7++eeVKnQqzsGpqJ7B0m7MI0eOBAoZwM6dO0lLSyMzM5MmTZrQrVu3Ev/cbjfNmzcnJiamQs+javcQ+N3T0tK4dUAbHh7RGYAXFCp4xf61YQ5UzaCif8eOHUlOTmbw4MEldh23a9eODz74gNtuu81Cu4qj4hyciuoZLO3GXLduHQMHDgz8PH36dAAmTpzIW2+9ZZpYbWHyRe3RNP9pxV5csR3dMPh/l8YoebkNQTgbnTp14quvviIzM5Nt27axadMm7r77boKCgqxWE+o4FS52l1xySYVWJ+U5Tlfbue0v7dE0jce/+IWXk37DMOD+y6TgCbWX6OhoWrRowZEjR+R1LtgC254IWuU3iKZpRERElMhw64XteOTyLgC88t1vzPky3ba7NEvzVw3VM6juD+pnUN0f1M9gprdtr1SuejdmWbz1404e+/wXAG6/qD0PDO+k7AtREAShOrFtN6aZqHpAFfzuW7ZsKTXDzQPa8c+r/GeWeX3lDmYv32K7Fd6Z/FVB9Qyq+4P6GVT3B/UzmOlt22Knaqss+N3T09PLzDChX1seP1Hw5q7cwZNLf7VVwTubvwqonkF1f1A/g+r+oH4GS796IJjD+H5tefzqbgDM/2Enj39hr4InCIJQm5BiZyHj+7bhyWv8Be+NH3fyzy9+kYInCIJQDdi22J18yjHVcDgcREdHlyvDuD5t+Nc13QF488ffmfW59QWvIv52RfUMqvuD+hlU9wf1M5jpLd2YNmFxSiYPfLIZgIn92vDYlV2lS1MQhDqNdGPaHJ/PR2pqaoUyjO4dzdPX9UDT4O3kDB759GfLVniV8bcbqmdQ3R/Uz6C6P6ifQboxbY6u62RmZlY4w6herZlzouC9uzqDmZ/+hK7XfMGrrL+dUD2D6v6gfgbV/UH9DNKNWYsZ1bN1YIX33upM/mFRwRMEQahNSLGzITf0bM2/r49F02DRmkweXiIFTxAEoSpU+ETQNYWq3UPgd4+JialShusuaIWmwd8+2sj7KZkYhsG/rumOw1H9TStm+FuN6hlU9wf1M1Sn/5EjR/jqq6/Iy8ujY8eODBgwoFoa0mrDHJiFdGPanITU3fztw43oBtzYszWzr62ZgicIgvkYhsGTTz7J7NmzOXbsWGC8Y8eOvPHGGwwYMMBCO/tRJ7oxvV6v1QqVxuv1smrVKlMyXBPfiv/cGIdDgw/W7WLGx5uqfZemmf5WoXoG1f1B/QzV4f/www8zc+bMEoUOYPv27QwePJgNGzaY9lxQO+bALGxb7Gy24KwQhmGQnZ1tWoar4loGCt5H63fz94834avGgme2vxWonkF1f1A/g9n+e/fu5emnny71Nl3X8Xq9PPzww6Y8VzG1YQ7MwrbFTijJVXEteX50PE6Hxn/X7+b+/26s1oInCIK5fPDBB2f88Pb5fHz11VdkZ2fXoFXdQYqdQlwRG8Xzo+NwOjQ+2bCH+z+SgicIqnDgwAGcTucZ71O8EhPMx7bF7mwvCjvjdDqJi4urlgyX94jihRMrvE9S9/C3D9NML3jV6V9TqJ5BdX9QP4PZ/i1btjzrGUEcDgfNmjUz5fmgdsyBWUg3pqIs37yPv76filc3uCouiv+7IRaX07Z/uwhCnScnJ4eoqCg8Hk+ptzudTq644goSEhJq2My+SDemzfF6vaxYsaJaMwzv3oKXxsbjcmh8mraX6R9uxOsz59Q6NeFf3aieQXV/UD+D2f5Nmzbl8ccfL/U2p9NJSEgITz75pCnPVUxtmAOzsG2xs9mCs0IYhkF+fn61ZxjWrQUvjT0fl0Pjs417uc+kgldT/tWJ6hlU9wf1M1SH/9///ndefvllmjZtWmK8V69e/Pjjj3Tp0sW054LaMQdmYdtiJ5SPYd2a88q483E7NT7fuJd7P0gzbYUnCIK5aJrGXXfdxd69e1mxYgVLlizh559/Jjk5mR49elitV6ux7enChPJzadfmvDLuAu5auJ6lm/aBAc+NjsMtx/AEwZa43W4GDhxotUadwrYNKocOHSI8PNxqnUqh6zo5OTk0bdq0Rs9J980vB7hz4Xo8PoPh3Zrzwpj4ShU8q/zNRPUMqvuD+hlU9wf1M+Tm5tKoUSNTGlRsW+ykG7NyfPvrAe58bwNFPp1hXZvz4tjKFTxBEASrqRPdmGW156qAx+Nh6dKllmQY3LkZr4+/gCCngy9/3s89izZQ5K3YMTwr/c1C9Qyq+4P6GVT3B/UzmOlt22KnOla2+g7sFMnrEy4gyOXgq58PcHclCp6qrcono3oG1f1B/Qyq+0PtyGAGUuxqKQNjIpk73l/wEn85wF0LK17wBEEQagtS7Goxl8REMn9CT4JdDr759QB3LVxPoffMpysSBEGojdi2QSU3N5ewsDCrdSpF8Rc5Q0NDq+XqwxXlf9uyue3tdRR6dQZ1iuTVm84n2FX2Oefs5l8ZVM+guj+on0F1f1A/Q15eHuHh4bW7QUV1QkJCrFYI8JfzIlgwsRfBLgcrtmRxx7vrOe458wrPTv6VRfUMqvuD+hlU94fakcEMbFvsVD6o6vV6WbZsma0yXHheU964uRf13A6S0rO5472yC54d/SuK6hlU9wf1M6juD+pnqBPnxhTMZ8C5TXljor/gfZeeze3lWOEJgiDUBqTY1TH6n9uUN2/uTYjbyfdbs5n8zjopeIIg1Hqk2NVB+nVowpuTehHidvK/bTlS8ARBqPVIN2Y1YBgGXq8Xl8tl6w6oNTsOMumttRwr8nHhuU2ZN6EnIUFOZfzPhOoZVPcH9TOo7g/qZ7C0G3PlypVcccUVREVFoWkaS5YsCdzm8XiYMWMG3bt3p0GDBkRFRTFhwgT27t1bJUkVKSgosFrhrPRp34S3JvWmfpCTH7bncNs7ayko8q/wVPA/G6pnUN0f1M+guj/UjgxmUOFid/ToUWJjY3n55ZdPu+3YsWNs2LCBmTNnsmHDBj755BPS09O58sorKyymavcQ+N2TkpKUyNC7XWPevqU3DYKc/Lj9ILe8tZbDx44r418WKs1BaajuD+pnUN0f1M9gpneFr2c3fPhwhg8fXuptYWFhJCYmlhh76aWX6N27N5mZmURHR1fOUqhWerX1F7yJb6SQvOMgU95L5boIq60EQRDMo9obVPLy8tA0Tdlr09UVerZtzDu39uacYBdrdh5i7hYnx4rU/GtQEAThVKr1SuXHjx9nxowZjBkzpsyDi4WFhRQWFgZ+Pnz4MOA//ld8eQeHw4HT6cTn86Hrf57MuHjc6/Vycp+N0+nE4XCUOX7qZSNcLv+v4dQlc1njbrcbXdfx+f7sYNQ0DZfLha7reDwenE4nHo8nMF6Wu50yxbcO542J53PL2+vZftjHre9s4O1b+hDidpSa1c6ZPB5Pifk41b2s+bNLJsMwAq+hkznba89OmU5+H5wpq10znexv9mdETWUqzlD8/+Vxt1Omk+9TVaqt2Hk8HkaNGoVhGLz66qtl3m/27NnMmjXrtPGkpCTq168PQHR0NPHx8WzatInMzMzAfWJiYujUqRMpKSlkZ2cHxuPi4mjTpg0rV64kPz8/MN6vXz8iIyP5+uuvS0zkwIEDCQkJYdmyZSUcRowYQUFBAUlJSYExl8vFyJEjycnJITk5OTAeGhrKoEGD2LVrF2lpaQAkJiYSERFB//792bZtG+np6YH72zXTvs3JTD4PXv3VybqMXCa9uZZZQ5qx9efNgfurlCkrK+us82TXTBdddFGJwwIVee3ZIVOxe/F/q/p+sipTYmJitX1G1FQmt9vN0qVLa+Rzz8xMHTp0wCyq9NUDTdNISEjg6quvLjFeXOh27NjBihUraNKkSZnbKG1l17p1a7KysgK7Pu20YoDyrewOHjxIkyZNcDqdtloxlCeTruv87+dM7vtsB/nHvfRs04h54+M5J9hVIqudM+m6Tl5eHo0bNy6xbTusGMqTyeFwkJOTQ3h4OA7Hn0cb7LJiKE+moqKiwPvA4XDYasVQnky6rgf8g4KCSnW3eyZd18nNzSUiIqLEc57J3U6Z8vPzadKkiSlfPTB9ZVdc6LZt20ZSUtIZCx1AcHAwwcHBp407HA7cbneJMafTGViSn0zx5JR3/NTtVmbc4XCU+BA6dXzt2rWMGDEi4FCWux0zeTweju76hbcmDuDmt9ezLuMQt72byluTehFa78/H2TmTx+MhOTmZESNGVGj+7JLJ4/GwevXqUv3P9torr3t1Zzr5fXDyfSrzfiqvu5mZPB5PwL/4O2pmfkaU170qmc70OjqTe1njNZ3JzK+BV7hB5ciRI6SlpQWWrDt37iQtLY3MzEw8Hg/XX38969atY+HChfh8Pvbv38/+/fspKioyTVqoGXq0CmPhbX1oWM/F+oxDTHwjhfzjnrM/UBAEwWZUuNitW7eO+Ph44uPjAZg+fTrx8fE88sgj7Nmzh88++4zdu3cTFxdHixYtAv9WrVplurxQ/fRoFc7C2/oSFuJmQ2YuE95I4bAUPEEQFKPCuzEvueSSMy4tzVp2qnhqm2I0TVP2Yolwun/3Eyu8cfPXkJqZy4QFKbxza28a1it994cdqG1zoCKqZ1DdH9TPYKa3bc+NacYBScFcftqTx00L1pB7zENs63DeuaU3YSH2LXiCIKiNmfXAtlc9MPP7FTWNrutkZGQom6Es/24t/Su88PpuNu7KZcKCNeQV2HOXZm2dA5VQPYPq/qB+BjO9bVvsTm2TVQmfz0daWpqyGc7k3zUqjEW39aVRfTcbd+cxfsEa8o7Zr+DV5jlQBdUzqO4P6mcw09u2xU6wL12iGrJocl8aNwhi027/rk07FjxBEIRipNgJlaJzi4YsmtyHxg2C2Lwnj3ELVpN7TL5eIgiCPbFtsVO1ewj87hEREcpmKK9/p+YNeX9yX5o0COKnPYcZO28Nh47ao+DVlTmwM6pnUN0f1M8g3ZiCrdh6IJ+x81aTc6SILi0asvC2PjRqEGS1liAIilMnujFVPaAKfvctW7Yom6Gi/h2bhfL+5L40PSeYX/YdZuz8Nfxh8Qqvrs2BHVE9g+r+oH6GOtGgomqrLPjd09PTlc1QGf/zmoWyeEofmp4TzK/7DjN23moOHik8+wOribo4B3ZD9Qyq+4P6GerEVw8E9Tg3MpTFU/oSERrMlv35jJu/xtKCJwiCUIwUO8FUzo08h8VT+hJ5ouCNnbeGHCl4giBYjG2LXWmXi1AFh8NBdHS0shmq6t8hwl/wmjUMJv1APmPmriY7v2YLXl2fAzugegbV/UH9DGZ6SzemUG3szDnK6LnJHDhcyHmR57Bosn8XpyAIQnmQbkyb4/P5SE1NVTaDWf7tmjZg8ZR+NG9Yj21ZRxgzbzVZ+cdNsjwzMgfWo3oG1f1B/QzSjWlzdF0nMzNT2Qxm+vsLXl9ahNVje9YRxsxdTdbh6i94MgfWo3oG1f1B/QzSjSkoRdsTBS8qrB6/ZR9l9LyaKXiCIAjFSLETaoQ2Tfy7NFuGh7Aj+yij567mgBQ8QRBqCNsWO1W7h8DvHhMTo2yG6vKPblKfxVP6+gtejr/g7c+rnoInc2A9qmdQ3R/UzyDdmILS7PrjGKPnrmZPbgFtm9Tn/Sl9aREWYrWWIAg2o050Y3q9XqsVKo3X62XVqlXKZqhu/9aN/Su8Vo1C+P2gv/DtzS0w9TlkDqxH9Qyq+4P6Gcz0tm2xs9mCs0IYhkF2drayGWrCv7jgtW4cQsbBP1d6ZiFzYD2qZ1DdH9TPYKa3bYudUPtp1ag+i6f0I7pxfTL/OMboucmmFjxBEIRipNgJltIyPITFU/oS3bg+u/4oYPTcZHYfOma1liAItQzbFjun02m1QqVxOp3ExcUpm6Gm/aPCQ/jg9r60aVJc8Faz64+qFTyZA+tRPYPq/qB+BjO9pRtTsA37844zem4yvx88FljxtW5c32otQRAsQroxbY7X62XFihXKZrDKv3lYPRZP6Ue7pg3Yk1u1FZ7MgfWonkF1f1A/g3Rj2hzDMMjPz1c2g5X+/oLXl/YnCt6NryeTebDiBU/mwHpUz6C6P6ifQboxhVpNs4YnCl5EA/bmHefGuclkHDxqtZYgCAojxU6wJZEN67F4cl86RDRgX95xRs9dze85UvAEQagctm1QOXToEOHh4VbrVApd18nJyaFp06ZKnpPOTv5Z+ccZO28N27OO0LxhPd6f0pd2TRuc9XF2ylAZVPcH9TOo7g/qZ8jNzaVRo0amNKjYtthJN6ZQTHZ+IWPnrWZb1hGaNQzm/cl9aR9xjtVagiBUM3WiG9Pj8VitUGk8Hg9Lly5VNoPd/CNCg1k0uS8dm53DgcOFjJ67mt+yj5zxMXbLUFFU9wf1M6juD+pnMNPbtsVOdVRt9S3Gbv7FBS+mWShZ+YWMmbua7VlnLnh2y1BRVPcH9TOo7g+1I4MZSLETlKHpOcEsmtyHTs1PFLx5Zy94giAIIMVOUIwm5wSz8DZ/wcvO9+/S3J6Vb7WWIAg2x7YNKrm5uYSFhVmtUymKv8gZGhqKpmlW61QYFfz/OFrEuPlr+HXfYZqeE8T7k/tyXrPQwO0qZDgTqvuD+hlU9wf1M+Tl5REeHl67G1RUJyRE7Stv292/cYMgFt3Why4tGpJzpIjRc1ez9YB/hXf48GFSU1PJyMhQ9swRYP85KA+qZ1DdH2pHBjOocLFbuXIlV1xxBVFRUWiaxpIlS0rcbhgGjzzyCC1atCAkJIQhQ4awbdu2CoupfFDV6/WybNkyZTOo4t+oQRALb+tD16iGHDxaxI2vr2LMnX+jefPmXHDBBfTo0YMuXbrw7rvvWq1aYVSZgzOhegbV/UH9DJaeG/Po0aPExsby8ssvl3r7008/zQsvvMBrr73GmjVraNCgAZdddhnHjx+vsqwgnEpxwevcrAGHjnlZFdSTO2b8kzVr1rBs2TK6dOnChAkTmDNnjtWqgiBYiKuiDxg+fDjDhw8v9TbDMHjuuef4xz/+wVVXXQXAO++8Q7NmzViyZAmjR4+umq0glEJ4/SA6HfiGzVnNcEW2J0lrwuTWnejduyHDhw/nwQcf5KGHHmLs2LG0bt3aal1BECzA1GN2O3fuZP/+/QwZMiQwFhYWRp8+fUhOTjbzqQQhgK7rvLfgda5smEGPVmEcOuZh7LzV/LL3MAAPPfQQDRo04M0337TYVBAEq6jwyu5M7N+/H4BmzZqVGG/WrFngtlMpLCyksLAw8PPhw/4PKMMwAt+edzgcOJ1OfD4fuq4H7ls87vV6SzQiOJ1OHA5HmeOnfivf5fL/Gk7dP1zWuNvtRtd1fD5fYEzTNFwuF7quYxgGQ4cOxTAMvF4vLperTHc7Zir2L6asrHbJdOTIEXJychg4oA+XXt6HCQvWsHF3HmPnrz5xTC+MHj16sG3btsB82D2T0+lk+PDhJd4HcPbXnp0ynfw+8Hg8lX4/WZXpZP/i+5j1GVFTmQzDYNiwYbhcrmr/3KuOTGaez9PUYlcZZs+ezaxZs04bT0xMpH59/1Wqo6OjiY+PZ9OmTWRmZgbuExMTQ6dOnUhJSSE7OzswHhcXR5s2bVi5ciX5+X9+B6tfv35ERkby9ddfl5jIgQMHEhISwrJly0o4jBgxgoKCApKSkgJjLpeLkSNHkpOTU2K1GhoayqBBg9i1axdpaWmB8YiICPr378+2bdtIT08PjEsm8zINGTIEt9tNRkYGYSFu3rm1DxPeSGHjrlzGzlvDe7f2IjMzk0aNGrFy5UolMg0fPpyDBw+SkpKi7DwtX768RKba+NpTIVOPHj1o27atkpnat2+PWVTpe3aappGQkMDVV18NwI4dO+jQoQOpqanExcUF7nfxxRcTFxfH888/f9o2SlvZtW7dmn379tGkSRPA3qugk38XxX/hHD9+nMTERIYOHUpQUJCtVgzlyeTxeEhMTOTSSy8lJCTEdiuG0jKNGTOG9evXs2nTJkJCQjh83MOEBSmk7colxGmwY8E0vvnoTfr162f5iqE8mQzDYPny5QwdOhS3213qPJ3qbrdMBQUFgfeB2+221YqhPJmK3wdDhw4NtO/beRVUWqbiDCNGjOBUVFjZ5ebmEhkZacr37Exd2bVr147mzZvz7bffBord4cOHWbNmDXfeeWepjwkODiY4OPi0cbfbXeJNDv7JdDqdp923eHLKO37qdisz7nA4Sl1iOxyOwP1PfoOX5W7nTMXPVVZWO2V68MEH6devH5dffjnPPvsssbGxzB8Xy9XPfcPu425a3jSHsHbdlclUXEBKex+c6bVnx0ynZqjo+8nqTG63O/CFbLM+I6zOVNnxms5k5m7MCm/pyJEjpKWlBZasO3fuJC0tjczMTDRNY9q0aTzxxBN89tlnbN68mQkTJhAVFRVY/QlCdRAbG8uyZctIT08nLi6OFi1a0KZlM1bNHkuD41nornrcNH8Nm3bnWq0qCIIFVHhlt27dOgYOHBj4efr06QBMnDiRt956i7///e8cPXqUKVOmkJuby4UXXsiXX35JvXr1zLNWgLL+ulIFFf0vueQSdu7cyRdffEFaWhq7du1i+vTptD2vEze/kcK6jEOMm7+G927tQ2zrcKt1z4qKc3AqqmdQ3R9qRwYzsO25MeXirYKZHCn0MunNFNb+fojQYBfv3taHOAUKniDUZerExVtPPnipGrquk5WVpWwG1f3h9AznBLt4c1JverdtTH6hl/Hz15Caechiy7KpjXOgGqr7g/oZzPS2bbE7ubNHNXw+H8nJycpmUN0fSs/gL3i96N3uRMFbkML6DHsWvNo6Byqhuj+on8FMb9sWO0GoDhoEu3hrUi/6tGvMkUIvE99IYX3GH1ZrCYJQzUixE+oc9YP8K7y+7f0Fb8KCFNb9LgVPEGozti12Kl5osBhN05S9WCKo7w9nz1A/yMWbN/emf4cmHC3yMfGNFNbaqODVhTmwO6r7g/oZzPSWbkyhTlNQ5OO2d9by4/aD1A9y8tak3vRu19hqLUEQkG5M26PrOhkZGcpmUN0fyp8hJMjJ/Am9uPDcphwr8nHzmyms2XGwhizLpi7NgV1R3R/UzyDdmDbH5/ORlpambAbV/aFiGUKCnMyf2JO/nFdc8Nay2uKCV9fmwI6o7g/qZ5BuTEEwmXpuJ/Mm9OSijhEUeHxMenMtyb9Zv8ITBMEcpNgJwgnquZ3MHX8BFxcXvLdSWLU9x2otQRBMwLbFTtXuIfC7R0REKJtBdX+ofIZ6bievj7+AS2IiOO7RueXttfxoQcGry3NgF1T3B/UzSDemIFQzhV4fd7y7nqT0bIJdDhZM7MWF5zW1WksQ6hR1ohtT1QOq4HffsmWLshlU94eqZwh2OXlt/AUM6hRJoVfn1rfX8r9t2Wd/oEnIHFiP6v6gfoY60aCiaqss+N3T09OVzaC6P5iTIdjl5NWbzmdI5+KCt47vt9ZMwZM5sB7V/UH9DHXiqweCYAeCXU5eGXcBQzo3o8irM/mddXyXnmW1liAIFUSKnSCchSCXg1fGnc/QLv6CN+Xd9SRJwRMEpbBtsXM4bKt2VhwOB9HR0cpmUN0fzM8Q5HLw8tjzuayrv+Dd/s56krZUX8GTObAe1f1B/Qxmeks3piBUAI9P56+LUvny5/0EOR28etP5DO7czGotQaiVSDemzfH5fKSmpiqbQXV/qL4MbqeDF8fGM7xbc4p8One8t55vfjlg6nOAzIEdUN0f1M8g3Zg2R9d1MjMzlc2guj9Ubwa308ELY+IZ2b0FHp/BnQvXk2hywZM5sB7V/UH9DNKNKQgW43Y6eH50HCN7+AveXQvX8/XP+63WEgShDKTYCUIlcTkdPH9jHFfERp0oeBv48icpeIJgR2xb7FTtHgK/e0xMjLIZVPeHmsvgcjr4z6hYroyNwqsb3LNoA1/+tK/K25U5sB7V/UH9DNKNKQg2w+vT+dtHG/k0bS9Oh8ZLY+IZ3r2F1VqCoDR1ohvT6/VarVBpvF4vq1atUjaD6v5Q8xlcTgfPjorjmviW+HSDe95PZemmyq/wZA6sR3V/UD+Dmd4u07ZkMjZbcFYIwzDIzs5WNoPq/mBNBqdD4983xKIBn6TuYeriVAwMLu8RVeFtyRxYj+r+oH4GM71tW+wEQUWcDo1nbohF0zQ+3rCbexenYRhwRWzFC54gCOZh292YgqAqTofG09f34PoLWuHTDe5dnMqnaXus1hKEOo1ti53T6bRaodI4nU7i4uKUzaC6P1ifwenQePq6Hozq2QrdgPs+SKtQwbPa3wxUz6C6P6ifwUxv6cYUhGpE1w0e/GQzH6zbhUOD/xsVyzXxrazWEgQlkG5Mm+P1elmxYoWyGVT3B/tkcDg0Zl/bndG9WqMb8LcPN/LJht1nfZxd/KuC6hlU9wf1M5jpbdtiZ7MFZ4UwDIP8/HxlM6juD/bK4HBo/Oua7ozpHe0veB9t5L/rz1zw7ORfWVTPoLo/qJ/BTG/bFjtBqE04HBpPXt2NcX2iMQy4/78b+WjdLqu1BKHOIMVOEGoIh0Pj8au6cVNff8H7+8eb+FAKniDUCLZtUDl06BDh4eFW61QKXdfJycmhadOmSp6TTnV/sHcGwzB49LOfeSc5A02Dp67tzo29okvcx87+5UX1DKr7g/oZcnNzadSokSkNKrYtdtKNKdRmDMPgsc9+5u3kDMBf8Eb3jj7LowShblEnujE9Ho/VCpXCMAxycnL473//q2wGj8fD0qVLlfUH+2fQNI3HruzKzf3bAvDAJ5tZtCYzcLvd/cuD6hlU9/d6vRw4cIBPP/1U2Qxmetu22KmGz+fj5ZdfpnPnzkRERHDDDTdw8cUXk5CQYLVapVC1Vflk7J5B0zQevaILkwa0BeChhM0sXJMRuN3u/uVB9Qwq+u/evZu77rqLhg0b0rx5c2644QamTJnCb7/9ZrWapZhe7Hw+HzNnzqRdu3aEhITQoUMHHn/8cWVbX8uDruuMHz+eqVOnEhsby6JFi3jjjTeoV68e1157Lf/617+sVhRsiqZpPHJ5F269sB0ADyf8xLurM87yKEEonR07dnD++eczb948CgoKAP/q6L333uOCCy5g8+bNFhtah+kngp4zZw6vvvoqb7/9Nl27dmXdunVMmjSJsLAwpk6davbT2YLFixfz/vvv89FHH3H99dcHxm+++WYee+wxHn74Ya688kq6detmoaVgVzRN4x8jO6MB83/YycwlP+HzemlktZigHJMnT+aPP/7A5/OVGPd6vRw5coQJEyaQmppqkZ21mN6gcvnll9OsWTMWLFgQGLvuuusICQnhvffeO+vjiw9I5ubmEhYWZqZatXHRRRfhdrv59ttvT7utqKiINm3acN111/HSSy9ZYFdxir+IGhoaiqZpVutUChUzGIbB7OVbmLtyBwAPXtqBKQNjlPE/FRXn4GRU89++fTvnnXfeWe+3du1aevbsWQNGVScvL4/w8HBTGlRMX9n179+fuXPnsnXrVjp27MjGjRv54YcfePbZZ0u9f2FhIYWFhYGfDx8+DPiX3sUHJx0OB06nE5/Ph67rgfsWj3u93hK7SZ1OJw6Ho8zxUw96ulz+X8Op++fLGne73ei6HvjrafPmzTzwwAOl5gsKCmLgwIFs3ry5xPPaOZNhGLhcLrxe72lZwb8ScblcZbrbIZNhGISEhKDregmXYne7Zvp/Qzpg6Drzfvid2V//htPlYmK/NoH7n2k+7JbJ4/HgcrnweDwBl5Oz2j1T8fvA4/HgdrtLdbdTpvLuoty4cSOxsbFV/tyriUwn36eqmF7sHnjgAQ4fPkynTp0CoZ588knGjRtX6v1nz57NrFmzThtPTEykfv36AERHRxMfH8+mTZvIzPyzYy0mJoZOnTqRkpJCdnZ2YDwuLo42bdqwcuVK8vPzA+P9+vUjMjKSr7/+usREDhw4kJCQEJYtW1bCYcSIERQUFJCUlBQYc7lcjBw5kpycHJKTkwH/GyQnJ6fM30lOTg6GYZTYvt0zAZxzzjkMHjyYXbt2kZaWFhiPiIigf//+bNu2jfT0dNtm8nq99OrVi7Vr1wbGQ0NDGTRokK0zdTXgtv7tmb8qkyeWpfPzL79wSQujzHmya6bly5eXa55UylSV91N1Zzp69CjlYcuWLSxbtkyJTG3bti1XpvJg+m7MxYsXc//99/PMM8/QtWtX0tLSmDZtGs8++ywTJ0487f6lrexat27Nvn37aNKkCWCvFQOc/hfO1KlTSUhIYMeOHYECXcxvv/1Gx44defnll7n11ltPc7djJo/HQ2JiIpdeemlgdWSnFUN5MhVnGDZsWIkv09phxVCeTLquc/fcr/lmj9/9oeExTOrfxraroNIyFRQUkJiYyNChQ3G73bZaMZQnU/FraOjQoYSEhJTqbqdMRUVFtGzZkry8PMoiODiYzMxMGjVqpMTKLjc3l8jISHO+d22YTKtWrYyXXnqpxNjjjz9uxMTElOvxeXl5BmDk5OSYrVZtpKenG/Xr1zeGDh1q/P7774Hx1NRUo3PnzkZ0dLSRn59voWHFKCoqMpYsWWIUFRVZrVJpVM9QVFRkJCQsMZ5a9rPRZsYXRpsZXxjzVv5mtVaFqA1zoJr/k08+aQCl/tM0zZg+fbrVihUiJyfHAIy8vLwqb8v0rx4cO3bstNPSOJ1OU/e92o2OHTvy+eefs27dOtq3b0+vXr3o1q0b8fHxgb8OzznnHKs1BcXQNLhv8Ln8ddC5ADyx9FfmnWheEYTSeOCBB5g2bRrg/9x1uVyBC6BOnDiROXPmWGhnLabvxrz55pv55ptveP311+natSupqalMmTKFW265pVy/aBW7MYs5cuQIixYtYvXq1TidTi677DKuuuqqwMFtVTAMA6/Xi8vlUqILrTRUz3CyP8B/vtnGC99uA+DB4Z24/eIOVuqVi9o0B6r5b9u2jbfffpvdu3fTtGlTJk6cSPfu3a3WqjBmdmOaXuzy8/OZOXMmCQkJZGVlERUVxZgxY3jkkUcICgo66+NVLnbFGIq1LJ+K6v6gfobS/P+TuJXnTxS8GcM6cecl9i54tXEOVEP1DGYWO9N3Y4aGhvLcc8+RkZFBQUEBv/32G0888US5Ct3JqHianmK8Xi9JSUnKZlDdH9TPUJr/fUM7Mm2I/3tUc77cwivfbbdKr1zUxjlQDdUz1IkrlQuCcDrThnRk+tCOADz9ZTovJ9m74AmCXZBiJwiKMXXwefy/S/0F75mv0nnxxK5NQRDKRopdNVHcWKAqqvuD+hnO5H/PoPO4/7IYAP4vcWugecVu1OY5UIXakMEM5OKtgqAwr3y3nae/9J+hYtqQ85g2pKPFRoJgHnXi4q0qfy9P13WysrKUzaC6P6ifobz+d11yLg8M7wTAc99s4z+JW2tCr1zUlTmwM6pnMNPbtsXu1EtUqITP5yM5OVnZDKr7g/oZKuJ/x8UdePBEwXv+2208m7jVFtePrEtzYFdUz2Cmt22LnSAI5ef2izvw8IjOALxgo4InCHZBip0g1BImX9Sef4z0F7wXV2zn31+nS8EThBPYttip+G3/YjRNU/aMBaC+P6ifobL+t/2lPTMv7wLAy0m/8cxX1hW8ujoHdkL1DGZ6SzemINRC3vxxJ7M+/wXwH9ObMUzdK54LdRfpxrQ5uq6TkZGhbAbV/UH9DFX1nzSgHY9d4V/hvfb9bzy1fEuNr/Dq+hzYAdUzSDemzfH5fKSlpSmbQXV/UD+DGf43D2jHP6/qCsDrK3cwu4YLnsyB9aieQboxBUEoFxP6teXxEwVv7sodPLn0V2laEeokUuwEoZYzvl9bnri6GwDzf9jJ419IwRPqHrYtdiofTNc0jYiICGUzqO4P6mcw2/+mvm148hp/wXvjx53884tfqr3gyRxYj+oZpBtTEIRK8X5KJg9+shmAm/u35dEruij7QSjUfupEN6aqB1TB775lyxZlM6juD+pnqC7/Mb2jeera7gC8tep3Hvvs52pb4ckcWI/qGepEg4qqrbLgd09PT1c2g+r+oH6G6vQf3Tuap6/rgabB28kZPPJp9RQ8mQPrUT1DnfjqgSAI1ceoXq2Zc6Lgvbs6g5mf/oSu2+qIhiCYihQ7QaijjOrZOrDCe291Jv+QgifUYmxb7BwO26qdFYfDQXR0tLIZVPcH9TPUlP8NPVvz7+tj0TRYtCaTh5eYV/BkDqxH9Qxmeks3piAIfLJhN3/7aCOGAaN7teZf13TH4ZAuTcFapBvT5vh8PlJTU5XNoLo/qJ+hpv2vPb8Vz46KxaHB4rW7ePCTzVVe4ckcWI/qGaQb0+bouk5mZqayGVT3B/UzWOF/TXwr/nNjHA4NPli3ixkfb6pSwZM5sB7VM5jp7TJtS4IgKM9VcS0BuO+DND5avxsDmHNdD5yyS1NQHCl2giCU4Kq4ljg0jWkfpPHf9bsxDHj6eil4gtrYttip2j0EfveYmBhlM6juD+pnsNr/itgoNA3uXZzGxxt2YxgGz9wQW6GCZ3WGqqK6P6ifQboxBUGoEZZu2sfUxan4dINr4lvy7woWPEGoCnWiG9Pr9VqtUGm8Xi+rVq1SNoPq/qB+Brv4j+zRgpfGxONyaCSk7mH6h2l4feVrGrBLhsqiuj+on8FMb9sWO5stOCuEYRhkZ2crm0F1f1A/g538h3dvwUtj/QXv07S9TP9wY7kKnp0yVAbV/UH9DGZ627bYCYJgH4Z1a8FLY8/H5dD4bONe7itnwRMEuyDFThCEcjGsW3NeGXc+bqfG5xv3cu8H5d+lKQhWY9ti53Q6rVaoNE6nk7i4OGUzqO4P6mewq/+lXZvzyrgLcDs1lm7ax72L0/CUUfDsmqG8qO4P6mcw01u6MQVBqDDf/HKAOxeux+MzGN6tOS+MicfttO3fzoKiSDemzfF6vaxYsULZDKr7g/oZ7O4/pEszXrvpAoKcDpb/tJ+/Lko9bYVn9wxnQ3V/UD+DdGPaHMMwyM/PVzaD6v6gfgYV/Ad3bsbr4/0F78uf93PPog0Uef8seCpkOBOq+4P6GaQbs46SmprKxIkTiYyMJDw8nKFDh7JkyRJlX8iC+gzsFMnrEy4gyOXgq58PcOd763jplVfp0aMH4eHhTJo0iQceeIDdu3dbrSrUcaql2O3Zs4ebbrqJJk2aEBISQvfu3Vm3bl11PFWd4cMPP6R3796sXLmS22+/nQceeIBjx45xzTXXcO+990rBEyxjYEwkc0+s8L7dks0/v93LT79s4dixYxw6dIjnn3+e7t27s3HjRqtVhTqM6Q0qhw4dIj4+noEDB3LnnXcSERHBtm3b6NChAx06dDjr44sPSB46dIjw8HAz1WoMXdfJycmhadOmppzbbe/evbRv357rrruOt99+G5frz1Oavvbaa9x5553897//5brrrqvyc4H5/lagegYV/Sf8fTbf+WJwuIM5tm0N2Z/OBp//mIvT6SQ6Oprt27crk0fFOTgV1TPk5ubSqFEjUxpUTC92DzzwAD/++CP/+9//KvV46cY8nVmzZvHMM8+wZ88ewsLCTrv94osvRtM0vvvuu5qXEwSgoKCA5s2bU9S4PRHXzvQXvO0pZC/5V6DgASxfvpxhw4ZZaCqohJn1wPSrHnz22Wdcdtll3HDDDXz//fe0bNmSu+66i8mTJ5d6/8LCQgoLCwM/Hz58GIBjx44REhIC+M987XQ68fl8JS7mVzzu9XpL7MZzOp04HI4yxz0eTwmH4pXSqZ0/ZY273W50XS9xFV1N03C5XOi6zvHjx1mxYgWDBg0iKCgIl8tVpnt5Mq1Zs4ZBgwaVWugArr76ah566KESuaqSyePxsGLFCgYPHkxISEiZWauSCap3njweD0lJSQwZMqTEX7Qnz5OdMxmGwddff82gQYNwu92B8bO99qzK9Ouvv/rfu4fTyP74n0Rc9wj1z+1NxDUPk53wL/B5cLlcrF69msGDB5fIatdMxe+DQYMGBT6LzPqMqKlMxRkuu+wyTsXsz73qyHRybagqphe7HTt28OqrrzJ9+nQeeugh1q5dy9SpUwkKCmLixImn3X/27NnMmjXrtPGkpCTq168PQHR0NPHx8WzatInMzMzAfWJiYujUqRMpKSlkZ2cHxuPi4mjTpg0rV64kPz8/MN6vXz8iIyP5+uuvS0zkwIEDCQkJYdmyZSUcRowYQUFBAUlJSYExl8vFyJEjycnJITk5OTAeGhrKoEGD2LVrF2lpaQAkJiYSERFB//792bZtG+np6YH7VyTTwYMHz/hXTUFBAQ6Ho4S/GZlWrVrF4MGDS2QCTMkE1T9PXq+XgwcPsnbt2sB4afNkx0xDhw7F5/ORmJhYIlN5X3s1nWnnzp2B8eMZG08UvJnU79CLyGseJivhSQzD/4F4clY7Zyqep8TExGr7jKiJTMXU1OeemZnatm17Wo7KYvpuzKCgIHr27MmqVasCY1OnTmXt2rUlfknFlLaya926Nfv27aNJkyaAvVYMUL6VXWJiIkOHDjVlZVf8x8POnTtp3bp1CRdd1+nWrRudOnXigw8+MCWTx+MhMTGRSy+9VOmVXWJiIsOGDVN2Zbd8+XKGDh2qxMrO5/PRvn179u3bF7i9XnQPIq5/BIe7HgU71pH1yZOkJP9IXFxciax2zVT8Gho6dKjSK7vExERGjBjBqaiwssvNzSUyMtKeuzFbtGhBly5dSox17tyZjz/+uNT7BwcHExwcfNq42+0u8SYH/2SWdvqYkxs2yjN+6nYrM+5wOEo94OtwOAL3d7vdAYey3MuT6eabb+bJJ5/k+uuvJyEhgaioKMC/q3f69Ols2bKF+fPnl+pZlUzFDmVlrUqm8oybNU8VmT+7ZCouiqW9D8702rMqk9vtZsaMGUybNi0wfjxzE1kfzSLy+kcJad+TmFv/j+5x5+N2n/6cdsx08v9rmnbaeFn3P5u7HTJVdrymM5nZVGN6e86AAQNKLFsBtm7dSps2bSq0nbImTAVcLhcDBw40LUPDhg1ZtmwZO3fupE2bNowYMYJRo0bRsmVL5s+fz/z58+nfv78pzwXm+1uB6hlU9J86dSp33XUX8Of717PnZ7L++xh4izjeqD2T31nHcY/vDFuxDyrOwamonsFMb9N3Y65du5b+/fsza9YsRo0aRUpKCpMnT2bu3LmMGzfurI8v7r7Jzc0tsyHD7hiGgdfrxeVyBf4iNIPc3Fzeeecdli1bRlFREb169WLKlCnl+kpHRagu/5pE9Qwq+69Zs4Z58+axdetWwsLCGDt2LNEXDGLye6kcK/Jx4blNmTehJyFB9j45scpzUIzqGfLy8ggPDzenO9+oBj7//HOjW7duRnBwsNGpUydj7ty55X5sXl6eARg5OTnVoVYjFBUVGUuWLDGKioqsVqkUqvsbhvoZVPc3jNMzrNlx0Og8c7nRZsYXxth5ycaxQq/FhmemNs6BauTk5BiAkZeXV+VtVcu3DC+//HI2b97M8ePH+fXXX8v82oEgCHWH3u0a8/YtvWkQ5OTH7Qe55a21HCtS8wTFgnqo95V6QRCUpVfbPwte8g4peELNIcVOEIQapWfbxrxza2/OCXaxescfTHpTCp5Q/dj24q3SoGIdqvuD+hlU94ezZ9iQeYiJC1LIL/TSu11j3ry5Fw2C7dM1WBfmwO6Y2aAiK7tqoqCgwGqFKqG6P6ifQXV/OHOG86Mb8c6tvQkNdpGy07/CO1porxVebZ+DuoRti52qV9YFv3tSUpKyGVT3B/UzqO4P5csQH92Id2/rQ2g9Fym//8HNb6ZwxCYFr67MgZ2pE1cqFwShbhDXOpz3bvUXvLW/H2LiGynkH/ec/YGCUAGk2AmCYDmxrcNZeFsfGtZzsT5DCp5gPlLsqglVT89TjOr+oH4G1f2hYhl6tApn4W19CQtxsyEzlwlvpHDY4oJX1+agNmPbbky5eKsg1E1+2pPHuPlryCvwENc6nHdu7U3DeqWfrFio3ZhZD2y7sjv58g+qoes6WVlZymZQ3R/Uz6C6P1Q+Q7eWYSy8rQ/h9d2k7cpl/IIU8gpqfoVXl+fALpjpbdtid/K1kVTD5/ORnJysbAbV/UH9DKr7Q9UynFzwNu7KZcKCNTVe8Or6HNgBM71tW+wEQajbdI0KY9FtfWlU383G3XmMX7CGvGPStCJUDil2giDYli5RDVk0uS+NGwSxaXceN0nBEyqJbYudiqe2KUbTNEJDQ5XNoLo/qJ9BdX8wL0PnFg1ZNLkPjRsEsXlPHuMWrCb3WJFJlmUjc2A9ZnpLN6YgCEqQvj+fsfNWc/BoEV1aNGThbX1o1CDIai2hGpFuTJuj6zoZGRnKZlDdH9TPoLo/mJ8hpnko70/pS9Nzgvhl32HGzV/DoaPVt8KTObAe6ca0OT6fj7S0NGUzqO4P6mdQ3R+qJ0PHZqG8P7kvTc8J5pd9hxk7fw1/VFPBkzmwHunGFAShznJes1AWT+lD03OC+XXfYf+uzSOFVmsJNkeKnSAIynFuZCiLp/QlIjSYLfvzGTd/jRQ84YzYttip2j0EfveIiAhlM6juD+pnUN0fqj/DuZHnsHhKXyJPFLyx89aQY2LBkzmwHunGFARBOMGO7COMmbeaA4cLOS/yHBZN9q/4BPWpE92Yqh5QBb/7li1blM2guj+on0F1f6i5DO0jzmHxlH40axjMtqwjjJ23muz8qq/wZA6sp040qKjaKgt+9/T0dGUzqO4P6mdQ3R9qNkO7pg1YPKUfzRvWY1uWf6WXlX+8StuUObCeOvHVA0EQhIrgL3h9aRFWj+1ZRxgzdzVZh6tW8ITagxQ7QRBqDW1PFLyosHr8ln2U0fOk4Al+bFvsHA7bqp0Vh8NBdHS0shlU9wf1M6juD9ZlaNPEv0uzZXgIO7KPMnruag5UouDJHFiPmd7SjSkIQq1k1x/HGD13NXtyC2jXtAHvT+5L87B6VmsJFUC6MW2Oz+cjNTVV2Qyq+4P6GVT3B+sztG5cn8VT+tIyPISdOUcZPTeZfXkF5X681f5moHoG6ca0Obquk5mZqWwG1f1B/Qyq+4M9MhQXvFaNQvj9oH+ltze3fAXPDv5VRfUM0o0pCIJQTooLXuvGIWQc/HPXplC3kGInCEKtp1Wj+iye0o/oxvXJ/OMYo+cmS8GrY9i22KnaPQR+95iYGGUzqO4P6mdQ3R/sl6FleAiLp/QlunF9dv1RwOi5yew+dKzM+9vNvzKonkG6MQVBECrJvrwCRs9dTcbBY7RqFML7k/vSunF9q7WEUqgT3Zher9dqhUrj9XpZtWqVshlU9wf1M6juD/bN0CIshA+m9KNtk/rsPuQvfLv+OH2FZ1f/iqB6BjO9bVvsbLbgrBCGYZCdna1sBtX9Qf0MqvuDvTM0D6vH4in9aNe0AXtySy94dvYvL6pnMNPbtsVOEAShOvEXvL60P1Hwbnw9mcyDZR/DE9RGip0g2IicnBzmzJnDX/7yF+677z4mTpzIjz/+aLVWraVZwxMFL6IBe/OOc+PcZDIOHrVaS6gGqr3YPfXUU2iaxrRp0yr0OKfTWT1CNYDT6SQuLk7ZDKr7g5oZNm7cSNeuXXn00Udp06YNw4cPZ+3atVx44YXMmDFDuV1RqsxBZMN6LJ7clw4RDdiXd5zRc1fze85RZfzPhOoZzPSu1m7MtWvXMmrUKBo2bMjAgQN57rnnzvoY6cYU6iKFhYWce+65REZGsnTpUpo3bw74zyDx3HPP8be//Y2FCxcyduxYi01rL1n5xxk7bw3bs47QvGE93p/Sl3ZNG1itVadRohvzyJEjjBs3jnnz5tGoUaMKP17V7iHwu69YsULZDKr7g3oZPv74Y3bv3s17770XKHTg/57R9OnTGTZsGM8++6yFhhVHtTmIDK3H+5P7cl7kOew/fJzRryez6PNvlPEvDdXm4FTM9HaZtqVTuPvuuxk5ciRDhgzhiSeeKPN+hYWFFBYWBn4+fPgwAEVFRXg8HsD/hnc6nfh8vhLnSise93q9JXbxOJ1OHA5HmePF2y3G5fL/Gk79xZY17na70XW9xElKNU3D5XKh6zpFRUXk5+dTVFQU2E5Z7nbM5PF4yM/Px+PxBDKVltXOmYoz+Hy+Ets+eZ7slOnbb78lNjaWzp07Uxpjxoxh4sSJ5Ofn06BBgzJfe3bKdPL7wDCMSr+fajJTkwZuFk3uy9h5yWzLOspTKQaxsbl0ad2kVHe7Zyp+HxiGUe2fe9WRycwTQVdLsVu8eDEbNmxg7dq1Z73v7NmzmTVr1mnjSUlJ1K/v/6JndHQ08fHxbNq0iczMzMB9YmJi6NSpEykpKWRnZwfG4+LiaNOmDStXriQ/Pz8w3q9fPyIjI/n6669LTOTAgQMJCQlh2bJlJRxGjBhBQUEBSUlJgTGXy8XIkSPJyckhOTk5MB4aGsqgQYPYtWsXaWlpACQmJhIREUH//v3Ztm0b6enpgfurkGnVqlUMHjy4RCZAmUwABw8eLPE6LG2e7JApMzPzjGeLKL7NMIxyvfbskCkxMREg8N+qvp9qMtOt7Y7yfL7BvgKNMfNW8+6knnRp3cT0z4iayFRMTX3umZmpbdu2p+WoLKYfs9u1axc9e/YkMTGRHj16AHDJJZcQFxdX6jG70lZ2rVu3Zt++fTRp4v9ryk4rBjj7XzjHjx8nMTGRoUOHEhQUZPtV0KmZPB4PiYmJXHrppYSEhNhuxVDelV1iYiLDhg0rUUTsugp64403uO2229i2bRvnnnsup3LVVVexc+dONm3aZIsVQ3kyFRQUBN4HbrfbViuG8mQ6kHuUG179gX3HNCJCg3n/tj60aVzyenh2z1T8PhgxYgSnosLKLjc3l8jISFOO2Zle7JYsWcI111xToovG5/OhaRoOh4PCwsIzdtgUH5A8dOgQ4eHhZqrVGLquk5OTQ9OmTZU8J53q/qBehoKCAtq1a8e5557LF198EXjtG4bBggULmDx5MgsWLOCWW26xVrQCqDYHp6LrOtsy93Hvkt/Ysj+fpucEs3hKH86NDLVardyoPge5ubk0atTInsUuPz+fjIyMEmOTJk2iU6dOzJgxg27dup3x8dKNKdRVVq9ezfDhw/H5fIwaNYqIiAi++uorUlNTueOOO3jllVfQNM1qzTrHH0eLGDd/Db/uO0zTc4L8TSzN1Cl4KmPrbszQ0FC6detW4l+DBg1o0qTJWQvdyZy6a0QlPB4PS5cuVTaD6v6gZoa+ffuyefNmpk6dyqpVq3j77beJiopi6dKlShY6FefgZIr9Q4M0Ft3Why4tGpJzpIjRc1ez9cDpx8TsSG2YA7NQb12rCKq2+hajuj+omaFVq1Y88cQTbNy4kVdffZWEhARGjBihXKErRsU5OJli/0YNglh4Wx+6RjXk4NEixsxdTfp+NQqe6nNgFjVS7L777rtyfaFcEATBrhQXvG4tTxS8eavZsv+w1VpCOZGVnSAIQjkJrx/Ewlv70r1lGH+cWOH9uk8KngrY9uKtubm5hIWFWa1TKQzDID8/n9DQUCV3P6nuD+pnUN0f1M9wJv+8Yx7Gv7GGTbvzaFTfzcLb+tIlyn4NdarPQV5eHuHh4fZsUBH8hISEWK1QJVT3B/UzqO4P6mcoyz+svpt3b+1DbKswDh3zMHb+an7em1fDduVD9TkwC9sWO5UPqnq9XpYtW6ZsBtX9Qf0MqvuD+hnO5h8W4ubd2/oQ2zqc3GMexs1fw0977FXwasMcmIVti50gCILdaVjPzbu39ibOxgVP8CPFThAEoQoUF7z46HDyCjyMnbeazbul4NkNKXaCIAhVJLSem3du6c350eEcPu5l3PzVbNqda7WWcBLSjVkNGIaB1+vF5XIp2QGluj+on0F1f1A/Q2X8jxR6ufmNFNZlHCK0nov3bvUf07MK1edAujEVoKCgwGqFKqG6P6ifQXV/UD9DRf3PCXbx1i296dW2EfnHvdw0fw1pu3KrR66cqD4HZmHbYqdq9xD43ZOSkpTNoLo/qJ9BdX9QP0Nl/c8JdvHmpN70btuY/EIv4+evITXzUDVZnpnaMAdmYdtiJwiCoCr+gteL3u1OFLwFKazPsKbgCX6k2AmCIFQDDYJdvDWpF33aNeZIoZeJb6SwPuMPq7XqLFLsqoniq/2qiur+oH4G1f1B/QxV9a8f5F/h9W3vL3gTFqSw7veaLXiqz4FZ2LYbUy7eKghCbaGgyMetb69l1W8HaRDkPNHE0thqLdtj64u3moWu61YrVBpd18nKylI2g+r+oH4G1f1B/Qxm+ocEOVkwsRcDzm3C0SIfE99IIWVn9a/wasMcmIVti53P57NaodL4fD6Sk5OVzaC6P6ifQXV/UD+D2f4hQU7mT+jFhec25ViRj5vfTGHNjoOmbLssasMcmIVti50gCEJtIyTIyfyJPfnLecUFby2rq7ngCX6k2AmCINQg9dxO5k3oyUUdIyjw+Jj05lqSf5OCV93YttipeGqbYjRNU/ZiiaC+P6ifQXV/UD9DdfrXczuZO/4CLi4ueG+lsGp7junPUxvmwLRtSTemIAiCNRz3+LjjvfV8l55NPbfjRBNLU6u1bIN0Y9ocXdfJyMhQNoPq/qB+BtX9Qf0MNeFfz+3k9fEXMDAmguMenVveWssP28xb4dWGOTAL2xY7VbuHwO+elpambAbV/UH9DKr7g/oZaso/2OXktfEXMKhTJIVenVvfXsv/tmWbsu3aMAdmYdtiJwiCUFcIdjl59abzGdK5uOCt4/ut5hQ8wY8UO0EQBBsQ7HLyyrgLGNK5GUVencnvrOO79CyrtWoNti12qnYPgd89IiJC2Qyq+4P6GVT3B/UzWOEf5HLwyrjzGdrFX/CmvLuepCoUvNowB6ZtS7oxBUEQ7EWRV+ev72/gq58PEOR0+JtYOkVarVXj1IluTFUPqILffcuWLcpmUN0f1M+guj+on8FK/yCXg5fGns+wrs0p8unc/u56vv31QIW3UxvmwCxsW+xUbZUFv3t6erqyGVT3B/UzqO4P6mew2t/tdPDi2HiGd/MXvDveW883v1Ss4FmdoarUia8eCIIg1HXcTgcvjIlnZPcWeHwGdy5cT2IFC57gR4qdIAiCjXE7HTw/Oo6RPfwF766F6/n65/1WaymHbYudw2FbtbPicDiIjo5WNoPq/qB+BtX9Qf0MdvJ3OR08f2McV8RGnSh4G/jyp7MXPDtlqAxmeks3piAIgiJ4fTrTP9zIZxv34nJovDQ2nmHdWlitVW1IN6bN8fl8pKamKptBdX9QP4Pq/qB+Bjv6u5wOnh0Vy1VxUXh1g7sXpbJ8874y72/HDBVBujFtjq7rZGZmKptBdX9QP4Pq/qB+Brv6+wteHNfEt8SnG9zzfipLN5Ve8OyaobyY6e0ybUuCIAhCjeB0aPz7hlg04JPUPUxdnIqBweU9oqxWsy1S7ARBEBTE6dB45oZYNE3j4w27uXdxGoYBV8RKwSsN2+7GVLV7CPzuMTExymZQ3R/Uz6C6P6ifQQV/p0Pj6et7cP0FrfDpBvcuTuXTtD2B21XIcCZs3Y05e/ZsPvnkE7Zs2UJISAj9+/dnzpw5xMTElOvx0o0pCIJQMXTd4IFPNvHhut04NPjPjXFcFdfSaq0qY+tuzO+//567776b1atXk5iYiMfj4dJLL+Xo0aMV2o7X6zVbrcbwer2sWrVK2Qyq+4P6GVT3B/UzqOTvcGg8dW0PbuzZGt2A+z5IIyF1t1IZSsNMb9OP2X355Zclfn7rrbeIjIxk/fr1XHTRReXejs2+/lchDMMgOztb2Qyq+4P6GVT3B/UzqObvcGjMvrY7mgaL1+7ibx9uxOv1EaRQhlMx07vaG1Ty8vIAaNy4cam3FxYWUlhYGPj58OHDAHg8HjweD+Dfb+t0OvH5fCVaUYvHvV5viV+K0+nE4XCUOV683WJcLv+v4dS/Isoad7vd6Lpe4jsgmqbhcrnQdT2wfY/HExgvy92OmYq35fV6z5jVzpmKH3vyfJzsbvdMxY899f5ne+3ZKdPJ74MzZbVrppP9zf6MqM5M/7yiM4Zh8MG63fz9458Y20FjBKe/llTIpMxXD3RdZ9q0aQwYMIBu3bqVep/Zs2cza9as08aTkpKoX78+ANHR0cTHx7Np0yYyMzMD94mJiaFTp06kpKSQnf3nJezj4uJo06YNK1euJD8/PzDer18/IiMj+frrr0tM5MCBAwkJCWHZsmUlHEaMGEFBQQFJSUmBMZfLxciRI8nJySE5OTkwHhoayqBBg9i1axdpaWkAJCYmEhERQf/+/dm2bRvp6emB+6uQadWqVQwePLhEJkCZTAAHDx5k7dq1gfHS5smOmYYOHQr4X0MnZyrva88OmYrdi/9b1feTVZkSExOr7TOiOjL98MP/6OvKZ3czBz8ecLDoNwfdN+whNOfnGvncMzNT27ZtMYtqPV3YnXfeyfLly/nhhx9o1apVqfcpbWXXunVrsrKyCA8PB+y1YoDyrez27NlDy5YtcTqdtloxlCeTruvs2bOHVq1aERQUZLsVQ3ky6brO/v37admyZYlt22HFUJ5MDoeDXbt20aJFixIdaXZdBZWWqaioKPA+cDgctloxlCdT8fugZcuWBAUFlepu50y6bvDYF7/y/trdaBr86+quXH/+n00rKqzs8vPzadKkiSkNKtVW7O655x4+/fRTVq5cSbt27cr9OOnGFARBMAfDMHj0s595JzkDTYOnru3Ojb2irdYqN7buxjQMg3vuuYeEhARWrFhRoUJ3Mqp2D4HffcWKFcpmUN0f1M+guj+on0F1f/CfW/Licw4wvm80hgEzPt7M4pTMsz/QJti6G/Puu+9m0aJFfPrpp4SGhrJ/v/8yFGFhYYSEhJR7O6p2D4HfPT8/X9kMqvuD+hlU9wf1M6juD/4MR44cYeaIv+B0OHhr1e888MlmdAPG9rH/Cs/M373pK7tXX32VvLw8LrnkElq0aBH498EHH5j9VIIgCEI50DSNR6/owqQBbQF4KGEzC9dkWCtVw5i+slP5ryBBEITaiqZpPHJ5FxyaxoIfdvJwwk/oBozv28ZqtRrBthdvPXToUKAbUzV0XScnJ4emTZsqeU461f1B/Qyq+4P6GVT3h9IzGIbBk0t/Zf4POwF4/KqujO/X1kLLssnNzaVRo0b27sasLNKNKQiCUL0YhsHs5VuYu3IHALOu7MrE/m2tlSoFW3djmsWp391RCY/Hw9KlS5XNoLo/qJ9BdX9QP4Pq/lB2Bk3TeHB4J26/uD0Aj372M2/9uNMKxTNi5u/etsVOdVRuVwb1/UH9DKr7g/oZVPeHsjNomsYDwzpxx8UdAHjs81944wf7FTyzkGInCIJQR9E0jRnDYrjrEn/B++cXvzD/fzsstqoepNgJgiDUYTRN4/7LYrhn4LkAPLH011pZ8GzboJKbm0tYWJjVOpWi+MuooaGhaJpmtU6FUd0f1M+guj+on0F1f6hYBsMweDZxKy+u2A7AwyM6M/mi9jWhWSZ5eXmEh4fX7gYV1anI2WLsiOr+oH4G1f1B/Qyq+0P5M2iaxvShHZk6+DwAnlz2K69//1t1qtUoti12Kh8Y9nq9LFu2TNkMqvuD+hlU9wf1M6juDxXPUFzw7j1R8GYv38Kr31lX8Mz83du22AmCIAjWcN/Qjkwb4i94c77cwivfbbfYqOpIsRMEQRBOY9qQjkwf2hGAp79M5+UktQueFDtBEAShVKYOPo//d6m/4D3zVTovfrvNYqPKI92YJpKfn8/ChQtZs2YNDoeDyy67jGuuuQa32221WoUwDAOv14vL5VK6C03lDKr7g/oZVPcH8zK8nLSdZ75KByjRxFLdSDemDfn222+Jjo7m7rvv5pdffmHdunXceOONdO7cmfT0dKv1KkxBQYHVClVG9Qyq+4P6GVT3B3My3D3wXP4+LAaAZxO38tw3W6u8zZrGtsVOpQ6o9PR0rrzySvr06cPvv//OmjVr2LhxIxs3biQ4OJhLL72UI0eOWK1ZbrxeL0lJSUrNwamonkF1f1A/g+r+YG6Guy45lweGdwLguW+28Z/E6i940o1pM55//nkaNmzIJ598QuvWrQPjPXr04PPPP2f37t0sXLjQQkNBEISqc8fFHXjwRMF7/tttPJu4VZlrmEqxM4ElS5Ywfvx46tevf9pt7du3Z8iQISxZsqTmxQRBEEzm9os78PCIzgC8oFDBk2JnAgUFBTRp0qTM25s0aaLcvn+Xy/SL2Nc4qmdQ3R/Uz6C6P1RPhskXtecfI/0F78UV2/n31+m2L3i27cZU6eKtF110ES6XixUrVpx2W1FREdHR0dxwww28+OKLFtgJgiBUDwt+2MnjX/wCwF2XdOD+y2JM7VytExdv1XXdaoVyc+edd5KUlMSHH35YYtwwDP75z39y4MAB7rjjDovsKo6u62RlZSk1B6eiegbV/UH9DKr7Q/VnuPXCdjx6RRcAXvnuN+Z8ae4Kz0xv2xY7n89ntUK5ufHGGxk3bhyjR4/m+uuvZ+HChSxYsICBAwfy5JNPMnv2bLp27Wq1Zrnx+XwkJycrNQenonoG1f1B/Qyq+0PNZJg0oB2PnSh4r33/G08t32JawTPTW/0d0jbA4XDwzjvvMGDAAF588UU+/vhjAAYMGEBCQgJXX321tYKCIAjVyM0D2uFwaDzy6c+8vnIHBvDg8E62+jK+FDuTcDgc3Hnnndxxxx0cOnSIb775RsmzpwiCIFSGCf3aogEzP/2ZuSt3oOsGD4/sbJuCZ9vdmHb5BVUUTdNo2LAhTZs2VTqDyhesBPUzqO4P6mdQ3R9qPsP4fm154upuAMz/YSePf/FrlXZpmukt3ZiCIAiCqSxck8HDCT8BMGlAWx65vEulCpd0Y9ocXdfJyMhQNoPq/qB+BtX9Qf0MqvuDdRnG9WnD7Gu7A/Dmj78z6/NfKrXCk25Mm+Pz+UhLS1M2g+r+oH4G1f1B/Qyq+4O1Gcb0juapEwXvrVW/89hnP1e44JnpbdtiJwiCIKjN6N7RPH1dDzQN3k7O4JFPK17wzEKKnSAIglBtjOrVmjknCt67qzOY+elP6HrNFzzbFjvVO6AiIiKUzaC6P6ifQXV/UD+D6v5gnwyjerbmmetj0TR4b3VmuQuedGMKgiAIyvHx+t38v/9uxDD8x/SevLobDkfZBa1OdGOqflB4y5YtymZQ3R/Uz6C6P6ifQXV/sF+G6y5oxbOjYnFo8H5KJg8lbD7jCq9ONKio3u6bnp6ubAbV/UH9DKr7g/oZVPcHe2a4Jr4Vz46Kw6HB4rW7ePCTsgtenfjqgSAIglA7uTq+Jf+50V/wPli3ixkfb6r2phUpdoIgCEKNc1VcS54bHY9Dg4/W7+bvH2/CV40Fz7YngnY41K3DDoeD6OhoZTOo7g/qZ1DdH9TPoLo/2D/DlbFRaMC0D9L47/rdGAY8fX0PnCeaVsz0lm5MQRAEwVK+2LSXexen4dMNro1vyTM3xOJ0aGp0Y7788su0bduWevXq0adPH1JSUir0eLt0D1UGn89HamqqshlU9wf1M6juD+pnUN0f1MlweY8oXhwTj9Oh8UnqHv7fRxvx6Yb9uzE/+OADpk+fzqOPPsqGDRuIjY3lsssuIysrq9zbsFP3UEXRdZ3MzExlM6juD+pnUN0f1M+guj+olWFE9xa8NCYel0MjIXUP0z9Mo8jjNW371VLsnn32WSZPnsykSZPo0qULr732GvXr1+eNN96ojqcTBEEQagHDu7fgpbH+gvdp2l5mfvqLads2vUGlqKiI9evX8+CDDwbGHA4HQ4YMITk5+bT7FxYWUlhYGPg5Ly8PgD/++KPE451OJz6fr8RfKMXjXq+3xMlFnU4nDoejzHGPx1PCweXy/xq8Xm+5xt1uN7qul1hia5qGy+VC13WOHz/OsWPHOHjwIEFBQbhcrjLd7ZjJ4/Fw7Ngx/vjjD0JCQsrMaudMxRlyc3NLHOQ+eZ7snMkwjMBr6OSr3Z/ttWenTAUFBSUyVPb9ZFWm4tfQwYMHCQkJKdXd7pmKMxw+fJhTMftzz6xMvVoE8dTI9vz9k59YlpoDYMrJo00vdjk5Ofh8Ppo1a1ZivFmzZmzZsuW0+8+ePZtZs2adNt6xY0ez1QRBEAQFOXjwIGFhYVXahuVfPXjwwQeZPn164Ofc3FzatGlDZmZmlcNZxeHDh2ndujW7du1SsqNUdX9QP4Pq/qB+BtX9Qf0MeXl5REdH07hx4ypvy/Ri17RpU5xOJwcOHCgxfuDAAZo3b37a/YODgwkODj5tPCwsTMnJOZmGDRsqnUF1f1A/g+r+oH4G1f1B/QxmfN/O9AaVoKAgLrjgAr799tvAmK7rfPvtt/Tr18/spxMEQRCEs1ItuzGnT5/OxIkT6dmzJ7179+a5557j6NGjTJo0qTqeThAEQRDOSLUUuxtvvJHs7GweeeQR9u/fT1xcHF9++eVpTSulERwczKOPPlrqrk1VUD2D6v6gfgbV/UH9DKr7g/oZzPS33enCBEEQBMFs7Hl2UEEQBEEwESl2giAIQq1Hip0gCIJQ65FiJwiCINR6bFfsqnppICuZPXs2vXr1IjQ0lMjISK6++mrS09Ot1qo0Tz31FJqmMW3aNKtVys2ePXu46aabaNKkCSEhIXTv3p1169ZZrVVufD4fM2fOpF27doSEhNChQwcef/xxU84NWB2sXLmSK664gqioKDRNY8mSJSVuNwyDRx55hBYtWhASEsKQIUPYtm2bNbJlcKYMHo+HGTNm0L17dxo0aEBUVBQTJkxg79691gmfwtnm4GTuuOMONE3jueeeqzG/8lCeDL/++itXXnklYWFhNGjQgF69epGZmVnu57BVsTPj0kBW8v3333P33XezevVqEhMT8Xg8XHrppRw9etRqtQqzdu1aXn/9dXr06GG1Srk5dOgQAwYMwO12s3z5cn755Rf+7//+j0aNGlmtVm7mzJnDq6++yksvvcSvv/7KnDlzePrpp3nxxRetViuVo0ePEhsby8svv1zq7U8//TQvvPACr732GmvWrKFBgwZcdtllHD9+vIZNy+ZMGY4dO8aGDRuYOXMmGzZs4JNPPiE9PZ0rr7zSAtPSOdscFJOQkMDq1auJioqqIbPyc7YMv/32GxdeeCGdOnXiu+++Y9OmTcycOZN69eqV/0kMG9G7d2/j7rvvDvzs8/mMqKgoY/bs2RZaVZ6srCwDML7//nurVSpEfn6+cd555xmJiYnGxRdfbNx7771WK5WLGTNmGBdeeKHVGlVi5MiRxi233FJi7NprrzXGjRtnkVH5AYyEhITAz7quG82bNzeeeeaZwFhubq4RHBxsvP/++xYYnp1TM5RGSkqKARgZGRk1I1UByvLfvXu30bJlS+Onn34y2rRpY/znP/+pcbfyUlqGG2+80bjpppuqtF3brOyKLw00ZMiQwNiZLg2kAsWXKzLjJKY1yd13383IkSNLzIUKfPbZZ/Ts2ZMbbriByMhI4uPjmTdvntVaFaJ///58++23bN26FYCNGzfyww8/MHz4cIvNKs7OnTvZv39/iddRWFgYffr0UfY9Df73taZphIeHW61SLnRdZ/z48dx///107drVap0Ko+s6S5cupWPHjlx22WVERkbSp0+fM+6uLQ3bFLszXRpo//79FllVHl3XmTZtGgMGDKBbt25W65SbxYsXs2HDBmbPnm21SoXZsWMHr776Kueddx5fffUVd955J1OnTuXtt9+2Wq3cPPDAA4wePZpOnTrhdruJj49n2rRpjBs3zmq1ClP8vq0t72mA48ePM2PGDMaMGaPMiZXnzJmDy+Vi6tSpVqtUiqysLI4cOcJTTz3FsGHD+Prrr7nmmmu49tpr+f7778u9Hcsv8VNbufvuu/npp5/44YcfrFYpN7t27eLee+8lMTGxYvvCbYKu6/Ts2ZN//etfAMTHx/PTTz/x2muvMXHiRIvtyseHH37IwoULWbRoEV27diUtLY1p06YRFRWlTIbaisfjYdSoURiGwauvvmq1TrlYv349zz//PBs2bEDTNKt1KkXxRV6vuuoq7rvvPgDi4uJYtWoVr732GhdffHG5tmOblV1FLw1kZ+655x6++OILkpKSaNWqldU65Wb9+vVkZWVx/vnn43K5cLlcfP/997zwwguBqw7bmRYtWtClS5cSY507d65Qx5bV3H///YHVXffu3Rk/fjz33Xefkivt4vdtbXhPFxe6jIwMEhMTlVnV/e9//yMrK4vo6OjAezojI4O//e1vtG3b1mq9ctG0aVNcLleV39u2KXa14dJAhmFwzz33kJCQwIoVK2jXrp3VShVi8ODBbN68mbS0tMC/nj17Mm7cONLS0nA6nVYrnpEBAwac9lWPrVu30qZNG4uMKs6xY8dOu3aX0+kM/HWrEu3ataN58+Yl3tOHDx9mzZo1yryn4c9Ct23bNr755huaNGlitVK5GT9+PJs2bSrxno6KiuL+++/nq6++slqvXAQFBdGrV68qv7dttRtT9UsD3X333SxatIhPP/2U0NDQwHGJsLAwQkJCLLY7O6GhoacdX2zQoAFNmjRR4rjjfffdR//+/fnXv/7FqFGjSElJYe7cucydO9dqtXJzxRVX8OSTTxIdHU3Xrl1JTU3l2Wef5ZZbbrFarVSOHDnC9u3bAz/v3LmTtLQ0GjduTHR0NNOmTeOJJ57gvPPOo127dsycOZOoqCiuvvpq66RP4UwZWrRowfXXX8+GDRv44osv8Pl8gfd148aNCQoKsko7wNnm4NTi7Ha7ad68OTExMTWtWiZny3D//fdz4403ctFFFzFw4EC+/PJLPv/8c7777rvyP0mVejmrgRdffNGIjo42goKCjN69exurV6+2WqncAKX+e/PNN61WqzQqffXAMAzj888/N7p162YEBwcbnTp1MubOnWu1UoU4fPiwce+99xrR0dFGvXr1jPbt2xsPP/ywUVhYaLVaqSQlJZX6mp84caJhGP6vH8ycOdNo1qyZERwcbAwePNhIT0+3VvoUzpRh586dZb6vk5KSrFY3DOPsc3AqdvzqQXkyLFiwwDj33HONevXqGbGxscaSJUsq9BxyiR9BEASh1mObY3aCIAiCUF1IsRMEQRBqPVLsBEEQhFqPFDtBEASh1iPFThAEQaj1SLETBEEQaj1S7ARBEIRajxQ7QRAEodYjxU4QBEGo9UixEwRBEGo9UuwEQRCEWo8UO0EQBKHW8/8BNkrNzuATZWUAAAAASUVORK5CYII=", 67 | "text/plain": [ 68 | "
" 69 | ] 70 | }, 71 | "metadata": {}, 72 | "output_type": "display_data" 73 | } 74 | ], 75 | "source": [ 76 | "import pandas as pd\n", 77 | "from scipy.spatial import Voronoi, voronoi_plot_2d\n", 78 | "d = pd.DataFrame({\n", 79 | " 'X1': [2, 2, 3, 6, 8, 10, 10, 13, 14],\n", 80 | " 'X2': [1, 8, 6, 10, 4, 6, 14, 13, 8],\n", 81 | " 'Y': [0, 0, 0, 0, 0, 1, 1, 1, 1]\n", 82 | "})\n", 83 | "X, Y = d[['X1', 'X2']], d['Y']\n", 84 | "\n", 85 | "# Correct this line\n", 86 | "boundary=pd.DataFrame([(0,16),(16,0)], columns=['x','y'])\n", 87 | "\n", 88 | "import matplotlib.pyplot as plt\n", 89 | "c= ['white' if l == 0 else 'black' for l in Y]\n", 90 | "fig, ax = plt.subplots(figsize=(5, 5))\n", 91 | "ax.set_axisbelow(True)\n", 92 | "plt.scatter(X['X1'], X['X2'], color=c, edgecolor='k')\n", 93 | "plt.xlim(0, 16)\n", 94 | "plt.ylim(0, 16)\n", 95 | "plt.plot(boundary['x'],boundary['y'])\n", 96 | "plt.grid(linestyle='dashed')\n", 97 | "plt.show()" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "## Exercise 4\n", 105 | "Finding optimum value for $k$\n", 106 | "\n", 107 | "For the dataset below:\n", 108 | "1. plot the points using `pyplot.scatter`;\n", 109 | "1. plot a line chart using `pyplot.plot` that shows the training error and the CVLOO error of k-NN for all values of $k$ between 1 and 15. \n", 110 | "1. What is the best value of $k$ in this case?" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 3, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "from scipy.stats import norm\n", 120 | "import pandas as pd\n", 121 | "import numpy as np\n", 122 | "x_red = norm.rvs(0, 1, 100, random_state=1)\n", 123 | "y_red = norm.rvs(0, 1, 100, random_state=2)\n", 124 | "x_green = norm.rvs(1, 1, 100, random_state=3)\n", 125 | "y_green = norm.rvs(1, 1, 100, random_state=4)\n", 126 | "d = pd.DataFrame({\n", 127 | " 'X1': np.concatenate([x_red,x_green]),\n", 128 | " 'X2': np.concatenate([y_red,y_green]),\n", 129 | " 'Y': [1]*100+[0]*100\n", 130 | "})\n", 131 | "X, Y = d[['X1', 'X2']], d['Y']" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "## Exercise 5\n", 139 | "Large number of attributes\n", 140 | "\n", 141 | "Considering the following dataset (attributes in the `X` variable and labels in the `Y` variable):\n", 142 | "1. Measure the CVLOO accuracy for 5-NN;\n", 143 | "1. Measure the CVLOO accuracy for ID3;\n", 144 | "1. What is the reason for the difference?\n", 145 | "1. What can be done to improve the accuracy of 5-NN? If something can be done, what is the new accuracy?" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 4, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "from scipy.stats import norm\n", 155 | "import pandas as pd\n", 156 | "from statistics import median\n", 157 | "\n", 158 | "def new_var(n):\n", 159 | " return norm.rvs(0, 1, 100, random_state=n)\n", 160 | "\n", 161 | "d = {f'X{i}':new_var(i) for i in range(10)}\n", 162 | "m = median(X['X2'])\n", 163 | "\n", 164 | "X = pd.DataFrame(d)\n", 165 | "Y = pd.Series([0 if i < m else 1 for i in X['X2']])" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "## Exercise 6\n", 173 | "Comparison between k-NN and ID3\n", 174 | "\n", 175 | "Create a simple two-dimensional dataset on which the CVLOO accuracy is at least 75% for 3-NN, but less than 50% for ID3 (with entropy and no prunning). Plot the dataset using `matplotlib.pyplot` and calculate the CVLOO accuracy for 3-NN and ID3 for confirmation." 176 | ] 177 | } 178 | ], 179 | "metadata": { 180 | "kernelspec": { 181 | "display_name": "Python 3 (ipykernel)", 182 | "language": "python", 183 | "name": "python3" 184 | }, 185 | "language_info": { 186 | "codemirror_mode": { 187 | "name": "ipython", 188 | "version": 3 189 | }, 190 | "file_extension": ".py", 191 | "mimetype": "text/x-python", 192 | "name": "python", 193 | "nbconvert_exporter": "python", 194 | "pygments_lexer": "ipython3", 195 | "version": "3.12.3" 196 | } 197 | }, 198 | "nbformat": 4, 199 | "nbformat_minor": 4 200 | } 201 | --------------------------------------------------------------------------------