├── .gitignore ├── README ├── data_visualization ├── hex_map_of_states.ipynb ├── matplotlib_football_marker.ipynb └── overlapping_shapes_alpha.ipynb ├── data_wrangling └── get_api_data.ipynb ├── football ├── afcon_example.ipynb ├── another_positional_heatmap.ipynb ├── circle_arc.ipynb ├── comet_lines.ipynb ├── complicated_heatmap_example.ipynb ├── complicated_placement.ipynb ├── corner_heatmap.ipynb ├── goal_heatmap.ipynb ├── grid_no_pad.ipynb ├── heatmaps_complicated.ipynb ├── kloppy_wyscout.ipynb ├── multiple_pizza_plots.ipynb ├── offset_lines.ipynb ├── outliers_movers.ipynb ├── pass_sonar.ipynb ├── pitch_of_radars.ipynb ├── positional_binning.ipynb ├── read_f24.ipynb ├── reduce_points_ramer_douglas_peucker.ipynb ├── rotate_and_count.ipynb ├── scrape_fbref.ipynb ├── sonofacorner_congested_zones.ipynb ├── statsbomb_duckdb │ ├── 360_v1.py │ ├── 360_visible_v1.py │ ├── competition_v4.py │ ├── events_freeze_v7.py │ ├── events_related_v7.py │ ├── events_tactics_v7.py │ ├── events_v7.py │ ├── lineup_v4.py │ └── match_v5.py └── statsbomb_parser.ipynb ├── modelling └── simulate_test_data.ipynb ├── modelling_from_scratch ├── Nadaraya-Watson_estimator.ipynb ├── k-nearest_neighbours.ipynb ├── linear_regression.ipynb ├── logistic_regression.ipynb └── ridge_regression.ipynb ├── neural_networks └── hard-mish.ipynb ├── pysport ├── 01_get_f24_data.ipynb ├── 02_get_fbref_data.ipynb ├── 03_turn_f24_data_to_actions.ipynb ├── 04_what_is_mplsoccer_slide.ipynb ├── 05_penalty_analysis.ipynb ├── 06_good_features_of_mplsoccer_slide.ipynb ├── data │ ├── f24 │ │ └── README.md │ └── fbref │ │ └── README.md ├── old_trafford_google_earth.png └── pysport_presentation.pdf └── simulation ├── simulate_car_wash.ipynb ├── simulate_composition_method.ipynb ├── simulate_correlated_random_variables.ipynb ├── simulate_estimate_of_pi.ipynb ├── simulate_exponential_random_variable_from_uniform.ipynb ├── simulate_linearly_related_random_variables.ipynb ├── simulate_normal_random_variable_from_uniform.ipynb ├── simulate_poisson_random_variable_from_uniform.ipynb └── simulate_random_variables.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # ignore binaries for programs and plugins 2 | *.exe 3 | *.dll 4 | *.so 5 | *.dylib 6 | 7 | # ignore desktop.ini 8 | desktop.ini 9 | 10 | # ignore jupyter notebook checkpoints 11 | .ipynb_checkpoints 12 | */.ipynb_checkpoints/* 13 | 14 | # ignore plans 15 | *PLAN.txt -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | # Data science notes 2 | Notes from my data science escapades in Python. 3 | -------------------------------------------------------------------------------- /data_visualization/overlapping_shapes_alpha.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "overlapping_shapes_alpha.ipynb", 7 | "provenance": [], 8 | "authorship_tag": "ABX9TyPRT2m2a1XQw1KIsIwt3HL8", 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "language_info": { 16 | "name": "python" 17 | } 18 | }, 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "id": "view-in-github", 24 | "colab_type": "text" 25 | }, 26 | "source": [ 27 | "\"Open" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 1, 33 | "metadata": { 34 | "colab": { 35 | "base_uri": "https://localhost:8080/", 36 | "height": 287 37 | }, 38 | "id": "o2UuURO81XqE", 39 | "outputId": "cb300235-aa51-4798-9329-ddc621a203f8" 40 | }, 41 | "outputs": [ 42 | { 43 | "output_type": "execute_result", 44 | "data": { 45 | "text/plain": [ 46 | "" 47 | ] 48 | }, 49 | "metadata": {}, 50 | "execution_count": 1 51 | }, 52 | { 53 | "output_type": "display_data", 54 | "data": { 55 | "image/png": "\n", 56 | "text/plain": [ 57 | "
" 58 | ] 59 | }, 60 | "metadata": { 61 | "needs_background": "light" 62 | } 63 | } 64 | ], 65 | "source": [ 66 | "import numpy as np\n", 67 | "import matplotlib.pyplot as plt\n", 68 | "from matplotlib.patches import Circle, PathPatch\n", 69 | "from matplotlib.path import Path\n", 70 | "\n", 71 | "def get_path(patch):\n", 72 | " \"\"\" get the real path from a patch\"\"\"\n", 73 | " path = patch.get_path()\n", 74 | " transform = patch.get_patch_transform()\n", 75 | " return transform.transform_path(path)\n", 76 | "\n", 77 | "# get the paths\n", 78 | "path1 = get_path(Circle(xy=(0, 0), radius=0.5))\n", 79 | "path2 = get_path(Circle((0.5, 0.5), 0.5, alpha=0.5))\n", 80 | "\n", 81 | "# create a patch from the combination of the two paths\n", 82 | "path = Path(np.concatenate([path1.vertices, path2.vertices]),\n", 83 | " np.concatenate([path1.codes, path2.codes]))\n", 84 | "pathpatch = PathPatch(path, facecolor='blue', alpha=0.2)\n", 85 | "\n", 86 | "# plot the combined shape\n", 87 | "fig,ax = plt.subplots()\n", 88 | "ax.set_xlim(-2, 2)\n", 89 | "ax.set_ylim(-2, 2)\n", 90 | "ax.set_aspect(1)\n", 91 | "ax.add_patch(pathpatch)" 92 | ] 93 | } 94 | ] 95 | } -------------------------------------------------------------------------------- /football/complicated_placement.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "complicated_placement.ipynb", 7 | "provenance": [], 8 | "authorship_tag": "ABX9TyNFhPIFb7DmFn6aCUlDViGU", 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "language_info": { 16 | "name": "python" 17 | } 18 | }, 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "id": "view-in-github", 24 | "colab_type": "text" 25 | }, 26 | "source": [ 27 | "\"Open" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 1, 33 | "metadata": { 34 | "colab": { 35 | "base_uri": "https://localhost:8080/" 36 | }, 37 | "id": "-4KGrxkfqM7q", 38 | "outputId": "25d14543-0899-48f6-c0e3-609287363878" 39 | }, 40 | "outputs": [ 41 | { 42 | "output_type": "stream", 43 | "name": "stdout", 44 | "text": [ 45 | "Requirement already satisfied: mplsoccer in /usr/local/lib/python3.7/dist-packages (1.0.5)\n", 46 | "Requirement already satisfied: seaborn in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (0.11.2)\n", 47 | "Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (1.4.1)\n", 48 | "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (4.6.3)\n", 49 | "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (3.2.2)\n", 50 | "Requirement already satisfied: pillow in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (7.1.2)\n", 51 | "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (1.1.5)\n", 52 | "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (1.19.5)\n", 53 | "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplsoccer) (1.3.2)\n", 54 | "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplsoccer) (3.0.6)\n", 55 | "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplsoccer) (0.11.0)\n", 56 | "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplsoccer) (2.8.2)\n", 57 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib->mplsoccer) (1.15.0)\n", 58 | "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->mplsoccer) (2018.9)\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "!pip install mplsoccer" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "source": [ 69 | "import matplotlib.pyplot as plt\n", 70 | "from mplsoccer import Pitch" 71 | ], 72 | "metadata": { 73 | "id": "gmSuXwanqbOX" 74 | }, 75 | "execution_count": 2, 76 | "outputs": [] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "source": [ 81 | "p = Pitch()" 82 | ], 83 | "metadata": { 84 | "id": "zdblfJN9rOTC" 85 | }, 86 | "execution_count": 3, 87 | "outputs": [] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "source": [ 92 | "figwidth, figheight = 16, 9\n", 93 | "fig = plt.figure(figsize=(figwidth, figheight))\n", 94 | "blank_axes = fig.add_axes((0, 0, 1, 1))\n", 95 | "#blank_axes.axis('off')\n", 96 | "pitch_height = 0.2\n", 97 | "pitch_width = pitch_height * p.ax_aspect * figheight / figwidth\n", 98 | "ax = fig.add_axes((0.1, 0.1, pitch_width, pitch_height))\n", 99 | "p.draw(ax=ax)\n", 100 | "ax2 = fig.add_axes((0.6, 0.6, pitch_width, pitch_height))\n", 101 | "p.draw(ax=ax2)\n" 102 | ], 103 | "metadata": { 104 | "colab": { 105 | "base_uri": "https://localhost:8080/", 106 | "height": 700 107 | }, 108 | "id": "9NrqO9jyqO2V", 109 | "outputId": "bff820ae-5efc-423a-f6a1-b2e155d92475" 110 | }, 111 | "execution_count": 4, 112 | "outputs": [ 113 | { 114 | "output_type": "display_data", 115 | "data": { 116 | "image/png": "\n", 117 | "text/plain": [ 118 | "
" 119 | ] 120 | }, 121 | "metadata": { 122 | "needs_background": "light" 123 | } 124 | } 125 | ] 126 | } 127 | ] 128 | } -------------------------------------------------------------------------------- /football/grid_no_pad.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "grid_no_pad.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "authorship_tag": "ABX9TyPkXol+Hqx2YA3pxGVxqEe2", 10 | "include_colab_link": true 11 | }, 12 | "kernelspec": { 13 | "name": "python3", 14 | "display_name": "Python 3" 15 | }, 16 | "language_info": { 17 | "name": "python" 18 | } 19 | }, 20 | "cells": [ 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "id": "view-in-github", 25 | "colab_type": "text" 26 | }, 27 | "source": [ 28 | "\"Open" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "colab": { 36 | "base_uri": "https://localhost:8080/" 37 | }, 38 | "id": "TPp-57en1irq", 39 | "outputId": "5132e4b3-5298-4103-fae7-e5c778de04e7" 40 | }, 41 | "outputs": [ 42 | { 43 | "output_type": "stream", 44 | "name": "stdout", 45 | "text": [ 46 | "Collecting mplsoccer\n", 47 | " Downloading mplsoccer-1.0.5.tar.gz (57 kB)\n", 48 | "\u001b[?25l\r\u001b[K |█████▊ | 10 kB 19.4 MB/s eta 0:00:01\r\u001b[K |███████████▍ | 20 kB 23.5 MB/s eta 0:00:01\r\u001b[K |█████████████████ | 30 kB 23.2 MB/s eta 0:00:01\r\u001b[K |██████████████████████▊ | 40 kB 25.5 MB/s eta 0:00:01\r\u001b[K |████████████████████████████▍ | 51 kB 26.5 MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 57 kB 3.2 MB/s \n", 49 | "\u001b[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (3.2.2)\n", 50 | "Requirement already satisfied: seaborn in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (0.11.2)\n", 51 | "Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (1.4.1)\n", 52 | "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (1.1.5)\n", 53 | "Requirement already satisfied: pillow in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (7.1.2)\n", 54 | "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (1.19.5)\n", 55 | "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (4.6.3)\n", 56 | "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplsoccer) (2.8.2)\n", 57 | "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplsoccer) (0.11.0)\n", 58 | "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplsoccer) (1.3.2)\n", 59 | "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplsoccer) (3.0.6)\n", 60 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib->mplsoccer) (1.15.0)\n", 61 | "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->mplsoccer) (2018.9)\n", 62 | "Building wheels for collected packages: mplsoccer\n", 63 | " Building wheel for mplsoccer (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 64 | " Created wheel for mplsoccer: filename=mplsoccer-1.0.5-py3-none-any.whl size=62946 sha256=176c25a24eda48ae8f8ddb1de842d3bfb9dd4dbcb78b97e7f3fb5b8983724609\n", 65 | " Stored in directory: /root/.cache/pip/wheels/35/71/46/5f3df8e696517b395ed75971c4d09f5854310efc09d364a9ca\n", 66 | "Successfully built mplsoccer\n", 67 | "Installing collected packages: mplsoccer\n", 68 | "Successfully installed mplsoccer-1.0.5\n" 69 | ] 70 | } 71 | ], 72 | "source": [ 73 | "!pip install mplsoccer" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "source": [ 79 | "from mplsoccer import VerticalPitch" 80 | ], 81 | "metadata": { 82 | "id": "ECPz3va41rMK" 83 | }, 84 | "execution_count": null, 85 | "outputs": [] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "source": [ 90 | "p = VerticalPitch(pitch_type='opta', pad_top=-120, pad_left=-15, pad_right=-15)" 91 | ], 92 | "metadata": { 93 | "id": "qDk64FET1w1a" 94 | }, 95 | "execution_count": null, 96 | "outputs": [] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "source": [ 101 | "grid_width, grid_height = p.calculate_grid_dimensions(nrows=3, ncols=3, figwidth=16, figheight=9, max_grid=0.9, space=0)" 102 | ], 103 | "metadata": { 104 | "id": "pKuC93iK3uAA" 105 | }, 106 | "execution_count": null, 107 | "outputs": [] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "source": [ 112 | "fig, ax = p.grid(nrows=3, ncols=3, space=0, endnote_height=0, title_height=0, grid_width=grid_width, grid_height=grid_height, figheight=9)" 113 | ], 114 | "metadata": { 115 | "colab": { 116 | "base_uri": "https://localhost:8080/", 117 | "height": 180 118 | }, 119 | "id": "C91vD3Hg2AUl", 120 | "outputId": "6f103d4d-ee3f-4e55-bfbd-22a9164b144c" 121 | }, 122 | "execution_count": null, 123 | "outputs": [ 124 | { 125 | "output_type": "display_data", 126 | "data": { 127 | "image/png": "\n", 128 | "text/plain": [ 129 | "
" 130 | ] 131 | }, 132 | "metadata": {} 133 | } 134 | ] 135 | } 136 | ] 137 | } -------------------------------------------------------------------------------- /football/kloppy_wyscout.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from kloppy import WyscoutSerializer\n", 10 | "import requests\n", 11 | "import zipfile\n", 12 | "import os" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "# kloppy serializer\n", 22 | "serializer = WyscoutSerializer()" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "def download_url(url, save_path, chunk_size=128, json=False):\n", 32 | " '''Souce: https://stackoverflow.com/questions/9419162/download-returned-zip-file-from-url '''\n", 33 | " r = requests.get(url, stream=True)\n", 34 | " if json:\n", 35 | " r.encoding = 'unicode-escape'\n", 36 | " with open(save_path, 'wb') as fd:\n", 37 | " for chunk in r.iter_content(chunk_size=chunk_size):\n", 38 | " fd.write(chunk)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# saves in the current directory the Wyscout events.zip file\n", 48 | "save_path = os.path.join('events.json')\n", 49 | "download_url('https://ndownloader.figshare.com/files/14464685', save_path)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "# extract the zip files in the current directory (e.g. events_World_Cup.json)\n", 59 | "extract_path = os.path.join('')\n", 60 | "with zipfile.ZipFile(save_path, 'r') as zip_ref:\n", 61 | " zip_ref.extractall(extract_path)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "# currently not working\n", 71 | "with open('events_World_Cup.json', \"rb\") as event_data:\n", 72 | " dataset = serializer.deserialize(inputs={'event_data': event_data})" 73 | ] 74 | } 75 | ], 76 | "metadata": { 77 | "kernelspec": { 78 | "display_name": "Python 3", 79 | "language": "python", 80 | "name": "python3" 81 | }, 82 | "language_info": { 83 | "codemirror_mode": { 84 | "name": "ipython", 85 | "version": 3 86 | }, 87 | "file_extension": ".py", 88 | "mimetype": "text/x-python", 89 | "name": "python", 90 | "nbconvert_exporter": "python", 91 | "pygments_lexer": "ipython3", 92 | "version": "3.7.9" 93 | } 94 | }, 95 | "nbformat": 4, 96 | "nbformat_minor": 4 97 | } 98 | -------------------------------------------------------------------------------- /football/scrape_fbref.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "scrape_fbref.ipynb", 7 | "provenance": [], 8 | "authorship_tag": "ABX9TyM5/QSze4o6cPLxZnYWR98H", 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "language_info": { 16 | "name": "python" 17 | } 18 | }, 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "id": "view-in-github", 24 | "colab_type": "text" 25 | }, 26 | "source": [ 27 | "\"Open" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 1, 33 | "metadata": { 34 | "id": "xUucUiEAmqqm" 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "import requests\n", 39 | "from bs4 import BeautifulSoup\n", 40 | "import pandas as pd\n", 41 | "import numpy as np\n", 42 | "\n", 43 | "def get_soup(url):\n", 44 | " headers = {'User-Agent': ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) '\n", 45 | " 'Chrome/39.0.2171.95 Safari/537.36')}\n", 46 | " r = requests.get(url, headers=headers)\n", 47 | " r.encoding = 'unicode-escape'\n", 48 | " return BeautifulSoup(r.content, 'html.parser')\n", 49 | "\n", 50 | "def get_data_from_table(table, data_type, skip_rows):\n", 51 | " \"\"\"Helper method to get the data from a table. \"\"\"\n", 52 | " # https://stackoverflow.com/questions/42285417/how-to-preserve-links-when-scraping-a-table-with-beautiful-soup-and-pandas\n", 53 | " if data_type == 'title':\n", 54 | " data = [[td.a.get('title') if td.find('a') else ''.join(td.stripped_strings) for td in row.find_all('td')]\n", 55 | " for row in table.find_all('tr')]\n", 56 | " if data_type == 'link':\n", 57 | " data = [[td.a['href'] if td.find('a') else ''.join(td.stripped_strings) for td in row.find_all('td')]\n", 58 | " for row in table.find_all('tr')]\n", 59 | " else:\n", 60 | " data = [[td.a.string if td.find('a') else ''.join(td.stripped_strings) for td in row.find_all('td')]\n", 61 | " for row in table.find_all('tr')] \n", 62 | " \n", 63 | " data = [d for d in data if len(d)!=0][0::skip_rows]\n", 64 | " \n", 65 | " return data\n", 66 | "\n", 67 | "def get_fbref_big5(url):\n", 68 | " soup = get_soup(url)\n", 69 | " df = pd.read_html(str(soup))[0]\n", 70 | " \n", 71 | " # column names - collapse the multiindex\n", 72 | " col1 = list(df.columns.get_level_values(0))\n", 73 | " col1 = ['' if c[:7]=='Unnamed' else c.replace(' ', '_').lower() for c in col1]\n", 74 | " col2 = list(df.columns.get_level_values(1))\n", 75 | " col2 = [c.replace(' ', '_').lower() for c in col2]\n", 76 | " cols = [f'{c}_{col2[i]}' if c != '' else col2[i] for i, c in enumerate(col1)]\n", 77 | " df.columns = cols\n", 78 | " \n", 79 | " # remove lines that are the header row repeated\n", 80 | " df = df[df.rk != 'Rk'].copy()\n", 81 | " \n", 82 | " # add the url for the player profile and match logs\n", 83 | " # https://stackoverflow.com/questions/42285417/how-to-preserve-links-when-scraping-a-table-with-beautiful-soup-and-pandas\n", 84 | " parsed_table = soup.find_all('table')[0]\n", 85 | " data = [[td.a['href'] if td.find('a') else ''.join(td.stripped_strings) for td in row.find_all('td')]\n", 86 | " for row in parsed_table.find_all('tr')]\n", 87 | " data = [d for d in data if len(d)!=0]\n", 88 | " match_log = [d[-1] for d in data]\n", 89 | " player_profile = [d[0] for d in data]\n", 90 | " df['match_link'] = match_log\n", 91 | " df['player_link'] = player_profile\n", 92 | " \n", 93 | " # remove players who haven't played a minute from the playing time table\n", 94 | " if 'playing_time_mp' in df.columns:\n", 95 | " df = df[df.playing_time_mp != '0'].copy()\n", 96 | " df.reset_index(drop=True, inplace=True)\n", 97 | " df['rk'] = df.index + 1\n", 98 | " \n", 99 | " # drop the matches column\n", 100 | " df.drop('matches', axis='columns', inplace=True)\n", 101 | "\n", 102 | " # columns to numeric columns\n", 103 | " df[df.columns[6:-2]] = df[df.columns[6:-2]].apply(pd.to_numeric, errors='coerce', axis='columns')\n", 104 | " return df" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "source": [ 110 | "url = 'https://fbref.com/en/comps/Big5/keepersadv/players/Big-5-European-Leagues-Stats'" 111 | ], 112 | "metadata": { 113 | "id": "Xet6H4pZnDXE" 114 | }, 115 | "execution_count": 2, 116 | "outputs": [] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "source": [ 121 | "df_gk = get_fbref_big5(url)" 122 | ], 123 | "metadata": { 124 | "id": "FdOe3aqZnci5" 125 | }, 126 | "execution_count": 3, 127 | "outputs": [] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "source": [ 132 | "df_gk.head()" 133 | ], 134 | "metadata": { 135 | "colab": { 136 | "base_uri": "https://localhost:8080/", 137 | "height": 477 138 | }, 139 | "id": "hyIRd04BnfpC", 140 | "outputId": "1acbe9b6-0532-40f1-e35a-2af9be1b5316" 141 | }, 142 | "execution_count": 4, 143 | "outputs": [ 144 | { 145 | "output_type": "execute_result", 146 | "data": { 147 | "text/plain": [ 148 | " rk player nation pos squad comp age \\\n", 149 | "0 1 Julen Agirrezabala es ESP GK Athletic Club es La Liga NaN \n", 150 | "1 2 Doğan Alemdar tr TUR GK Rennes fr Ligue 1 NaN \n", 151 | "2 3 Alisson br BRA GK Liverpool eng Premier League NaN \n", 152 | "3 4 Alphonse Areola fr FRA GK West Ham eng Premier League NaN \n", 153 | "4 5 Kepa Arrizabalaga es ESP GK Chelsea eng Premier League NaN \n", 154 | "\n", 155 | " born 90s goals_ga ... goal_kicks_launch% goal_kicks_avglen \\\n", 156 | "0 2000.0 4.0 5.0 ... 38.5 38.3 \n", 157 | "1 2002.0 5.0 4.0 ... 86.2 57.7 \n", 158 | "2 1992.0 26.0 18.0 ... 43.5 39.1 \n", 159 | "3 1993.0 1.0 1.0 ... 71.4 53.2 \n", 160 | "4 1994.0 4.0 2.0 ... 28.6 30.2 \n", 161 | "\n", 162 | " crosses_opp crosses_stp crosses_stp% sweeper_#opa sweeper_#opa/90 \\\n", 163 | "0 35.0 2.0 5.7 5.0 1.25 \n", 164 | "1 46.0 1.0 2.2 7.0 1.40 \n", 165 | "2 166.0 17.0 10.2 38.0 1.46 \n", 166 | "3 13.0 1.0 7.7 0.0 0.00 \n", 167 | "4 31.0 1.0 3.2 5.0 1.25 \n", 168 | "\n", 169 | " sweeper_avgdist match_link \\\n", 170 | "0 17.3 /en/players/a2c1a8d3/matchlogs/2021-2022/keepe... \n", 171 | "1 14.3 /en/players/9e17ccff/matchlogs/2021-2022/keepe... \n", 172 | "2 17.6 /en/players/7a2e46a8/matchlogs/2021-2022/keepe... \n", 173 | "3 7.0 /en/players/2f965a72/matchlogs/2021-2022/keepe... \n", 174 | "4 16.2 /en/players/28d596a0/matchlogs/2021-2022/keepe... \n", 175 | "\n", 176 | " player_link \n", 177 | "0 /en/players/a2c1a8d3/Julen-Agirrezabala \n", 178 | "1 /en/players/9e17ccff/Dogan-Alemdar \n", 179 | "2 /en/players/7a2e46a8/Alisson \n", 180 | "3 /en/players/2f965a72/Alphonse-Areola \n", 181 | "4 /en/players/28d596a0/Kepa-Arrizabalaga \n", 182 | "\n", 183 | "[5 rows x 36 columns]" 184 | ], 185 | "text/html": [ 186 | "\n", 187 | "
\n", 188 | "
\n", 189 | "
\n", 190 | "\n", 203 | "\n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | "
rkplayernationpossquadcompageborn90sgoals_ga...goal_kicks_launch%goal_kicks_avglencrosses_oppcrosses_stpcrosses_stp%sweeper_#opasweeper_#opa/90sweeper_avgdistmatch_linkplayer_link
01Julen Agirrezabalaes ESPGKAthletic Clubes La LigaNaN2000.04.05.0...38.538.335.02.05.75.01.2517.3/en/players/a2c1a8d3/matchlogs/2021-2022/keepe.../en/players/a2c1a8d3/Julen-Agirrezabala
12Doğan Alemdartr TURGKRennesfr Ligue 1NaN2002.05.04.0...86.257.746.01.02.27.01.4014.3/en/players/9e17ccff/matchlogs/2021-2022/keepe.../en/players/9e17ccff/Dogan-Alemdar
23Alissonbr BRAGKLiverpooleng Premier LeagueNaN1992.026.018.0...43.539.1166.017.010.238.01.4617.6/en/players/7a2e46a8/matchlogs/2021-2022/keepe.../en/players/7a2e46a8/Alisson
34Alphonse Areolafr FRAGKWest Hameng Premier LeagueNaN1993.01.01.0...71.453.213.01.07.70.00.007.0/en/players/2f965a72/matchlogs/2021-2022/keepe.../en/players/2f965a72/Alphonse-Areola
45Kepa Arrizabalagaes ESPGKChelseaeng Premier LeagueNaN1994.04.02.0...28.630.231.01.03.25.01.2516.2/en/players/28d596a0/matchlogs/2021-2022/keepe.../en/players/28d596a0/Kepa-Arrizabalaga
\n", 353 | "

5 rows × 36 columns

\n", 354 | "
\n", 355 | " \n", 365 | " \n", 366 | " \n", 403 | "\n", 404 | " \n", 428 | "
\n", 429 | "
\n", 430 | " " 431 | ] 432 | }, 433 | "metadata": {}, 434 | "execution_count": 4 435 | } 436 | ] 437 | } 438 | ] 439 | } -------------------------------------------------------------------------------- /football/statsbomb_duckdb/360_v1.py: -------------------------------------------------------------------------------- 1 | with raw_json as ( 2 | select 3 | * replace(unnest(freeze_frame) as freeze_frame) 4 | from 5 | read_json( 6 | $filename, 7 | format = 'array', 8 | columns = {event_uuid: varchar, 9 | freeze_frame: 'struct(teammate boolean, actor boolean, keeper boolean, location double[])[]'}, 10 | filename = true 11 | ) 12 | ) 13 | select 14 | cast(split(split(filename, '/') [-1], '.') [1] as integer) as match_id, 15 | event_uuid, 16 | freeze_frame.teammate, 17 | freeze_frame.actor, 18 | freeze_frame.keeper, 19 | freeze_frame.location [1] as x, 20 | freeze_frame.location [2] as y, 21 | from 22 | raw_json; -------------------------------------------------------------------------------- /football/statsbomb_duckdb/360_visible_v1.py: -------------------------------------------------------------------------------- 1 | with raw_json as ( 2 | select 3 | * 4 | from 5 | read_json( 6 | $filename, 7 | format = 'array', 8 | columns = {event_uuid: varchar, 9 | visible_area: 'double[]'}, 10 | filename = true 11 | ) 12 | ) 13 | select 14 | cast(split(split(filename, '/') [-1], '.') [1] as integer) as match_id, 15 | event_uuid, 16 | visible_area 17 | from 18 | raw_json; -------------------------------------------------------------------------------- /football/statsbomb_duckdb/competition_v4.py: -------------------------------------------------------------------------------- 1 | with raw_json as ( 2 | select 3 | * 4 | from 5 | read_json( 6 | $filename, 7 | format = 'array', 8 | columns = {'competition_id': integer, 9 | 'season_id': integer, 10 | 'country_name': varchar, 11 | 'competition_name': varchar, 12 | 'competition_gender': varchar, 13 | 'competition_youth': boolean, 14 | 'competition_international': boolean, 15 | 'season_name': varchar, 16 | 'match_updated': varchar, 17 | 'match_updated_360': varchar, 18 | 'match_available_360': varchar, 19 | 'match_available': varchar} 20 | ) 21 | ), 22 | final as ( 23 | select 24 | * replace(case when match_updated is null then null else cast(left(concat(replace(match_updated, 'T', ' '), ':00'), 19) as timestamp) end as match_updated, 25 | case when match_available is null then null else cast(left(concat(replace(match_available, 'T', ' '), ':00'), 19) as timestamp) end as match_available, 26 | cast(match_available_360 as timestamp) as match_available_360, 27 | cast(match_updated_360 as timestamp) as match_updated_360 28 | ) 29 | from 30 | raw_json) 31 | select 32 | * 33 | from 34 | final; -------------------------------------------------------------------------------- /football/statsbomb_duckdb/events_freeze_v7.py: -------------------------------------------------------------------------------- 1 | with raw_json as ( 2 | select 3 | * 4 | from 5 | read_json( 6 | $filename, 7 | format = 'array', 8 | filename = true, 9 | -- columns from the StatsBomb docs, but excluding tactics, related_events, and shot.freeze_frame as many to one relationships 10 | -- these are instead handled seperately to create their own dataframes. 11 | columns = { 'id': varchar, 12 | type: 'struct(name varchar)', 13 | shot: 'struct(freeze_frame struct(location double[], player struct(id integer, name varchar), position struct(id integer, name varchar), teammate boolean)[])' } 14 | ) 15 | ), 16 | final as ( 17 | select 18 | cast(split(split(filename, '/') [-1], '.') [1] as integer) as match_id, 19 | id as event_uuid, 20 | unnest(shot.freeze_frame).location [1] as x, 21 | unnest(shot.freeze_frame).location [2] as y, 22 | unnest(shot.freeze_frame).player.id as player_id, 23 | unnest(shot.freeze_frame).player.name as player_name, 24 | unnest(shot.freeze_frame).position.id as position_id, 25 | unnest(shot.freeze_frame).position.name as position_name, 26 | unnest(shot.freeze_frame).teammate as teammate 27 | from 28 | raw_json 29 | where 30 | type.name = 'Shot' 31 | ) 32 | select 33 | * 34 | from 35 | final; -------------------------------------------------------------------------------- /football/statsbomb_duckdb/events_related_v7.py: -------------------------------------------------------------------------------- 1 | with raw_json as ( 2 | select 3 | * 4 | from 5 | read_json( 6 | $filename, 7 | format = 'array', 8 | auto_detect = true, 9 | maximum_object_size = 11000000, 10 | filename = false, 11 | columns = { 'id': varchar, 12 | index: integer, 13 | type: 'struct(id ubigint, name varchar)', 14 | related_events: 'VARCHAR[]' } 15 | ) 16 | ), 17 | related as ( 18 | select 19 | id as event_uuid, 20 | index, 21 | replace(type.name, '*', '') as type_name, 22 | unnest(related_events) as event_uuid_related 23 | from 24 | raw_json 25 | ), 26 | events as ( 27 | select 28 | id as event_uuid_related, 29 | index as index_related, 30 | replace(type.name, '*', '') as type_name_related 31 | from 32 | raw_json 33 | ), 34 | final as ( 35 | select 36 | related.*, 37 | events.* exclude event_uuid_related 38 | from 39 | related 40 | join events on related.event_uuid_related = events.event_uuid_related 41 | ) 42 | select 43 | * 44 | from 45 | final; -------------------------------------------------------------------------------- /football/statsbomb_duckdb/events_tactics_v7.py: -------------------------------------------------------------------------------- 1 | with raw_json as ( 2 | select 3 | * 4 | from 5 | read_json( 6 | $filename, 7 | format = 'array', 8 | filename = true, 9 | -- columns from the StatsBomb docs, but excluding tactics, related_events, and shot.freeze_frame as many to one relationships 10 | -- these are instead handled seperately to create their own dataframes. 11 | columns = { 'id': varchar, 12 | index: integer, 13 | period: integer, 14 | timestamp: time, 15 | minute: integer, 16 | second: integer, 17 | type: 'struct(id ubigint, name varchar)', 18 | team: 'struct(id ubigint, name varchar)', 19 | tactics: 'struct(lineup struct(jersey_number integer, player struct(id integer, name varchar), position struct(id integer, name varchar))[])' } 20 | ) 21 | ), 22 | final as ( 23 | select 24 | cast(split(split(filename, '/') [-1], '.') [1] as integer) as match_id, 25 | id as event_uuid, 26 | index, 27 | period, 28 | timestamp, 29 | minute, 30 | second, 31 | type.id as type_id, 32 | type.name as type_name, 33 | team.id as team_id, 34 | team.name as team_name, 35 | unnest(tactics.lineup).jersey_number as jersey_number, 36 | unnest(tactics.lineup).player.id as player_id, 37 | unnest(tactics.lineup).player.name as player_name, 38 | unnest(tactics.lineup).position.id as position_id, 39 | unnest(tactics.lineup).position.name as position_name 40 | from 41 | raw_json 42 | where 43 | type.name in ('Starting XI', 'Tactical Shift') 44 | ) 45 | select 46 | * 47 | from 48 | final; -------------------------------------------------------------------------------- /football/statsbomb_duckdb/events_v7.py: -------------------------------------------------------------------------------- 1 | with raw_json as ( 2 | select 3 | * 4 | from 5 | read_json( 6 | $filename, 7 | format = 'array', 8 | filename = true, 9 | -- columns from the StatsBomb docs, but excluding tactics, related_events, and shot.freeze_frame as many to one relationships 10 | -- these are instead handled seperately to create their own dataframes. 11 | columns = { 'id': varchar, 12 | index: integer, 13 | period: integer, 14 | timestamp: time, 15 | minute: integer, 16 | second: integer, 17 | type: 'struct(id ubigint, name varchar)', 18 | possession: integer, 19 | possession_team: 'struct(id ubigint, name varchar)', 20 | play_pattern: 'struct(id ubigint, name varchar)', 21 | team: 'struct(id ubigint, name varchar)', 22 | player: 'struct(id ubigint, name varchar)', 23 | position: 'struct(id ubigint, name varchar)', 24 | location: 'double[]', 25 | duration: double, 26 | under_pressure: boolean, 27 | off_camera: boolean, 28 | out: boolean, 29 | tactics: 'struct(formation varchar)', 30 | obv_for_after: double, 31 | obv_for_before: double, 32 | obv_for_net: double, 33 | obv_against_after: double, 34 | obv_against_before: double, 35 | obv_against_net: double, 36 | obv_total_net: double, 37 | -- in the docs counterpress is within the event type objects 38 | -- however, counterpress appears to be outside of these events, i.e. not within 50_50 event type ibject 39 | -- included this both inside the event type objects and outside just in case 40 | counterpress: boolean, 41 | '50_50': 'struct( 42 | outcome struct(id ubigint, name varchar), 43 | counterpress boolean 44 | )', 45 | bad_behaviour: 'struct(card struct(id ubigint, name varchar))', 46 | ball_receipt: 'struct(outcome struct(id ubigint, name varchar))', 47 | ball_recovery: 'struct(offensive boolean, recovery_failure boolean)', 48 | block: 'struct( 49 | deflection boolean, 50 | offensive boolean, 51 | save_block boolean, 52 | counterpress boolean 53 | )', 54 | carry: 'struct(end_location double[])', 55 | -- in the open-data there are some boolean columns left_foot, right_foot, head, other 56 | -- these are covered by body_part so ignored (columns are not in the docs) 57 | clearance: 'struct( 58 | aerial_won boolean, 59 | body_part struct(id ubigint, name varchar) 60 | )', 61 | dribble: 'struct( 62 | overrun boolean, 63 | nutmeg boolean, 64 | outcome struct(id ubigint, name varchar), 65 | no_touch boolean 66 | )', 67 | dribbled_past: 'struct(counterpress boolean)', 68 | duel: 'struct( 69 | counterpress boolean, 70 | type struct(id ubigint, name varchar), 71 | outcome struct(id ubigint, name varchar) 72 | )', 73 | foul_committed: 'struct( 74 | counterpress boolean, 75 | offensive boolean, 76 | type struct(id ubigint, name varchar), 77 | advantage boolean, 78 | penalty boolean, 79 | card struct(id ubigint, name varchar) 80 | )', 81 | foul_won: 'struct( 82 | defensive boolean, 83 | advantage boolean, 84 | penalty boolean 85 | )', 86 | -- open data also has the following boolean columns for goalkeeper 87 | -- shot_saved_to_post, shot_saved_off_target, punched_out, lost_out, success_out, lost_in_play, success_in_play, penalty_saved_to_post, saved_to_post 88 | -- ignored as not in the official spec and covered by type and outcome columns 89 | goalkeeper: 'struct( 90 | position struct(id ubigint, name varchar), 91 | technique struct(id ubigint, name varchar), 92 | body_part struct(id ubigint, name varchar), 93 | type struct(id ubigint, name varchar), 94 | outcome struct(id ubigint, name varchar), 95 | end_location double[] -- added but not in docs 96 | )', 97 | half_end: 'struct( 98 | early_video_end boolean, 99 | match_suspended boolean 100 | )', 101 | half_start: 'struct(late_video_start boolean)', 102 | injury_stoppage: 'struct(in_chain boolean)', 103 | interception: 'struct(outcome struct(id ubigint, name varchar))', 104 | miscontrol: 'struct(aerial_won boolean)', 105 | -- open data pass columns also has inswinging, outswinging, through_ball, straight 106 | -- ignored as not in the official spec and covered by technique column 107 | pass: 'struct( 108 | recipient struct(id ubigint, name varchar), 109 | length double, 110 | angle double, 111 | height struct(id ubigint, name varchar), 112 | end_location double[], 113 | assisted_shot_id varchar, 114 | backheel boolean, 115 | deflected boolean, 116 | miscommunication boolean, 117 | "cross" boolean, 118 | cut_back boolean, 119 | switch boolean, 120 | shot_assist boolean, 121 | goal_assist boolean, 122 | body_part struct(id ubigint, name varchar), 123 | type struct(id ubigint, name varchar), 124 | outcome struct(id ubigint, name varchar), 125 | technique struct(id ubigint, name varchar), 126 | aerial_won boolean, -- added but not in docs 127 | no_touch boolean -- added but not in docs 128 | )', 129 | player_off: 'struct(permanent boolean)', 130 | pressure: 'struct(counterpress boolean)', 131 | -- open data shot columns also has saved_off_target, saved_to_post, kick_off 132 | -- ignored as not in the official spec and covered by other columns (type/ outcome) 133 | shot: 'struct( 134 | key_pass_id varchar, 135 | end_location double[], 136 | aerial_won boolean, 137 | follows_dribble boolean, 138 | first_time boolean, 139 | open_goal boolean, 140 | one_on_one boolean, 141 | statsbomb_xg double, 142 | deflected boolean, 143 | technique struct(id ubigint, name varchar), 144 | shot_shot_assist boolean, 145 | shot_goal_assist boolean, 146 | body_part struct(id ubigint, name varchar), 147 | type struct(id ubigint, name varchar), 148 | outcome struct(id ubigint, name varchar), 149 | redirect boolean -- added but not in docs 150 | )', 151 | substitution: 'struct(replacement struct(id ubigint, name varchar), outcome struct(id ubigint, name varchar))' } 152 | ) 153 | ), 154 | final as ( 155 | select 156 | cast( 157 | split(split(filename, '/') [-1], '.') [1] as integer 158 | ) as match_id, 159 | id as event_uuid, 160 | index, 161 | period, 162 | timestamp, 163 | minute, 164 | second, 165 | type.id as type_id, 166 | replace(type.name, '*', '') as type_name, 167 | coalesce(duel.type.id, foul_committed.type.id, goalkeeper.type.id, pass.type.id, shot.type.id) as event_type_id, 168 | coalesce(duel.type.name, foul_committed.type.name, goalkeeper.type.name, pass.type.name, shot.type.name) as event_type_name, 169 | coalesce("50_50".outcome.id, ball_receipt.outcome.id, dribble.outcome.id, duel.outcome.id, goalkeeper.outcome.id, interception.outcome.id, pass.outcome.id, shot.outcome.id, substitution.outcome.id) as outcome_id, 170 | coalesce("50_50".outcome.name, ball_receipt.outcome.name, dribble.outcome.name, duel.outcome.name, goalkeeper.outcome.name, interception.outcome.name, pass.outcome.name, shot.outcome.name, substitution.outcome.name) as outcome_name, 171 | possession, 172 | possession_team.id as possession_team_id, 173 | possession_team.name as possession_team_name, 174 | play_pattern.id as play_pattern_id, 175 | play_pattern.name as play_pattern_name, 176 | team.id as team_id, 177 | team.name as team_name, 178 | player.id as player_id, 179 | player.name as player_name, 180 | position.id as position_id, 181 | position.name as position_name, 182 | location [1] as x, 183 | location [2] as y, 184 | location [3] as z, 185 | coalesce(carry.end_location[1], goalkeeper.end_location[1], pass.end_location[1], shot.end_location[1]) as end_x, 186 | coalesce(carry.end_location[2], goalkeeper.end_location[2], pass.end_location[2], shot.end_location[2]) as end_y, 187 | shot.end_location [3] as end_z, 188 | duration, 189 | under_pressure, 190 | off_camera, 191 | out, 192 | tactics.formation as tactics_formation, 193 | obv_for_after, 194 | obv_for_before, 195 | obv_for_net, 196 | obv_against_after, 197 | obv_against_before, 198 | obv_against_net, 199 | obv_total_net, 200 | coalesce(counterpress, "50_50".counterpress, block.counterpress, dribbled_past.counterpress, duel.counterpress, foul_committed.counterpress, pressure.counterpress) as counterpress, 201 | coalesce(block.offensive, ball_recovery.offensive, foul_committed.offensive) as offensive, 202 | coalesce(clearance.aerial_won, miscontrol.aerial_won, pass.aerial_won, shot.aerial_won) as aerial_won, 203 | coalesce(clearance.body_part.id, goalkeeper.body_part.id, pass.body_part.id, shot.body_part.id) as body_part_id, 204 | coalesce(clearance.body_part.name, goalkeeper.body_part.name, pass.body_part.name, shot.body_part.name) as body_part_name, 205 | coalesce(goalkeeper.technique.id, pass.technique.id, shot.technique.id) as technique_id, 206 | coalesce(goalkeeper.technique.name, pass.technique.name, shot.technique.name) as technique_name, 207 | coalesce(dribble.no_touch, pass.no_touch) as no_touch, 208 | coalesce(pass.deflected, shot.deflected) as deflected, 209 | bad_behaviour.card.id as bad_behaviour_card_id, 210 | bad_behaviour.card.name as bad_behaviour_card_name, 211 | ball_recovery.recovery_failure as ball_recovery_recovery_failure, 212 | block.deflection as block_deflection, 213 | block.save_block as block_save_block, 214 | dribble.overrun as dribble_overrun, 215 | dribble.nutmeg as dribble_nutmeg, 216 | coalesce(foul_committed.advantage, foul_won.advantage) as foul_advantage, 217 | coalesce(foul_committed.penalty, foul_won.penalty) as foul_penalty, 218 | foul_committed.card.id as foul_card_id, 219 | foul_committed.card.name as foul_card_name, 220 | foul_won.defensive as foul_defensive, 221 | goalkeeper.position.id as goalkeeper_position_id, 222 | goalkeeper.position.name as goalkeeper_position_name, 223 | half_end.early_video_end as half_end_early_video_end, 224 | half_end.match_suspended as half_end_match_suspended, 225 | half_start.late_video_start as half_start_late_video_start, 226 | injury_stoppage.in_chain as injury_stoppage_in_chain, 227 | pass.recipient.id as pass_recipient_id, 228 | pass.recipient.name as pass_recipient_name, 229 | pass.length as pass_length, 230 | pass.angle as pass_angle, 231 | pass.height.id as pass_height_id, 232 | pass.height.name as pass_height_name, 233 | pass.assisted_shot_id as pass_assisted_shot_id, 234 | pass.backheel as pass_backheel, 235 | pass.miscommunication as pass_miscommunication, 236 | pass."cross" as pass_cross, 237 | pass.cut_back as pass_cut_back, 238 | pass.switch as pass_switch, 239 | pass.shot_assist as pass_shot_assist, 240 | pass.goal_assist as pass_goal_assist, 241 | player_off.permanent as player_off_permanent, 242 | shot.key_pass_id as shot_key_pass_id, 243 | shot.follows_dribble as shot_follows_dribble, 244 | shot.first_time as shot_first_time, 245 | shot.open_goal as shot_open_goal, 246 | shot.one_on_one as shot_one_on_one, 247 | shot.statsbomb_xg as shot_statsbomb_xg, 248 | shot.shot_shot_assist, 249 | shot.shot_goal_assist, 250 | shot.redirect as shot_redirect, 251 | substitution.replacement.id as substitution_replacement_id, 252 | substitution.replacement.name as substitution_replacement_name 253 | from 254 | raw_json 255 | ) 256 | select 257 | * 258 | from 259 | final; -------------------------------------------------------------------------------- /football/statsbomb_duckdb/lineup_v4.py: -------------------------------------------------------------------------------- 1 | with raw_json as ( 2 | select 3 | * 4 | from 5 | read_json( 6 | $filename, 7 | format = 'array', 8 | filename = true, 9 | columns = {team_id: integer, 10 | team_name: varchar, 11 | lineup: 'STRUCT( 12 | player_id UBIGINT, 13 | player_name VARCHAR, 14 | jersey_number UBIGINT, 15 | country STRUCT(id UBIGINT, "name" VARCHAR), 16 | player_nickname VARCHAR 17 | )[]'} 18 | ) 19 | ), 20 | final as ( 21 | select 22 | cast(split(split(filename, '/') [-1], '.') [1] as integer) as match_id, 23 | team_id, 24 | team_name, 25 | unnest(lineup).player_id as player_id, 26 | unnest(lineup).player_name as player_name, 27 | unnest(lineup).jersey_number as jersey_number, 28 | unnest(lineup).player_nickname as player_nickname, 29 | unnest(lineup).country.id as country_id, 30 | unnest(lineup).country.name as country_name, 31 | from 32 | raw_json 33 | ) 34 | select 35 | * 36 | from 37 | final; -------------------------------------------------------------------------------- /football/statsbomb_duckdb/match_v5.py: -------------------------------------------------------------------------------- 1 | with raw_json as ( 2 | select 3 | * 4 | from 5 | read_json( 6 | $filename, 7 | format = 'array', 8 | columns = {match_id: integer, 9 | match_date: date, 10 | kick_off: time, 11 | competition: 'struct(competition_id integer, country_name varchar, competition_name varchar)', 12 | season: 'struct(season_id integer, season_name varchar)', 13 | home_team: 'struct(home_team_id integer, home_team_name varchar, home_team_gender varchar, home_team_group varchar, country struct(id integer, name varchar), 14 | managers struct(id varchar, name varchar, nickname varchar, dob date, country struct(id integer, name varchar))[])', 15 | away_team: 'struct(away_team_id integer, away_team_name varchar, away_team_gender varchar, away_team_group varchar, country struct(id integer, name varchar), 16 | managers struct(id varchar, name varchar, nickname varchar, dob date, country struct(id integer, name varchar))[])', 17 | home_score: integer, 18 | away_score: integer, 19 | match_status: varchar, 20 | match_status_360: varchar, 21 | last_updated: varchar, 22 | last_updated_360: varchar, 23 | metadata: 'struct(data_version varchar)', 24 | match_week: integer, 25 | competition_stage: 'struct(id integer, name varchar)', 26 | stadium: 'struct(id integer, name varchar, country struct(id integer, name varchar))', 27 | referee: 'struct(id integer, name varchar, country struct(id integer, name varchar))'} 28 | ) 29 | ), 30 | final as ( 31 | select 32 | match_id, 33 | match_date, 34 | kick_off, 35 | competition.competition_id, 36 | competition.country_name as competition_country_name, 37 | competition.competition_name, 38 | season.season_id, 39 | season.season_name, 40 | home_team.home_team_id, 41 | home_team.home_team_name, 42 | home_team.home_team_gender, 43 | home_team.home_team_group, 44 | home_team.country.id as home_team_country_id, 45 | home_team.country.name as home_team_country_name, 46 | home_team.managers [1].id as home_team_manager_id, 47 | home_team.managers [1].name as home_team_manager_name, 48 | home_team.managers [1].nickname as home_team_manager_nickname, 49 | home_team.managers [1].dob as home_team_manager_dob, 50 | home_team.managers [1].country.id as home_team_manager_country_id, 51 | home_team.managers [1].country.name as home_team_manager_country_name, 52 | away_team.away_team_id, 53 | away_team.away_team_name, 54 | away_team.away_team_gender, 55 | away_team.away_team_group, 56 | away_team.country.id as away_team_country_id, 57 | away_team.country.name as away_team_country_name, 58 | away_team.managers [1].id as away_team_manager_id, 59 | away_team.managers [1].name as away_team_manager_name, 60 | away_team.managers [1].nickname as away_team_manager_nickname, 61 | away_team.managers [1].dob as away_team_manager_dob, 62 | away_team.managers [1].country.id as away_team_manager_country_id, 63 | away_team.managers [1].country.name as away_team_manager_country_name, 64 | home_score, 65 | away_score, 66 | match_status, 67 | match_status_360, 68 | case when last_updated is null then null else cast(left(concat(replace(last_updated, 'T', ' '), ':00'), 19) as timestamp) end as last_updated, 69 | case when last_updated_360 is null then null else cast(left(concat(replace(last_updated_360, 'T', ' '), ':00'), 19) as timestamp) end as last_updated_360, 70 | metadata.data_version as metadata_data_version, 71 | match_week, 72 | competition_stage.id as competition_stage_id, 73 | competition_stage.name as competition_stage_name, 74 | stadium.id as stadium_id, 75 | stadium.name as stadium_name, 76 | stadium.country.id as country_id, 77 | stadium.country.name as country_name, 78 | referee.id as referee_id, 79 | referee.name as referee_name, 80 | referee.country.id as referee_country_id, 81 | referee.country.name as referee_country_name 82 | from 83 | raw_json 84 | ) 85 | select 86 | * 87 | from 88 | final; -------------------------------------------------------------------------------- /football/statsbomb_parser.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Work in progress, StatsBomb parser" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "\n", 18 | "import pandas as pd\n", 19 | "import requests" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "class Sbopen:\n", 29 | " def __init__(self, dataframe=True):\n", 30 | " self.dataframe = dataframe\n", 31 | " self.url = 'https://raw.githubusercontent.com/statsbomb/open-data/master/data/'\n", 32 | "\n", 33 | " @staticmethod\n", 34 | " def get_data(url):\n", 35 | " resp = requests.get(url=url)\n", 36 | " resp.raise_for_status()\n", 37 | " return resp.json()\n", 38 | "\n", 39 | " def event(self, match_id):\n", 40 | " url = f'{self.url}events/{match_id}.json'\n", 41 | " data = self.get_data(url)\n", 42 | " return flatten_event(data, match_id, self.dataframe)\n", 43 | "\n", 44 | " def lineup(self, match_id):\n", 45 | " url = f'{self.url}lineups/{match_id}.json'\n", 46 | " data = self.get_data(url)\n", 47 | " return flatten_lineup(data, match_id, self.dataframe)\n", 48 | "\n", 49 | " def match(self, competition, season):\n", 50 | " url = f'{self.url}matches/{competition}/{season}.json'\n", 51 | " data = self.get_data(url)\n", 52 | " return flatten_match(data, self.dataframe)\n", 53 | "\n", 54 | " def competition(self):\n", 55 | " url = f'{self.url}competitions.json'\n", 56 | " data = self.get_data(url)\n", 57 | " if self.dataframe:\n", 58 | " return pd.DataFrame(data)\n", 59 | " return data\n", 60 | "\n", 61 | " def frame(self, match):\n", 62 | " url = f'{self.url}three-sixty/{match}.json'\n", 63 | " data = self.get_data(url)\n", 64 | " return flatten_360(data, match, self.dataframe)\n", 65 | "\n", 66 | "\n", 67 | "class Sbapi:\n", 68 | " def __init__(self, username=None, password=None, dataframe=True):\n", 69 | " if username is None:\n", 70 | " username = os.environ.get(\"SB_USERNAME\")\n", 71 | " if password is None:\n", 72 | " password = os.environ.get(\"SB_PASSWORD\")\n", 73 | " self.auth = requests.auth.HTTPBasicAuth(username, password)\n", 74 | " self.dataframe = dataframe\n", 75 | " self.url = 'https://data.statsbombservices.com/api/v'\n", 76 | "\n", 77 | " def get_data(self, url):\n", 78 | " resp = requests.get(url=url, auth=self.auth)\n", 79 | " resp.raise_for_status()\n", 80 | " return resp.json()\n", 81 | "\n", 82 | " def event(self, match_id, version=6):\n", 83 | " url = f'{self.url}{version}/events/{match_id}'\n", 84 | " data = self.get_data(url)\n", 85 | " return flatten_event(data, match_id, self.dataframe)\n", 86 | "\n", 87 | " def lineup(self, match_id, version=2):\n", 88 | " url = f'{self.url}{version}/lineups/{match_id}'\n", 89 | " data = self.get_data(url)\n", 90 | " return flatten_lineup(data, match_id, self.dataframe)\n", 91 | "\n", 92 | " def match(self, competition, season, version=5):\n", 93 | " url = f'{self.url}{version}/competitions/{competition}/seasons/{season}/matches'\n", 94 | " data = self.get_data(url)\n", 95 | " return flatten_match(data, self.dataframe)\n", 96 | "\n", 97 | " def competition(self, version=4):\n", 98 | " url = f'{self.url}{version}/competitions'\n", 99 | " data = self.get_data(url)\n", 100 | " if self.dataframe:\n", 101 | " return pd.DataFrame(data)\n", 102 | " return data\n", 103 | "\n", 104 | " def frame(self, match_id, version=1):\n", 105 | " url = f'{self.url}{version}/360-frames/{match_id}'\n", 106 | " data = self.get_data(url)\n", 107 | " return flatten_360(data, match_id, self.dataframe)\n", 108 | "\n", 109 | "\n", 110 | "def _flatten_location(row, value, keyword=''):\n", 111 | " \"\"\" Flatten a list of locations into dictionary keys (x, y, z).\"\"\"\n", 112 | " if len(value) == 2:\n", 113 | " row[f'{keyword}x'], row[f'{keyword}y'] = value\n", 114 | " elif len(value) == 3:\n", 115 | " row[f'{keyword}x'], row[f'{keyword}y'], row[f'{keyword}z'] = value\n", 116 | " else:\n", 117 | " msg = 'location length not equal to 2 (x, y) or 3 (x, y, z)'\n", 118 | " raise AssertionError(msg)\n", 119 | "\n", 120 | "\n", 121 | "def _flatten_freeze(data, match_id, event_id):\n", 122 | " \"\"\" Flatten the freeze-frame events.\"\"\"\n", 123 | " for row in data:\n", 124 | " row['match_id'] = match_id\n", 125 | " row['id'] = event_id\n", 126 | " for key in list(row):\n", 127 | " value = row[key]\n", 128 | " if key == 'location':\n", 129 | " _flatten_location(row, value)\n", 130 | " del row['location']\n", 131 | " elif key in ['player', 'position']:\n", 132 | " for nested_key in value:\n", 133 | " row[f'{key}_{nested_key}'] = value[nested_key]\n", 134 | " del row[key]\n", 135 | " return data\n", 136 | "\n", 137 | "\n", 138 | "def _flatten_tactic(data, match_id, event_id):\n", 139 | " \"\"\" Flatten the tactics events.\"\"\"\n", 140 | " for row in data:\n", 141 | " row['match_id'] = match_id\n", 142 | " row['id'] = event_id\n", 143 | " for key in list(row):\n", 144 | " if key in ['player', 'position']:\n", 145 | " value = row[key]\n", 146 | " for nested_key in value:\n", 147 | " row[f'{key}_{nested_key}'] = value[nested_key]\n", 148 | " del row[key]\n", 149 | " return data\n", 150 | "\n", 151 | "\n", 152 | "def _flatten_list_of_lists(list_of_lists, key):\n", 153 | " \"\"\" Flatten a list of lists into a list\"\"\"\n", 154 | " flat_list = []\n", 155 | " for sublist in list_of_lists:\n", 156 | " for idx, item in enumerate(sublist):\n", 157 | " item[key] = idx + 1\n", 158 | " flat_list.append(item)\n", 159 | " return flat_list\n", 160 | "\n", 161 | "\n", 162 | "def _event_dataframe(data):\n", 163 | " \"\"\" Transform the event dictionary into a dataframe.\"\"\"\n", 164 | " df = pd.DataFrame(data)\n", 165 | " df['timestamp'] = pd.to_datetime(df['timestamp']).dt.time\n", 166 | " df.sort_values(['period', 'timestamp', 'index'], inplace=True)\n", 167 | " df.reset_index(drop=True, inplace=True)\n", 168 | " for col in ['counterpress', 'under_pressure', 'off_camera', 'out']:\n", 169 | " if col in df.columns:\n", 170 | " df[col] = df[col].astype(float)\n", 171 | " return df\n", 172 | "\n", 173 | "\n", 174 | "def _related_dataframe(data, df_events):\n", 175 | " \"\"\" Transform the related-events dictionary into a dataframe. For carries, we also\n", 176 | " ensure that both the carry and the related event are related both ways.\n", 177 | " Sometimes another event is not related to the carry event (but it is the other way round)\"\"\"\n", 178 | " df = pd.DataFrame(data)\n", 179 | " cols = ['id', 'index', 'type_name']\n", 180 | " df = df.merge(df_events[cols].rename({'id': 'id_related'}, axis='columns'),\n", 181 | " how='left', on='id_related', validate='m:1',\n", 182 | " suffixes=('', '_related'))\n", 183 | " df_carry = df[df['type_name'] == 'Carry'].copy()\n", 184 | " df_carry.rename({'id': 'id_related',\n", 185 | " 'index': 'index_related',\n", 186 | " 'type_name': 'type_name_related',\n", 187 | " 'id_related': 'id',\n", 188 | " 'index_related': 'index',\n", 189 | " 'type_name_related': 'type_name'},\n", 190 | " axis='columns', inplace=True)\n", 191 | " df = pd.concat([df, df_carry]).drop_duplicates()\n", 192 | " return df\n", 193 | "\n", 194 | "\n", 195 | "def _competition_dataframe(data):\n", 196 | " df = pd.DataFrame(data)\n", 197 | " date_cols = ['match_updated', 'match_updated_360', 'match_available_360', 'match_available']\n", 198 | " for date in date_cols:\n", 199 | " if date in df.columns:\n", 200 | " df[date] = pd.to_datetime(df[date])\n", 201 | " return df\n", 202 | "\n", 203 | "\n", 204 | "def _match_dataframe(data):\n", 205 | " df = pd.DataFrame(data)\n", 206 | " df['kick_off'] = pd.to_datetime(df['match_date'] + ' ' + df['kick_off'])\n", 207 | " date_cols = ['match_date', 'last_updated', 'last_updated_360',\n", 208 | " 'home_team_managers_dob', 'away_team_managers_dob']\n", 209 | " for date in date_cols:\n", 210 | " if date in df.columns:\n", 211 | " df[date] = pd.to_datetime(df[date])\n", 212 | " return df\n", 213 | "\n", 214 | "\n", 215 | "def flatten_event(events, match_id, dataframe=True):\n", 216 | " \"\"\" Flatten the events (list) so each row (dictionary) contains no nested events.\n", 217 | "\n", 218 | " Parameters\n", 219 | " ----------\n", 220 | " events : list of dicts\n", 221 | " The events to flatten.\n", 222 | " match_id : int\n", 223 | " The StatsBomb match identifier.\n", 224 | " dataframe : bool, default True\n", 225 | " Whether to return the results as a dataframe (True)\n", 226 | " or as flattened lists of dictionaries (False)\n", 227 | "\n", 228 | " Returns\n", 229 | " -------\n", 230 | " events, related, freeze, tactics\n", 231 | " If dataframe=True then returns dataframes else if dataframe=False\n", 232 | " each of the returned values is a list of dictionaries.\n", 233 | " \"\"\"\n", 234 | " related = []\n", 235 | " freeze = []\n", 236 | " tactics = []\n", 237 | " for row in events:\n", 238 | " row['match_id'] = match_id\n", 239 | " for key in list(row):\n", 240 | "\n", 241 | " # unpack nested columns\n", 242 | " if isinstance(row[key], dict):\n", 243 | " for nested_key in list(row[key]):\n", 244 | " nested_value = row[key][nested_key]\n", 245 | " if nested_key == 'end_location':\n", 246 | " _flatten_location(row, nested_value, keyword='end_')\n", 247 | " elif nested_key == 'aerial_won':\n", 248 | " row[f'{nested_key}'] = nested_value\n", 249 | " elif nested_key in ['outcome', 'body_part', 'technique', 'aerial_won']:\n", 250 | " for k in nested_value:\n", 251 | " row[f'{nested_key}_{k}'] = nested_value[k]\n", 252 | " elif nested_key == 'freeze_frame':\n", 253 | " freeze.append(_flatten_freeze(nested_value, match_id, row['id']))\n", 254 | " elif nested_key == 'lineup':\n", 255 | " tactics.append(_flatten_tactic(nested_value, match_id, row['id']))\n", 256 | " elif nested_key == 'type':\n", 257 | " for k in nested_value:\n", 258 | " row[f'sub_{nested_key}_{k}'] = nested_value[k]\n", 259 | " elif isinstance(nested_value, dict):\n", 260 | " for k in nested_value:\n", 261 | " row[f'{key}_{nested_key}_{k}'] = nested_value[k]\n", 262 | " else:\n", 263 | " row[f'{key}_{nested_key}'] = nested_value\n", 264 | " del row[key]\n", 265 | "\n", 266 | " # unpack the location column\n", 267 | " if 'location' in row:\n", 268 | " _flatten_location(row, row['location'])\n", 269 | " del row['location']\n", 270 | "\n", 271 | " # replace random star in ball receipts in some rows\n", 272 | " row['type_name'] = row['type_name'].replace('Ball Receipt*', 'Ball Receipt')\n", 273 | "\n", 274 | " # pass through ball is deprecated now, but it was not always added to technique name\n", 275 | " if 'pass_through_ball' in row:\n", 276 | " row['technique_name'] = 'Through Ball'\n", 277 | "\n", 278 | " # drop cols that are covered by other columns\n", 279 | " # (e.g. pass technique covers through, ball, inswinging etc.)\n", 280 | " cols_to_drop = ['pass_through_ball', 'pass_outswinging', 'pass_inswinging',\n", 281 | " 'clearance_head', 'clearance_left_foot', 'clearance_right_foot',\n", 282 | " 'pass_straight', 'clearance_other', 'goalkeeper_punched_out',\n", 283 | " 'goalkeeper_shot_saved_off_target', 'shot_saved_off_target',\n", 284 | " 'goalkeeper_shot_saved_to_post', 'shot_saved_to_post',\n", 285 | " 'goalkeeper_lost_out', 'goalkeeper_lost_in_play',\n", 286 | " 'goalkeeper_success_out', 'goalkeeper_success_in_play',\n", 287 | " 'goalkeeper_saved_to_post', 'shot_kick_off',\n", 288 | " 'goalkeeper_penalty_saved_to_post',\n", 289 | " ]\n", 290 | " for col in cols_to_drop:\n", 291 | " row.pop(col, None)\n", 292 | "\n", 293 | " # remove related_events as storing as separate dictionary\n", 294 | " if 'related_events' in row:\n", 295 | " for related_event in row['related_events']:\n", 296 | " related.append({'match_id': match_id,\n", 297 | " 'id': row['id'],\n", 298 | " 'index': row['index'],\n", 299 | " 'type_name': row['type_name'],\n", 300 | " 'id_related': related_event})\n", 301 | " del row['related_events']\n", 302 | "\n", 303 | " # flatten list of lists (e.g. player in lineup or freeze-frame into separate entry)\n", 304 | " tactics = _flatten_list_of_lists(tactics, key='event_tactics_id')\n", 305 | " freeze = _flatten_list_of_lists(freeze, key='event_freeze_id')\n", 306 | "\n", 307 | " if dataframe:\n", 308 | " events = _event_dataframe(events)\n", 309 | " related = _related_dataframe(related, events)\n", 310 | " freeze = pd.DataFrame(freeze)\n", 311 | " tactics = pd.DataFrame(tactics)\n", 312 | "\n", 313 | " return events, related, freeze, tactics\n", 314 | "\n", 315 | "\n", 316 | "def flatten_lineup(data, match_id, dataframe=True):\n", 317 | " lineup = []\n", 318 | " for row in data:\n", 319 | " for player in row['lineup']:\n", 320 | " player['match_id'] = match_id\n", 321 | " player['team_id'] = row['team_id']\n", 322 | " player['team_name'] = row['team_name']\n", 323 | " if 'country' in player:\n", 324 | " player['country_id'] = player['country']['id']\n", 325 | " player['country_name'] = player['country']['name']\n", 326 | " del player['country']\n", 327 | " if player['player_nickname'] is None:\n", 328 | " player['player_nickname'] = player['player_name']\n", 329 | " player.pop('positions', None) # if flattened would be multiple lines\n", 330 | " player.pop('cards', None) # if flattened would be multiple lines\n", 331 | " lineup.append(player)\n", 332 | " if dataframe:\n", 333 | " lineup = pd.DataFrame(lineup)\n", 334 | " return lineup\n", 335 | "\n", 336 | "\n", 337 | "def flatten_match(match, dataframe=True):\n", 338 | " for row in match:\n", 339 | " for key in list(row):\n", 340 | " value = row[key]\n", 341 | " if isinstance(value, dict):\n", 342 | " for nested_key in list(value):\n", 343 | " nested_value = value[nested_key]\n", 344 | " if isinstance(nested_value, list):\n", 345 | " nested_value = nested_value[0]\n", 346 | " if isinstance(nested_value, dict):\n", 347 | " for k in list(nested_value):\n", 348 | " if k == 'nickname' and not nested_value[k]:\n", 349 | " row[f'{key}_{nested_key}_{k}'] = nested_value['name']\n", 350 | " else:\n", 351 | " if isinstance(nested_value[k], dict):\n", 352 | " for sub_k in nested_value[k]:\n", 353 | " nested_sub_value = nested_value[k][sub_k]\n", 354 | " row[f'{key}_{nested_key}_{k}_{sub_k}'] = nested_sub_value\n", 355 | " else:\n", 356 | " row[f'{key}_{nested_key}_{k}'] = nested_value[k]\n", 357 | " elif key in ['competition_stage', 'stadium', 'referee', 'metadata']:\n", 358 | " row[f'{key}_{nested_key}'] = nested_value\n", 359 | " else:\n", 360 | " row[nested_key] = nested_value\n", 361 | " del row[key]\n", 362 | " if dataframe:\n", 363 | " match = _match_dataframe(match)\n", 364 | " return match\n", 365 | "\n", 366 | "\n", 367 | "def flatten_360(data, match_id, dataframe=True):\n", 368 | " frames = []\n", 369 | " visible = []\n", 370 | " for row in data:\n", 371 | " for idx, frame in enumerate(row['freeze_frame']):\n", 372 | " frame['match_id'] = match_id\n", 373 | " frame['id'] = row['event_uuid']\n", 374 | " _flatten_location(frame, frame['location'])\n", 375 | " del frame['location']\n", 376 | " frames.append(frame)\n", 377 | " frame_visible = {'match_id': match_id,\n", 378 | " 'id': row['event_uuid'],\n", 379 | " 'visible_area': row['visible_area'],\n", 380 | " }\n", 381 | " visible.append(frame_visible)\n", 382 | " if dataframe:\n", 383 | " frames = pd.DataFrame(frames)\n", 384 | " visible = pd.DataFrame(visible)\n", 385 | " return frames, visible" 386 | ] 387 | }, 388 | { 389 | "cell_type": "markdown", 390 | "metadata": {}, 391 | "source": [ 392 | "# Test it works" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": null, 398 | "metadata": {}, 399 | "outputs": [], 400 | "source": [ 401 | "parser = Sbapi(username=None, password=None)\n", 402 | "match_id = 3798898\n", 403 | "competition_id = 11\n", 404 | "season_id = 90\n", 405 | "\n", 406 | "#parser = Sbopen()\n", 407 | "#match_id = 3788741\n", 408 | "#competition_id = 11\n", 409 | "#season_id = 1" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": null, 415 | "metadata": {}, 416 | "outputs": [], 417 | "source": [ 418 | "events, related, freeze, tactics = parser.event(match_id)" 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": null, 424 | "metadata": {}, 425 | "outputs": [], 426 | "source": [ 427 | "lineups = parser.lineup(match_id)" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": null, 433 | "metadata": {}, 434 | "outputs": [], 435 | "source": [ 436 | "frames, visible = parser.frame(match_id)" 437 | ] 438 | }, 439 | { 440 | "cell_type": "code", 441 | "execution_count": null, 442 | "metadata": {}, 443 | "outputs": [], 444 | "source": [ 445 | "competition = parser.competition()" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": null, 451 | "metadata": {}, 452 | "outputs": [], 453 | "source": [ 454 | "matches = parser.match(competition=competition_id, season=season_id)" 455 | ] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": null, 460 | "metadata": {}, 461 | "outputs": [], 462 | "source": [ 463 | "events.head()" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": null, 469 | "metadata": {}, 470 | "outputs": [], 471 | "source": [ 472 | "related.head()" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": null, 478 | "metadata": {}, 479 | "outputs": [], 480 | "source": [ 481 | "freeze.head()" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": null, 487 | "metadata": {}, 488 | "outputs": [], 489 | "source": [ 490 | "tactics.head()" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": null, 496 | "metadata": {}, 497 | "outputs": [], 498 | "source": [ 499 | "lineups.head()" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": null, 505 | "metadata": {}, 506 | "outputs": [], 507 | "source": [ 508 | "frames.head()" 509 | ] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "execution_count": null, 514 | "metadata": {}, 515 | "outputs": [], 516 | "source": [ 517 | "visible.head()" 518 | ] 519 | }, 520 | { 521 | "cell_type": "code", 522 | "execution_count": null, 523 | "metadata": {}, 524 | "outputs": [], 525 | "source": [ 526 | "competition.head()" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": null, 532 | "metadata": {}, 533 | "outputs": [], 534 | "source": [ 535 | "matches.head()" 536 | ] 537 | } 538 | ], 539 | "metadata": { 540 | "kernelspec": { 541 | "display_name": "Python 3 (ipykernel)", 542 | "language": "python", 543 | "name": "python3" 544 | }, 545 | "language_info": { 546 | "codemirror_mode": { 547 | "name": "ipython", 548 | "version": 3 549 | }, 550 | "file_extension": ".py", 551 | "mimetype": "text/x-python", 552 | "name": "python", 553 | "nbconvert_exporter": "python", 554 | "pygments_lexer": "ipython3", 555 | "version": "3.10.4" 556 | } 557 | }, 558 | "nbformat": 4, 559 | "nbformat_minor": 4 560 | } 561 | -------------------------------------------------------------------------------- /pysport/01_get_f24_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "2c2b202c-59b9-40f5-ad19-a897c72c7918", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import os\n", 11 | "import pandas as pd\n", 12 | "import glob\n", 13 | "from optasoccer import read_f24" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "id": "fe4ade04-0cf8-4477-a853-1f8fd4ac043a", 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "all_events = []\n", 24 | "all_matches = []\n", 25 | "files = glob.glob(os.path.join('data', 'f24', '*.xml'))\n", 26 | "for file in files:\n", 27 | " events, matches = read_f24(file)\n", 28 | " all_events.append(events)\n", 29 | " all_matches.append(matches)\n", 30 | "all_events = pd.concat(all_events)\n", 31 | "all_events = all_events[list(all_events.columns[:19]) + list(all_events.columns[19:].sort_values())].copy()\n", 32 | "all_matches = pd.concat(all_matches)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "id": "22385103-5b33-4aea-8e7b-43b9fd5ed963", 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "competition_id = all_matches.competition_id.unique()[0]\n", 43 | "season_id = all_matches.season_id.unique()[0]\n", 44 | "fname_suffix = f'{competition_id}_{season_id}'\n", 45 | "all_events = all_events.sort_values(['match_id', 'period_id', 'min', 'sec', 'timestamp']).reset_index(drop=True).copy()\n", 46 | "all_matches.to_parquet(f'opta_matches_{fname_suffix}.parquet')\n", 47 | "all_events.to_parquet(f'opta_events_{fname_suffix}.parquet')" 48 | ] 49 | } 50 | ], 51 | "metadata": { 52 | "kernelspec": { 53 | "display_name": "Python 3 (ipykernel)", 54 | "language": "python", 55 | "name": "python3" 56 | }, 57 | "language_info": { 58 | "codemirror_mode": { 59 | "name": "ipython", 60 | "version": 3 61 | }, 62 | "file_extension": ".py", 63 | "mimetype": "text/x-python", 64 | "name": "python", 65 | "nbconvert_exporter": "python", 66 | "pygments_lexer": "ipython3", 67 | "version": "3.12.2" 68 | } 69 | }, 70 | "nbformat": 4, 71 | "nbformat_minor": 5 72 | } 73 | -------------------------------------------------------------------------------- /pysport/02_get_fbref_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "aff60507-446d-4691-968a-da6411082285", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import requests\n", 11 | "from bs4 import BeautifulSoup, Comment\n", 12 | "import pandas as pd\n", 13 | "import numpy as np\n", 14 | "from lxml import etree, html\n", 15 | "import re\n", 16 | "from io import StringIO\n", 17 | "import os\n", 18 | "import glob\n", 19 | "\n", 20 | "DATA_DIR = os.path.join('data', 'fbref')\n", 21 | "\n", 22 | "def get_soup(url):\n", 23 | " headers = {'User-Agent': ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) '\n", 24 | " 'Chrome/39.0.2171.95 Safari/537.36')}\n", 25 | " r = requests.get(url, headers=headers)\n", 26 | " r.encoding = 'unicode-escape'\n", 27 | " return BeautifulSoup(r.content, 'html.parser')\n", 28 | "\n", 29 | "def get_url(stat):\n", 30 | " prefix = 'https://fbref.com/en/comps/37/2022-2023/'\n", 31 | " suffix = '/2022-2023-Belgian-Pro-League-Stats'\n", 32 | " return f'{prefix}{stat}{suffix}'\n", 33 | "\n", 34 | "def flatten_cols(df):\n", 35 | " col_level1 = list(df.columns.get_level_values(0))\n", 36 | " col_level1 = ['' if c[:7]=='Unnamed' else c.replace(' ', '_').lower() for c in col_level1]\n", 37 | " col_level2 = list(df.columns.get_level_values(1))\n", 38 | " col_level2 = [c.replace(' ', '_').lower() for c in col_level2]\n", 39 | " cols = [f'{c}_{col_level2[i]}' if c != '' else col_level2[i] for i, c in enumerate(col_level1)]\n", 40 | " cols = [re.sub('[^0-9a-zA-Z]+', '_', c.replace('%', '_percent').replace('+/-', '_plus_minus')).rstrip('_') for c in cols]\n", 41 | " df.columns = cols\n", 42 | " return df\n", 43 | "\n", 44 | "def extract_stats(url):\n", 45 | " soup = get_soup(url)\n", 46 | " comments = soup.findAll(string=lambda string:isinstance(string, Comment))\n", 47 | " extracted_comments = [comment.extract() for comment in comments if 'table' in str(comment)] \n", 48 | " df = pd.read_html(StringIO(str(extracted_comments[0])))[0]\n", 49 | " return flatten_cols(df)\n", 50 | "\n", 51 | "def stats_to_parquet(stat, directory):\n", 52 | " url = get_url(stat)\n", 53 | " df = extract_stats(url)\n", 54 | " df = df[df['rk'] != 'Rk'].copy()\n", 55 | " df.drop(['rk', 'matches'], axis='columns', inplace=True)\n", 56 | " file_name = os.path.join(directory, f'{stat}.parquet')\n", 57 | " df.to_parquet(file_name)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "id": "445947f7-f6e0-4c62-ab59-4caec70fe5f6", 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "stats_to_parquet('playingtime', DATA_DIR)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "id": "227b0731-5275-4707-84a7-e03ae40fb2ab", 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "stats_to_parquet('stats', DATA_DIR)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "id": "81ff4ca9-097f-4295-be7d-1e7ede6a1d9c", 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "stats_to_parquet('keepers', DATA_DIR)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "id": "f22b0102-28f7-47c4-9327-8933b2a5d8e6", 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "stats_to_parquet('keepersadv', DATA_DIR)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "id": "8d335176-8c03-4955-8b3a-60eef1bd4de5", 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "stats_to_parquet('shooting', DATA_DIR)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "id": "18f228d7-4aa8-4e3d-ae22-731b634c6412", 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "stats_to_parquet('passing', DATA_DIR)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "id": "ea0776f6-f3a5-4c55-b3c2-384f8af0ecc0", 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "stats_to_parquet('passing_types', DATA_DIR)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "id": "2d8a5718-e385-41fa-9673-60edcd3cbe92", 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "stats_to_parquet('gca', DATA_DIR)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "id": "8df12e5e-f0cf-4baf-ac1c-87a8812f15ac", 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "stats_to_parquet('defense', DATA_DIR)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "id": "58c29c73-6ab0-458d-ab83-1bfff8e3dfe7", 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "stats_to_parquet('possession', DATA_DIR)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "id": "0d9f1448-91c5-429f-870e-f712fb501fd9", 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "stats_to_parquet('misc', DATA_DIR)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "id": "84bdc0bc-17b9-4794-b310-c1843d44b37b", 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "files = glob.glob(os.path.join(DATA_DIR, '*'))\n", 178 | "df = pd.read_parquet([f for f in files if 'playingtime' in f][0])\n", 179 | "files = [f for f in files if 'playingtime' not in f]\n", 180 | "for f in files:\n", 181 | " df_temp = pd.read_parquet(f)\n", 182 | " df = df.merge(df_temp, on=['player', 'squad'], how='left', suffixes=['', '_to_remove'])\n", 183 | "cols_to_remove = [col for col in df.columns if '_to_remove' in col]\n", 184 | "df.drop(cols_to_remove, axis='columns', inplace=True)\n", 185 | "df.to_parquet('player_stats_112_2022.parquet')" 186 | ] 187 | } 188 | ], 189 | "metadata": { 190 | "kernelspec": { 191 | "display_name": "Python 3 (ipykernel)", 192 | "language": "python", 193 | "name": "python3" 194 | }, 195 | "language_info": { 196 | "codemirror_mode": { 197 | "name": "ipython", 198 | "version": 3 199 | }, 200 | "file_extension": ".py", 201 | "mimetype": "text/x-python", 202 | "name": "python", 203 | "nbconvert_exporter": "python", 204 | "pygments_lexer": "ipython3", 205 | "version": "3.12.2" 206 | } 207 | }, 208 | "nbformat": 4, 209 | "nbformat_minor": 5 210 | } 211 | -------------------------------------------------------------------------------- /pysport/03_turn_f24_data_to_actions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "2c2b202c-59b9-40f5-ad19-a897c72c7918", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd\n", 11 | "from mplsoccer import Pitch" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "id": "c81bc44e-35a5-432b-811c-22c58ef9d804", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "df = pd.read_parquet('opta_events_112_2022.parquet')\n", 22 | "p = Pitch(pitch_type='opta')" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "f8574cf9-d45a-4785-83bc-b8b444c17806", 28 | "metadata": {}, 29 | "source": [ 30 | "Add on the x/ coordinates in relation to the camera instead of left to right" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "id": "3f68fddc-d8ae-46f0-8a6f-37485c7b3248", 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "df_direction = df.loc[df.direction_of_play.notnull(), ['match_id', 'team_id', 'period_id', 'direction_of_play']].copy()\n", 41 | "df_direction['camera_right_to_left'] = df_direction['direction_of_play'] == 'Right to Left'\n", 42 | "df.drop('direction_of_play', axis='columns', inplace=True)\n", 43 | "df = df.merge(df_direction, on=['match_id', 'team_id', 'period_id'], how='left', validate='m:1')\n", 44 | "# fill missing\n", 45 | "df.loc[df['shot_goal_mouth_y_coordinate'].notnull(), 'shot_goal_mouth_x_coordinate'] = p.dim.right\n", 46 | "df.loc[df['type_name'] == 'foul_throw_in', 'end_x'] = df.loc[df['type_name'] == 'foul_throw_in', 'x']\n", 47 | "df.loc[df['type_name'] == 'foul_throw_in', 'end_y'] = df.loc[df['type_name'] == 'foul_throw_in', 'y']\n", 48 | "# flip coordinates for camera\n", 49 | "df['camera_x'], df['camera_y'] = p.flip_side(df['x'], df['y'], df['camera_right_to_left'])\n", 50 | "df['camera_pass_end_x'], df['camera_pass_end_y'] = p.flip_side(df['pass_end_x'], df['pass_end_y'], df['camera_right_to_left'])\n", 51 | "df['camera_shot_blocked_x_coordinate'], df['camera_shot_blocked_y_coordinate'] = p.flip_side(df['shot_blocked_x_coordinate'], df['shot_blocked_y_coordinate'], df['camera_right_to_left'])\n", 52 | "df['camera_shot_goal_mouth_x_coordinate'], df['camera_shot_goal_mouth_y_coordinate'] = p.flip_side(df['shot_goal_mouth_x_coordinate'], df['shot_goal_mouth_y_coordinate'], df['camera_right_to_left'])\n", 53 | "# end coordinates\n", 54 | "df['camera_end_x'] = (df['camera_pass_end_x']\n", 55 | " .fillna(df['camera_shot_blocked_x_coordinate'])\n", 56 | " .fillna(df['camera_shot_goal_mouth_x_coordinate'])\n", 57 | " )\n", 58 | "df['camera_end_y'] = (df['camera_pass_end_y']\n", 59 | " .fillna(df['camera_shot_blocked_y_coordinate'])\n", 60 | " .fillna(df['camera_shot_goal_mouth_y_coordinate'])\n", 61 | " )" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "id": "5a8b3368-e93e-435f-92c5-f6a5a2ff328f", 67 | "metadata": {}, 68 | "source": [ 69 | "Add on some boolean columns for pass/shot/goal/ and set pieces" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "id": "83bfc757-842c-46ff-8d58-61f3b7fbd5ec", 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "df['shot'] = df['type_name'].isin(['goal', 'attempt_saved', 'miss', 'post'])\n", 80 | "df['goal'] = df['type_name'] == 'goal'\n", 81 | "df['pass'] = df['type_name'].isin(['pass', 'offside_pass', 'foul_throw_in'])\n", 82 | "df['set_piece_taken'] = (df['free_kick_taken'] |\n", 83 | " df['free_kick'] | # includes tap pass can exclude with df['assisted'].isnull()\n", 84 | " df['corner_taken'] |\n", 85 | " df['shot_corner_direct'] |\n", 86 | " df['goalkeeper_goal_kick'] |\n", 87 | " df['kick_off'] |\n", 88 | " df['throw_in'] |\n", 89 | " (df['type_name'] == 'foul_throw_in')\n", 90 | " )" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "id": "e2056e99-c056-4412-9357-b471e2c30ec9", 96 | "metadata": {}, 97 | "source": [ 98 | "Remove some events that aren't the offensive team or aren't related to play (e.g. formations)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "id": "225e2369-ef21-4888-a226-8bce6c4e7c0d", 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "remove_events = ['attempted_tackle', 'card', 'challenge', 'chance_missed',\n", 109 | " 'coach_setup', 'collection_end', 'contentious_referee_decision',\n", 110 | " 'coverage_interruption', 'cross_not_claimed', 'delayed_start',\n", 111 | " 'deleted_after_review', 'end', 'end_delay',\n", 112 | " 'formation_change', 'good_skill', 'injury_time_announcement',\n", 113 | " 'obstacle', 'offside_provoked', 'penalty_faced',\n", 114 | " 'player_becomes_goalkeeper', 'player_off', 'player_on',\n", 115 | " 'player_retired', 'referee_drop_ball', 'shield_ball_opp', 'start',\n", 116 | " 'start_delay', 'take_on', 'team_set_up']\n", 117 | "df = df[~df['type_name'].isin(remove_events)].copy()\n", 118 | "# remove defensive duels\n", 119 | "df = df[df['duel_events_defensive'].isnull()].copy()" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "id": "6853f008-de31-4fb4-bc69-d57a61324488", 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "multi_outcome_events = ['aerial', '50_50', 'foul',\n", 130 | " 'corner_awarded', 'foul_throw_in', 'out', 'referee_drop_ball']\n", 131 | "mask_multi = (df['type_name'].isin(multi_outcome_events))\n", 132 | "mask_success = df['outcome'] == 1\n", 133 | "df = df[(~mask_multi) | (mask_multi & mask_success)].dropna(how='all', axis='columns').reset_index(drop=True).copy()" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "id": "48324fb7-3cfe-4b47-9f42-0bdd8fe604a4", 139 | "metadata": {}, 140 | "source": [ 141 | "Add 'out' events to the previous events and change the outcome of events where the pass or ball recovery went out/ was offside but it looks succesful" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "id": "2d582f70-e3e2-4591-9bcf-75df690b65ca", 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "df['out'] = df['type_name'].isin(['out', 'corner_awarded'])\n", 152 | "df['previous_out'] = df.groupby(['match_id', 'period_id'])['out'].shift(1) == True\n", 153 | "df['out'] = df.groupby(['match_id', 'period_id'])['out'].shift(-1) == True\n", 154 | "df['next_camera_x'] = df.groupby(['match_id', 'period_id'])['camera_x'].shift(-1)\n", 155 | "df['next_camera_y'] = df.groupby(['match_id', 'period_id'])['camera_y'].shift(-1)\n", 156 | "df = df[~df['type_name'].isin(['out', 'corner_awarded'])].dropna(how='all', axis='columns').reset_index(drop=True).copy()" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "id": "b16a9c1b-2768-4e2a-a772-b4b38dbff4e0", 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "mask_change_outcome = (df['out']) & (df['type_name'] == 'pass') & (df['outcome'] == 1)\n", 167 | "print('Number of outcomes changed:', mask_change_outcome.sum())\n", 168 | "df.loc[mask_change_outcome, 'outcome'] = 0\n", 169 | "# change outcome of offside pass to zero\n", 170 | "mask_change_outcome2 = df['type_name'].isin(['offside_pass', 'foul_throw_in'])\n", 171 | "print('Number of outcomes changed:', mask_change_outcome2.sum())\n", 172 | "df.loc[mask_change_outcome, 'outcome'] = 0\n", 173 | "mask_change_type = (df['type_name'] == 'ball_recovery') & (df['out'] == True)\n", 174 | "print('Number of type_name changed', mask_change_type.sum())\n", 175 | "df.loc[mask_change_type, 'type_name'] = 'ball_touch'" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "id": "af80f6a2-da3b-440a-b3f8-4a9f8ff0f8ff", 181 | "metadata": {}, 182 | "source": [ 183 | "Fix x/y end coordinates that went out but the event doesn't have the final coordinate" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "id": "526ce63b-00ce-4a2e-9364-d11503ca2b8e", 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "mask_missing = df['out'] & (df['camera_end_x'].isnull())\n", 194 | "mask_change = (df['out'] & \n", 195 | " (df['camera_end_x'] > 0) & (df['camera_end_x'] < 100) & (df['camera_end_y'] > 0) & (df['camera_end_y'] < 100) & \n", 196 | " ((df['next_camera_x'] <= 0) | (df['next_camera_x'] >= 0) | (df['next_camera_x'] >= 100) | (df['next_camera_y'] >= 100))\n", 197 | " )\n", 198 | "df.loc[mask_missing | mask_change, 'camera_end_x'] = df.loc[mask_missing | mask_change, 'next_camera_x']\n", 199 | "df.loc[mask_missing | mask_change, 'camera_end_y'] = df.loc[mask_missing | mask_change, 'next_camera_y']" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "id": "ac5ec3cb-d286-419b-aa2a-16c282852d4c", 205 | "metadata": {}, 206 | "source": [ 207 | "Work out if an event was a carry" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "id": "f5f7d008-a7d3-485c-a7f5-828c3c44b446", 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "df['previous_team_name'] = df.groupby(['match_id', 'period_id'])['team_name'].shift(1)\n", 218 | "df['previous_type_name'] = df.groupby(['match_id', 'period_id'])['type_name'].shift(1)\n", 219 | "df['previous_player_id'] = df.groupby(['match_id', 'period_id'])['player_id'].shift(1)\n", 220 | "df['previous_camera_end_x'] = df.groupby(['match_id', 'period_id'])['camera_end_x'].shift(1)\n", 221 | "df['previous_camera_end_y'] = df.groupby(['match_id', 'period_id'])['camera_end_y'].shift(1)\n", 222 | "df['previous_camera_x'] = df.groupby(['match_id', 'period_id'])['camera_x'].shift(1)\n", 223 | "df['previous_camera_y'] = df.groupby(['match_id', 'period_id'])['camera_y'].shift(1)\n", 224 | "df['previous_outcome'] = df.groupby(['match_id', 'period_id'])['outcome'].shift(1)\n", 225 | "df['same_team'] = (df['previous_team_name'] == df['team_name']) | (df['previous_team_name'].isnull())\n", 226 | "df['same_player'] = (df['previous_player_id'] == df['player_id'])\n", 227 | "df['previous_defensive_touch_type_control'] = df.groupby(['match_id', 'period_id'])['defensive_touch_type_control'].shift(1)\n", 228 | "df['previous_timestamp_utc'] = df.groupby(['match_id', 'period_id'])['timestamp_utc'].shift(1)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "id": "08b35c74-c607-4ece-8101-6dfba96668c1", 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "mask1 = df['same_team'] & df['previous_type_name'].isin(['pass', 'ball_recovery', 'keeper_pick_up', 'drop_of_ball', 'miss', 'attempt_saved', 'smother', 'punch', 'post'])\n", 239 | "mask2 = df['previous_type_name'].isin(['50_50', 'tackle']) & (df['same_player'] == 1) & (df.type_name == 'pass') & df['duel_events_offensive'].isnull()\n", 240 | "mask3 = (df['previous_type_name'] == 'claim') & (df['same_player']) & (df['previous_outcome'] == 1)\n", 241 | "mask4 = (df['timestamp_utc'] - df['previous_timestamp_utc']) < pd.Timedelta(1, 'minute')\n", 242 | "mask_carry = ((mask1 | mask2 | mask3)\n", 243 | " & mask4 # rules out around 200 dribbles for being 1+ minutes\n", 244 | " & (df['set_piece_taken'].isnull())\n", 245 | " & (df['shot_first_touch'].isnull())\n", 246 | " & (df['shot_volley'].isnull())\n", 247 | " & (df['body_part_head'].isnull())\n", 248 | " & (df['body_part_other'].isnull())\n", 249 | " )\n", 250 | "df['carry_between'] = mask_carry\n", 251 | "df.index = df.index + df['carry_between'].cumsum()" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "id": "6753234d-ade8-4c18-9f64-aabbe29fd5b0", 257 | "metadata": {}, 258 | "source": [ 259 | "Create carry events and add to the other actions" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "id": "7f6859ad-9448-458e-bbbc-3a506259e2ea", 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "df_carry = df.loc[df['carry_between'], ['match_id', 'period_id', 'team_id', 'team_name', 'player_id', 'type_name', 'previous_camera_x', 'previous_camera_y',\n", 270 | " 'previous_camera_end_x', 'previous_camera_end_y', 'camera_x', 'camera_y']].copy()\n", 271 | "df_carry.rename({'camera_x': 'camera_end_x', 'camera_y': 'camera_end_y'}, axis='columns', inplace=True)\n", 272 | "df_carry['type_name'] = 'carry'\n", 273 | "df_carry['camera_x'], df_carry['camera_y'] = df_carry['previous_camera_end_x'].fillna(df['previous_camera_x']), df_carry['previous_camera_end_y'].fillna(df['previous_camera_y'])\n", 274 | "df_carry = df_carry.drop(['previous_camera_x', 'previous_camera_y', 'previous_camera_end_x', 'previous_camera_end_y'], axis='columns').copy()\n", 275 | "xstart, ystart = p.standardizer.transform(df_carry['camera_x'], df_carry['camera_y'])\n", 276 | "xend, yend = p.standardizer.transform(df_carry['camera_end_x'], df_carry['camera_end_y'])\n", 277 | "df_carry['angle'], df_carry['length'] = p.calculate_angle_and_distance(xstart, ystart, xend, yend, standardized=True)\n", 278 | "df_carry.index = df_carry.index - 1\n", 279 | "df_carry = df_carry[df_carry['length'] >= 2].copy() # rules out 51.1k for being less than 2 meters\n", 280 | "df_carry['pass'] = False\n", 281 | "df_carry['shot'] = False\n", 282 | "df_carry['goal'] = False\n", 283 | "df_carry['outcome'] = 1\n", 284 | "# add to the other actions\n", 285 | "df_actions = pd.concat([df, df_carry]).sort_index().reset_index(drop=True)\n", 286 | "df_actions['carry'] = df_actions['type_name'] == 'carry'" 287 | ] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "id": "5ff5860d-e28e-4e36-b191-e6f47e355526", 292 | "metadata": {}, 293 | "source": [ 294 | "Clean up and add the non-camera coordinates and save the final action filem" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": null, 300 | "id": "5420e2fe-cc27-44cd-9708-a5d7da957910", 301 | "metadata": {}, 302 | "outputs": [], 303 | "source": [ 304 | "df_actions.drop(columns=['camera_right_to_left', 'shot_goal_mouth_x_coordinate', 'camera_pass_end_x', 'camera_pass_end_y', 'end_x', 'end_y',\n", 305 | " 'camera_shot_blocked_x_coordinate', 'camera_shot_blocked_y_coordinate', 'camera_shot_goal_mouth_x_coordinate',\n", 306 | " 'camera_shot_goal_mouth_y_coordinate', 'out', 'previous_out',\n", 307 | " 'next_camera_x', 'next_camera_y', 'previous_team_name', 'previous_type_name', 'previous_player_id', 'previous_camera_end_x',\n", 308 | " 'previous_camera_end_y', 'previous_camera_x', 'previous_camera_y', 'previous_outcome', 'same_team', 'same_player', \n", 309 | " 'previous_defensive_touch_type_control', 'previous_timestamp_utc', 'carry_between'], axis='columns', inplace=True)" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "id": "b8e9fba9-45ba-408b-ac4d-da419d56b2ff", 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [ 319 | "df_actions = df_actions.merge(df_direction, on=['match_id', 'team_id', 'period_id'], how='left', validate='m:1')\n", 320 | "new_x, new_y = p.flip_side(df_actions['camera_x'], df_actions['camera_y'], df_actions['camera_right_to_left'])\n", 321 | "df_actions.loc[df_actions['x'].isnull(), 'x'] = new_x[df_actions['x'].isnull()].round(1)\n", 322 | "df_actions.loc[df_actions['y'].isnull(), 'y'] = new_y[df_actions['y'].isnull()].round(1)\n", 323 | "df_actions['end_x'], df_actions['end_y'] = p.flip_side(df_actions['camera_end_x'], df_actions['camera_end_y'], df_actions['camera_right_to_left'])\n", 324 | "df_actions['end_x'] = df_actions['end_x'].round(1)\n", 325 | "df_actions['end_y'] = df_actions['end_y'].round(1)" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": null, 331 | "id": "44cbd222-31fe-4ec9-b82e-3b440152a396", 332 | "metadata": {}, 333 | "outputs": [], 334 | "source": [ 335 | "df_actions.to_parquet('opta_actions_112_2022.parquet')" 336 | ] 337 | } 338 | ], 339 | "metadata": { 340 | "kernelspec": { 341 | "display_name": "Python 3 (ipykernel)", 342 | "language": "python", 343 | "name": "python3" 344 | }, 345 | "language_info": { 346 | "codemirror_mode": { 347 | "name": "ipython", 348 | "version": 3 349 | }, 350 | "file_extension": ".py", 351 | "mimetype": "text/x-python", 352 | "name": "python", 353 | "nbconvert_exporter": "python", 354 | "pygments_lexer": "ipython3", 355 | "version": "3.12.2" 356 | } 357 | }, 358 | "nbformat": 4, 359 | "nbformat_minor": 5 360 | } 361 | -------------------------------------------------------------------------------- /pysport/data/f24/README.md: -------------------------------------------------------------------------------- 1 | # Data for analysis 2 | Placeholder for saving the Opta f24 data. 3 | -------------------------------------------------------------------------------- /pysport/data/fbref/README.md: -------------------------------------------------------------------------------- 1 | # Data for analysis 2 | Placeholder for saving the fbref data. 3 | -------------------------------------------------------------------------------- /pysport/old_trafford_google_earth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewRowlinson/data-science/91bc237e20e27dcb62cb1525655790b0ab8c5e85/pysport/old_trafford_google_earth.png -------------------------------------------------------------------------------- /pysport/pysport_presentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewRowlinson/data-science/91bc237e20e27dcb62cb1525655790b0ab8c5e85/pysport/pysport_presentation.pdf -------------------------------------------------------------------------------- /simulation/simulate_composition_method.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "The composition method can be used to simulate complicated distributions, such as the double exponential. First, a random number is generated to identify, which distribution to sample from. Then a random variable is selected from the chosen distribution." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "#import seaborn\n", 21 | "import seaborn as sns\n", 22 | "# set to plot automatically\n", 23 | "%matplotlib inline\n", 24 | "# set font size of charts\n", 25 | "sns.set(font_scale=2)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "# Example: double exponential distribution" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "Set parameters" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": { 46 | "collapsed": true 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "# set samples\n", 51 | "samples = 1000\n", 52 | "# set lambda\n", 53 | "lam = 3" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "Generate random uniform number U~U(0,1) and V~U(0,1)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 3, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "U = np.random.uniform(0,1,(samples,2))" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "step a) If $U \\leq \\frac{1}{2}$ then generate $X$ from $\\frac{1}{\\lambda} \\log(V)$" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 4, 84 | "metadata": { 85 | "collapsed": true 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "# step a) filter to return V where U<=1/2\n", 90 | "a=U[:,1][np.where(U[:,0]<=0.5)]\n", 91 | "# calculate X1\n", 92 | "X1=(1/lam)*np.log(a)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "step b) If $U > \\frac{1}{2}$ then generate $X$ from $-\\frac{1}{\\lambda} \\log(1-V)$" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 5, 105 | "metadata": { 106 | "collapsed": true 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "# step b) filter to return V where U>1/2\n", 111 | "b=U[:,1][np.where(U[:,0]>0.5)]\n", 112 | "# calculate X2\n", 113 | "X2=(-1/lam)*np.log(1-b)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 6, 119 | "metadata": { 120 | "collapsed": true 121 | }, 122 | "outputs": [], 123 | "source": [ 124 | "# combine X1 and X2 to form a single distribution\n", 125 | "X=np.concatenate([X1,X2])" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 7, 131 | "metadata": {}, 132 | "outputs": [ 133 | { 134 | "data": { 135 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfUAAAFTCAYAAAAgDZXiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X9Y1fX9//EHHAVMQPxBXoYgCkuv5owfjVq/NszVdHaJ\nmVJKijo7XgvxUlZqXrmyLrWuPjb8QSBmCOoSV+H6savNTMvWZVhJWxstDTKPP8YUElR+HA/fP/rC\nYhzwR284nBf323+93i/OeZ4Xxx7n9eL1Pi+fxsbGRgEAAK/n6+kCAACANQh1AAAMQagDAGAIQh0A\nAEMQ6gAAGIJQBwDAED08XcD3VVFR7ekSOlTfvlepsvKcp8swBuNpLcbTWoyntUwdz9DQoDavMVPv\n4nr0sHm6BKMwntZiPK3FeFqrO44noQ4AgCEIdQAADEGoAwBgCEIdAABDEOoAABiCUAcAwBCEOgAA\nhiDUAQAwBKEOAIAhCHUAAAxBqAMAYAhCHQAAQ3j9KW0APGPWqt1tXtu0eHQnVgKgCTN1AAAMQagD\nAGAIQh0AAEMQ6gAAGIJQBwDAEIQ6AACGINQBADAEoQ4AgCEIdQAADEGoAwBgCEIdAABDEOoAABiC\nUAcAwBCEOgAAhiDUAQAwxGWfp37y5EmNGzdO8+bNU2pqaqvrRUVFysvLU3l5uYKDgzV27Filp6er\nd+/erfru2bNHzz//vP71r38pICBAiYmJysjIUP/+/a/oxQCwVntnpgPoei5rpn727FnNmzdPNTU1\nbq/n5ORo0aJFcrlcSklJ0YgRI5SXl6fZs2ervr6+Rd/XX39ddrtdp06d0v3336+bbrpJr776qu67\n7z6dOXPmyl8RAADd1CXP1B0Oh+bNm6fPPvuszetr1qxRbGysCgoK1LNnT0lSZmamsrKyVFhYqJSU\nFEnffjhYvny5wsPDVVRUpMDAQEnSLbfcoqVLl+r555/XokWLvu9rAwCgW7mkmXpeXp7uvvtulZaW\n6qabbnLbp7CwUE6nU3a7vTnQJWnu3LkKDAzUjh07mtveeOMNffPNN0pNTW0OdEm69957NXToUL3y\nyiu6cOHClb4mAAC6pUsK9fz8fIWFhWnLli2aMGGC2z7FxcWSpISEhBbt/v7+iomJUWlpqaqrq1v0\nvfHGG1s9TkJCgqqqqvTFF19c+qsAAACXFupPPPGEioqKFBcX12afI0eOaMCAAW43xIWFhUmSysrK\nJElff/21JCk8PLxV38GDB7foCwAALs0lhfptt90mm83Wbp+qqioFBQW5vdbU3rTBrrKyUn5+fgoI\nCGjVt2k5vq3NeAAAwD3L7lN3Op3y8/Nze62pva6u7rL7AgCAS3PZ96m3JSAgQA0NDW6vNd3O1qtX\nr8vuezF9+16lHj3aX0XwdqGh7ldAcGUYz47HGF85xs5a3W08LQv14ODg5o1w/6upvWkZPjg4WHV1\ndaqvr281Y29adm9rKf9/VVaeu9KSvUJoaJAqKtyPKy4f49k5GOMrw/vTWqaOZ3sfVCxbfo+MjNSp\nU6dUW1vb6prD4ZCvr6+GDBnS3FeSjh492qpvU9vQoUOtKg0AgG7BslCPj4+Xy+XSgQMHWrTX1dXp\n4MGDio6Obt4EFx8fL+m/t7Z91/79+xUUFKSoqCirSgMAoFuwLNTHjx8vm82mdevWtfhK2OzsbNXU\n1Cg5Obm5bcyYMerdu7c2btyoqqqq5vY//OEPKi8v1+TJk+Xry1kzAABcDsv+ph4VFaVZs2YpNzdX\nSUlJSkxM1KFDh7Rnzx7FxcVpypQpzX1DQkL08MMP6/HHH1dSUpLGjh2rkydP6k9/+pMiIyNlt9ut\nKgvARXBoC2AOy0JdkjIyMjRo0CBt27ZN+fn5Cg0NVWpqqtLS0lptiLv//vvVp08fbdy4UVu3blWf\nPn2UlJSkBQsWKCQkxMqyAADoFnwaGxsbPV3E92HizsbvMnX3pqcwnq11xEx90+LRlj9md8D701qm\njmen7H4HAACeRagDAGAIQh0AAEMQ6gAAGIJQBwDAEIQ6AACGINQBADAEoQ4AgCEIdQAADEGoAwBg\nCEIdAABDEOoAABiCUAcAwBCEOgAAhiDUAQAwBKEOAIAhCHUAAAxBqAMAYAhCHQAAQxDqAAAYooen\nCwBgnlmrdrtt37R4dCdXAnQvzNQBADAEoQ4AgCEIdQAADEGoAwBgCEIdAABDEOoAABiCUAcAwBCE\nOgAAhiDUAQAwBKEOAIAhCHUAAAxBqAMAYAhCHQAAQxDqAAAYglAHAMAQhDoAAIbo0REPWllZqd/9\n7nfavXu3KisrdfXVV+sXv/iF5s2bp169erXoW1RUpLy8PJWXlys4OFhjx45Venq6evfu3RGlAQBg\nLMtn6mfPntXUqVP10ksvaejQoXrggQd09dVX64UXXtDMmTPldDqb++bk5GjRokVyuVxKSUnRiBEj\nlJeXp9mzZ6u+vt7q0gAAMJrlM/Xt27fryy+/1PTp07V06VJJUmNjox5++GG99tpreu211zRx4kQ5\nHA6tWbNGsbGxKigoUM+ePSVJmZmZysrKUmFhoVJSUqwuDwAAY1k+U//b3/4mSZo0aVJzm4+PjyZP\nnixJOnjwoCSpsLBQTqdTdru9OdAlae7cuQoMDNSOHTusLg0AAKNZHuohISGSpGPHjrVoP3nypCSp\nX79+kqTi4mJJUkJCQot+/v7+iomJUWlpqaqrq60uDwAAY1ke6pMmTVLPnj21cuVKffTRRzp//rz2\n79+vZ599VkFBQc0z+CNHjmjAgAFuN8SFhYVJksrKyqwuDwAAY1ke6iNHjtSLL76o2tpaTZ06VTEx\nMZo+fbpsNpt+//vfa/DgwZKkqqoqBQUFuX2MpvaamhqrywMAwFiWb5Q7deqUVq9erYqKCiUmJmro\n0KH6+9//rg8//FDLli1TTk6OgoOD5XQ65efn5/Yxmtrr6uou+nx9+16lHj1slr6GriY01P2HH1wZ\nxtNzGPuLY4ys1d3G0/JQz8jI0Mcff6znnntO48aNa27Py8vTypUr9dhjjykzM1MBAQFqaGhw+xhN\nt7P97z3t7lRWnrOm8C4qNDRIFRXsLbAK4+lZjH37eH9ay9TxbO+DiqXL7ydOnNAHH3ygH//4xy0C\nXZJSU1MVHR2tP//5z6qpqVFwcHCbG+Ga2ttangcAAK1ZGurHjx+XJA0bNszt9aioKLlcLp08eVKR\nkZE6deqUamtrW/VzOBzy9fXVkCFDrCwPAACjWRrqAwYMkCSVl5e7vf7VV1/Jx8dH/fv3V3x8vFwu\nlw4cONCiT11dnQ4ePKjo6GgFBgZaWR4AAEaz9G/q4eHh+uEPf6gPP/xQu3bt0pgxY5qv7dixQ6Wl\npbrtttsUEhKi8ePHKycnR+vWrVNCQkLz5rjs7GzV1NQoOTnZytIAdAGzVu1u89qmxaM7sRLATJZv\nlFuxYoUeeOABzZs3r3n3++eff6733ntPoaGh+u1vfyvp26X4WbNmKTc3V0lJSUpMTNShQ4e0Z88e\nxcXFacqUKVaXBgCA0SwP9REjRujll1/W+vXr9f7772vv3r3q37+/kpOTlZaWpquvvrq5b0ZGhgYN\nGqRt27YpPz9foaGhSk1NVVpaWpu3uwEAAPc65OjViIgIPf300xft5+Pjo2nTpmnatGkdUQYAAN2K\n5d8oBwAAPINQBwDAEIQ6AACGINQBADAEoQ4AgCEIdQAADEGoAwBgCEIdAABDEOoAABiCUAcAwBCE\nOgAAhiDUAQAwBKEOAIAhCHUAAAxBqAMAYAhCHQAAQxDqAAAYglAHAMAQhDoAAIYg1AEAMAShDgCA\nIQh1AAAMQagDAGAIQh0AAEMQ6gAAGIJQBwDAEIQ6AACGINQBADAEoQ4AgCEIdQAADEGoAwBgCEId\nAABDEOoAABiih6cLANDxZq3a7ekSAHQCZuoAABiCmTqALqG91YRNi0d3YiWA9+qwmfof//hH3Xvv\nvbr++ut16623Kj09XWVlZa36FRUVKSkpSTExMbr99tu1cuVKnT17tqPKAgDAWB0S6s8995wefvhh\nVVdXa+rUqUpISNCuXbuUnJyso0ePNvfLycnRokWL5HK5lJKSohEjRigvL0+zZ89WfX19R5QGAICx\nLF9+//TTT5WTk6OEhATl5uYqICBAknTnnXdq/vz5Wr9+vVauXCmHw6E1a9YoNjZWBQUF6tmzpyQp\nMzNTWVlZKiwsVEpKitXlAQBgLMtn6lu3bpUkLV++vDnQJemuu+5ScnKyIiIiJEmFhYVyOp2y2+3N\ngS5Jc+fOVWBgoHbs2GF1aQAAGM3ymfq7776ra6+9VkOHDm3R7uPjo+XLlzf/d3FxsSQpISGhRT9/\nf3/FxMRo3759qq6uVlBQkNUlAgBgJEtn6qdOndLp06f1gx/8QIcPH1ZaWppuuOEGxcfHKz09XV9/\n/XVz3yNHjmjAgAHq3bt3q8cJCwuTJLcb6wAAgHuWhvq///1vSdLJkyc1efJkORwOTZo0SXFxcXrr\nrbeUnJwsh8MhSaqqqmpzFt7UXlNTY2V5AAAYzdLl93Pnzkn6dmk9KSlJK1askM1mkyQVFBToqaee\n0ooVK7R+/Xo5nU75+fm5fZym9rq6uos+Z9++V6lHD5tFr6BrCg3lTxBWYjy9T3f6nXWn19oZutt4\nWhrqvr7fTvxtNpuWLFnSHOiSNG3aNG3evFl79+7V+fPnFRAQoIaGBreP03Q7W69evS76nJWV5yyo\nvOsKDQ1SRUW1p8swBuPpnbrL74z3p7VMHc/2PqhYuvzetGweFhamkJCQlk/k66vhw4eroaFBx44d\nU3BwsKqr3Q92Uzub5AAAuHSWztTDw8Nls9nanIE7nU5J387AIyMjVVxcrNra2ha3vkmSw+GQr6+v\nhgwZYmV5gPE4uAXo3iydqfv7+2vkyJE6fvy4vvrqqxbXnE6nSktLFRISooEDByo+Pl4ul0sHDhxo\n0a+urk4HDx5UdHS0AgMDrSwPAACjWf7lM1OmTJEkPfXUUy1m7Js2bdKJEyeUlJQkm82m8ePHy2az\nad26dS2+EjY7O1s1NTVKTk62ujQAAIxm+ZfPTJo0Se+884527dqlpKQk3X777Tp8+LD27t2ryMhI\npaWlSZKioqI0a9Ys5ebmKikpSYmJiTp06JD27NmjuLi45g8HAADg0lg+U/fx8VFmZqaWLFkiSdqy\nZYv++c9/aurUqXrppZdabH7LyMjQsmXL5OPjo/z8fH3xxRdKTU3Vhg0b2rzdDQAAuOfT2NjY6Oki\nvg8Tb1f4LlNvyfAU08fT1I1y3eU8ddPfn53N1PHstFvaAACA5xDqAAAYglAHAMAQhDoAAIYg1AEA\nMAShDgCAIQh1AAAMQagDAGAIQh0AAEMQ6gAAGIJQBwDAEIQ6AACGINQBADAEoQ4AgCEIdQAADEGo\nAwBgCEIdAABDEOoAABiCUAcAwBCEOgAAhiDUAQAwBKEOAIAhCHUAAAxBqAMAYAhCHQAAQxDqAAAY\nglAHAMAQhDoAAIYg1AEAMAShDgCAIQh1AAAMQagDAGAIQh0AAEMQ6gAAGIJQBwDAEIQ6AACG6JRQ\nf/rppzV8+HDt37+/1bWioiIlJSUpJiZGt99+u1auXKmzZ892RlkAABilw0P9008/1ebNm91ey8nJ\n0aJFi+RyuZSSkqIRI0YoLy9Ps2fPVn19fUeXBgCAUXp05IPX19fr0Ucf1YULF1pdczgcWrNmjWJj\nY1VQUKCePXtKkjIzM5WVlaXCwkKlpKR0ZHkAABilQ2fq2dnZKi8v180339zqWmFhoZxOp+x2e3Og\nS9LcuXMVGBioHTt2dGRpAAAYp8NCvbS0VBs2bJDdbld0dHSr68XFxZKkhISEFu3+/v6KiYlRaWmp\nqqurO6o8AACM0yGhfuHCBS1dulRDhgyR3W532+fIkSMaMGCAevfu3epaWFiYJKmsrKwjygMAwEgd\n8jf1F154Qf/4xz+0bds2+fn5ue1TVVWlwYMHu70WFBQkSaqpqemI8gAAMJLloV5WVqZ169Zp6tSp\nio2NbbOf0+lsM/Cb2uvq6i76fH37XqUePWxXVqyXCA0N8nQJRmE8vU93+p11p9faGbrbeFoa6o2N\njVq6dKn69++vhQsXtts3ICBADQ0Nbq813c7Wq1eviz5nZeW5yy/Ui4SGBqmigr0FVmE8vVN3+Z3x\n/rSWqePZ3gcVS0N969at+uijj7Rhwwa3fyv/ruDg4DY3wjW1Ny3DAwCAi7M01N966y1J0oMPPuj2\n+vTp0yVJb7/9tiIjI1VcXKza2loFBAS06OdwOOTr66shQ4ZYWR4AAEazNNQnTpzY6hY1SXrvvfdU\nUlKiiRMnKiwsTMHBwYqPj9f+/ft14MAB3Xrrrc196+rqdPDgQUVHRyswMNDK8gAAMJqloX7PPfe4\nbT9z5kxzqN94442SpPHjxysnJ0fr1q1TQkJC8+a47Oxs1dTUKDk52crSAAAwXod+TWx7oqKiNGvW\nLOXm5iopKUmJiYk6dOiQ9uzZo7i4OE2ZMsVTpQEA4JU8FuqSlJGRoUGDBmnbtm3Kz89XaGioUlNT\nlZaW1ubtbgAAwD2fxsbGRk8X8X2YeLvCd5l6S4anmD6es1bt9nQJHWLT4tGeLqFTmP7+7Gymjmd7\nt7R1ynnqAACg4xHqAAAYglAHAMAQhDoAAIbw6O53AO61t+Gtu2waA3D5mKkDAGAIQh0AAEMQ6gAA\nGIJQBwDAEIQ6AACGYPc74GVM/SpYAN8fM3UAAAxBqAMAYAhCHQAAQxDqAAAYglAHAMAQhDoAAIYg\n1AEAMAShDgCAIQh1AAAMwTfKATASZ9KjO2KmDgCAIQh1AAAMwfI7AK/GATfAfzFTBwDAEIQ6AACG\nINQBADAEoQ4AgCHYKAegy2MzHHBpmKkDAGAIQh0AAEMQ6gAAGIJQBwDAEIQ6AACGINQBADAEoQ4A\ngCE65D71iooKrV27Vnv37tWpU6fUp08f/eQnP9H8+fMVHh7eom9RUZHy8vJUXl6u4OBgjR07Vunp\n6erdu3dHlAYAgLEsn6lXVFRo8uTJ2r59u6KiovTAAw/oRz/6kV5//XXde++9Ki8vb+6bk5OjRYsW\nyeVyKSUlRSNGjFBeXp5mz56t+vp6q0sDAMBols/U165dq+PHj2vx4sWaOXNmc/vOnTv1yCOPaNWq\nVcrOzpbD4dCaNWsUGxurgoIC9ezZU5KUmZmprKwsFRYWKiUlxeryAAAwluUz9V27dqlfv36aMWNG\ni/YJEyYoIiJC+/btk8vlUmFhoZxOp+x2e3OgS9LcuXMVGBioHTt2WF0aAABGs3SmfuHCBdntdvXo\n0UO+vq0/L/j5+amhoUFOp1PFxcWSpISEhBZ9/P39FRMTo3379qm6ulpBQUFWlggAgLEsDXWbzdZq\nht7k8OHD+vLLLxURESE/Pz8dOXJEAwYMcLshLiwsTJJUVlamUaNGWVkiALR7QMymxaM7sRLAWp1y\nS5vL5dKTTz4pl8ulKVOmSJKqqqranIU3tdfU1HRGeQAAGKHDQ72xsVHLli3TBx98oJEjRzbP5J1O\np/z8/Nz+TFN7XV1dR5cHAIAxOvQ8dafTqccee0yvvPKKwsPDlZWV1RzYAQEBamhocPtzTbez9erV\n66LP0bfvVerRw2Zd0V1QaCj7CqzEeKI97b0/7s7Y6bb9tf+b0CnPj8vX3cazw0L9/Pnzmj9/vvbu\n3avIyEi9+OKLGjhwYPP14OBgVVdXu/3ZpvZL2SRXWXnOmoK7qNDQIFVUuB8nXD7GExdzJe8Pq95T\nvD+tZep4tvdBpUNC/ZtvvtGcOXNUUlKi6667Ths3blT//v1b9ImMjFRxcbFqa2sVEBDQ4prD4ZCv\nr6+GDBnSEeUBnaqtTVlsyAJgNcv/pl5XVye73a6SkhIlJCSooKCgVaBLUnx8vFwulw4cONDq5w8e\nPKjo6GgFBgZaXR4AAMayPNRXr16tTz75RLGxscrNzW0zmMePHy+bzaZ169a1+ErY7Oxs1dTUKDk5\n2erSAAAwmqXL7xUVFdq6daskadiwYcrNzXXb78EHH1RUVJRmzZql3NxcJSUlKTExUYcOHdKePXsU\nFxfXfOsbYKr27pUGgCthaaiXlJQ072h/+eWX2+w3Y8YM+fv7KyMjQ4MGDdK2bduUn5+v0NBQpaam\nKi0trc3b3QAAgHuWhvqYMWP0+eefX3J/Hx8fTZs2TdOmTbOyDAAAuqVO+UY5AADQ8Qh1AAAMQagD\nAGAIQh0AAEMQ6gAAGIJQBwDAEIQ6AACGINQBADAEoQ4AgCEIdQAADNEh56kDJmrvABbORgfQFTBT\nBwDAEIQ6AACGINQBADAEoQ4AgCEIdQAADEGoAwBgCEIdAABDEOoAABiCUAcAwBB8oxy6pba+HY5v\nhgPgzZipAwBgCEIdAABDsPwOAN/R3sE9Vj8ef+6B1ZipAwBgCEIdAABDsPwOWMDqJVt4F37/6CqY\nqQMAYAhm6vBqVm9CYsaFzsQmOliNmToAAIYg1AEAMATL7+hUnbncyFI6gO6GmToAAIYg1AEAMASh\nDgCAIQh1AAAMwUY5XBHurwU8g397aI/HZ+pOp1N5eXkaN26cRo0apTvuuEPr169XQ0ODp0sDAMCr\neDzUly9frpUrVyokJETTp0/XwIEDtWbNGmVkZHi6NAAAvIpHl98//vhjbd++XXfddZcyMzPl4+Oj\nxsZGLV68WEVFRXrnnXeUmJjoyRLRibivHPivK/n30BFL8209Jkv9XZNHZ+pbt26VJKWlpcnHx0eS\n5OPjo4ULF8rHx0c7duzwZHkAAHgVj87UDxw4oL59++raa69t0T5w4EBFRkaquLi402vqKt94ZvVh\nJO09HhtvgO7N6lWBK9XW/28687muVFf5/6jHZur19fU6ceKEIiIi3F4PCwvTmTNndPr06U6uDAAA\n7+SxUK+qqpIkBQUFub3e1F5dXd1pNQEA4M18GhsbGz3xxMeOHVNiYqJGjx6t559/vtX1Rx55RDt3\n7tRrr73WankeAAC05rGZekBAgCS1eT96fX29JKlXr16dVhMAAN7MY6EeGBgoX19f1dTUuL3etOze\n1vI8AABoyWOh7ufnp2uuuUZHjx51e/3o0aPq16+fQkJCOrkyAAC8k0fvU4+Pj1dFRYXKyspatJ88\neVLl5eW6/vrrPVQZAADex6OhnpSUJEl67rnn5HK5JEmNjY1avXq1JCk5OdljtQEA4G08tvu9yYIF\nC/Tmm29q1KhRuvHGG/XJJ5/owIEDLb46FgAAXJzHD3R55plnlJ6ersrKSm3evFn/+c9/lJ6ermef\nfZZA/46//vWvmjlzpm644QaNHDlS48aN04YNG+R0Oj1dmtfbsmWLhg8frjNnzni6FK/B6Yod4+TJ\nk4qPj1deXp6nS/FaFRUVWrZsmX76059q5MiRuuWWW/Sb3/xGX3/9tadL6xQen6nj4nbu3KlFixap\nd+/euvPOOxUYGKj3339fhw8f1ujRo5WVlcUHoCtUXFysX/3qV6qtrVVxcbGCg4M9XZJXWLZsmbZv\n3674+HjFxcXp448/1kcffaS77rpLa9as8XR5Xuns2bOaOXOmSkpKtGTJEqWmpnq6JK9TUVGhyZMn\n6/jx47rllls0fPhwlZWVac+ePerTp4+2b9+uyMhIT5fZoTz63e+4uNraWq1YsUKBgYF69dVXFR4e\nLunb+/t//etfa/fu3frLX/6iO++808OVep833nhDS5cuVW1tradL8Sqcrmg9h8OhefPm6bPPPvN0\nKV5t7dq1On78uBYvXqyZM2c2t+/cuVOPPPKIVq1apezsbA9W2PE8vvyO9u3fv19VVVWaPHlyc6BL\nUs+ePWW32yVJ7777rqfK80qnT5/WQw89pIULF6pfv34aMmSIp0vyKpyuaK28vDzdfffdKi0t1U03\n3eTpcrzarl271K9fP82YMaNF+4QJExQREaF9+/Y1b8o2FaHexQ0ePFgLFy7Uz3/+81bX/Pz8JEnn\nzp3r7LK82hdffKG3335b99xzj4qKijRw4EBPl+RVuuLpit4sPz9fYWFh2rJliyZMmODpcrzWhQsX\nZLfblZaWJl/f1tHm5+enhoYG4/chsfzexUVFRSkqKsrttV27dkmSoqOjO7MkrxcREaGdO3dq+PDh\nni7F6zSdrtjWd0iEhYWprKxMp0+fVr9+/Tq5Ou/0xBNP6Oabb5bNZlN5ebmny/FaNput1Qy9yeHD\nh/Xll18qIiKieTJkKmbqXurw4cPKz8+Xn5+fJk6c6OlyvMqgQYMI9CvE6YrWu+2222Sz2TxdhrFc\nLpeefPJJuVwuTZkyxdPldDhm6h4yevRoORyOdvtMmzZNy5Yta9V+4sQJzZkzR+fPn9eSJUs0aNCg\njirTa3yf8cSla1q6bGu209ReV1fXaTUBbWlsbNSyZcv0wQcfaOTIkW3O5E1CqHvImDFjdPr06Xb7\njBo1qlXbV199pZkzZ8rhcOi+++7jtpf/70rHE5eH0xXhLZxOpx577DG98sorCg8PV1ZWlvFL7xKh\n7jGPPvroZf/Mp59+KrvdrtOnT+u+++7T448/bn1hXupKxhOXj9MV4Q3Onz+v+fPna+/evYqMjNSL\nL77YbTbEEupe4v3331daWprOnTunuXPnasGCBZ4uCd0Qpyuiq/vmm280Z84clZSU6LrrrtPGjRvV\nv39/T5fVadgo5wUOHjyohx56SOfPn9ejjz5KoMOjOF0RXVVdXZ3sdrtKSkqUkJCggoKCbhXoEqHe\n5Z09e1YLFizQ+fPntXjx4m6x0QNdG6croqtavXq1PvnkE8XGxio3N1eBgYGeLqnTsfzexRUWFurY\nsWMKCQlRdXW11q5d26rPsGHD9Mtf/tID1aE7uvnmmzVu3Di9+eabSk5ObnW64s9+9jNPl4huqKKi\novnbDocNG6bc3Fy3/R588EH5+/t3ZmmdilDv4pq+nauqqkrr1q1z2+eOO+4g1NGpnnnmGUVHR+vV\nV1/V5s2bdc011yg9PV1z5szhcCF4RElJSfNdGS+//HKb/WbMmGF0qHNKGwAAhuBv6gAAGIJQBwDA\nEIQ6AADX8TGDAAAALUlEQVSGINQBADAEoQ4AgCEIdQAADEGoAwBgCEIdAABDEOoAABiCUAcAwBD/\nD4SbU4b4/S2pAAAAAElFTkSuQmCC\n", 136 | "text/plain": [ 137 | "" 138 | ] 139 | }, 140 | "metadata": {}, 141 | "output_type": "display_data" 142 | } 143 | ], 144 | "source": [ 145 | "# plot\n", 146 | "plt.hist(X,bins=60);" 147 | ] 148 | } 149 | ], 150 | "metadata": { 151 | "kernelspec": { 152 | "display_name": "Python [conda root]", 153 | "language": "python", 154 | "name": "conda-root-py" 155 | }, 156 | "language_info": { 157 | "codemirror_mode": { 158 | "name": "ipython", 159 | "version": 3 160 | }, 161 | "file_extension": ".py", 162 | "mimetype": "text/x-python", 163 | "name": "python", 164 | "nbconvert_exporter": "python", 165 | "pygments_lexer": "ipython3", 166 | "version": "3.5.3" 167 | } 168 | }, 169 | "nbformat": 4, 170 | "nbformat_minor": 2 171 | } 172 | -------------------------------------------------------------------------------- /simulation/simulate_exponential_random_variable_from_uniform.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Generate exponential random variables from the uniform distribution using the inverse transform method. However, note that the built-in numpy functions run faster." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "# import libraries\n", 19 | "import numpy as np\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "import seaborn as sns\n", 22 | "from scipy.stats import expon\n", 23 | "# set to plot automatically\n", 24 | "%matplotlib inline" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "# Simulate values" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": { 38 | "collapsed": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "# set lambda for exponential distribution\n", 43 | "lam=0.01\n", 44 | "# set random state so repeatable results\n", 45 | "np.random.seed(42)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 3, 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "# exponential simulated from the inverse transform method: -1/lambda*ln(1-X) where X~U(0,1)\n", 57 | "exponential_sim=(-1/lam)*np.log(1-np.random.uniform(0,1,1000))" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "# Plot simulated values versus actual pdf" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 4, 70 | "metadata": { 71 | "collapsed": true 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "# create x and y values for the actual pdf so we can plot them\n", 76 | "x = np.linspace(expon.ppf(0.0001,scale=1/lam),expon.ppf(0.9999,scale=1/lam))\n", 77 | "y = expon.pdf(x,scale=1/lam)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 5, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEFCAYAAAASWssjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmYXHWV//F3bb2lF7qTzgpIgHBAVEDACcomiqIzuA4u\nqLgBw4z7MjOOo4zj6Oj4G2R0lHFEgxvKqIgaQVHZZIuAAoIkBwIkECChk3TSnd5r+f3xvd1d3emu\nqoRUV3fV5/U89XTVvXWrT13CPf3dzo3lcjlEREQKiVc6ABERmf2ULEREpCglCxERKUrJQkREilKy\nEBGRopKVDqAcurp6n9EUr/b2Jrq7+/dVOHOWzkOg8xDoPATVfB46O1ti0+1Ty2IKyWSi0iHMCjoP\ngc5DoPMQ1Op5ULIQEZGilCxERKQoJQsRESlKyUJERIpSshARkaKULEREpKiyrbMwszhwCXAUMASc\n6+7r8/afCVwIpIFV7n5p3r6/AP7D3U+NXh8KfAvIAfcD73H3bLliFxGRicrZsngN0ODuJwAfAy4a\n3WFmKeBi4GXAKcD5ZrYo2vcPwDeAhrzP+iLwCXc/CYgBry5HwFu2xPjOd1LceGM5Pl1EZO4qZ7I4\nEfgVgLuvAY7L23cEsN7du919GLgFODna9zDwukmfdSxwU/T8l8BLyxHwr3+d5N5741xxRUgcIiIS\nlLPcRyuwM+91xsyS7p6eYl8v0Abg7lea2UGTPivm7rnJ751Oe3vTXq2ynDcPGhshl4Oenmae85w9\n/oiq09nZUukQZgWdh0DnIajF81DOZNED5J/ReJQoptrXAuwo8Fn54xPF3rvXdVuamxMMDCRpbKxj\n3boBVqxIFz+oinV2ttDV1VvpMCpO5yHQeQiq+TwUSoLl7Ia6FXglgJmtBO7L27cWWGFmHWZWR+iC\nur3AZ91tZqdGz18B3Lzvw4XFi8frDz71lLqhRERGlbNlcRVwupndRhiUfqeZnQ00u/vXzezDwLWE\nhLXK3Z8o8FkfAS6NEsta4MflCHhistCsYhGRUWVLFtHU1gsmbV6Xt381sHqaYzcAK/NeP0iYNVVW\nCxbkSKXC854e6OsL4xgiIrVOfz7niccnti42b1ZXlIgIKFnsZvHi8bF0dUWJiAS6Gk6iQW4Rkd0p\nWUyyZIkGuUVEJtPVcJIlS8a7obZsiZF7RnfzFhGpDkoWk7S0jM+AGhyE7u7KxiMiMhsoWUwSi8Gy\nZeOv1RUlIqJkMaX8ZKHpsyIiShZTUstCRGQiXQmnMDFZqGUhIqJkMYWlS8efP/10jHRtF58VEVGy\nmEpDA3R0hDmz2Sx0dal1ISK1TcliGqoRJSIyTsliGlrJLSIyTlfBaeSv5NYgt4jUOiWLaeS3LNQN\nJSK1TsliGgsW5EgkwvPt22MMDlY2HhGRSlKymEYyCZ2dKlcuIgJKFgXld0Vt2aJTJSK1S1fAAjTI\nLSISKFkUoLvmiYgEShYFTGxZxHUjJBGpWUoWBbS3h9IfAP390NNT2XhERColWekAZqMzP/Kzsefr\n1x5K/85w67xzP/MILfN7pzxm1cdOm5HYREQqQS2LIhqaxxdYDO5qqGAkIiKVo2RRhJKFiIiSRVEN\nLQNjzwd3NVYwEhGRylGyKGJiy6JeM6JEpCYpWRSRTGVI1o8AkMvGGe6vr3BEIiIzT8miBBq3EJFa\np2RRgsbm8XGLgV6NW4hI7VGyKEFj63iy6O9pqmAkIiKVoWRRgqa2vrHnAzubNMgtIjWnbCu4zSwO\nXAIcBQwB57r7+rz9ZwIXAmlglbtfOt0xZnY08LXovQ9G27PMkFTDCMm6NOnhJJl0guH+eurnDc3U\nrxcRqbhytixeAzS4+wnAx4CLRneYWQq4GHgZcApwvpktKnDMvwCfdvcTgXrgL8sY925iMWhq6x97\n3b9TXVEiUlvKmSxOBH4F4O5rgOPy9h0BrHf3bncfBm4BTi5wzN1Ah5nFgBZgpIxxT6mxNS9ZaNxC\nRGpMOQsJtgI7815nzCzp7ukp9vUCbdMdAzwEfBX4RLT/xkK/uL29iWQy8Yy/QL6mvGQxMEWy6Oxs\n2ae/b7ao1u+1p3QeAp2HoBbPQzmTRQ+hFTAqHiWKqfa1ADumO8bMvgSc5O5/NrP3ELqn3jPdL+7u\n7p9u115rzOuGGuhtJJuNEY+Pj3R3dU1djXYu6+xsqcrvtad0HgKdh6Caz0OhJFjObqhbgVcCmNlK\n4L68fWuBFWbWYWZ1hC6o2wscs52QSACeBNrLGPeUkqkM9U1hUDuXjTHYq8V5IlI7ytmyuAo43cxu\nA2LAO83sbKDZ3b9uZh8GriUkrFXu/oSZ7XZM9FnnAleYWRoYBs4rY9zTamztZygq99G/cx5NbQNF\njhARqQ4lJQszey6wAsgSBqbvL3ZMNLX1gkmb1+XtXw2sLuEY3P0W4EWlxFpOTW397NgcGjVTjVuI\niFSraZNFNPPoAuCDhAHoxwizkJabWSvwJeB/Z3K9Q6VNmBGl6bMiUkMKtSx+DPwGWOnu3fk7zKwN\neDuhq+nV5QtvdmlsGSAWy5HLxRjqryczkiCRylQ6LBGRsiuULM5x976pdrj7TuDLZvbN8oQ1O8UT\nORpaBhnoCcUE+3saaZm/q8JRiYiU37SzoUYThZl9PlrrQPR6sZmtzn9PLWksst5CRKQalTJ1tgO4\nw8yebWZvBe4AbihvWLNXflFBjVuISK0oOhvK3c83szcB9wJbgRe6+6Nlj2yWasovVx5VoI3FKhiQ\niMgMKNqyMLN3Av8P+GdC3aYfRVVga1L9vEHiyTConR5OMTKUqnBEIiLlV8o6iwuA0919HYCZ/SXw\nU+CgMsY1a8VioXWxa3szEMYt6hp2FjlKRGRum7ZlYWaj9SxOGE0UAO5+NeF+E/nvqSlabyEitaZQ\nN9TlZnYeMG+KfdmooN8PyhPW7KZ7W4hIrSnUDXUW8LfAnWa2A9hEuFPdQcB8wgrus8od4Gw0efqs\nbrMqItWuULI40d2/CnzVzI5ivDbUw+5+74xEN0ul6kdI1o+QHkqRzcQZ6qvJ3jgRqSGFksVXgeea\n2R3u/gLC1FlhdJC7n56uNiCs5BYRqWaFksWTZrYJ6DSzR/K2x4Ccux9c3tBmt6a2vGSxc6phHRGR\n6lEoWbwC2J9QRvxVMxPO3KGyHyJSS6ZNFlHp8ceIpsnKRPkruQd7GxgZgZTW54lIlSrnbVWrWiL/\nNqu5GE88oZofIlK9lCyegfz1Fo89plMpItWrlNpQXzWz42cimLkmf9xi40YlCxGpXqXUhvo98Hkz\nWwh8B/iuu28ub1hzw7z28RsfrV8fVwVaEalaRf8cdvfvuPtLgFcSps3eZma/MLPXlD26Wa6heZBE\nKg3Arl2wZYsyhYhUp5L6TsxsOfCO6LGecO/tN5jZd8oW2RwQi0Fz+/jNkNavV1eUiFSnUsYsbgV+\nE708w91f5u7fBM4BXl7O4OaCyV1RIiLVqJQxi4vc/Sf5G8zsWe6+EVhUnrDmjuaO3rHnDz+scQsR\nqU7TJgszO4AwRvFpM7szej56zDXA4eUPb/arnzdEsm4EgP5+ePLJGMuWqQytiFSXQi2LfwVeDCwF\nfpe3PQ38opxBzSWxGMybNG6xbFmmghGJiOx7hcp9vAvAzP7R3f9j5kKae5o7Jo5bnHKKkoWIVJdC\n3VDnu/vXgQYzu3Dyfnf/dFkjm0OaO3ohalw88kicbBbiGusWkSpS6JIWm/R88kMidY3DtLWFcYrB\nQdi0SadHRKrLtMnC3f83+vmvwOein5cDdwH/NjPhzQ2xGBx6aHbstabQiki1KWWdxSeBb5jZgYSB\n7g8CXyt3YHNNfrJ4+GElCxGpLqVc1V4NnAecDXzP3U8Hnl/WqOag/GTx6KNxMhrjFpEqUkqySLj7\nEPBXwDVmFgd0H9FJOjqgoyOMWwwNwWOPadxCRKpHKcniOjO7H6gjdEPdRLjVqkxyyCHqihKR6lS0\n3Ie7f9TMvgxscvesmb3P3e8pdlzUArmEcFvWIeBcd1+ft/9M4ELCIr9V7n7pdMdE5dEvBdqBBHCO\nuz+8p1+23FasyHLnnQkgDHK/9KXqixKR6lDKAPezgPcBl5rZKuD90c9iXgM0uPsJwMeAi/I+MwVc\nDLwMOAU438wWFTjmC8Dl7n4y8AlmaamR/HGLDRvipNMVDEZEZB8qpa/kh4R1FTcTuqBGH8WcCPwK\nwN3XAMfl7TsCWO/u3e4+DNwCnFzgmBcB+5vZb4G3ADeW8PtnXFsbdHaGcYuREdi4UeMWIlIdSqk6\nm3L3j+7FZ7cCO/NeZ8ws6e7pKfb1Am3THQMcBHS7+0uj1eT/SOjCmlJ7exPJZGIvQt57nZ0tADz/\n+fC7qJJWV1cdK1fOaBj73Oj3qnU6D4HOQ1CL56GUZHFLNL5wbdQKKFUPkH9G41GimGpfC7BjumPM\nbBvw82jbauCzhX5xd3d/od1l0dUVSpUvXBhnYCAFwB/+kGXlypEZj2Vf6exsGftetUznIdB5CKr5\nPBRKgqV0Q/018DNg0MwyZpY1s1JGbm8l3IoVM1sJ3Je3by2wwsw6zKyO0AV1e4FjbhndHr33zyX8\n/orInxG1cWOc4T1JryIis1Qps6GW7uVnXwWcbma3EcY83mlmZwPN7v51M/swcC0hYa1y9yfMbLdj\nos/6CGEV+d8SuqnO3suYyq6lBRYtyrFlS4xMJgx0H3ZYtviBIiKzWNFkEf3l/1HACLOiPgh8vliX\nlLtngQsmbV6Xt381k9ZrTHMM0V35Ti8W62yxYkWWLVvGp9AqWYjIXFdKN9RXgWbgWMKaiEOBb5Yz\nqLkufwrtQw9pcZ6IzH2lXMmOdfePAyPu3g+8HTimvGHNbQcfnB27n8Xjj8forc6xMBGpIaUki1zU\nFTV6Y+kFec9lCvPmwfLloXWRy8EDD8zsNF4RkX2tlGTxX8BvgcVm9l+E+1lcXNaoqsCRR453Rd1/\nv7qiRGRuK3oVc/fvEgadPws8Apzp7qWU+6hp+cnioYfiDA1VMBgRkWeo0D24z5m0abTn/WgzO9rd\nv1O+sOa+BQtyLF6cY/PmGCMj8OCDcZ77XM2KEpG5qdDU2RdHPw8hzIC6GsgAZxAWxSlZFHHkkVk2\nbw7jFX/+s5KFiMxd0yYLd38ngJndADzP3bdGr9uBn85MeHPbkUdmuO66kCzWrk2QzabHZkmJiMwl\npVy6lgLb8173AUvKE051OfDAHK2t4fmuXbBhg6rQisjcVEohwauB35jZTwjJ5Szg/8oa1Rz0rs9f\nP+X2TQ/vz/Yn5gPwwGeeZslhT03Yv+pjp5U9NhGRZ6qU2VAfJty97nBgBfCf7v7JcgdWLVoXjldc\n7+lqq2AkIiJ7r5SWBe5+JXBlmWOpSs3tu4gnsmQzcYb66xnsq6dhnubRisjcouHWMosncrTMH6/3\n0dPVWsFoRET2jpLFDGjtzOuKelpdUSIy95RSovwa4DLgp+4+d2/7VkEtnT0Qy0EuRn9PEyNDSVL1\n6eIHiojMEqW0LD5PWIj3kJl91cyOL3NMVSeZyjBvv77wIhejd6u6okRkbillNtTv3P3dwBHAGuBK\nM7vfzD5oZvVlj7BKtHb2jD3XrCgRmWtKGrMws1OBrwD/DvwK+ACwGPh52SKrMvnjFr3bmsmkNVwk\nInNHKWMWGwnVZi8D3uvuA9H2G4E7yxpdFalvGqaheZDBXQ3ksnH6upsntDZERGazUtZZ/KW735+/\nwcxWuvsa4PnlCas6tXbuZHBXAwA7n25TshCROaNQifIXAQngG2b2bmC0sFEK+B/gsPKHV11aO3t4\n+tFFAPRubSWn+w2KyBxRqGVxOnAKoWjgp/O2p4H/LWdQ1aqxtZ9k/QjpoRTp4SS921oqHZKISEkK\nlSj/FICZvS26W548Q7EY7Le4m60bFwLQ/WRHhSMSESlNoW6oT0UJ4zQze/Hk/e7+rnIGVq3al4wn\ni56uNvr7oampwkGJiBRRqBvqD9HPG2cgjprR2DJIY8sAA72N5LIx/vSnBCtXZiodlohIQYWSxb1m\ndiBww0wFUyval25nwJcBcOedShYiMvsVShY3ATnGZ0HlywEHlyWiGrDf4h089eBScrkYGzbE6OqK\n0dmpqVEiMnsVGuBePpOB1JJkXZqWBT1jZT/uuivBK16hwoIiMnsVHeA2s1VT7dcA9zPTvrR7LFn8\n4Q9xzjgjzJYSEZmNShngvmkmAqk1LQt6SKRCa6K7O8bDD8c49FB1RYnI7DRtNTt3Xx39/DbwS2A7\nsAVYHW2TZyAez7Hf4h1jr++6K1HBaERECita+tTMzgLuAd4OnA/cY2ZnlDuwWtC+dPvY8z/9KcGQ\nbs0tIrNUKXWyPwEc6+5/7e6vA04C/qO8YdWGxpYBFi0KXU9DQ3DffSpbLiKzUylVZ0eAzaMv3H2j\nmRWdumNmceAS4ChgCDjX3dfn7T8TuJBQa2qVu19awjFnA+9z9xNK+XKzXSwGxx2X4eqrw3+Gu+5K\ncNxx2QpHJSKyu0Kzoc6Jnj4KrDazbxMu7G8G7i3hs18DNLj7CWa2ErgIeHX02SngYuB4oA+41cx+\nDryowDHHAPnVb6vCscdmuOaaJLkcPPRQnO3boUMlo0RklinU7/Hi6LEL6AJeCbyKcHEv5YJ9IuGu\nekT3vjgub98RwHp373b3YeAW4OTpjjGz+YS79H2w1C82V7S1gdl4a+KPf9RAt4jMPoUW5b1zun1m\n1ljCZ7cCO/NeZ8ws6e7pKfb1Am3THFMPfBP4MDBQwu+lvb2JZHJuXHQ7O1s4/XTYuDG8fuCBOt70\nptmz5qKzU2XUQedhlM5DUIvnoZTbqr6eMLbQTGhRJIBGYGGRQ3uA/DMajxLFVPtagB1THUMYv1hB\nuOFSA/BsM/svd5+2ldHd3V8ktNmjq6uXZcsgl6tncDAkjTvuGObggyu/5qKzs4Wurt5Kh1FxOg+B\nzkNQzeehUBIsZfrNFwjdP2uBtxDuxf3DEo67ldB1RTT+cF/evrXACjPrMLM6QhfU7VMd4+53uPuR\n7n4q8CbggUKJYi5KpeCYY8aLCd54YynzDkREZk4pyaLb3W8A1gBt0T0uSpmNdBUwaGa3EQazP2Rm\nZ5vZ+e4+QuhWupaQJFa5+xNTHbPH32iOOumk8WTxwANxurpmST+UiAilTZ0dMLPDCK2BU83sesL4\nQkHungUumLR5Xd7+1cDqEo7J378BWFlCzHPOokU5jjgiy9q1cXI5uPnmBK97nYoLisjsUOqivM8A\nvwBeQij5cVU5g6pVp5wy3rq4444EfX0VDEZEJE/RloW738R4McHjzazd3bvLG1bteNfnrx97nsvB\nQw8fxmBvmGx234eeYtHBT+92zKqPnTZj8YmIQGm1ofY3s5+Y2XYz2wz8t5l1zkBsNScWg84Du8Ze\nb3t8Admsxi5EpPJK6YZaBfwGeBZwGKF0+WXlDKqWtS3eQap+BID0cIodm/ercEQiIqUNcHe6+//k\nvb7YzN5eroBqXTyeY/4BW9m8fgkAWzcupH1J96xZpCcitamUlsUdZvam0Rdm9lfAXeULSebvv414\nIpQAGdzVwK7ttbdaVERml0KFBLNAjrBq+zwz+yaQIazk7gbOnZEIa1AilaF96Xa2Pb4AgK6NnbTM\nr84VoyIyNxSqDaWbK1TQggO72LZpPuRi7NrWwkBvA40tg5UOS0RqVCm1oZqAfyGssUgC1wOfdHet\nAiij+qZh2jp72Pl0WP+49bFODjjy8QpHJSK1qpTWw1eAecC7CLdWrQO+Vs6gJFjwrPE1Fjs2tzMy\npJpRIlIZpVx9jnX3o/Jev9fMHihXQDKuqa2fprZ++nc2kcvG6Nq4kKWHPVnpsESkBpXSsoib2dhk\n/+i5ihbNgFgMOg8ab11se3wBwwOpCkYkIrWqlJbFFwnTZ0eL/r0K+Fz5QpJ8rZ07J7Qutjy8pNIh\niUgNKqVlsRp4HfAIsAF4nbuvKmdQMi4Wg8Urxrueujfvx5NPaoWeiMysUloWN7v7EcD95Q5Gptbc\n3kdrZw89Xa2Qi3H11UnOO2+k0mGJSA0pJVnca2ZvA+4g7x7Y7v5Y2aKS3Sw+9Cl6trZALsa6dXEe\neijOihXZSoclIjWilGTxF9EjXw44eN+HI9NpaB6kfUk33U92AHD11Uk+8IFh1YwSkRlRyv0sls9E\nIFLcokM2s2NzOwCPPx7j3nvjHH20WhciUn6FakMtJSzIWwHcAvyTu++YqcBkd3UNIyw4oAtYBsAv\nf5nkuc8dJpGobFwiUv0KzYa6jHDP7L8HGoCLZyQiKWjh8qdpagrPt26NsWaNMoWIlF+hZLHM3T/u\n7r8CzgdeMEMxSQGJVIbTThtfE/nrXycZVH1BESmzQsliePSJu4/kv5bKOvHEDPvtlwNg1y644QbV\njBKR8tqTMuS5skUheySVgpe/fLx1cf31CZ56StOiRKR8Cv1JeqSZPZL3eln0Ogbk3F1TZyvo+OOz\n/P73OTZsiJHNwv/9X4r3v3+YuO5CIiJlUChZHDZjUcgei8XgDW8Y4YtfrCOdDlNpb7opwYtfnKl0\naCJShQrdKW/jTAYie27Rohwve1maa64J/xmvvTbJkUdmWbhQPYYism+p02KOO/XUDMuWheQwMgI/\n+lGSnHKFiOxjShZzXCIBb3zjyNhYxSOPxLntNq29EJF9S8miCixbluO008bHKq6+Osn27RUMSESq\njpJFlTj99DSLFoX+p6EhuPLKlLqjRGSfUbKoEslkmB01WoV23bo4d96p/7wism/oalJFDjoox0kn\njXdH/fSnKbZs0WI9EXnmlCyqzBlnpOnsHO+OuuyylGpHicgzpmRRZerr4ZxzRkilwuuurhhXXKHx\nCxF5ZspWgc7M4sAlwFHAEHCuu6/P238mcCGQBla5+6XTHWNmRwP/DWSi7ee4+5ZyxT7XLV2a4w1v\nGOHyy0PGuO++ODfckJgwY0pEZE+Us1zpa4AGdz/BzFYCFwGvBjCzFOH+GMcDfcCtZvZz4EXTHPMl\n4H3ufo+Z/Q3wj8CHyxj7rPauz19f0vue3L6UrY91AnDTvTm+v/9CDjtMd9YTkT1XzmRxIvArAHdf\nY2bH5e07Aljv7t0AZnYLcDJwwjTHvMndn8qLuWAvfHt7E8mkFqYtWfEkA72N9HU3Qy7GlVfO45//\nGebPL/0zOjtbyhfgHKLzEOg8BLV4HsqZLFqBnXmvM2aWdPf0FPt6gbYCxzwFYGYvBN5LSCzT6u7u\n3wfhz32xOBz43I2s//1hjAyl2Lp1mIsuyvHe9w6PjWkU0tnZQldXb/kDneV0HgKdh6Caz0OhJFjO\nAe4eIP83x6NEMdW+FmBHoWPM7I3A14C/dPeuskVdZVL1aQ583gZi8TDCvWlTjCuvVP0oEdkz5UwW\ntwKvBIjGH+7L27cWWGFmHWZWR2gp3D7dMWb2VkKL4lR3z7/HhpRg3n79LDnsibHXd96Z4Be/UMIQ\nkdKVM1lcBQya2W2EwewPmdnZZnZ+dJvWDwPXEpLEKnd/YppjEsCXCS2On5jZjWb2r2WMuyrN338b\nL3jB+GyoG29M8JvfaFxHREpTtjELd88CF0zavC5v/2pgdQnHAHTs8wBrTCwGZ52Vpr8/xv33h78R\nrr02SX09nHKKptSKSGFalFdD4nF429tGMBufPvvznydZs0YtDBEpTMmixiST8I53jLB8+XjC+PGP\nk/zxj/qnICLT0xWiBtXVwbvfPcIBB4QR7lwOfvCDFPfdp38OIjI1XR1qVGMjnHfeMIsXh4SRzcJ3\nv5tSC0NEpqQrQw2bNw8uuGB4rEptJgOXX57iuusSmlYrIhOUcwW3zDLT1ZQaHkzx6EMHM9TXAMBN\n90DHt7ex7IhNXPZPp81kiCIyS6llIdQ1jHDo8euZ175rbNv2J+az4e6DdS8MEQGULCSSSGVY/vxH\n2G9J99i23m0tXHJJHTt3FjhQRGqCkoWMicdzHHDkYyxcPn6rkCeeiPHlL9excaNuzypSy5QsZIJY\nDBYfupn9n/04sVgY5d6xI8ZXvlLHddclyOp2GCI1SQPcMqWOZdtJNYzQULeMwcEwtfaaa5I8+GCc\ns88eoa2t0hGKyExSspBptczvZePAdTz26IH075wHhJlS3/pZmgOOfJzWzp5pj131Mc2iEqkm6oaS\nguoahznk+PVhHCPqlsqMJNlwz3KeWLeMbEZjGSK1QMlCihodxzj42IdJNYyMbd/2+AIevP1werpa\ntYhPpMopWUjJmtv7WLHSaVs4Ppd2eKCODfcsZ8M9yxnqr6tgdCJSTkoWskeSqQwHPm8D+z/7cRKp\n9Nj23q2tPHj74Wxev1hdUyJVSMlC9lgsFmZL2YvW0bH/trGxjFw2xtOPLsJvO5w//jGuabYiVUTJ\nQvZaMpVh/yM2cegLHqKprX9s+8hgHZdfnuILX6jjD39Q0hCpBkoW8ow1tQ5wyPEPsf+zHydZN941\n1dUV4/vfD0njrruUNETmMq2zkH1itGuqbeFOuh7rpKFh6VgRwq6uGD/4QYrf/jbHqadmOOaYDPX1\nlY1XRPaMkoXsU4lUhsWHbGbTyK/Z+vQCtj7WSSY9fo/vH/8GEskM+y3pZv7+W2loHhrbp4V8IrOX\nkoWURSKVYdEhW1hw4Fa2Ph4ljZGQNDLpBNseX8C2xxcwr30X8/ffRutClbYVmc2ULKSsEqkMiw7e\nwoIDu9j+xHy2b5rPUP94H1RfdzN93c0k69L8+MdJjjkmw/LlOeIaTROZVZQsZEYkklk6n9XFggO7\n2LW9me2bFkQrv8OajPRwkttvT3D77QlaW+GoozIcdVSGgw7KEdOyDZGKU7KQGRWLQcv8XbTM38XI\nYDK0Np6Yz8hQauw9PT1w880Jbr45wX775Tj88CyHH55lxYosDQ0VDF6khilZSMWkGtIsOmQLCw/e\nQt+OebzwBYv4058S7Bq/uys7dsRYsybBmjUJ4nE46KAsRxyRxSzL0qVqdYjMFCULqbhYLNSdev3r\n07z2tWnWr49z991x7r8/Qf/4Wj+yWXjkkTiPPBLn6quhsTEkj+XLw+OAA3KkUtP/HhHZe0oWMmu8\n6/PXT3gkCylDAAALKUlEQVSda4S+4Xns2tZC77ZWBnoadz/o9vGnsXiOxpZ+GlsHaGztp7FlgIZ5\ng8SmGCzXNF2RPaNkIbNWLB5aHM3tfSw+dDMjQ8kocbSwa3sz6eGJzYhcNkb/znljN2oKn5GjoXkg\nJI7mQRqaB6lvGiSXQ11YIntAyULmjFR9mval3bQv7SaXC+XR+3fMoy96DPXtPvqdy8YY6GlioKdp\nwvZPfrKehQtzLFyYZeHCHPPn5+joCD+bmnb7GJGap2Qhc1IsBvVNw9Q3DdO+tBuA9HCC/p3zGOht\nZKC3kcHeRoYHpr7HxsAAbNwYY+PGxG77mpqgoyNHe3t4POtZAHHa2nLRAxK7HyZS1ZQspGok6zK0\ndvZMuDd4eiTBYG8jAz2NDPY1MNTXwGBf4cJU/f3Q3x9j06bQT9XYCAMDocvrpnuehFiOZCpDsi5N\nsm4k/KyPnkfbE8kMibo0yVSGb33iZC0ylDlPyUKqWjKVobljF80d4/Nxczm48O8W8vTTcbZsibFt\nW4ytW2Ns3x6ej4wU+ECAXIz0cJL0cBIovvDjH3rqaWiAxsbc2M/GxvGfdXVQXx/21dfnqK8neuSo\nq4O6utGfkEpprEUqo2zJwsziwCXAUcAQcK67r8/bfyZwIZAGVrn7pdMdY2aHAt8CcsD9wHvcXQWv\nZa/EYtDWBm1tWVasmLgvl4Ndu2Dr1hg7doRHLlfHY49l2bkzRnLtSEgSudKv2Llc6PYaGBg9Zu+v\n9rEYJJMhaaRSIYmMvk4mc9HP0W05Eonx1+F52Db6SCYhHodEIjf2nnh8dNv4vlgM+vqguzs2tn/8\nEcqzxGJM+DnVtlhMyW6uKmfL4jVAg7ufYGYrgYuAVwOYWQq4GDge6ANuNbOfAy+a5pgvAp9w9xvN\n7GvRtqvKGLtUucnTdEv17JPDxX+0ZZEeSpEeTjIynCQ9nCIznCQ9kiAzkiQzkiA9kgxdV3vhlKOX\n7rYtl4OREaLWz+SrbnmvwqE7bt/cZ300aUyVSPKTyej28Dw34dj8904+bqrXk7cX+zn52FHNzdDX\nVzfpPbkp31vK5+3t9uksXZrjJS9J7/OJGuVMFicCvwJw9zVmdlzeviOA9e7eDWBmtwAnAydMc8yx\nwE3R818CL0PJQiokFgszs1L1aWgZLPr+XC5U2s2mE2RGEmTyf6YTZDNxMuk42eh5NpMgk45zxyPr\no9fjj1x29gx+TJXMSpXLhcee3RBrZpok+zK5V8KDD0Jra45TTsns088tZ7JoBfLrTmfMLOnu6Sn2\n9QJt0x0DxNw9N+m90+rsbHlG/6pWX/TqZ3K4iMxpB1U6gH1g37QA85Xzz5QeoCX/d0WJYqp9LcCO\nAsdkp3iviIjMkHImi1uBVwJE4w/35e1bC6wwsw4zqyN0Qd1e4Ji7zezU6PkrgJvLGLeIiEwSy+Vy\nxd+1F/JmNj2P0Nn4TuD5QLO7fz1vNlScMBvqq1Md4+7rzOww4FJC22otcJ6779sOORERmVbZkoWI\niFSP2TO1QkREZi0lCxERKUrJQkREilJtqEix8iTVKFpJv4owsbwe+AzwAFOUVjGz84C/IZRn+Yy7\n/6ISMZeTmS0E/gCcTvie36LGzoOZ/RPwKsJkkksIi2G/RQ2dh+j/i28T/r/IAOdRo/8e8qllMW6s\nPAnwMUKpkWr3VmCbu58EnAF8hfHSKicRZqS92swWA+8nlGN5OfA5MytcunWOiS4Q/wsMRJtq7jxE\n09NfSPh+pwAHUIPngTB9P+nuLwQ+DXyW2jwPEyhZjJtQngQ4rvDbq8KPgE9Gz2OEv44ml1Z5KfAC\n4FZ3H3L3ncB6wvTmavKfwNeA0VoPtXgeXk5Y23QVsBr4BbV5Hh4EklFvQyswQm2ehwmULMZNV2qk\narn7LnfvNbMW4MfAJ5i6tMp05Vmqgpm9A+hy92vzNtfceQAWEP5IOgu4ALicUEWh1s7DLkIX1DrC\n+q4vU5v/HiZQshhXqDxJ1TKzA4AbgO+6+/eZurTKdOVZqsW7gNPN7EbgaOA7wMK8/bVyHrYB17r7\nsLs7MMjEi1+tnIcPEc7DYYQxzG8zsdhSrZyHCZQsxhUqT1KVzGwR8GvgH919VbR5qtIqdwAnmVmD\nmbURqgbfP9Pxlou7n+zup7j7qcA9wDnAL2vtPAC3AGeYWczMlgLzgOtq8Dx0M95i2A6kqMH/LybT\nCu7IdKVGKhtVeZnZl4A3Eprboz5AaHZPKK0Szfo4n/AHxr+7+5UzHe9MiFoXFxBaWLuVmKn282Bm\nXwBeTPh+HwcepcbOg5k1E2YJLiF87y8Bd1Fj52EyJQsRESlK3VAiIlKUkoWIiBSlZCEiIkUpWYiI\nSFFKFiIiUlRVr1CW6mVmBxHKMjwQbRotzfBtd/+XffD57wBOdfd3PNPP2heidQ/fcPdXFnjPpwDc\n/VOTtp8KfCpaR1LK71pFWJT6juh1PWEd0qfd/ed7Hr1UAyULmcuedPejR19EF9SHzOwKd19bwbj2\nOXd/kmjR6Az4AHCPmb3W3a8iFNW8VYmitilZSDVZQlhQ2RvV9fof4DnAIsCB10XPryKstD0G2AKc\n5e7bzexthPpYPcBGQo2g0RX9XwIagK3A37j7+mgB392EonKNwPsIVUiPBC5294tHA4vieQw4xt23\nmFlHFMOzCCWu30ZYMZ0F3ujua81sA/B7QgmStwE/dPeDzOw5wH8DzYSyJBe5+5ejX/UCM/t9tO/r\n7v6l/BNkZodG52U+0A+8z93vzn9PVC/sbcBPzKwdOCF6SA3TmIXMZUvN7B4zW2dmWwn343itu28i\nlNoejkrOH0q4mI/+ZX4U8EV3fw6hls9bolbJF4CTCRfGFgAzqwOuAN7r7kcRKtP+ID8Id38u8F3C\nBfz1wEnAhZPekyZU+T0r2vR64KdRXK8hdHk9J9r2d3mH/tLdDXg6b9u5hHsnHE9Ybf3ZvH1LgNOi\n7/BeMzuaib4N/IO7P5+w8viK3c5qiPc2wv0bvg682d2Hp3qf1A4lC5nLRruhnk24WNcB1wO4+++A\nS8zsPYRWwQrCX9sAT+f9NX0/0EFILre5+5bowv69aP9hQLe73xl97o+AQ6NaQBDKVUNoiaxx9353\n3wjsN0W83wXeFD1/M/A9d+8BzgbeZGafA87MixNCy2KyjwAN0Y2KPjvp/Ve4e1/0uasJ96UAxspY\nHA9cZmb3AN8Hms1s/uRfYGaJ6JxsBV42RQxSY5QsZM5z9yzw94Qupo8CmNmrCCW2+4HLgN8Ruqgg\nVFMdlYu255j4/8NoxeGp/h+JAYnoef5f3AWrFLv7XUCHmR0P7O/ut0VVf28nJJdfEv6aj+UdNrDb\nB8EPgdcSBvc/Pmlffgwxwr0YRiWAQXc/evQB/AWhWN5kFxIK6r0E+LSZHVHou0n1U7KQqhC1Bj4K\nfDy6g9lLCX38lwGbCd1LiQIfcQuw0syWRUUl3zj60cD86AKPmb0B2OjuU11gS3E54Y58o90/xwPr\no/GN3xMqmhaKE8JtXy90958RtRyilgDAX5tZfTTWcCah/Hz4IuEGPQ+Z2VujY04nJNEJzOwU4N3A\nu939z4S7xV0edclJjVKykKrh7r8C1hDGLi4F3mxmdwM/ibYvL3DsFsIA9W8Jpad7ou1DhMTxFTO7\nH3gv44lkb3yPMGA92s31ayBuZg9EMW4oFGfkU8AtZvZHwt3t8o/ZSJjmeguhCurkWWFvAc41sz8B\nnyMMpo9VE40G3r9LSBRbo81fIrQ+/m1PvqhUF1WdFRGRotSyEBGRopQsRESkKCULEREpSslCRESK\nUrIQEZGilCxERKQoJQsRESnq/wMMegB84K8KtQAAAABJRU5ErkJggg==\n", 88 | "text/plain": [ 89 | "" 90 | ] 91 | }, 92 | "metadata": {}, 93 | "output_type": "display_data" 94 | } 95 | ], 96 | "source": [ 97 | "#plt.hist(exponential_sim,bins=20)\n", 98 | "plt.hist(exponential_sim,bins=20,normed=True);\n", 99 | "plt.plot(x, y,'b-', lw=3, alpha=0.6, label='exponential pdf');\n", 100 | "plt.xlabel('Random variable X');\n", 101 | "plt.ylabel('Probability density f(x)');" 102 | ] 103 | } 104 | ], 105 | "metadata": { 106 | "kernelspec": { 107 | "display_name": "Python [conda root]", 108 | "language": "python", 109 | "name": "conda-root-py" 110 | }, 111 | "language_info": { 112 | "codemirror_mode": { 113 | "name": "ipython", 114 | "version": 3 115 | }, 116 | "file_extension": ".py", 117 | "mimetype": "text/x-python", 118 | "name": "python", 119 | "nbconvert_exporter": "python", 120 | "pygments_lexer": "ipython3", 121 | "version": "3.5.3" 122 | } 123 | }, 124 | "nbformat": 4, 125 | "nbformat_minor": 2 126 | } 127 | -------------------------------------------------------------------------------- /simulation/simulate_linearly_related_random_variables.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Simulated linearly related random variables" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Delivery times $X_1$ and $X_2$ for two products are related through a linear transformation:\n", 15 | "$\\begin{bmatrix} X_1 \\\\ X_2 \\end{bmatrix} = \\begin{bmatrix} 2 & 2 \\\\ 1 & 4 \\end{bmatrix} \\begin{bmatrix} Z_1 \\\\ Z_2 \\end{bmatrix} + \\begin{bmatrix} 15 \\\\ 20 \\end{bmatrix}$
\n", 16 | "where $Z_1$ and $Z_2$ both follow standardized normal distribution N(0,1), and are independent of each other. $Z_1$ and $Z_2$ indicate conditions related to certain raw materials. This notebook simulates the values $X_1$ and $X_2$." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "# import libraries\n", 28 | "import numpy as np\n", 29 | "import matplotlib.pyplot as plt\n", 30 | "# set to plot automatically\n", 31 | "%matplotlib inline" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": { 38 | "collapsed": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "# set random state so repeatable results\n", 43 | "np.random.seed(42)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 3, 49 | "metadata": { 50 | "collapsed": true 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "# Draw conditions for materials\n", 55 | "samples=1000\n", 56 | "Z = np.random.randn(2,1000)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 4, 62 | "metadata": { 63 | "collapsed": true 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "# calculate X through dot product and addition (broadcasting)\n", 68 | "X = np.dot(np.array([[2,2],[1,4]]),Z)+np.array([[15],[20]])" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 5, 74 | "metadata": { 75 | "collapsed": true 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "# store values in variables for X1 and X2\n", 80 | "X1 = X[0]\n", 81 | "X2 = X[1]" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "# Detect outliers using Mahalanobis distance" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "The Mahalanobis distance can be defined as
\n", 96 | "$\\sqrt {({\\vec {x}}-{\\vec {\\mu }})^{T}S^{-1}({\\vec {x}}-{\\vec {\\mu }})}$
\n", 97 | "This can be used to detect outliers as only 5% of **squared** Mahalanobis distances are expected to be greater than 5.99 see 'use of Mahalanobis distance for detecting outliers and clusters in markedly non-normal data' available at http://www.dtic.mil/dtic/tr/fulltext/u2/a545834.pdf" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 6, 103 | "metadata": { 104 | "collapsed": true 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "# calculate covariance matrix: cov_matrix\n", 109 | "cov_matrix=np.cov(X1,X2)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 7, 115 | "metadata": { 116 | "collapsed": true 117 | }, 118 | "outputs": [], 119 | "source": [ 120 | "# calculate inverse covariance matrix: inv_cov\n", 121 | "inv_cov_matrix=np.linalg.inv(cov_matrix)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 8, 127 | "metadata": { 128 | "collapsed": true 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "# calculate mean of random variable X: mean_X\n", 133 | "mean_X=X.mean(axis=1)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 9, 139 | "metadata": { 140 | "collapsed": true 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "# calculate X - mean: part1\n", 145 | "part1 = np.transpose(X)-mean_X" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 10, 151 | "metadata": { 152 | "collapsed": true 153 | }, 154 | "outputs": [], 155 | "source": [ 156 | "# dot product of part 1 with the inv_cov_matrix: part2\n", 157 | "part2 = np.dot(part1,inv_cov_matrix)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 11, 163 | "metadata": { 164 | "collapsed": true 165 | }, 166 | "outputs": [], 167 | "source": [ 168 | "# calculate squared mahalanobis distance: mahalanobis_distance_sq \n", 169 | "mahalanobis_distance_sq = np.sum(np.multiply(part2,part1),axis=1)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 12, 175 | "metadata": { 176 | "collapsed": true 177 | }, 178 | "outputs": [], 179 | "source": [ 180 | "# only 5% of squared mahalanobis distances are expected to be greater than 5.99\n", 181 | "# create mask for outliers\n", 182 | "mask = mahalanobis_distance_sq>5.99\n", 183 | "# filter outliers\n", 184 | "X1_outliers = X1[mask]\n", 185 | "X2_outliers = X2[mask]\n", 186 | "# remove outliers\n", 187 | "X1_new = X1[~mask]\n", 188 | "X2_new = X2[~mask]" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 13, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "data": { 198 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAFpCAYAAABnHGgVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xt4VPW1P/73mmEwA9UEEBUSEOzDgSrkgiFiAbVQhArK\npYpaPAdbLW2//dlq20BoBaP1W1BQrLanPWi9VK2HFDHihaICfhUeRYMJQURqVRQC5Z4oZDCTZP3+\n2DPDTLLnft95v54nT5I9e2Y+GcKaT9Zn7fURVQUREVmLLd0DICKixGNwJyKyIAZ3IiILYnAnIrIg\nBnciIgticCcisqCIg7uI2EWkVkRe9HzfW0ReFZGPPJ97JW+YREQUjWhm7j8HsNPv+woA61V1CID1\nnu+JiCgDRBTcRaQAwBQAj/gdngbgCc/XTwCYntihERFRrCKduT8AYB6Adr9jZ6vqfs/X/wZwdiIH\nRkREsesW7gQRmQrgoKpuFZHLzM5RVRUR0z4GIjIXwFwA6Nmz54XDhg2LY7hERF3P1q1bD6tq32ju\nEza4AxgD4CoRuQJADoAzROQpAAdEpJ+q7heRfgAOmt1ZVVcAWAEApaWlWlNTE834iIi6PBH5LNr7\nhE3LqOoCVS1Q1UEArgOwQVVvALAGwBzPaXMAPB/tkxMRUXLEU+e+BMBEEfkIwLc93xMRUQaIJC3j\no6qvA3jd8/URABMSPyQiIopXVMGdKFXcbjf27t2LkydPpnsoRCmTk5ODgoICOByOuB+LwZ0y0t69\ne3H66adj0KBBEJF0D4co6VQVR44cwd69ezF48OC4H4+9ZSgjnTx5En369GFgpy5DRNCnT5+E/bXK\n4E4Zi4GduppE/s4zuBPF6IorrkBjYyMaGxvx3//9377jr7/+OqZOnWp6n5tvvhkffPBBxM9x7Ngx\nzJgxA4WFhSgrK8P777/vu23QoEEYMWIEiouLUVpa6js+f/58FBYW4r/+6798x5566ik88MAD0fx4\nMbn++utRWFiI5cuXY9GiRXjttdc6nRPq9Ummffv24eqrrw573u9+97sUjAa47LLLkNTrflQ1ZR8X\nXnihEkXigw8+SPcQIvbpp5/qBRdc4Pt+48aNOmXKlIQ89q9+9SutrKxUVdWdO3fq+PHjfbede+65\neujQoYDzGxsb9dvf/raqqt50001aX1+vzc3NOn78eG1paUnImILZv3+/fv3rXw97XiJfn2To2bNn\n1PdpbW2N+j6XXnqpvvvuu52Om/3uA6jRKOMtZ+5kCdW1DRizZAMGV7yEMUs2oLq2Ia7HW7p0KR58\n8EEAwG233Ybx48cDADZs2IDZs2cDMGbOhw8fRkVFBT7++GMUFxejvLwcAHD8+HFcffXVGDZsGGbP\nng3j/2fgbO1rX/safvOb36CoqAijR4/GgQMHOo3jgw8+8D33sGHDsHv3btPzvGw2G9xuN1QVzc3N\ncDgcWLZsGW655ZaQFRj33HMPRowYgaKiIlRUGA1e6+rqMHr0aBQWFmLGjBk4duyY72eYP38+ysrK\n8B//8R948803AQCXX345GhoaUFxcjDfffBM33ngjVq1aBQD4xz/+gWHDhmHkyJFYvXq173lPnDiB\nH/zgBygrK0NJSQmef964FvLxxx/HzJkzMXnyZAwZMgTz5s3z3ecf//gHRo4ciaKiIkyYMCHk4/jb\nvXs3hg8fHvLxKyoq4HK5UFxc7Pt3fuqpp1BWVobi4mL86Ec/Qltbm+/f75e//CWKioqwePFiXHPN\nNb7n8v/r5Cc/+QlKS0txwQUX4I477gj6b5Bw0b4bxPPBmTtFKpqZ+3Pv7dVht6/Vc+e/6PsYdvta\nfe69vTE//1tvvaVXX321qqqOHTtWR40apS0tLVpZWal//vOfVfXUzNls5n7GGWfonj17tK2tTUeP\nHq1vvvmmqgbO1gDomjVrVFW1vLxcf/vb33Yax4IFC/TWW29VVdUtW7ao3W7XmpoaVVUdNGiQFhUV\n6ciRI/V//ud/fPe55557tKioSH/xi1/ovn37ws6SX375Zb344ov1xIkTqqp65MgRVVUdMWKEvv76\n66qqunDhQv35z3/u+xl+8YtfqKrqSy+9pBMmTFDVzn/BzJkzR//+97+ry+XSgoIC/ec//6nt7e16\nzTXX+Ma0YMECffLJJ1VV9dixYzpkyBA9fvy4PvbYYzp48GBtbGxUl8ulAwcO1M8//1wPHjyoBQUF\n+sknnwSMNdjj+PMfX7DHVw2cuX/wwQc6depU3189P/nJT/SJJ57w/futXLlSVVXdbrcOGDDA95w/\n/vGPfePxjrG1tVUvvfRS3bZtm+915MydKISl63bB5W4LOOZyt2Hpul0xP+aFF16IrVu34osvvsBp\np52Giy++GDU1NXjzzTcxbty4sPcvKytDQUEBbDYbiouLsXv37k7ndO/e3Te7u/DCC03PqaioQGNj\nI4qLi/HQQw+hpKQEdrsdALBp0ybU1dVh7dq1+OMf/4g33ngDADBv3jzU1dXhvvvuw8KFC3HXXXfh\nkUcewaxZs3D33Xd3eo7XXnsN3//+99GjRw8AQO/evdHU1ITGxkZceumlAIA5c+b4Hh8AZs6cGXLc\n/j788EMMHjwYQ4YMgYjghhtu8N32yiuvYMmSJSguLsZll12GkydP4vPPPwcATJgwAbm5ucjJycH5\n55+Pzz77DG+//TYuueQSX6lg7969wz5OMGaP39H69euxdetWjBo1CsXFxVi/fj0++eQTAIDdbsd3\nv/tdAEC3bt0wefJkvPDCC2htbcVLL72EadOmAQCqqqowcuRIlJSUYMeOHVGtucSDde6U9fY1uqI6\nHgmHw4HBgwfj8ccfxze/+U0UFhZi48aN+Ne//oVvfOMbYe9/2mmn+b622+1obW01fQ5vdUSwc844\n4ww89thjAIy/sgcPHozzzjsPAJCfnw8AOOusszBjxgy88847uOSSS3z3ra2thapi6NChWLBgAdat\nW4fvf//7+OijjzBkyJAoXo3gP1+wcUdKVfHss89i6NChAce3bNkS0WsY7nFCieTxVRVz5szB4sWL\nO92Wk5Pje6MFgOuuuw5/+MMf0Lt3b5SWluL000/Hp59+imXLluHdd99Fr169cOONN6bswjzO3Cnr\n9c9zRnU8UuPGjcOyZctwySWXYNy4cfjzn/+MkpKSTuVqp59+Or788su4niuYxsZGtLS0AAAeeeQR\nXHLJJTjjjDNw4sQJ33OeOHECr7zyii+f7LVw4UL89re/hdvt9uWJbTYbmpubA86bOHEiHnvsMd/x\no0ePIjc3F7169fLl05988knfLD5a3rWCjz/+GADwzDPP+G6bNGkSHnroId+aRG1tbcjHGj16NN54\n4w18+umnvrHG8jihOBwOuN1uAMbsftWqVTh48KDv+cxm+ABw6aWX4r333sPDDz+M6667DgDwxRdf\noGfPnsjNzcWBAwewdu3amMcVLQZ3ynrlk4bC6bAHHHM67CifFPkszsy4ceOwf/9+XHzxxTj77LOR\nk5NjmpLp06cPxowZg+HDh/sWVBNl586dGD58OIYOHYq1a9fi97//PQDgwIEDGDt2LIqKilBWVoYp\nU6Zg8uTJvvtVV1ejtLQU/fv3R15eHoqLizFixAicPHkSRUVFAc8xefJkXHXVVSgtLUVxcTGWLVsG\nAHjiiSdQXl6OwsJC1NXVYdGiRTH9DDk5OVixYgWmTJmCkSNH4qyzzvLdtnDhQrjdbhQWFuKCCy7A\nwoULQz5W3759sWLFCsycORNFRUW49tprY3qcUObOnYvCwkLMnj0b559/Pu6++25cfvnlKCwsxMSJ\nE7F//37T+9ntdkydOhVr1671pduKiopQUlKCYcOG4Xvf+x7GjBkT87iiJd53ulRgP3eK1M6dOyNK\nf3hV1zZg6bpd2NfoQv88J8onDcX0kvwkjpAoOcx+90Vkq6qWBrmLKebcyRKml+QzmBP5YVqGiMiC\nGNyJiCyIwZ2IyIIY3ImILIjBnYjIghjciRLg8ccfx759+3zf+zcI87YGJkolBneiBOgY3P29/PLL\nyMvLi/ixvFeTEsWDwZ2sob4KWD4cqMwzPtdXxf2Q999/P4YPH47hw4fjgQceCGgZCwDLli1DZWUl\nVq1ahZqaGsyePRvFxcVwuQJ72nhbAwORtY996623UFFRgfPPPx+FhYX41a9+FffPQl0Pgztlv/oq\n4IWfAU17AKjx+YWfxRXgt27disceewxbtmzB22+/jYcfftjXz7yjq6++GqWlpXj66adRV1cHp9O8\np83OnTuxcuVKbN68GXV1dbDb7Xj66acBGP1hLrroImzbtg3f+MY38Nxzz2HHjh2or6/H7bffHvPP\nQV0Xr1Cl7Lf+LsDdoQOk22UcL5wV00Nu2rQJM2bMQM+ePQEYLW69TbRiHqZf+1gAcLlcvj4r/u1j\nvW1ob7rpJkydOjUtW9JR9mNwp+zXtDe64zFqbGxEe3u77/toW7dG2j62W7dueOedd7B+/XqsWrUK\nf/jDH7Bhw4b4Bk9dDtMylP1yC6I7HoFx48ahuroazc3NOHHiBJ577jl85zvfwcGDB3HkyBF89dVX\nePHFF33nR9L2N9L2scePH0dTUxOuuOIKLF++HNu2bYv556CuizN3yn4TFhk5dv/UjMNpHI/RyJEj\nceONN6KsrAwAcPPNN2PUqFFYtGgRysrKkJ+fj2HDhvnOv/HGG/HjH/8YTqcTb731lulj+rePbW9v\nh8PhwB//+Eece+65Aed9+eWXmDZtGk6ePAlVxf333x/zz0FdF1v+UkaKtuUv6quMHHvTXmPGPmFR\nzPl2onRiy18if4WzGMyJ/DDnTkRkQQzuREQWxOBOGSuV60FEmSCRv/Nhg7uI5IjIOyKyTUR2iMid\nnuOVItIgInWejysSNirq8nJycnDkyBEGeOoyVBVHjhxBTk5OQh4vkgXVrwCMV9XjIuIAsElE1npu\nW66qyxIyEiI/BQUF2Lt3Lw4dOpTuoRClTE5ODgoKYr8+w1/Y4K7G1Om451uH54PTKUoqh8OBwYMH\np3sYRFkropy7iNhFpA7AQQCvquoWz023iEi9iDwqIr2C3HeuiNSISA1nYUREqRFRcFfVNlUtBlAA\noExEhgP4E4DzABQD2A/gviD3XaGqpapa2rdv3wQNm4iIQomqWkZVGwFsBDBZVQ94gn47gIcBlCVj\ngEREFL1IqmX6ikie52sngIkAPhSRfn6nzQDwfnKGSERE0YqkWqYfgCdExA7jzaBKVV8UkSdFpBjG\n4upuAD9K3jCJiCgakVTL1AMoMTn+n0kZERERxY1XqBIRWRCDOxGRBTG4ExFZEIM7EZEFMbgTEVkQ\ngzsRkQUxuBMRWRCDOxGRBTG4EyVKfRWwfDhQmWd8rq9K94ioC4uk/QARdVRfBay/C2jaC+QWAEMu\nB7b9DXC7jNub9gAv/Mz4unBW+sZJXRZn7kTRqq8yAnfTHgBqfK559FRg93K7jDcAojRgcCeK1vq7\nOgfyYJuTNe1N+nCIzDC4E0UrmoCdm5j9MImixeBOFK2gAVsCv3U4gQmLkj4cIjMM7kTRmrDICNz+\nHE6g9AdA7gAAYny+8kEuplLasFqGKFregO1fLTNhEQM5ZRQGd6JYFM5iMKeMxrQMEZEFMbgTpQKv\nXqUUY1qGKNm8Fz3x6lVKIc7ciZLN7KInXr1KScbgTpRswS564tWrlEQM7kTJFuyiJ169SknE4E6U\nbMEueuLVq5REXFAlioRZi9+PXonsIiZe9ERpIKpButklQWlpqdbU1KTs+YgSor4KeP6nQFtL8HMc\nzsxoN9DxTYhvIpYgIltVtTSa+zAtQxTO2vmhAzuQGdUvZn3mX/gZa+q7KAZ3onBcRyM7L93VLyy5\nJD8M7kSJku7qF5Zckh8Gd6JwnL3Dn5MJ1S8suSQ/DO5E4XznHsDm6HDQ5gn6GdS7nSWX5IelkEQm\nqmsbsHTdLuxrdKF/3pl4oPj/YtTHD2V2FQpLLslP2FJIEckB8AaA02C8GaxS1TtEpDeAlQAGAdgN\nYJaqHgv1WCyFpGxQXduABau3w+Vu8x1zOuxYPHMEppfkp3Fk1FUlqxTyKwDjVbUIQDGAySIyGkAF\ngPWqOgTAes/3RFlv6bpdAYEdAFzuNvyyahsGV7yEMUs2oLq2wbiBrXwpQ4VNy6gxtT/u+dbh+VAA\n0wBc5jn+BIDXAcxP+AiJUmxfo8v0eJvnr9yGRhcWrN6O/D0vYtT2O9jKlzJSRAuqImIXkToABwG8\nqqpbAJytqvs9p/wbwNlB7jtXRGpEpObQoUMJGTRRRGKcVffPc4Y9x+Vuw4D3lrKunDJWRAuqqtoG\noFhE8gA8JyLDO9yuImKavFfVFQBWAEbOPc7xEkXGbIOM1XOBz98Gpt4fsGCa63RABGhsdiOvhwMn\nO6RkgjlLDwFicgPryikDRFUto6qNIrIRwGQAB0Skn6ruF5F+MGb1RJnB7GpNKFDzKN5tH4oF757r\ny6s3uty+M441uxGpfXomCuRw5xvEZvy1wGoVSqOwaRkR6euZsUNEnAAmAvgQwBoAczynzQHwfLIG\nSRS1oLNnxYD3lnZaMI3Fva2z0KzdTZ6iDeztQukWSc69H4CNIlIP4F0YOfcXASwBMFFEPgLwbc/3\nRJkhxFWZZ6nJbDsGa9rHosJ9M/a2n4l2FUDsnU9iDp7SJJJqmXoAJSbHjwCYkIxBEcVtwiIjx47O\nyzwH5cyYH9Yu4quaAYwAj1bg193/jnM0SMEAc/CUBmw/QNZUOAso/QE6rXg6nNgzshxOh8ksOwyn\nw47rLxoQcN+rbJtwj+MRnIMQlWDs7UJpwOBO1jX1fmDmCqP3i18PmFFX/QiLZ44IeVcBkOd0oFcP\nBwRAfp4Ti2eOwN3TR2DxzBHIz3NCYMzYnRJmEw/2dqE04E5M1CV4Sx8bGl2+1ErHFItXfp4TmyvG\nR/bAlXkwS/0AMN5MWC1DCRBL+wE2DiPL69grxhvQzQK7APjWsL4dGoc5UT5pqHlfmdwCz85HHY8P\nAG57P5E/BlFUmJYhyzPrFROMAlj5zh6Ur9qGhkYXFKfaDfj6yfhjm13KUAzuZHnBesUE425XuNsC\nZ/UudxuWrtvV+eTCWUYv9w55faZiKN0Y3MnyIukVEwnTN4n6qsj6p7N7JKUYc+5kKWa58vJJQzv1\nZ49FpzeJYP1rVv8wcDHV7Dx2j6QkY7UMWUaoTTYA+KplYiUCqBrVNOWThmL665PMF1O9HE4jRbP+\nLi66UlxiqZZhcKes51/maMYugvtmFfmqXTrO7mMJ+ALg49O+B5tZV0h/uQM8V6ia/T8ToLIx6uem\nroelkNTlmM3WO2pTxYLV2wEA00vyfR9eY5ZsiDrAK0J0hfTnzcWbztx55SolDxdUKatFWuYYtNoF\nQPmkoaZt2cMJ2hXSn3eRleWSlGKcuVNWi6bMseO5/umZWJKTa9rHAm5gXrcq5MthiAgC0i/eAO5d\nNI2kqoYoQZhzp6wWTUqlVw8HenTvhoZGF2wCtCfwVz8/z4nNVxxmAKekYM6dsptJzXh12xj8enU9\nmt3tAIyKldkXDcTd040KmPJJQ3HbyrqIZt5NzW7fTkuJDOwOm6B80lCgcDyDOWUM5twpM3hrwZv2\nwLuLUevzt+D1VX/wBXbAKEV86u3PcXv1qQXSSON0e/hTTEmYhPzXcrqZ950hSiMGd8oMJnuedms7\niV/Zza/kfGbLqeqT/DBXoMbSu91fuMxlYxT7rhKlCoM7ZQQNsltRfzliety/o2P5pKGdArh3sm0X\nifvKVHuYqXui2hsQJRKDO2WE/ehjenyfmh/3D7jTS/IDNtDIz3Ni9uiBcDrspm19o+GwS6fdl/w5\nHXYj306UYRjcKe2qaxuwpKVzzXizdse9reYLlHYbUHLXKxhc8RLGLNkAANhcMR7Lry0GYOTl452x\nA0DP7t0Cdl8CTr2xeHdnYr6dMhGrZSh5IuyYuHTdLjT41Yz3lyPYp31wb+sso5bcREubosWT6/b2\nW6/57Cie3dqQkKDu1eQynqPjVa1EmY7BnZIjik6I3ouL1rSPxZoW82Aejsvdhme27Ik7DdNRRPn0\nSNv+EqUQ0zKUHCbVL3C7jOMdJGpBMtGBPaJ8ukkJJ174Gfu1U9oxuFNyBKl+MTtuVu0Sb/livPKc\nDuQ4bLhtZR3GLNlgvsUeENWbGFEqMbhTcgTreBjkeI7j1K9intOBxTNHIM/pSMbIQvLW4DS5jKtZ\nw+6hGsWbGFEqMbhTcph0Qmy156DyxHd9FS7VtQ2+lr3H/C4E+qrVuJZ0alG/lAzVvye7dvjsFbSr\nZJRvYkSpwgVVSo4OnRCbnedg0YnvYlVLGYBTs+Ech61TdUuo9ryJZrcJbADaI8jXm3agnLAocOEY\nYDtfyggM7pQ8hbN8QX7ikg1oaAkMji53W9CyxXi2w4vG6ad1Q6MrsvYBpgu/bOdLGYrBnVIimr7r\nXh26oyecABEHdl/nRzN+b2JEmYI5d0q66toG2IL0Z+nhsAXdBSnZOw1E8/js/EjZhjN3Sprq2gZU\nrtkRcnbs3843mDynw/cY3WyC1kQ2Y48QOz9StuHMnZLCWwUTadojlKlF/bB7yRTcMHpgWgI7wM6P\nlH3CztxFZACAvwI4G8ZfsitU9fciUgnghwAOeU79taq+nKyBUnaJdOPqSDz99ucAjGZg6cDOj5SN\nIknLtAL4paq+JyKnA9gqIq96bluuqsuSNzzKVrEsoAajOBXgE0Uk/CYcgNH5sXzSUObbKeuEDe6q\nuh/Afs/XX4rITgD8TaeQ+uc5E1rOmOhkjGjox7xh9Kl9WomyUVQ5dxEZBKAEwBbPoVtEpF5EHhWR\nXkHuM1dEakSk5tChQ2anULarrwKWDwcq84zP9VWm/WIySahl3Dyng4Gdsl7EwV1EvgbgWQC3quoX\nAP4E4DwAxTBm9veZ3U9VV6hqqaqW9u3bNwFDpowSpCvidPvmgA0usoXTYUflVRecOmDyxkWUDSIK\n7iLigBHYn1bV1QCgqgdUtU1V2wE8DKAsecOkjBWiK+L0knxsrhiPG0YPTM/YYjByYO6p/Drb+VIW\nCxvcRUQA/AXATlW93++4f1enGQDeT/zwKONF0BXx7ukjMObrvVM0oPhs/vgozl+41ugAyXa+lMUi\nqZYZA+A/AWwXkTrPsV8DuF5EimGsS+0G8KOkjJAyW26BZ2ZrctyjurYB733elMJBxafZ3Y4Fq7dj\nmn2v+dWzbOdLWSCSaplNgOnvOGvaKaKuiImseU8Vl7sNB+xn4hyYFAGwnS9lAV6hSvEpnAVc+SCQ\nOwCAGJ+vfDCgkVYia95TaV1rETrNa9jOl7IEe8tQXKprG7B03ZnY13gP+uc5UX7ZUEwvzA+4Pent\nHZPgKtsmXGN/A4EDF6Doe+wASVmBM3eKmbd/TEOjy7cd3W0r63B79faA2xO8b3VKzOtWhR7S0uGo\nAh+9kpbxEEWLM3eKmVku3dsqoPTc3rjzhR1Zl2v36i+HzW/gYiplCc7cKWbBcukK4DfPBe6Lmm32\n40zzG7iYSlmCwZ1iFqoN7omW7Jqx222CPKcDAqNZ2L4L53Xa4JuLqZRNmJahmH1rWN+0teFNhPw8\nJ/Y1uoyF4E6dH8cDg3pxb1TKWgzuFJPq2gY8u7Uh3cOIWX6eE5srxoc+iXujUhZjWoZiko0XJvn7\n1jCTJnZsEkYWwpk7xSRbL0zyevrtz/HU25+f2ozDvjnwSltvkzCAs3fKSpy5U0yyfU9Rb+l9Q6ML\nC1ZvR/PaRWwSRpbCmTtFzLgadRf2NbqQ18MBh03gTtOG1ZGwCRDJ8FzuNuS4/m1+I+vaKUtx5k4R\n6Xg16rFmN1ozOLADkQV2r33tfcxvYF07ZSkGd4pIsKtRreLe1llw4bTAg6xrpyzGtAxFJNsXUMNZ\n0z4W0gL8vu8LrGsnS2Bwp4j0z3OiwSIBvofDhmZ35y2ya86YCNy2OA0jIko8pmUoIuWThsLpsKd7\nGHHr1cOB380s7PSzOB12lE8amqZRESUeZ+4UEe+l+UvX7cqKGbzDLoAioJrH6bDjjisvCPhZgrcf\nIMpuDO5kSe42Ra8eDqgCTS73qQBu3wwsvwvTm/Ziem4B8L1FQOGUdA+XKOEY3Cki3lLIbGo5cKzZ\nDafDjuXXFhuz8voqXoVKXYZoCrfJKS0t1ZqampQ9H0XH/yKljqmKMUs2ZEU6xkzP7nbk9eiOlc0/\nRIHNZBOO3AHAbe+nfmBEERKRrapaGs19OHPvSuqrTrWwdfYyjrmOAbkFePfrt2DBu+f6Zubey/IB\nI9+ezaWQJ1racKLFhf6ncXcl6jpYLdNVeFMSTXsAKOA6anxAgaY9GP7e7ZjY9v8C7uJyt2Hpul0A\ngLwejtSPOcH2KXdXoq6Dwb2rWH9X58ZYfpxowbxunVvcemfs2bjJdUf3ts5Cs3YPPMirUMmimJbp\nKiJIPfSXI52P5Tlxe/V2NLqydz9UrzXtYwE3MK9bFfrbjsDGq1DJwhjcu4rcAk9KJrh92rl51qA+\nzqzeSq+jNe1j8apeisXTRrCunSyNaRmr8+4u1LQHgAQ9rVm7497WwBlsrx4OvP3JsSQPMLny85y4\nYfRA5Oc5fZtfL57JwE7Wx5m7lXWs64bCCPAKOHsDANqbj2Gf9sG9rbOMtIWfO668ALeurEvpkBNF\ngFP17URdEIO7lZkuompAXfe4IPXrvXo4ML0kH7+s2oa2LFtNFQCzRw9kYKcujWkZKwu2iOp3PFhD\nsGPNbgyqeCnrArtdBLNHD8Td00ekeyhEacXgbmXB6rf9jk8vycfimSPQs3v2d3wEgDZVPLu1AdW1\nDekeClFahQ3uIjJARDaKyAciskNEfu453ltEXhWRjzyfeyV/uBSVCYuMOm5/JnXdNZ8dxYmW7OkZ\nE47/xVdEXVUkM/dWAL9U1fMBjAbwUxE5H0AFgPWqOgTAes/3lEkKZwFXPmjk2CHG5ysf7FTX/bct\n1il19MrmdglEiRB2QVVV9wPY7/n6SxHZCSAfwDQAl3lOewLA6wDmJ2WUFLvCWWEv0snwfa5j0j/P\nGf4kIgs/1cyvAAAWK0lEQVSLKucuIoMAlADYAuBsT+AHgH8DODuhIyOKQJ7TAbstsH7fYRfuqkRd\nXsTBXUS+BuBZALeq6hf+t6nRN9h0/icic0WkRkRqDh06FNdgKTkcWbys3uhyo63jnx4W/EuEKFoR\n/bcWEQeMwP60qq72HD4gIv08t/cDcNDsvqq6QlVLVbW0b9++iRgzJUB1bQPGLNmAwRUvodViwdDd\nruYLqt6rdSvzjM/1nRulEVlF2Jy7iAiAvwDYqar3+920BsAcAEs8n59PyggpoaprG1C5ZkdgIzCL\nBXfAZEGVuzBRFxPJzH0MgP8EMF5E6jwfV8AI6hNF5CMA3/Z8TxnMu1WeFTo8htNpQdXsal23yzhO\nZEGRVMtsQvCOUxMSOxxKpqXrdmXVHqixcjrsnRdUI7hal8hK2FumCwlX+22T7CyLnGbbhPndq3CO\nHsZB6Ys9I8sxqmRy4EnBWh5zFyayqCyuk+hCErQQGK72O6db9v06XGXbhMWOR9Afh2ET4Bwcwqjt\nd3R+jSK8WpfIKrLvf3NX03HvU+9CYAwBPliTMK9md3scA02Ped2q0ENaAg+a5dIjvFqXyCpEU9j1\nr7S0VGtqalL2fJbg22ijA7+2vcFU1zZg6bpd2NfoQv88py8P7T1mE8m6ro8dfXLa92AzXRESoLIx\n1cMhSgoR2aqqpdHchzP3TBfjQqC3Mqah0QUF0NDowoLV2wEAmyvG49MlU9CeRYE92Ir+Pj3T/Abm\n0qmLY3DPdBG07TVjVhnTsVtirtMR9/BS5Ztf7418kzWDe1tnoVm7Bx5kLp2IwT3jxbgQaLa7EmBU\nzFTXNqD4zleyqt599xEXNleMx+4lU3DD6IGwizGXf0nH4cVzK3zbBgIAurFpGBFLITOdd8Fv/V1G\nKia3wAjsIRYCq2sbvDuldpLXw4EFq7dnXb27fxnn3dNHBO60VH8ceMHvzcx1lFefUpfH4J4NImjb\n62/pul1BOwo0utzIolS7T8gUUqirTxncqYtiWsaCQl2slI2BHQBOtLQG3zqPV58SdcLgbkFW3KjC\n3Rak0yMQ86IzkZUxLWMBHevZB/VxBl1QzTZX2TZhXrcq9JfD2Nd8JlC/uHOqZcKiwI6PACtmqMtj\ncM9y3np27wJpQ6PLMvuHXmXbhCWOR3xXoBbIYWD1XODzt4Gpft2nY1h0JrI6BvcsV7lmR6fKlyxN\nq3di2loACtQ8CgwcHRi8o1x0JrI65tyzWHVtQ1bVqkervxwOcouyDztRGAzuWSzoAqNF7EeQ1gIA\nK2GIwmBwz2JWya37s4tAAOTnObHvwnkI2lWGlTBEITHnnsX651mnKsarXRWfLpni+W48YNtl5Nj9\nVxJYCUMUFmfuWax80lA47MH6JWanTjX6U+8HZq5gH3aiKHHmnsWml+QDAO58YQeONYdfWO3Z3Y4T\nLZnbU8Z071OAlTBEMeDMPVNFsbVej+7dIAje8xww9kdtac3snZa+e2G+7w2LiOLDmXsm8m6t573i\n0ru1HuCbwVbXNqByzY6ISyHbFRm/OcfGDw+lewhElsGZeyYK1eUQp65KtVqNuxWrf4jShcE9E4Xp\ncmi2y5IVWLHhGVG6MLhnojBdDsPNcCULC2iCLqYSUUwY3DNRmK31Qs1wnQ47Zl80EA5b9kT4/Dwn\nFs8cwcVUogRicM9EhbOMWu4gtd3lk4bC6bB3uluvHg4snjkCGz88BHd7Zi+eAsYb0QPXFmNzxXgG\ndqIEY7VMpgpR2+0NhP493MsnDfUdv21lXcqGGav8DmMmosQSTWF5XGlpqdbU1KTs+bqKjpt1NLe0\nRnRRUzI4bIA7gnL63b4WA0QUjohsVdXSaO7DtEyW85ZFNjS6oDA26zh+sjVtbQn8A3uwEQgQfD9U\nIkoIpmUyVX1Vp52FqtvGBMzQvzWsL57ZsgdtHf76crcrejhsaG8H2lRhF+l0TioEe0aFkVJiSoYo\neRjcM5HJFaqtz9+CTe6b0dDyTQDGDP2ptz8P+hDNflPoNlUI0rtDU8BeqHomln4xC8D4NI6IyNrC\npmVE5FEROSgi7/sdqxSRBhGp83xckdxhdhHefjKrf9jpCtVubSdxK/435odWhO49kyx2Ed9eqAW2\nw7AJUGA7jCXd/xKyXw4RxSeSnPvjACabHF+uqsWej5cTO6wuyDtbb9oT9JT+ciSup1AYwTZVnA47\nrr9oAOY7Ou+F6sRX3CqPKInCBndVfQPA0RSMpWsz6yfTwT7tE9dT5Oc5cf1FA+J6jEjlOR3466jP\ncPen1wffC5Vb5RElTTzVMreISL0nbdMr2EkiMldEakSk5tAhdv0LKkyga7Xn4AFcF/PD28RoWxAq\nTx+tUH8DTLdvxqjtdwBNe4Kfx63yiJIm1uD+JwDnASgGsB/AfcFOVNUVqlqqqqV9+/aN8em6gFCB\nLncAuk17CGNn/B/kx9hcq10Tv6Aa6vFubnkq9F8i3CqPKKliqpZR1QPer0XkYQAvJmxEXdWERYEV\nMoARAP3aDkz3HL41zBWo6a6MAYD+thDrA7kDjJ+XuysRJU1MwV1E+qnqfs+3MwC8H+p8ioA30HWo\nbe8YAJeu2xXyYSK9QjSZHHbBSec56OHa3/nG3AHAbfx1IUq2sMFdRJ4BcBmAM0VkL4A7AFwmIsUw\nJoi7AfwoiWPsOkL0k/G2GGgI0+433YEdAKDAjm/cZuTcO6ZmWk4YlUGctRMlFXvLZAFvi4F0btDh\ndNijev78PCc2X3EYWDsfcHUotuqQbiKi0NhbxqKi3XnJrB1wvBbPHBHVYu6+RpcRvLv37Hyj35aB\nRJQcDO5ZIJq9RZ0OG07rduqftYfDhh6O+P+Zp5fkm/aRD1bm6NtQJMyWgUSUHAzuWSCavUW/am0P\n2Djb3ab43cxC7F4yBTeMHhhTCwKnw4YxSzbgtpV1yHHYkOd0QGCkXmaPHtgp4AdsmRdmy0AiSg4G\n9ywQbOeljgRGPbs/d7uics0OVNc24NmtDTGVSLa2q6+l8LFmN75qbcdyzw5Kd08f4UvZeAN+wJZ5\nYbYMJKLk4IJqlqiubcCdL+wIuglHuAXPPKcjYEYfKZt0fsPwing3JZP2xVxMJYpcLAuqDO5Zxr8k\n0tun3Rtkw13cFK1IKmScDjs3tyZKMlbLdAHehc38PCfa/QL79JJ89OrhiPvxe3a3B6RXwlXIuNxt\nYS+sIqLU42YdGarjvqjfGtYXGz88hIZGV0B7gYZGFxas3g4AuOPKC+KavffsbkdzS5tvw20AaG5p\nDXu/aKp5iCg1mJbJQLFctJSf58TmivEYXPFS1IumPRw2uNsV7rZT93TYBBAEHAv33ESUHLGkZThz\nzyCRthgws6/RheraBogA0b5fN5v0LHAHWUXt2JQsoOyRiDIGg3u6eSpJtGkvRmkfXOiehQaMjfph\n+uc5sXTdrqCVLYmiMGbq3nRRRNUyRJRyDO7p5LcRtgDIl8NY4ngEcANr2iMP8A67oHzSUNyW4GoZ\nM0zBEGUHVsukk8nWej2kBfO6RblxtGe2Hs2VrOE4bAKHPfB6VqZgiLIHg3s6BemvEu1G2O52xS+r\ntkWUqxcAD1xb3OmKV4dN0KvHqbYCS68pwtKri4JfeUpEGY1pmXTwXrEZpK6l40bY3kXM/Dxn0ADe\nFuEqqk2M2fjimSMCSi2D5c4ZzImyE4N7qvnl2c00a3fc2xp4ab43sG+uGI/iO1+JqY2AV5sqFqze\njsUzRzB3TmRhTMukmkme3Sd3ABa4bzZdTPWWOp6I4KKicHhVKZH1MbinWtA+5gLc9j5qzphoeqtN\nBJVrdkR0UVEkeFUpkbUxuKeas1fI498a1tf05jbViNMxDrv4eq4Hk8jKGiLKPAzuGWbjh4eivk+e\n0xFQ1bL06iLU3XE5vvn13qbn24BTJY31VcDy4UBlnvG5PsoyTCLKSFxQTTXXsZDHo02XOB12VF51\nAQD4ql+WrtuFms+OYvPHR83vJJ4qmI6Lu017jO8B9lsnynKcuadamG3nIkmX2EUCas8BYMHq7b7d\nkhoaXXj67c+D3t/XosBscZebVxNZAoN7qk1YBNi7Bx6zd/dtOxfJlnrtqvh0yRRsrhiP6SX5WLpu\nV6cOkqGWXe2eWnduXk1kXQzu6dDxgiO/76eX5IfdJKPj7D7aVM7iITuN/HqwtwBuXk2U9RjcU239\nXUB7h6qXdndAKmR6ST42V4w3bRNg1t8lWCrHrFqm/Jw6zNq/1Mivm+Hm1USWwOCeakFTIXs6Vaz4\nz+JD9XcxS+U4HXbMHj0w4L4PXFuMn+ozIS+iwpUPcjGVyAJYLZNquQXBZ81Q04qVE1+1+hZK73xh\nB4DAni/eryPpFYPnQ19ERUTWwOCeahMWhewtA8BXsVLdNgblf98WsCvSsWY3yldtA9A5wEfU5CvY\nmwvz7ESWwrRMqhXOMlIfuQNgnhX3aNqLpet2mW53527T2HvDTFhk5NX9Mc9OZDkM7ulQOMtIgVQ2\neoK8idyCkFUwMfeG6fjmwjw7kSUxLZNuZmkaz0y6/8vB+7fH1RumcBaDOZHFceaebiFm0uWThsJh\n65y68e6ZSkQUTNiZu4g8CmAqgIOqOtxzrDeAlQAGAdgNYJaqBmmaQmEFmUl7F0gr1+zwdYTs1cOB\nO668gDskEVFIomG2ZxORSwAcB/BXv+B+L4CjqrpERCoA9FLV+eGerLS0VGtqahIwbCKirkNEtqpq\naTT3CZuWUdU3AHRsLzgNwBOer58AMD2aJyUiouSKNed+tqru93z9bwBnBztRROaKSI2I1Bw6FH2v\nciIiil7cC6pq5HWC5nZUdYWqlqpqad++5rsMERFRYsUa3A+ISD8A8Hw+mLghERFRvGIN7msAzPF8\nPQfA84kZThfHLe+IKEEiKYV8BsBlAM4Ukb0A7gCwBECViNwE4DMAvCImXtzyjogSKGxwV9Xrg9w0\nIcFj6Rrqq4ze7U17jWZdExYZwTvUlncM7kQUJbYfSKVQs3NueUdECcT2A6kUanYeZuNsIqJoMLin\nSn1V8E06mvayFS8RJRSDeyp40zHB5BawFS8RJRRz7qlglo7x8p+dewO5d8HVu2k2AzwRRYnBPdHM\nqmFCLYr6z85ZDklECcK0TCJ5g3PTHgRsdu3oYX6+s3dg0A614EpEFAXO3BMpWHCWCN9DWQ5JRAnC\n4J5IwYKwtpsfdx0F7uwNaJuxgOrsZRzriOWQRBQlpmUSKVgQFnvw+2ib8blpD9ByHLA5Am9nOSQR\nxYDBPZGC1apfeGPn42baWoDTTmc5JBHFjWmZROpYyujfOwYAtj5+aqYejOsYMP/TpA6TiKyPwT3R\nzDa7rq8Ctv0tfGAHmF8nooRgWiYVQl3EFECYXyeihGBwT4WIShkFKP0B8+tElBBMy6RCboF50zCx\nG2WSHXPzRERxYnBPhQmLAtsKAEb1DCthiChJmJZJBXZ8JKIU48w9VcyqaIiIkoQzdyIiC2JwJyKy\nIAZ3IiILYnAnIrIgBnciIgticCcisiAGdyIiC2JwJyKyIAZ3IiILYnAnIrIgBnciIgticCcisiAG\ndyIiC4qrK6SI7AbwJYA2AK2qWpqIQRERUXwS0fL3W6p6OAGPQ0RECcK0DBGRBcUb3BXAayKyVUTm\nmp0gInNFpEZEag4dOhTn0xERUSTiDe5jVbUYwHcA/FRELul4gqquUNVSVS3t27dvnE9HRESRiCu4\nq2qD5/NBAM8BKEvEoIiIKD4xB3cR6Skip3u/BnA5gPcTNTAiIopdPNUyZwN4TkS8j/M3Vf1HQkZF\nRERxiTm4q+onAIoSOBYiIkoQlkISEVkQgzsRkQUxuBMRWRCDOxGRBTG4ExFZEIM7EZEFMbgTEVkQ\ngzsRkQUxuBMRWRCDOxGRBTG4ExFZEIM7EZEFMbgTEVkQgzsRkQUxuBMRWRCDOxGRBTG4ExFZEIM7\nEZEFMbgTEVkQgzsRkQUxuBMRWRCDOxGRBTG4ExFZEIM7EZEFMbgTEVkQgzsRkQUxuBMRWRCDOxGR\nBTG4ExFZEIM7EZEFMbgTEVlQXMFdRCaLyC4R+ZeIVCRqUEREFJ+Yg7uI2AH8EcB3AJwP4HoROT9R\nAyMiotjFM3MvA/AvVf1EVVsA/C+AaYkZFhERxSOe4J4PYI/f93s9x4iIKM26JfsJRGQugLmeb78S\nkfeT/ZwJcCaAw+keRAQ4zsTJhjECHGeiZcs4h0Z7h3iCewOAAX7fF3iOBVDVFQBWAICI1KhqaRzP\nmRIcZ2JlwzizYYwAx5lo2TTOaO8TT1rmXQBDRGSwiHQHcB2ANXE8HhERJUjMM3dVbRWR/w/AOgB2\nAI+q6o6EjYyIiGIWV85dVV8G8HIUd1kRz/OlEMeZWNkwzmwYI8BxJpplxymqmoyBEBFRGrH9ABGR\nBaUkuGdLmwIR2S0i20WkLpbV6WQRkUdF5KB/GamI9BaRV0XkI8/nXukco2dMZuOsFJEGz2taJyJX\npHOMnjENEJGNIvKBiOwQkZ97jmfUaxpinBnzmopIjoi8IyLbPGO803M8017LYOPMmNfSn4jYRaRW\nRF70fB/165n0tIynTcE/AUyEcaHTuwCuV9UPkvrEMRCR3QBKVTWj6l5F5BIAxwH8VVWHe47dC+Co\nqi7xvGH2UtX5GTjOSgDHVXVZOsfmT0T6Aeinqu+JyOkAtgKYDuBGZNBrGmKcs5Ahr6mICICeqnpc\nRBwANgH4OYCZyKzXMtg4JyNDXkt/IvILAKUAzlDVqbH8f0/FzJ1tCuKkqm8AONrh8DQAT3i+fgLG\nf/q0CjLOjKOq+1X1Pc/XXwLYCePq6ox6TUOMM2Oo4bjnW4fnQ5F5r2WwcWYcESkAMAXAI36Ho349\nUxHcs6lNgQJ4TUS2eq6szWRnq+p+z9f/BnB2OgcTxi0iUu9J26Q9feRPRAYBKAGwBRn8mnYYJ5BB\nr6knhVAH4CCAV1U1I1/LIOMEMui19HgAwDwA7X7Hon49uaAaaKyqFsPodPlTT5oh46mRW8vIWQiA\nPwE4D0AxgP0A7kvvcE4Rka8BeBbArar6hf9tmfSamowzo15TVW3z/L8pAFAmIsM73J4Rr2WQcWbU\naykiUwEcVNWtwc6J9PVMRXCPqE1BJlDVBs/ngwCeg5FSylQHPDlZb272YJrHY0pVD3j+U7UDeBgZ\n8pp68q7PAnhaVVd7Dmfca2o2zkx9TVW1EcBGGHnsjHstvfzHmYGv5RgAV3nW//4XwHgReQoxvJ6p\nCO5Z0aZARHp6Fq0gIj0BXA4gk5ucrQEwx/P1HADPp3EsQXl/IT1mIANeU8/i2l8A7FTV+/1uyqjX\nNNg4M+k1FZG+IpLn+doJo3DiQ2Tea2k6zkx6LQFAVReoaoGqDoIRKzeo6g2I5fVU1aR/ALgCRsXM\nxwB+k4rnjGGM5wHY5vnYkUnjBPAMjD8Z3TDWLG4C0AfAegAfAXgNQO8MHeeTALYDqPf8gvbLgHGO\nhfFnbT2AOs/HFZn2moYYZ8a8pgAKAdR6xvI+gEWe45n2WgYbZ8a8liZjvgzAi7G+nrxClYjIgrig\nSkRkQQzuREQWxOBORGRBDO5ERBbE4E5EZEEM7kREFsTgTkRkQQzuREQW9P8DDCQA+SmX00wAAAAA\nSUVORK5CYII=\n", 199 | "text/plain": [ 200 | "" 201 | ] 202 | }, 203 | "metadata": {}, 204 | "output_type": "display_data" 205 | } 206 | ], 207 | "source": [ 208 | "fig = plt.figure(figsize=(6,6));\n", 209 | "plt.scatter(X1_new,X2_new,label='within 95% confidence interval');\n", 210 | "plt.scatter(X1_outliers,X2_outliers, label='outliers');\n", 211 | "plt.legend()\n", 212 | "plt.xlim(0,40);\n", 213 | "plt.ylim(0,40);" 214 | ] 215 | } 216 | ], 217 | "metadata": { 218 | "kernelspec": { 219 | "display_name": "Python [conda root]", 220 | "language": "python", 221 | "name": "conda-root-py" 222 | }, 223 | "language_info": { 224 | "codemirror_mode": { 225 | "name": "ipython", 226 | "version": 3 227 | }, 228 | "file_extension": ".py", 229 | "mimetype": "text/x-python", 230 | "name": "python", 231 | "nbconvert_exporter": "python", 232 | "pygments_lexer": "ipython3", 233 | "version": "3.5.3" 234 | } 235 | }, 236 | "nbformat": 4, 237 | "nbformat_minor": 2 238 | } 239 | --------------------------------------------------------------------------------