├── .gitignore ├── README ├── data_visualization ├── hex_map_of_states.ipynb ├── matplotlib_football_marker.ipynb └── overlapping_shapes_alpha.ipynb ├── data_wrangling └── get_api_data.ipynb ├── football ├── afcon_example.ipynb ├── another_positional_heatmap.ipynb ├── circle_arc.ipynb ├── comet_lines.ipynb ├── complicated_heatmap_example.ipynb ├── complicated_placement.ipynb ├── corner_heatmap.ipynb ├── goal_heatmap.ipynb ├── grid_no_pad.ipynb ├── heatmaps_complicated.ipynb ├── kloppy_wyscout.ipynb ├── multiple_pizza_plots.ipynb ├── offset_lines.ipynb ├── outliers_movers.ipynb ├── pass_sonar.ipynb ├── pitch_of_radars.ipynb ├── positional_binning.ipynb ├── read_f24.ipynb ├── reduce_points_ramer_douglas_peucker.ipynb ├── rotate_and_count.ipynb ├── scrape_fbref.ipynb ├── sonofacorner_congested_zones.ipynb ├── statsbomb_duckdb │ ├── 360_v1.py │ ├── 360_visible_v1.py │ ├── competition_v4.py │ ├── events_freeze_v7.py │ ├── events_related_v7.py │ ├── events_tactics_v7.py │ ├── events_v7.py │ ├── lineup_v4.py │ └── match_v5.py └── statsbomb_parser.ipynb ├── modelling └── simulate_test_data.ipynb ├── modelling_from_scratch ├── Nadaraya-Watson_estimator.ipynb ├── k-nearest_neighbours.ipynb ├── linear_regression.ipynb ├── logistic_regression.ipynb └── ridge_regression.ipynb ├── neural_networks └── hard-mish.ipynb ├── pysport ├── 01_get_f24_data.ipynb ├── 02_get_fbref_data.ipynb ├── 03_turn_f24_data_to_actions.ipynb ├── 04_what_is_mplsoccer_slide.ipynb ├── 05_penalty_analysis.ipynb ├── 06_good_features_of_mplsoccer_slide.ipynb ├── data │ ├── f24 │ │ └── README.md │ └── fbref │ │ └── README.md ├── old_trafford_google_earth.png └── pysport_presentation.pdf └── simulation ├── simulate_car_wash.ipynb ├── simulate_composition_method.ipynb ├── simulate_correlated_random_variables.ipynb ├── simulate_estimate_of_pi.ipynb ├── simulate_exponential_random_variable_from_uniform.ipynb ├── simulate_linearly_related_random_variables.ipynb ├── simulate_normal_random_variable_from_uniform.ipynb ├── simulate_poisson_random_variable_from_uniform.ipynb └── simulate_random_variables.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # ignore binaries for programs and plugins 2 | *.exe 3 | *.dll 4 | *.so 5 | *.dylib 6 | 7 | # ignore desktop.ini 8 | desktop.ini 9 | 10 | # ignore jupyter notebook checkpoints 11 | .ipynb_checkpoints 12 | */.ipynb_checkpoints/* 13 | 14 | # ignore plans 15 | *PLAN.txt -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | # Data science notes 2 | Notes from my data science escapades in Python. 3 | -------------------------------------------------------------------------------- /data_visualization/overlapping_shapes_alpha.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "overlapping_shapes_alpha.ipynb", 7 | "provenance": [], 8 | "authorship_tag": "ABX9TyPRT2m2a1XQw1KIsIwt3HL8", 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "language_info": { 16 | "name": "python" 17 | } 18 | }, 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "id": "view-in-github", 24 | "colab_type": "text" 25 | }, 26 | "source": [ 27 | "\"Open" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 1, 33 | "metadata": { 34 | "colab": { 35 | "base_uri": "https://localhost:8080/", 36 | "height": 287 37 | }, 38 | "id": "o2UuURO81XqE", 39 | "outputId": "cb300235-aa51-4798-9329-ddc621a203f8" 40 | }, 41 | "outputs": [ 42 | { 43 | "output_type": "execute_result", 44 | "data": { 45 | "text/plain": [ 46 | "" 47 | ] 48 | }, 49 | "metadata": {}, 50 | "execution_count": 1 51 | }, 52 | { 53 | "output_type": "display_data", 54 | "data": { 55 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQoAAAD8CAYAAACPd+p5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAY9ElEQVR4nO3da2yc133n8e+fw7lwyBleh6RM8SJZimTLcRJbiWPEWGR7WThGEaPdZtdZoNv0Ai8WMbbdLbCbbIHuYl9ld4G+KBK0azRBGiDIBW3T9SLuummbwinWF8mBbVmSZVGUKImiRGp4G84MhzPDsy84cmiJ1EPxeTg3/j4AoZnh8XPO0OQPz3meM+dvzjlERO6mpdYDEJH6p6AQEU8KChHxpKAQEU8KChHxpKAQEU++g8LMhs3sx2Z2xsxOm9nvbNLGzOyPzGzczN42s0f89isi1dMawDFKwO85535qZgngDTP7kXPuzIY2nwEOV74eA/648q+INADfZxTOuWnn3E8rjzPAWWDotmZPA99y614Fusxsn9++RaQ6gjijeJ+ZjQEfA1677VtDwJUNz69WXpve5BjPAs8CtLe3P3r06NEghygiG7zxxhs3nXMpr3aBBYWZdQB/Afyuc25pp8dxzj0PPA9w/Phxd/LkyYBGKCK3M7PJ7bQL5K6HmYVZD4lvO+f+cpMmU8Dwhuf7K6+JSAMI4q6HAV8Hzjrn/nCLZi8A/7py9+OTwKJz7o5ph4jUpyCmHp8Cfg04ZWZvVl77z8AIgHPuT4AXgaeAcSAH/EYA/YpIlfgOCufcPwLm0cYBX/Tbl4jUhlZmiognBYWIeFJQiIgnBYWIeFJQiIgnBYWIeFJQiIgnBYWIeFJQiIgnBYWIeFJQiIgnBYWIeFJQiIgnBYWIeFJQiIgnBYWIeFJQiIgnBYWIeApqF+5vmNmMmb2zxfc/bWaLZvZm5esPguhXRKojqLoe3wS+CnzrLm1+4pz7pYD6E5EqCuSMwjn3MjAXxLFEpP5U8xrF42b2lpn9tZkdq2K/IuJToLVH7+KnwKhzbtnMngL+ivXK5nfYWHt0ZGSkSsMTkbupyhmFc27JObdcefwiEDazvi3aPu+cO+6cO55KedZOFZEqqEpQmNlgpfQgZvaJSr/pavQtIv4FMvUws+8Anwb6zOwq8F+AMLxfUvBXgX9rZiUgDzxTqR4mIg0gkKBwzn3e4/tfZf32qYg0IK3MFBFPCgoR8aSgEBFPCgoR8aSgEBFPCgoR8aSgEBFPCgoR8aSgEBFPCgoR8aSgEBFPCgoR8aSgEBFPCgoR8aSgEBFPCgoR8aSgEBFPCgoR8aSgEBFP1ao9amb2R2Y2bmZvm9kjQfQrItUR1BnFN4En7/L9z7Be8Ocw68V9/jigfkWkCoLahftlMxu7S5OngW9Vtuh/1cy6zGyfc246iP6leZXLZXK5HLlcjvn5HIVCiXJ5jbU1R0tLC6GQ0dYWpqsrTjy+/tXSohl10KpVUnAIuLLh+dXKa3cEhUoKSiaT4fr1NLOzWTKZVdbW2oA4kUiS1tZWzFowM5xzOLfGzZtFJiZywBxmeRKJCKlUO/v29dHR0VHrt9MUqhUU2+acex54HuD48eMqErRHlMtl0uk0ExOzzM9Da2uKeHyA3t4YlSJz2+Kco1DIc+lShvHxS/T2tnDgQIre3l6dafhQraCYAoY3PN9feU32uHK5zOXL17hwIU2xmKS9fYT+/sSOj2dmxGJxYrE4MMDy8hInT84SiUxx6FAfw8P3KTB2oFpB8QLwnJl9F3gMWNT1CVlcXOTUqcssL3fS3X2M1tZw4H10dCTp6EhSLK5y5sw1pqbO8OEPj5JI7DyM9qJq1R59EXgKGAdywG8E0a80pnK5zMTEFc6fz9DRMUYqtft/tOFwhP7+MZaXF/nJTy5y9GgXY2P7dXaxTdWqPeqALwbRlzS2bDbLm29OkMl00tf3IKFQqKr9d3R00tZ2jHPnrnD9+hk++tGDxOPxqo6hESlOpWqWlpZ45ZVxSqURUqmRqofELaFQiP7+MQqFIV577TzLy8s1GUcjUVBIVSwuLvLqqxeJRu+no6Oz1sMBIJnspqXlAK++eoFMJlPr4dQ1BYXsuuXlZV5//RLx+CHi8fpa19DRkSQSOciJExPkcrlaD6duKShkVxUKBU6cuEAkcoC2tvZaD2dT7e0JWlpGOXlynGKxWOvh1CUFheyqd9+9RKk0SEdHstZDuatEoouVlT7On5+s9VDqkoJCds2NGzNMTUF3d3+th7ItPT37uHhxlXQ6Xeuh1B0FheyKQqHAqVPTdHWN3tMS7FoyM7q6xnj77auagtxGQSG74ty5SZwbJBqN1Xoo9yQWi1MspjQFuY2CQgKXyWS4erXYMFOO2/X07GNyMk82m631UOqGgkICd/XqLJFIf8NMOW5nZoRCKa5dm631UOqGgkICVSwWuXx5iWSyp9ZD8aWzs49LlxYolUq1HkpdUFBIoG7cmMW5npotzw5Ka2srxWInN2/qDggoKCRAzjkmJm6SSKRqPZRAJJP9TExo+gEKCglQPp8nmw0Ri7XVeiiBaGtrZ3FxjUKhUOuh1JyCQgKz/lmJ5vrItlm7PgOCgkICtLSUo7W1Pj/PsVNmcZaXFRQKCgnM3Fyusldl84jF4qTTWk+hoJBAOOeYn883ZVDMzemMIqiSgk+a2blKycAvbfL9L5jZrJm9Wfn67SD6lfpRLBYpl1ubbg/K1tYwq6vs+fUUvvfMNLMQ8DXgF1kv7HPCzF5wzp25ren3nHPP+e1P6tPa2hrNe4LaUnl/e1cQm+t+Ahh3zk0AVLbkfxq4PSikiRSLRbLZLPl8nmKxTCaTJZtdoVy+QktLC62tYdra2olG25rgLKOF9f2h964ggmKzcoGPbdLun5vZPwHeA/69c+7KJm1UUrBOrayskE7PMTeXY24uRz7vcC6OWRyzVorFGPn8MslklLW1NQqFPIuLaQqFFSKRKLFYnHg8UdmnsrGCo0E/shKoahUA+j/Ad5xzBTP7N8CfAT+3WUOVFKwfzjkWFxeZnJzh2rU8Zr3EYn3EYnE6OiIfaFsorJDLLdPT03/HMQqFPPl8lkxmnpmZqySTPXR3pxrmI+jOrTVcuAUtiKDwLBfonNu4YP5Pgf8RQL+yS9bW1rh+/Qbj47Nks1Gi0RSpVPddPw3a0tLC2lr5jtc3lvjr7k5RLK4yPz/L5OQ5otE2envrf5s8KCsoAjjGCeCwmR1gPSCeAf7VxgZmtm9DCcHPAmcD6Fd2wfLyMqdOXWJhIU4yeZj+/u0txw6HI6ytrVEqlWht3frXar1i1xCp1H0sLc1z/fok7e1J+vv31+UHyVZXC7S1hepybNXkOyiccyUzew54CQgB33DOnTaz/wacdM69APw7M/ssUALmgC/47VeCtba2xuXLU5w9O09b2wj9/V33fIxYLM7KSm5bZwhmRmdnDx0dnczOTnHx4hkGB0fqpubHLSsrOVKp5lobshNBlRR8kfX6ohtf+4MNj78MfDmIviR4uVyOt96aYH6+nd7eB+96RnA39xIUt4RCIQYHR8hmM0xPX6K9PcnAwHDdnOqvrubo6WmuZek7UR//N6RmMpkMr7xynpWVIQYGDuw4JOBWUOxsuXN7e4IDBx6kXC5z5co45fKd1ztqwbkc7e06o1BQ7GHrZf4mCIcPkkx2+z7erTOKnQqFQgwNHSASiXLlyvmah8X62omsihijoNizMpkMr79+iba2Q7S3JwI5ZjQao6UlRDa78zqeZsa+faPEYnGuXBmv6YrI5eVFUqkY4XC4ZmOoFwqKPSifz3PixATR6MHAy/x1d6eYn/e/K9Tg4AjhcISpqYkARrUz+fwsY2PNsVuXXwqKPcY5x+nTl3BuKLAziY2SyR6y2SVKJf8FdO67b4xyuRRI8Nyr1dUCsViO7m7/U7JmoKDYY65du87MTCtdXX27cvxQKERnZw8LCzd9H2t9GjLG7Ow1isXVAEa3fUtLsxw61Fc3d19qTT+FPSSfz3P69Aw9PaO72k9X1/r0I4jrC9FojJ6eAa5du+R/YNu0/pHyNP39uxOmjUhBsUfcmnKEQkOEwxHv/8CHWKyN9vYkMzNXAzleb+8Azq1VbQqSTl/hyJEeotFoVfprBAqKPWJubo4bN1p2bcpxu4GBYZaXF33dAbnFzBgcHGV29tqu3wXJZBbo7s4yMjK0q/00GgXFHnHp0izt7QNV6+/Wisvp6clA/rhjsTZisThLS/MBjG5zpVKJfP4yDz00qmsTt9FPYw/I5XLMzBSr/jmKjo5O4vGOwKYg3d0pFhZ2b/qRTl/hgQe6SSSCvxvU6BQUe8C1a7OEQqmaFA1en4IsMTc34/tYHR2dlEpFX6s/t5JOT9Pfn9eUYwsKiiZXLpe5eHGezs7emvQfCoUYHf0Qc3M3WFjwV8fTzN6/oxKk+fkZEok0H/nIYU05tqCfSpNbWlqiVOqgtbV2y5DD4QjDw4eZnZ3y/Ufe2dlDJrMQ0Mhgbu4GbW03eOSRw1qqfRcKiia3vJyjpaX2H5OORmOMjh4hnb7BzZvTO96sNhyOYGa+F2A555idvUpn500+/vEjuhXqQUHR5OqpelckEmVs7AjZbIbJyXMUCis7Oo7fT6murOSZmXmX4eEVHnnkCJHI7q4raQYKiiaXTmfrJihgvaDO6OiH6OzsZXLyHOn0jXs+u4jF4uTz977vhXOOdHqaQuE9Hnusn2PHDvnaf2Mv0U+piRUKBQqFFjo762/u3d2dor09yfT0JJnMPH199217Z6xYLH5P1zqcc2SzS2Sz1xgdDXP48AM6i7hH1SopGDWz71W+/5qZjQXRr9zdysoKZtvbHLcWIpEoo6MfoqsrxezsFOPj75BO3/As3xeNtrG66j1tKZVKpNPXmZ19h46OaT75yQGOHTukkNiBapUU/C1g3jl3yMyeAf478C/99i13t74isv53j+7q6qWrq5d8Psv8/CwXLrxDItFFPJ4gFosTjcY+sAakpSW06WrPWzVEVlZyrK5maG1d5ODBbu67737tUuVTtUoKPg3818rjPwe+ambm9nqdtl22/sfUOGWu2traaWtrp1QqsbQ0Rza7RDp9nWJx9f0l3K2tYZxzlMulyvWNNZwrAjnM8iSTEfbvj9PT00FPz8ie32Y/KNUqKfh+m8r2/otAL3DHpgUqKRi0xgmKW1pbWz9QcaxcLrOykqNQyFMulyiXS/T2wtGjRUKhFsLhGPF4D/F4XAumdkndXcxUScHgrP/RNH4V7lAoRHt74v0duUqlEtnsHCMj+2s8sr0jiPj1LCm4sY2ZtQKdgL/1vOJpfV7f+EFxO+fWCIV05lBNQfy03y8paGYR1ksKvnBbmxeAX688/lXg73V9YvfFYjFgZ4ua6lmhsEIy2RgFjpuF76BwzpWAWyUFzwLfv1VSsFJGEODrQK+ZjQP/AbjjFqoELxqN0tparHl9jKCtrOTo7dVdjGqqVknBFeBzQfQl22dm9PTEWV7O7cqO27XiXI6OjnuvjSo7p4lek+vp2dly53rmnKp3VZuCosklEnHW1oLf6KVWSqUS0Wi5cv1FqkVB0eSSySQtLUtNc50ik5lncLB5plGNQkHR5MLhMMPDCZaW5mo9lEAUi7MMD/d7N5RAKSj2gOHhforF6pflC1out0xXl9PmtzWgoNgDEokEXV2OXG651kPxZXl5lvvvV9HgWlBQ7BH3359iedn/Tti1UioVCYcX6e2tzSbBe52CYo/o6+sjmcyyvLxY66HsSDo9ydGj/fo0aI0oKPaIlpYWHn54jFzucsPdAVlYSJNKrTI0tK/WQ9mzFBR7SCKR4MiRTm7evOLduE6USkVKpascOzZWkwJGsk5BsceMjg7R2ZlpmClIOj3JsWMprcSsMQXFHhMKhXj44TFWViZ3vF1+taTT0wwMFDXlqAMKij0okUhw/PgQi4vnfRfS2S3z8zMkk2kefviQphx1QEGxR/X29vLoo4PMzZ1jdbVQ6+F8wM/K/H1IZf7qhIJiD+vvT/Hxj+9jYeHcrlQI34l0+hrJ5HqZP22rXz8UFHtcKtXH44+PUCicJ53eeU1Qv1ZXC1y/fo6BgQyPPqqQqDd1t7muVF9XVxdPPBHnvfcmuXz5XTo7x4jFqlc4aH5+BuemefTRQQYG+nVNog4pKASASCTCQw8dZt++NG+99R7ZbIquroFdXQm5spJjYeEKQ0PwwANHVVG8jvkKCjPrAb4HjAGXgH/hnJvfpF0ZOFV5etk599nb20h96O3t5YknEly8OMXk5CnK5W6SyVRghY7X1tZYWppndXWWRKLI8eMD9PendBZR5/yeUXwJ+Dvn3FcqNUe/BPynTdrlnXMf9dmXVEkkEuHIkQMcPFjk5s00Fy5cYGYmTCTSR1tbB5FI9J7+sG8V8MnlFoE0Q0NxhocH6ezsVEA0CL9B8TTw6crjPwP+gc2DQhpQOBxm375BBgcHWFxcZHp6jrm569y8WQTiQJxwOE4oFMKsBTOrlPhzrK4WKlvw5QiFVunubuPAgQT9/ZpiNCK/QTHgnJuuPL4ODGzRLmZmJ4ES8BXn3F9tdUCVFKw/ZkZXVxddXes7X5fLZXK5HNlsloWFJYrFMuWyY23NEQoZra0ttLWF6exMEo8PEovFdObQ4DyDwsz+Fhjc5Fu/v/GJc86Z2Vb31kadc1NmdhD4ezM75Zy7sFlDlRSsf6FQiEQiQSKRYHCz3wxpOp5B4Zz7ha2+Z2Y3zGyfc27azPYBm+6M4pybqvw7YWb/AHwM2DQoRKT++F1wtbFU4K8D//v2BmbWbWbRyuM+4FPAGZ/9ikgV+Q2KrwC/aGbngV+oPMfMjpvZn1baPACcNLO3gB+zfo1CQSHSQHxdzHTOpYGf3+T1k8BvVx7/P+DDfvoRkdrSZz1ExJOCQkQ8KShExJOCQkQ8KShExJOCQkQ8KShExJOCQkQ8KShExJOCQkQ8KShExJOCQkQ8KShExJOCQkQ8KShExJOCQkQ8KShExJOCQkQ8+QoKM/ucmZ02szUzO36Xdk+a2TkzG69UFBORBuL3jOId4FeAl7dqYGYh4GvAZ4AHgc+b2YM++xWRKvK7ue5ZwKsK1CeAcefcRKXtd1kvRaiduEUaRDWuUQwBVzY8v1p5bVNm9qyZnTSzk7Ozs7s+OBHx5qukoHPujoI/fqmkoEj98VVScJumgOENz/dXXhORBlGNqccJ4LCZHTCzCPAM66UIRaRB+L09+stmdhV4HPihmb1Uef0+M3sRwDlXAp4DXgLOAt93zp32N2wRqSa/dz1+APxgk9evAU9teP4i8KKfvkSkdrQyU0Q8KShExJOCQkQ8KShExJOCQkQ8KShExJOCQkQ8KShExJOCQkQ8KShExJOCQkQ8KShExJOCQkQ8KShExJOCQkQ8KShExJOCQkQ8KShExFO1SgpeMrNTZvammZ3006eIVJ+vPTP5WUnB/7WNtv/UOXfTZ38iUgPVKCkoIg2uWtcoHPA3ZvaGmT1bpT5FJCDVKin4hHNuysz6gR+Z2bvOuU0roFeC5FmAkZGRbR5eRHZTNUoK4pybqvw7Y2Y/YL3C+aZBodqjIvVn16ceZtZuZolbj4F/xvpFUBFpELteUhAYAP7RzN4CXgd+6Jz7v376FZHq2vWSgs65CeAjfvoRkdrSykwR8aSgEBFPCgoR8aSgEBFPCgoR8aSgEBFPCgoR8aSgEBFPCgoR8aSgEBFPCgoR8aSgEBFPCgoR8aSgEBFPCgoR8aSgEBFPCgoR8aSgEBFPCgoR8eR3c93/aWbvmtnbZvYDM+vaot2TZnbOzMbN7Et++hSR6vN7RvEj4CHn3MPAe8CXb29gZiHga8BngAeBz5vZgz77FZEq8hUUzrm/cc6VKk9fBfZv0uwTwLhzbsI5twp8F3jaT78iUl1+q5lv9JvA9zZ5fQi4suH5VeCxrQ6ysaQgUDCzZiwW1Ac0Y2X3Zn1f0Lzv7ch2GgVSe9TMfh8oAd++lxFuZmNJQTM76Zw77veY9Ubvq/E063szs5Pbaee79qiZfQH4JeDnnXOb1QqdAoY3PN9feU1EGoTfux5PAv8R+KxzLrdFsxPAYTM7YGYR4BngBT/9ikh1+b3r8VUgAfzIzN40sz+BD9YerVzsfA54CTgLfN85d3qbx3/e5/jqld5X42nW97at92WbzxZERH5GKzNFxJOCQkQ81XVQbHeJeCMys8+Z2WkzWzOzhr/t1qzL9M3sG2Y202zrecxs2Mx+bGZnKr+Hv3O39nUdFGxjiXgDewf4FeDlWg/EryZfpv9N4MlaD2IXlIDfc849CHwS+OLd/p/VdVBsc4l4Q3LOnXXOnav1OALStMv0nXMvA3O1HkfQnHPTzrmfVh5nWL8jObRV+7oOitv8JvDXtR6EbGqzZfpb/tJJfTGzMeBjwGtbtQnysx47Uu0l4tW0nfcmUktm1gH8BfC7zrmlrdrVPCgCWCJet7zeWxPRMv0GZGZh1kPi2865v7xb27qeemxzibjUnpbpNxgzM+DrwFnn3B96ta/roGCLJeLNwMx+2cyuAo8DPzSzl2o9pp3yuUy/rpnZd4BXgCNmdtXMfqvWYwrIp4BfA36u8rf1ppk9tVVjLeEWEU/1fkYhInVAQSEinhQUIuJJQSEinhQUIuJJQSEinhQUIuLp/wMu8HgMiPVYqAAAAABJRU5ErkJggg==\n", 56 | "text/plain": [ 57 | "
" 58 | ] 59 | }, 60 | "metadata": { 61 | "needs_background": "light" 62 | } 63 | } 64 | ], 65 | "source": [ 66 | "import numpy as np\n", 67 | "import matplotlib.pyplot as plt\n", 68 | "from matplotlib.patches import Circle, PathPatch\n", 69 | "from matplotlib.path import Path\n", 70 | "\n", 71 | "def get_path(patch):\n", 72 | " \"\"\" get the real path from a patch\"\"\"\n", 73 | " path = patch.get_path()\n", 74 | " transform = patch.get_patch_transform()\n", 75 | " return transform.transform_path(path)\n", 76 | "\n", 77 | "# get the paths\n", 78 | "path1 = get_path(Circle(xy=(0, 0), radius=0.5))\n", 79 | "path2 = get_path(Circle((0.5, 0.5), 0.5, alpha=0.5))\n", 80 | "\n", 81 | "# create a patch from the combination of the two paths\n", 82 | "path = Path(np.concatenate([path1.vertices, path2.vertices]),\n", 83 | " np.concatenate([path1.codes, path2.codes]))\n", 84 | "pathpatch = PathPatch(path, facecolor='blue', alpha=0.2)\n", 85 | "\n", 86 | "# plot the combined shape\n", 87 | "fig,ax = plt.subplots()\n", 88 | "ax.set_xlim(-2, 2)\n", 89 | "ax.set_ylim(-2, 2)\n", 90 | "ax.set_aspect(1)\n", 91 | "ax.add_patch(pathpatch)" 92 | ] 93 | } 94 | ] 95 | } -------------------------------------------------------------------------------- /football/complicated_placement.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "complicated_placement.ipynb", 7 | "provenance": [], 8 | "authorship_tag": "ABX9TyNFhPIFb7DmFn6aCUlDViGU", 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "language_info": { 16 | "name": "python" 17 | } 18 | }, 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "id": "view-in-github", 24 | "colab_type": "text" 25 | }, 26 | "source": [ 27 | "\"Open" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 1, 33 | "metadata": { 34 | "colab": { 35 | "base_uri": "https://localhost:8080/" 36 | }, 37 | "id": "-4KGrxkfqM7q", 38 | "outputId": "25d14543-0899-48f6-c0e3-609287363878" 39 | }, 40 | "outputs": [ 41 | { 42 | "output_type": "stream", 43 | "name": "stdout", 44 | "text": [ 45 | "Requirement already satisfied: mplsoccer in /usr/local/lib/python3.7/dist-packages (1.0.5)\n", 46 | "Requirement already satisfied: seaborn in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (0.11.2)\n", 47 | "Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (1.4.1)\n", 48 | "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (4.6.3)\n", 49 | "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (3.2.2)\n", 50 | "Requirement already satisfied: pillow in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (7.1.2)\n", 51 | "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (1.1.5)\n", 52 | "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (1.19.5)\n", 53 | "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplsoccer) (1.3.2)\n", 54 | "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplsoccer) (3.0.6)\n", 55 | "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplsoccer) (0.11.0)\n", 56 | "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplsoccer) (2.8.2)\n", 57 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib->mplsoccer) (1.15.0)\n", 58 | "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->mplsoccer) (2018.9)\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "!pip install mplsoccer" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "source": [ 69 | "import matplotlib.pyplot as plt\n", 70 | "from mplsoccer import Pitch" 71 | ], 72 | "metadata": { 73 | "id": "gmSuXwanqbOX" 74 | }, 75 | "execution_count": 2, 76 | "outputs": [] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "source": [ 81 | "p = Pitch()" 82 | ], 83 | "metadata": { 84 | "id": "zdblfJN9rOTC" 85 | }, 86 | "execution_count": 3, 87 | "outputs": [] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "source": [ 92 | "figwidth, figheight = 16, 9\n", 93 | "fig = plt.figure(figsize=(figwidth, figheight))\n", 94 | "blank_axes = fig.add_axes((0, 0, 1, 1))\n", 95 | "#blank_axes.axis('off')\n", 96 | "pitch_height = 0.2\n", 97 | "pitch_width = pitch_height * p.ax_aspect * figheight / figwidth\n", 98 | "ax = fig.add_axes((0.1, 0.1, pitch_width, pitch_height))\n", 99 | "p.draw(ax=ax)\n", 100 | "ax2 = fig.add_axes((0.6, 0.6, pitch_width, pitch_height))\n", 101 | "p.draw(ax=ax2)\n" 102 | ], 103 | "metadata": { 104 | "colab": { 105 | "base_uri": "https://localhost:8080/", 106 | "height": 700 107 | }, 108 | "id": "9NrqO9jyqO2V", 109 | "outputId": "bff820ae-5efc-423a-f6a1-b2e155d92475" 110 | }, 111 | "execution_count": 4, 112 | "outputs": [ 113 | { 114 | "output_type": "display_data", 115 | "data": { 116 | "image/png": "iVBORw0KGgoAAAANSUhEUgAABK0AAAKrCAYAAAAzsH71AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdX2yd953n98+PpGhRfymJMuVIsv5ZO2PFsSVHcRjnYp3sBsjMRXhRoJgAi6LFYnPTKRbYRYEpWkyL6VVboBcF0j+5KBYt0B2kvagMNEUuNl4s4pSG5axlW7alKPpjyRJFSTElmSYpkXx6IZGreGyLog55fiRfL8CAzjkPz+8rngMBfuP3PE9pmiYAAAAAUJOOdg8AAAAAAJ8nWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVeWi0KqX8r6WUkVLKe1/yeiml/A+llDOllHdKKS+2fkwAAAAAVpP57LT6F0l++BWv/1mSg/f/+0mS/+nxxwIAAABgNXtotGqa5t8k+cNXHDKY5H9r7hlK0ltKeapVAwIAAACw+rTimlY7k1x84PGl+88BAAAAwIJ0LeVipZSf5N4phFm/fv03//RP/3QplwcAAABgCb311lvXm6bZvpCfbUW0+jjJ7gce77r/3N/RNM3PkvwsSY4ePdocP368BcsDAAAAUKNSyoWF/mwrTg98Ncl/cP8uggNJbjZNc6UF7wsAAADAKvXQnVallH+Z5JUkfaWUS0n+yyRrkqRpmv85yS+S/HmSM0k+S/IfLdawAAAAAKwOD41WTdP8+CGvN0n+45ZNBAAAAMCq14rTAwEAAACgpUQrAAAAAKojWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1RCsAAAAAqiNaAQAAAFAd0QoAAACA6ohWAAAAAFRHtAIAAACgOqIVAAAAANURrQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUB3RCgAAAIDqiFYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdUQrAAAAAKojWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1RCsAAAAAqiNaAQAAAFAd0QoAAACA6ohWAAAAAFRHtAIAAACgOqIVAAAAANURrQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUB3RCgAAAIDqiFYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdUQrAAAAAKojWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1RCsAAAAAqiNaAQAAAFAd0QoAAACA6ohWAAAAAFRHtAIAAACgOqIVAAAAANURrQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUB3RCgAAAIDqiFYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdUQrAAAAAKojWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1RCsAAAAAqiNaAQAAAFAd0QoAAACA6ohWAAAAAFRHtAIAAACgOqIVAAAAANURrQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUB3RCgAAAIDqiFYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdUQrAAAAAKojWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1RCsAAAAAqiNaAQAAAFAd0QoAAACA6ohWAAAAAFRHtAIAAACgOqIVAAAAANURrQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUB3RCgAAAIDqiFYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdeYVrUopPyylnCqlnCml/NUXvP50KeW1Usq/LaW8U0r589aPCgAAAMBq8dBoVUrpTPLTJH+W5FCSH5dSDn3usP8iyc+bpjmS5C+S/I+tHhQAAACA1WM+O61eSnKmaZqzTdPcSfK3SQY/d0yTZNP9P29Ocrl1IwIAAACw2swnWu1McvGBx5fuP/eg/yrJPyqlXEryiyT/yRe9USnlJ6WU46WU49euXVvAuAAAAACsBq26EPuPk/yLpml2JfnzJP97KeXvvHfTND9rmuZo0zRHt2/f3qKlAQAAAFhp5hOtPk6y+4HHu+4/96B/nOTnSdI0zf+XZG2SvlYMCAAAAMDqM59o9WaSg6WUfaWU7ty70PqrnzvmoyT/IElKKc/mXrRy/h8AAAAAC/LQaNU0zVSSv0zyyyQf5N5dAk+WUv6mlPKj+4f98yT/pJRyIsm/TPIfNk3TLNbQAAAAAKxsXfM5qGmaX+TeBdYffO6vH/jz+0m+29rRAABg+RgaGsrVq1fbPQY8lv7+/gwMDLR7DIAkrbsQOwAArGqCFSuB7zFQk3nttAIAAOZncHCw3SOsWMeOHUvid7xYZn+/ALWw0woAAACA6ohWAAAAAFRHtAIAAACgOqIVAAAAANURrQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUB3RCgAAAIDqiFYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdUQrAAAAAKojWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1RCsAAAAAqiNaAQAAAFCdrnYPAAAAK8mxY8eWZJ3+/v4MDAwsyVq0z9DQUK5evdruMQDawk4rAABYhoSM1cHnDKxmdloBAEALDQ4OLvoaS7Wbi3r4XgGrkZ1WAAAAAFRHtAIAAACgOqIVAAAAANURrQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUB3RCgAAAIDqiFYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdUQrAAAAAKrT1e4BAABgJTl27NhXvt7f35+BgYElmmZ5m5mZye3btzM6Oprx8fG558+fP5/e3t5s3LgxnZ2dbZxw+RgaGsrVq1fbPQbAIxGtAABgCQkHX61pmly7di3nzp3LyMhIZmZm/s4xJ06cSJKUUrJ9+/bs27cv/f39KaUs9bjLhu8dsByJVgAA0EKDg4Nf+trDdmGtdsPDw3nvvfcyNjY299z69evT29ub9evX5/Tp00mSXbt2ZXR0NJ9++mlGRkYyMjKSnp6efP3rX8/OnTvbNf6y4PsJLCeiFQAA0FZ37tzJe++9l4sXLyZJenp6snfv3jz99NNZu3bt3HGz0eqb3/zm3M999NFHOX/+fMbGxnL8+PFcvnw5zz//fJ544oml/4sA0FKiFQAALFMXLlzInj172j3GY/nss8/ym9/8JmNjY+no6MihQ4eyb9++dHQ8/J5R3d3deeaZZ3LgwIGcP38+J0+ezOXLl/PJJ5/kO9/5TjZu3LgEf4PF8/HHH7d7BIC2cvdAAABYpt5+++1cvny53WMs2Pj4eH79619nbGwsmzdvzve+970cOHBgXsHqQaWU7Nu3L9///vezZcuWjI+P5/XXX8+nn366SJMvvpGRkbz11lvtHgOgrUQrAABYxn7729/m5s2b7R7jkU1PT2doaCjj4+PZsmVLvvvd72bDhg2P9Z7r1q3Lyy+/nL6+vkxOTmZoaChTU1Mtmnjp3L59O2+++Waapmn3KABtJVoBAMAytXv37kxPT+eNN97InTt32j3OIzl16lRu3bqV9evXZ2BgIGvWrGnJ+3Z1deXb3/52Nm3alLGxsbz//vsted+lMjU1lTfeeCNTU1N56qmn2j0OQFuJVgAA8JgevOvaUt6B7YUXXkhvb2/Gx8fzu9/9bsnWfVyjo6M5c+ZMkuTIkSPp7u5u6ft3dXXlxRdfTCkl586dy/Xr11v6/ovp97//fcbGxrJx48a8+OKLS7Zuu77DAF9FtAIAgGWqs7MzL7zwQpLk7NmzmZiYaPNE83P69Ok0TZP9+/dn27Zti7LG5s2bc/Dgwbn1loO7d+/Oxbznn38+XV3umwWsbqIVAAAsY729vdmxY0dmZmZy7ty5do/zUOPj4xkeHk4pJc8888yirnXgwIF0dnbm2rVry+Ki7BcuXMjU1FT6+vrS19fX7nEA2k60AgCAZW7//v1JkuHh4TZP8nAfffRRmqbJU089lZ6enkVdq7u7Ozt37kxyLwjV7sqVK0mSffv2tXkSgDqIVgAAsMxt3bo1HR0duXXrVvUXZL9x40aSzMWkxTa7zuy6tZqens7o6GiS2GUFcJ9oBQAAy1xnZ2d6e3uTJH/4wx/aPM2Xa5omN2/eTJK5eRfb7Do3b97MzMzMkqy5EKOjo5mZmcmmTZtafmF6gOVKtAIAgBVg69atSTIXhWo0MTGRO3fupLu7e9FPDZzV3d2ddevWZWZmpurrWs3ustqyZUubJwGoh2gFAAAVOXny5Ny1jR7F7O6cqampVo/UMrOnLj7xxBMppTzSz05OTi543bVr1ya5d3e+Ws1+bgvZZTUyMpJ33303TdO0eiyAthKtAACgEk3T5NKlS7l69eoj/+xsBKo5XMzO9qjBKknGxsYWvO5K/91cu3Ytly5dqvr0R4CF6Gr3AAAAwD2llPzgBz9YULiYnp5Ocu/6VrWanW0hu8FmT39ciNn1lsPvZvZzfBSHDh3Ks88+m44OexKAlcW/agAAUJGOjo4FRatbt24lSdatW9fqkVpm3bp1KaXks88+W7JT9WZmZnL79u0kyYYNG5ZkzYWY/dxmP8dHUUoRrIAVyb9sAACwzDVNkxs3biR5vB1Ji62zszObNm1KsrA4sxC3b9/OzMxM1q9fnzVr1izJmguxbdu2JPfu/ug0P4B7RCsAAFjmxsbGMjk5me7u7qp3EyVJb29vknsXD18K165d+6N1a7V27dqsX78+09PTVd8BEmApiVYAALDMXbx4MUnS19e3oFMLl9KuXbuSJBcuXFj0HUVN0+T8+fNJkp07dy7qWq3Q19eXJLl06VKbJwGog2gFAADL2N27d3Pu3Lkkyf79+9s8zcNt27YtGzZsyOTkZC5fvryoa42MjGRsbCw9PT3ZsWPHoq7VCvv27UtyL+hNTk62eRqA9hOtAABgGTt9+nTu3r2bbdu2VX09q1mllBw4cCBJcvLkyUW7IPvU1FTefffdJPdiUO070JJk8+bN6e/vz/T0dD788MN2jwPQdqIVAAA8psHBwS/882K7cuVKzpw5kyQ5dOjQsggzSbJnz55s2bIlExMTc2Gp1T744IOMjY1l06ZNc5FsOZj9HM+fPz932udSaNd3GOCriFYAALBMvfXWW0mSZ599dlnssppVSsmRI0fS0dGRixcvzoW3Vrlw4ULOnj37R+ssF5s2bcpzzz2XJHn77bfbPA1Aey2ff70BAIA/Mj09nV27duXgwYPtHuWRbdy4MYcPH05y7zTB06dPp2max3rPpmly9uzZudjz3HPPVX/XwC+yf//+7N27d9EvVA9Qu652DwAAACzMtm3bcvjw4WVzWuDn7d69O9PT0zlx4kQ++OCD3LhxI4cPH05PT88jv9fExETeeeedXLlyJcm90+yWw4Xpv8w3vvGNjI+P5+rVq+0eBaBt7LQCAIBl6uWXX05nZ2e7x3gse/fuzbe+9a2sWbMmIyMj+dWvfpVTp05lYmJiXj8/OTmZ06dP57XXXsuVK1fS1dWVI0eOLMvdZw/q6OjISy+91O4xANrKTisAAFimltO1mr7K1772tWzdujUnTpzI8PBwPvzww5w6dSr9/f3ZunVrent7s2HDhrnjr1+/ntHR0XzyyScZHh6eO41u+/btOXz4cNatW9euv0pLrZTPF2ChRCsAAGihY8eOtXuEZWnt2rV56aWXcv369Zw7dy7Dw8Nz/33e66+//kePd+zYkb179+bJJ59ctqdKLhXfT2A5Ea0AAGAJ9ff3t3uEapVSsn379mzfvj3j4+MZGRnJ6OhoRkdHMzExMXfK4ObNm9Pb25ve3t48+eSTK2Zn1WLq7+93fSxg2RGtAACghQYHB9s9worQ09OTPXv2ZM+ePXPPze4SeuWVV9o01fI1MDDw0GPswgJq4yRpAAAAAKojWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1RCsAAAAAqiNaAQAAAFAd0QoAAACA6ohWAAAAAFRHtAIAAACgOqIVAAAAANURrQAAAACoTle7BwAAgJXk2LFj7R6BFcj3CliN7LQCAIBlqL+/v90jsAR8zsBqZqcVAAC00ODgYLtHYAUZGBhYsrXs5gJqY6cVAAAAANURrQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUB3RCgAAAIDqiFYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdUQrAAAAAKojWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1RCsAAAAAqiNaAQAAAFAd0QoAAACA6ohWAAAAAFSnq90DAADASnLs2LF2j7Di+R0DrA52WgEAQAv09/e3ewR4bL7HQE3stAIAgBYYGBho9wgAsKLMa6dVKeWHpZRTpZQzpZS/+pJj/v1SyvullJOllP+jtWMCAAAAsJo8dKdVKaUzyU+T/CDJpSRvllJebZrm/QeOOZjkP0vy3aZpPimlPLlYAwMAAACw8s1np9VLSc40TXO2aZo7Sf42yeDnjvknSX7aNM0nSdI0zUhrxwQAAABgNZlPtNqZ5OIDjy/df+5Bfy/J3yulvF5KGSql/LBVAwIAAACw+rTqQuxdSQ4meSXJriT/ppTyjaZpRh88qJTykyQ/SZKnn366RUsDAAAAsNLMZ6fVx0l2P/B41/3nHnQpyatN09xtmuZcktO5F7H+SNM0P2ua5mjTNEe3b9++0JkBAAAAWOHmE63eTHKwlLKvlNKd5C+SvPq5Y/7v3NtllVJKX+6dLni2hXMCAAAAsIo8NFo1TTOV5C+T/DLJB0l+3jTNyVLK35RSfnT/sF8muVFKeT/Ja0n+06ZpbizW0AAAAACsbKVpmrYsfPTo0eb48eNtWRsAAACAxVdKeatpmqML+dn5nB4IAAAAAEtKtAIAAACgOqIVAAAAANURrQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUB3RCgAAAIDqiFYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdUQrAAAAAKojWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1RCsAAAAAqiNaAQAAAFAd0QoAAACA6ohWAAAAAFRHtAIAAACgOqIVAAAAANURrQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUB3RCgAAAIDqiFYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdUQrAAAAAKojWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1RCsAAAAAqiNaAQAAAFAd0QoAAACA6ohWAAAAAFRHtAIAAACgOqIVAAAAANURrQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUB3RCgAAAIDqiFYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdUQrAAAAAKojWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1RCsAAAAAqiNaAQAAAFAd0QoAAACA6ohWAAAAAFRHtAIAAACgOqIVAAAAANURrQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUB3RCgAAAIDqiFYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdUQrAAAAAKojWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1RCsAAAAAqiNaAQAAAFAd0QoAAACA6ohWAAAAAFRHtAIAAACgOqIVAAAAANURrQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUB3RCgAAAIDqiFYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdUQrAAAAAKojWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1RCsAAAAAqiNaAQAAAFAd0QoAAACA6ohWAAAAAFRnXtGqlPLDUsqpUsqZUspffcVx/14ppSmlHG3diAAAAACsNg+NVqWUziQ/TfJnSQ4l+XEp5dAXHLcxyT9N8karhwQAAABgdZnPTquXkpxpmuZs0zR3kvxtksEvOO6/TvLfJJlo4XwAAAAArELziVY7k1x84PGl+8/NKaW8mGR30zT/TwtnAwAAAGCVeuwLsZdSOpL890n++TyO/Ukp5Xgp5fi1a9ced2kAAAAAVqj5RKuPk+x+4PGu+8/N2pjkuST/upRyPslAkle/6GLsTdP8rGmao03THN2+ffvCpwYAAABgRZtPtHozycFSyr5SSneSv0jy6uyLTdPcbJqmr2mavU3T7E0ylORHTdMcX5SJAQAAAFjxHhqtmqaZSvKXSX6Z5IMkP2+a5mQp5W9KKT9a7AEBAAAAWH265nNQ0zS/SPKLzz33119y7CuPPxYAAAAAq9ljX4gdAAAAAFpNtAIAAACgOqIVAAAAANURrQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUB3RCgAAAIDqiFYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdUQrAAAAAKojWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1RCsAAAAAqiNaAQAAAFAd0QoAAACA6ohWAAAAAFRHtAIAAACgOqIVAAAAANURrQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUB3RCgAAAIDqiFYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdUQrAAAAAKojWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1RCsAAAAAqiNaAQAAAFAd0QoAAACA6ohWAAAAAFRHtAIAAACgOqIVAAAAANURrQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUB3RCgAAAIDqiFYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdUQrAAAAAKojWgEAAABQHdEKAAAAgOp0tXsAVqahoaFcvXq13WPAY+nv78/AwEC7xwAAAFiV7LRiUQhWrAS+xwAAAO1jpxWLanBwsN0jrFjHjh1L4ne8WGZ/vwAAALSHnVYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdUQrAAAAAKojWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1RCsAAAAAqiNaAQAAAFAd0QoAAACA6ohWAAAAAFRHtAIAAACgOqIVAAAAANURrQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUB3RCgAAAIDqdLV7AFa2Y8eOLck6/f39GRgYWJK1aJ+hoaFcvXq13WMAAACwBOy0YkUQMlYHnzMAAMDqYacVi2pwcHDR11iq3VzUw/cKAABg5bPTCgAAAIDqiFYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdUQrAAAAAKojWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1uto9ACvbsWPHvvL1/v7+DAwMLNE0y9vMzExu376d0dHRjI+Pzz1//vz59Pb2ZuPGjens7GzjhMvH0NBQrl692u4xAAAA+AqiFW0lHHy1pmly7dq1nDt3LiMjI5mZmfk7x5w4cSJJUkrJ9u3bs2/fvvT396eUstTjLhu+dwAAAPUTrVhUg4ODX/raw3ZhrXbDw8N57733MjY2Nvfc+vXr09vbm/Xr1+f06dNJkl27dmV0dDSffvppRkZGMjIykp6ennz961/Pzp072zX+suD7CQAAUC/RCipz586dvPfee7l48WKSpKenJ3v37s3TTz+dtWvXzh03G62++c1vzv3cRx99lPPnz2dsbCzHjx/P5cuX8/zzz+eJJ55Y+r8IAAAAPAbRihXjwoUL2bNnT7vHeCyfffZZfvOb32RsbCwdHR05dOhQ9u3bl46Oh98zobu7O88880wOHDiQ8+fP5+TJk7l8+XI++eSTfOc738nGjRuX4G+weD7++ON2jwAAAMAScvdAVoy33347ly9fbvcYCzY+Pp5f//rXGRsby+bNm/O9730vBw4cmFewelApJfv27cv3v//9bNmyJePj43n99dfz6aefLtLki29kZCRvvfVWu8cAAABgCYlWrCi//e1vc/PmzXaP8cimp6czNDSU8fHxbNmyJd/97nezYcOGx3rPdevW5eWXX05fX18mJyczNDSUqampFk28dG7fvp0333wzTdO0exQAAACWkGjFirF79+5MT0/njTfeyJ07d9o9ziM5depUbt26lfXr12dgYCBr1qxpyft2dXXl29/+djZt2pSxsbG8//77LXnfpTI1NZU33ngjU1NTeeqpp9o9DgAAAEtItKLlHrzr2lLege2FF15Ib29vxsfH87vf/W7J1n1co6OjOXPmTJLkyJEj6e7ubun7d3V15cUXX0wpJefOncv169db+v6L6fe//33GxsaycePGvPjii0u2bru+wwAAAPw7omnMXHQAABDfSURBVBUrRmdnZ1544YUkydmzZzMxMdHmiebn9OnTaZom+/fvz7Zt2xZljc2bN+fgwYNz6y0Hd+/enYt5zz//fLq63DcCAABgNRGtWFF6e3uzY8eOzMzM5Ny5c+0e56HGx8czPDycUkqeeeaZRV3rwIED6ezszLVr15bFRdkvXLiQqamp9PX1pa+vr93jAAAAsMREK1ac/fv3J0mGh4fbPMnDffTRR2maJk899VR6enoWda3u7u7s3Lkzyb0gVLsrV64kSfbt29fmSQAAAGgH0YoVZ+vWreno6MitW7eqvyD7jRs3kmQuJi222XVm163V9PR0RkdHk8QuKwAAgFVqXtGqlPLDUsqpUsqZUspffcHr/6yU8n4p5Z1Syr8qpexp/agwP52dnent7U2S/OEPf2jzNF+uaZrcvHkzSebmXWyz69y8eTMzMzNLsuZCjI6OZmZmJps2bWr5hekBAABYHh4arUopnUl+muTPkhxK8uNSyqHPHfZvkxxtmub5JP9Xkv+21YPCo9i6dWuSzEWhGk1MTOTOnTvp7u5e9FMDZ3V3d2fdunWZmZmp+rpWs7ustmzZ0uZJAAAAaJf57LR6KcmZpmnONk1zJ8nfJhl88ICmaV5rmuaz+w+Hkuxq7ZisVidPnpy7ttGjmN2dMzU11eqRWmb21MUnnngipZRH+tnJyckFr7t27dok9+7OV6vZz20hu6xGRkby7rvvpmmaVo8FAADAEppPtNqZ5OIDjy/df+7L/OMk/+8XvVBK+Ukp5Xgp5fi1a9fmPyWrUtM0uXTpUq5evfrIPzsbgWoOF7OzPWqwSpKxsbEFr7vSfzfXrl3LpUuXqj79EQAAgIfrauWblVL+UZKjSf7+F73eNM3PkvwsSY4ePVrv/zFThVJKfvCDHywoXExPTye5d32rWs3OtpDdYLOnPy7E7HrL4Xcz+zk+ikOHDuXZZ59NR4f7TAAAACxn84lWHyfZ/cDjXfef+yOllH+Y5D9P8vebpln4uUvwgIWGh1u3biVJ1q1b18pxWmrdunUppeSzzz7L3bt3s2bNmkVfc2ZmJrdv306SbNiwYdHXW6jZz232c3wUpZQFhU4AAADqMp8i8GaSg6WUfaWU7iR/keTVBw8opRxJ8r8k+VHTNCOtHxPmr2ma3LhxI8nj7UhabJ2dndm0aVOShcWZhbh9+3ZmZmayfv36JYlkC7Vt27Yk9+7+6DQ/AACA1emh0appmqkkf5nkl0k+SPLzpmlOllL+ppTyo/uH/XdJNiT5P0spb5dSXv2St4NFNzY2lsnJyXR3d1e9myhJent7k9y7ePhSmL2W3Oy6tVq7dm3Wr1+f6enpqu8ACQAAwOKZ1zWtmqb5RZJffO65v37gz/+wxXPBgl28eO++AX19fdWfJrZr165cuHAhFy5cyJ/8yZ8s6nWYmqbJ+fPnkyQ7d37VvRTq0NfXl7GxsVy6dClbtmxp9zgAAAAsMVcqZkW5e/duzp07lyTZv39/m6d5uG3btmXDhg2ZnJzM5cuXF3WtkZGRjI2NpaenJzt27FjUtVph3759SZILFy5kctJl8gAAAFYb0YoV5fTp07l79262bdtW9fWsZpVScuDAgSTJyZMnc/fu3UVZZ2pqKu+++26SezGo9h1oSbJ58+b09/dneno6H374YbvHAQAAYImJVrTc4ODgF/55sV25ciVnzpxJkhw6dGhZhJkk2bNnT7Zs2ZKJiYm5sNRqH3zwQcbGxrJp06a5SLYczH6O58+fnzvtcym06zsMAADAvyNasWK89dZbSZJnn312WeyymlVKyZEjR9LR0ZGLFy/OhbdWuXDhQs6ePftH6ywXmzZtynPPPZckefvtt9s8DQAAAEtp+fzfKzzE9PR0du3alYMHD7Z7lEe2cePGHD58OMm90wRPnz6dpmke6z2bpsnZs2fnYs9zzz1X/V0Dv8j+/fuzd+/ezMzMtHsUAAAAltC87h4Iy8G2bdty+PDhZXNa4Oft3r0709PTOXHiRD744IPcuHEjhw8fTk9PzyO/18TERN55551cuXIlyb3T7JbDhem/zDe+8Y2Mj4/n6tWr7R4FAACAJWKnFSvGyy+/nM7OznaP8Vj27t2bb33rW1mzZk1GRkbyq1/9KqdOncrExMS8fn5ycjKnT5/Oa6+9litXrqSrqytHjhxZlrvPHtTR0ZGXXnqp3WMAAACwhOy0YsVYTtdq+ipf+9rXsnXr1pw4cSLDw8P58MMPc+rUqfT392fr1q3p7e3Nhg0b5o6/fv16RkdH88knn2R4eHjuNLrt27fn8OHDWbduXbv+Ki21Uj5fAAAA5ke0YlEdO3as3SMsS2vXrs1LL72U69ev59y5cxkeHp777/Nef/31P3q8Y8eO7N27N08++eSyPVVyqfh+AgAA1Eu0oq36+/vbPUK1SinZvn17tm/fnvHx8YyMjGR0dDSjo6OZmJiYO2Vw8+bN6e3tTW9vb5588skVs7NqMfX397s+FgAAQOVEKxbV4OBgu0dYEXp6erJnz57s2bNn7rnZXUKvvPJKm6ZavgYGBh56jF1YAAAA7eUiMQAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1RCsAAAAAqiNaAQAAAFAd0QoAAACA6ohWAAAAAFRHtAIAAACgOqIVAAAAANURrQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUJ2udg/Aynbs2LF2j8AK5HsFAACw8tlpxYrQ39/f7hFYAj5nAACA1cNOKxbV4OBgu0dgBRkYGFiytezmAgAAaC87rQAAAACojmgFAAAAQHVEKwAAAACqI1oBAAAAUB3RCgAAAIDqiFYAAAAAVEe0AgAAAKA6ohUAAAAA1RGtAAAAAKiOaAUAAABAdUQrAAAAAKojWgEAAABQHdEKAAAAgOqIVgAAAABUR7QCAAAAoDqiFQAAAADVEa0AAAAAqI5oBQAAAEB1RCsAAAAAqiNaAQAAAFAd0QoAAACA6ohWAAAAAFRHtAIAAACgOl3tHoCV7f9v7+5CLS3LMADfTzNpB5mBAxJqjeAYmQWGyEQHBUaoB85BFgqSxpBHRn8ERlFhRxYlBPZjKJpQah7khgwP0hAixQFBVDAGCx0LLLU5EbWpp4O1DqbdOPubcdbar67rgg3fz7u+9bDZD2vte73fu+6+++7NLuFNz+8YAACANyMzrViIk08+ebNLgNfN3zEAAMDmMdOKhdi5c+dmlwAAAAC8gZlpBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwhFYAAAAADEdoBQAAAMBwJoVWVXVBVT1ZVXur6ppDnD++qu6Yn3+oqrYf60IBAAAAWB0bhlZVtSXJDUkuTHJWksuq6qx1w3YnebG7z0hyfZLrjnWhAAAAAKyOKTOtzkuyt7uf6u5Xk9yeZNe6MbuS3DrfvivJ+VVVx65MAAAAAFbJlNDqlCTPHLS/b37skGO6+0CS/UlOOhYFAgAAALB6ti7zyarqqiRXzXdfqarHlvn8wIa2JfnHZhcB/B+9CePRlzAmvQnjee/RPnBKaPVsktMO2j91fuxQY/ZV1dYkJyZ5fv2FuvvGJDcmSVXt6e5zj6ZoYDH0JYxJb8J49CWMSW/CeKpqz9E+dsrtgQ8n2VFVp1fVcUkuTbK2bsxakivm25ckua+7+2iLAgAAAGC1bTjTqrsPVNXVSe5NsiXJzd39eFVdm2RPd68luSnJbVW1N8kLmQVbAAAAAHBUJq1p1d33JLln3bFvHrT9cpJPHeFz33iE44HF05cwJr0J49GXMCa9CeM56r4sd/EBAAAAMJopa1oBAAAAwFItPLSqqguq6smq2ltV1xzi/PFVdcf8/ENVtX3RNcGqm9CXX66qJ6rq0ar6XVW9ZzPqhFWzUW8eNO6TVdVV5duRYMGm9GVVfXr+uvl4Vf1i2TXCqpnwXvbdVXV/VT0yfz970WbUCaukqm6uqueq6rHXOF9V9cN53z5aVR+act2FhlZVtSXJDUkuTHJWksuq6qx1w3YnebG7z0hyfZLrFlkTrLqJfflIknO7+4NJ7kry3eVWCatnYm+mqk5I8oUkDy23Qlg9U/qyqnYk+VqSj3T3+5N8cemFwgqZ+Hr5jSR3dvc5mX1J2I+WWyWspFuSXHCY8xcm2TH/uSrJj6dcdNEzrc5Lsre7n+ruV5PcnmTXujG7ktw6374ryflVVQuuC1bZhn3Z3fd390vz3QeTnLrkGmEVTXnNTJLvZPYBz8vLLA5W1JS+/FySG7r7xSTp7ueWXCOsmil92UneMd8+Mclfl1gfrKTufiDJC4cZsivJz3vmwSTvrKp3bXTdRYdWpyR55qD9ffNjhxzT3QeS7E9y0oLrglU2pS8PtjvJbxdaEZBM6M35NOrTuvs3yywMVtiU18wzk5xZVX+oqger6nCfMgOv35S+/HaSy6tqX5J7knx+OaUBh3Gk/4cmSbYurBzgDa+qLk9ybpKPbnYtsOqq6i1JfpDkyk0uBfhfWzO71eFjmc1MfqCqPtDd/9zUqmC1XZbklu7+flV9OMltVXV2d/9nswsDjsyiZ1o9m+S0g/ZPnR875Jiq2prZ9M3nF1wXrLIpfZmq+niSrye5uLtfWVJtsMo26s0Tkpyd5PdV9ZckO5OsWYwdFmrKa+a+JGvd/a/u/nOSP2UWYgGLMaUvdye5M0m6+49J3pZk21KqA17LpP9D11t0aPVwkh1VdXpVHZfZInhr68asJblivn1Jkvu6uxdcF6yyDfuyqs5J8tPMAitrc8ByHLY3u3t/d2/r7u3dvT2z9eYu7u49m1MurIQp72V/ndksq1TVtsxuF3xqmUXCipnSl08nOT9Jqup9mYVWf19qlcB6a0k+M/8WwZ1J9nf33zZ60EJvD+zuA1V1dZJ7k2xJcnN3P15V1ybZ091rSW7KbLrm3swW7bp0kTXBqpvYl99L8vYkv5p/L8LT3X3xphUNK2BibwJLNLEv703yiap6Ism/k3y1u901AAsysS+/kuRnVfWlzBZlv9LECFisqvplZh/ibJuvJ/etJG9Nku7+SWbry12UZG+Sl5J8dtJ19S4AAAAAo1n07YEAAAAAcMSEVgAAAAAMR2gFAAAAwHCEVgAAAAAMR2gFAAAAwHCEVgAAAAAMR2gFAAAAwHCEVgAAAAAM57/A6xh50aQ65gAAAABJRU5ErkJggg==\n", 117 | "text/plain": [ 118 | "
" 119 | ] 120 | }, 121 | "metadata": { 122 | "needs_background": "light" 123 | } 124 | } 125 | ] 126 | } 127 | ] 128 | } -------------------------------------------------------------------------------- /football/grid_no_pad.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "grid_no_pad.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "authorship_tag": "ABX9TyPkXol+Hqx2YA3pxGVxqEe2", 10 | "include_colab_link": true 11 | }, 12 | "kernelspec": { 13 | "name": "python3", 14 | "display_name": "Python 3" 15 | }, 16 | "language_info": { 17 | "name": "python" 18 | } 19 | }, 20 | "cells": [ 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "id": "view-in-github", 25 | "colab_type": "text" 26 | }, 27 | "source": [ 28 | "\"Open" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "colab": { 36 | "base_uri": "https://localhost:8080/" 37 | }, 38 | "id": "TPp-57en1irq", 39 | "outputId": "5132e4b3-5298-4103-fae7-e5c778de04e7" 40 | }, 41 | "outputs": [ 42 | { 43 | "output_type": "stream", 44 | "name": "stdout", 45 | "text": [ 46 | "Collecting mplsoccer\n", 47 | " Downloading mplsoccer-1.0.5.tar.gz (57 kB)\n", 48 | "\u001b[?25l\r\u001b[K |█████▊ | 10 kB 19.4 MB/s eta 0:00:01\r\u001b[K |███████████▍ | 20 kB 23.5 MB/s eta 0:00:01\r\u001b[K |█████████████████ | 30 kB 23.2 MB/s eta 0:00:01\r\u001b[K |██████████████████████▊ | 40 kB 25.5 MB/s eta 0:00:01\r\u001b[K |████████████████████████████▍ | 51 kB 26.5 MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 57 kB 3.2 MB/s \n", 49 | "\u001b[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (3.2.2)\n", 50 | "Requirement already satisfied: seaborn in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (0.11.2)\n", 51 | "Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (1.4.1)\n", 52 | "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (1.1.5)\n", 53 | "Requirement already satisfied: pillow in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (7.1.2)\n", 54 | "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (1.19.5)\n", 55 | "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.7/dist-packages (from mplsoccer) (4.6.3)\n", 56 | "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplsoccer) (2.8.2)\n", 57 | "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplsoccer) (0.11.0)\n", 58 | "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplsoccer) (1.3.2)\n", 59 | "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplsoccer) (3.0.6)\n", 60 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib->mplsoccer) (1.15.0)\n", 61 | "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->mplsoccer) (2018.9)\n", 62 | "Building wheels for collected packages: mplsoccer\n", 63 | " Building wheel for mplsoccer (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 64 | " Created wheel for mplsoccer: filename=mplsoccer-1.0.5-py3-none-any.whl size=62946 sha256=176c25a24eda48ae8f8ddb1de842d3bfb9dd4dbcb78b97e7f3fb5b8983724609\n", 65 | " Stored in directory: /root/.cache/pip/wheels/35/71/46/5f3df8e696517b395ed75971c4d09f5854310efc09d364a9ca\n", 66 | "Successfully built mplsoccer\n", 67 | "Installing collected packages: mplsoccer\n", 68 | "Successfully installed mplsoccer-1.0.5\n" 69 | ] 70 | } 71 | ], 72 | "source": [ 73 | "!pip install mplsoccer" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "source": [ 79 | "from mplsoccer import VerticalPitch" 80 | ], 81 | "metadata": { 82 | "id": "ECPz3va41rMK" 83 | }, 84 | "execution_count": null, 85 | "outputs": [] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "source": [ 90 | "p = VerticalPitch(pitch_type='opta', pad_top=-120, pad_left=-15, pad_right=-15)" 91 | ], 92 | "metadata": { 93 | "id": "qDk64FET1w1a" 94 | }, 95 | "execution_count": null, 96 | "outputs": [] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "source": [ 101 | "grid_width, grid_height = p.calculate_grid_dimensions(nrows=3, ncols=3, figwidth=16, figheight=9, max_grid=0.9, space=0)" 102 | ], 103 | "metadata": { 104 | "id": "pKuC93iK3uAA" 105 | }, 106 | "execution_count": null, 107 | "outputs": [] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "source": [ 112 | "fig, ax = p.grid(nrows=3, ncols=3, space=0, endnote_height=0, title_height=0, grid_width=grid_width, grid_height=grid_height, figheight=9)" 113 | ], 114 | "metadata": { 115 | "colab": { 116 | "base_uri": "https://localhost:8080/", 117 | "height": 180 118 | }, 119 | "id": "C91vD3Hg2AUl", 120 | "outputId": "6f103d4d-ee3f-4e55-bfbd-22a9164b144c" 121 | }, 122 | "execution_count": null, 123 | "outputs": [ 124 | { 125 | "output_type": "display_data", 126 | "data": { 127 | "image/png": "iVBORw0KGgoAAAANSUhEUgAABDUAAAJVCAYAAADOVM6BAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdeVNUZ7vw7bOhQRBldAJUVJwjGqPmxuzcVbueL+Bn9SM8u3apiWKMGKMoKkFAnJhnutfzhy/9ilPuKLB6wXFUWUlE44mlF6t/67pW55IkCQAAAICsqUh7AAAAAICvIWoAAAAAmSRqAAAAAJkkagAAAACZJGoAAAAAmSRqAAAAAJmU/5uPe79XAAAAIE25z33ATg0AAAAgk0QNAAAAIJNEDQAAACCTRA0AAAAgk0QNAAAAIJNEDQAAACCTRA0AAAAgk0QNAAAAIJNEDQAAACCTRA0AAAAgk0QNAAAAIJNEDQAAACCTRA0AAAAgk0QNAAAAIJNEDQAAACCTRA0AAAAgk/JpDwD/1NLSUszNzcXc3FzMz89/9M9CoRDFYjGSJIkkSSKXy0VFRUXkcrmorq6OmpqaqK2t/eiftbW1UVGh8wH8nWKxGLOzsx+twSv/vrS0FEmSlNbiXC5XWovz+fyq9ff9f9++fXvk8y5NAP5OkiQfrb3v/3NxcbG0BheLxYiIqKioKH37cP19/5/btm1L+bODfyaXJMmXPv7FD8J6S5IkJicnY2xsLMbGxuLt27cxPT29Lr9WZWVlNDQ0RHNzczQ1NUVTU1PU1tauy68FkBVJksTc3FxpDR4bG4uJiYnSRfJaq6+vL63BTU1NsXPnzsjlcuvyawFkxeLiYul6eOXb0tLSuvxatbW10dTUVLombmhoiMrKynX5teAf+OzFgKhBWUmSJKampmJ4eDjevHkTY2NjUSgUVv2YXC4XdXV1n9xpUVNTE/l8vrQzY0WxWIxisRiLi4uf3N2xcsfxQzU1NdHc3By7d++O1tZW5RrYEubn52N4eDhevXoVY2NjsbCw8NGPWVl3P3WXr7q6etUuufd3bSwtLX1yHZ6bm4vZ2dn48Lokn89HU1NTtLS0RFtbW+zcuXOjfhsAUrO8vByjo6MxOjoaY2Njn7ypV11dHdu3b//kNXF1dXVUVlaWdspFRGktLhQKMT8//8m1eGZm5pPX3is3/lpbW6OlpUVsJg2iBuVrJWQMDQ3F8PDwR4v29u3bV921W69avLCwEOPj46U7kWNjY7G8vFz6eC6Xi127dkVbW5vAAWw6KyFjJSq/r6qqatVdu8bGxqiurl7zGQqFQoyPj6+6Ezk3N7fqx9TX10dbW5vAAWw6KyFjeHg4RkdHV8WFioqKaGxsLF0PNzc3R01NzZrHhZXr8vd3501NTa36Mdu2bSutwwIHG0jUoPxMT0/H4ODgRyGjuro6WltbY+/evdHU1BQ1NTWpzJckSUxPT8ebN29iZGQkXr16VbqD+H7gaG9vj6qqqlRmBPgWi4uL8fz5849CRkVFRezZsydaW1ujubk56urqUrtoXTn68uLFi3jx4sWq7dYrgePAgQOxffv2VOYD+BbFYjFevHgRQ0NDH4WMpqamUjhoaGhI7dlvS0tLMT4+Hi9fvozh4eGYnZ0tfWwlcOzfvz+ampoEDtaTqEF5SJIkXr16FU+ePInR0dHS96+EjLa2tti1a1dZPrBzcXExRkZGSluyV/7u5PP5OHjwYBw5ciTq6upSnhLg701NTcWTJ09icHCwdAG9EjLa2tpi3759ZRlri8VivHr1KoaGhmJkZGTVbrrW1tbo7OyM5uZmF9VA2VtcXIxnz57F06dPVx2BXgkZbW1tZRlrkySJiYmJ0g7r9wNHY2NjHDlyJNrb28vyWp7MEzVIV7FYjOHh4ejr6yttYauoqIj9+/dHe3t72YaMz1kJHIODg6vubra2tsbx48ejsbExxekAPu3t27fR19e3Kirv3r07Dhw4ULYh43NWAsfz589jaGioFJobGhri+PHj0draKm4AZWd2djYeP34cAwMDpQcu79ixIzo6Oso2ZHxOkiQxPj4ew8PD8ddff8Xi4mJEvHsmXWdnZxw6dMg7WrGWRA3SUSwWY3BwMB49ehQzMzMR8W6hO3z4cHR0dGyK51KMj4/HkydPYmhoqPTFac+ePXHixIlobm5OeTqAiNevX8fDhw/j9evXEfHu3Z4OHDgQR44c2RTPpZifn4+nT5/Gs2fPShfVO3fujOPHj0d7e7u4AaRuZmYmHj58GM+fPy9F2D179kRnZ2fs3r078+tUoVCI58+fR39/f+kGZnV1dRw5ciSOHDmSqWhO2RI12Hhv3ryJu3fvxuTkZERE1NXVxbFjx+LAgQOZ2pXxn5qbm4v+/v549uxZaTv3gQMH4vTp06k9FwTY2ubm5uLevXsxPDwcEe+Oy61cYG6GqPyhQqEQAwMD8fjx49IDRpuamuLs2bN20AGpWF5ejkePHsXjx49LN7/2798fx44di/r6+pSnW3tJksTo6Gj09fXF2NhYRLy7ofndd9+JzHwrUYONMz8/H3/88Uc8f/48It697d/p06ejra1tU8aMDy0sLER/f3/09/dHsViMfD4fJ0+ejMOHD2+Jzx9IX6FQiP7+/ujr64tCoRCVlZVx7NixLXO3bGWX4J9//ll6O9pDhw7FqVOn1uVdWwA+lCRJjIyMxL1790qRdf/+/XHixInYsWNHytOtvyRJ4vXr13H//v0YHx+PiIhdu3ZFV1fXpow5bAhRg/VXLBbj6dOn8eDBg1heXo6Kioo4duxYHD16dEuep5ueno579+6Vzq7X19dHV1dX7Nq1K+XJgM1sdHQ0ent7S0f+2tra4syZM1FbW5vyZBtvaWkpHj58GE+ePIkkSaKqqipOnz4dHR0d7hYC62Z6ejp6e3vj5cuXEfHuGvDs2bPR0tKS8mQbL0mS+Ouvv+L+/fuxuLgYuVwuDh8+HCdPntwSkZ01JWqwviYmJqKnp6d0hm7fvn1x5swZ7wYSES9evIje3t7S06EPHDgQXV1dFnJgTS0uLsadO3diZGQkIt49eO7s2bOxe/fulCdL3+TkZPT29paeKdLY2Bg//PDDpnieCFA+kiSJhw8fRl9fXymknjp1Kg4dOrTlQ+ri4mI8ePAgnj59GhHv3gr23Llz0dramvJkZIiowfpIkiQGBgait7c3isVi1NXVxZkzZ2Lfvn1pj1ZWCoVCPH78OPr6+kq/T5cuXYqGhoa0RwM2gbdv38atW7dibm4uKisr4+TJk3HkyBFH3t6TJEkMDw/HvXv3Yn5+PiorK+P777+P/fv3pz0asAnMz89HT09PKZ52dHTEqVOnNuXzi77FxMRE3L17N96+fRsREZ2dnXH69Glfr/hPiBqsvaWlpfj9999jaGgoIt6dVz5z5kxUVlamPFn5mpqailu3bsXk5GRUVFREV1eXbdDAV0uSJPr7++P+/fuRJEk0NTXFxYsXM/WWgBvtw69dHR0d0dXV5WsX8NVevXoVPT09sbCwENu2bYsLFy7YJfcFvnbxlUQN1tbExETcvHkzZmZm3O36hwqFQvT29sbAwEBERLS3t8e5c+ccRwH+kcXFxbh9+3bpuT3udv3nPtxlWF9fHxcvXnQcBfhHVo6bPHz4MCLePQjzwoUL3vXuP/T+LsOqqqr44Ycf7PbmS0QN1s7AwEDcvXvXheA3GhwcjN9//z0KhULU1dXFjz/+6GnQwH9kbGwsbt68WboQPH/+vHPJX+HDQH/+/Plob29PeywgAxYWFuLWrVul4yYnTpyIEydO2H37D30Y6I8ePRqnT5/2+8iniBp8uyRJoq+vLx48eBARtuyuhfePo1RVVUV3d3c0NzenPRZQxl6+fBm//vprFAqFaGxsjEuXLtmy+w0+PI7S1dUVR44cSXkqoJzNzc3FtWvXYnp62nGTNfDhcZT29vb44Ycf7DzkQ6IG3yZJkvjjjz+iv78/IiLOnTsXhw4dSneoTWJ5eTl6enrixYsXUVlZGT/++GPs2bMn7bGAMjQ8PBw9PT1RLBZj//79cf78eRd9ayBJknj8+HHcv38/IiJOnjwZx48fd6cQ+Mj09HRcu3Yt5ubmor6+Prq7u7fkW2avh9evX8cvv/wSy8vLsWfPnrh06VLk8/m0x6J8iBp8vSRJ4s6dO/HXX39FLpeLixcvRltbW9pjbSrFYjHu3LkTg4ODUVFRERcuXPB7DKzy119/xW+//RYREYcPH46uri4vutfYwMBA3LlzJyLePaPku+++83sMlExMTMT169djYWEhmpqaoru7O6qrq9Mea1MZHx+P69evx+LiYjQ3N0d3d7fnzrFC1ODrFAqF6OnpiZGREbsI1lmSJNHb21t6/+7z58/HwYMHU54KKAf9/f1x7969iHBue70NDQ1FT09PJEkSBw8ejO+//97vNRBv3ryJGzduxPLycuzevTt+/PFHuwjWydTUVFy7di3m5+ejoaEhLl++7K1xiRA1+BqFQiF+/fXXePnyZeTz+eju7o6Wlpa0x9rUPnyKtrPdwMOHD0vPMjpz5kx0dnamPNHm9/5zS9ra2uLChQuO+cAW9urVq/jll1+iUChEa2trXLhwwTPl1tns7Gxcu3YtZmZmoq6uLv7rv/7LMR8+GzV8heaTkiSJ27dvx8uXL2Pbtm3x888/CxobIJfLxcmTJ+PMmTMREdHb2xvPnz9PeSogLU+fPi0FjfPnzwsaG2TPnj1x+fLlyOfzMTw8HHfv3o2/uQkEbFLj4+OloHHw4MG4ePGioLEBtm/fHj///HPU19fHzMxM3LhxI5aWltIeizIlavCRJEni3r17MTw8HPl8Pi5fvhwNDQ1pj7WldHZ2xunTpyMi4vbt2/Hq1auUJwI22sjISNy9ezciIr7//nvH0TZYS0tLXL58OSoqKmJgYKC0gw7YOlZeTBcKhWhvb4/vv//erq0NVFNTEz/99FPU1dXF5ORkaQcdfKgsj5/cuHGj9F7FAOVg79690d3dnfYYG8Y6DJSbrbYOR1iLgfKS8jqcreMnFm+g3Gy1dWmrfb5A+duK69JW/JyB8lWua1JZP7L3ypUraY8AEFevXk17hNRYh4FysJXX4QhrMZC+cl6Hy3KnBgAAAMDfETUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI12LSWlpZieno6kiRJexSALalYLMb09HQsLy+nPQrAlpQkSczOzsbCwkLao8C6yac9AKyHsbGxuHbtWiRJEnV1dfHvf/878nl/3AE2ytLSUvzf//t/Y35+PioqKuLnn3+O+vr6tMcC2DKSJImenp4YGRmJiIhz587FwYMHU54K1p6dGmxKf/75ZywvL0ehUIiZmZkYHR1NeySALeX58+cxNzcXhUIhlpaWoq+vL+2RALaUmZmZGBkZiWKxGMViMf7444+0R4J1IWqwKVVVVa36b7s0ADZWPp+PXC4XERG5XC6qq6tTnghga6msrFz1366H2axEDTalrq6uaGhoiMrKyjhw4EDs2bMn7ZEAtpT9+/dHa2trVFZWRmNjY5w8eTLtkQC2lNra2jhz5kzk8/mora2Nixcvpj0SrAu5jk2ppqYm/vu//zvtMQC2rFwuFxcuXEh7DIAt7fDhw3H48OG0x4B1ZacGAAAAkEmiBgAAAJBJogYAAACQSaIGAAAAkEmiBgAAAJBJogYAAACQSaIGAAAAkEmiBgAAAJBJogYAAACQSaIGAAAAkEmiBgAAAJBJogYAAACQSaIGAAAAkEmiBgAAAJBJogYAAACQSaIGAAAAkEmiBgAAAJBJogYAAACQSaIGAAAAkEmiBgAAAJBJogYAAACQSaIGAAAAkEmiBgAAAJBJogYAAACQSaIGAAAAkEmiBgAAAJBJogYAAACQSaIGAAAAkEmiBgAAAJBJogYAAACQSaIGAAAAkEmiBgAAAJBJogYAAACQSaIGAAAAkEmiBgAAAJBJogYAAACQSaIGAAAAkEmiBgAAAJBJogYAAACQSaIGAAAAkEn5tAdg7d24cSNGR0fTHoMys3fv3uju7k57DNgSrMN8inUYNo51mM+xFm8+dmpsQhZwPsWfC9g4/r7xKf5cwMbx943P8Wdj87FTYxO7cuVK2iNQJq5evZr2CLAlWYdZYR2GdFiHeZ+1eHOyUwMAAADIJFEDAAAAyCRRAwAAAMgkUQMAAADIJFEDAAAAyCRRAwAAAMgkUQMAAADIJFEDAAAAyCRRAwAAAMgkUQMAAADIJFEDAAAAyCRRAwAAAMgkUQMAAADIJFEDAAAAyCRRAwAAAMgkUQMAAADIJFEDAAAAyCRRAwAAAMgkUQMAAADIJFEDAAAAyCRRAwAAAMgkUQMAAADIJFEDAAAAyCRRAwAAAMgkUQMAAADIJFEDAAAAyCRRAwAAAMgkUQMAAADIJFEDAAAAyCRRAwAAAMgkUQMAAADIJFEDAAAAyCRRAwAAAMgkUQMAAADIJFEDAAAAyCRRAwAAAMgkUQMAAADIJFEDAAAAyCRRAwAAAMgkUQMAAADIJFEDAAAAyCRRAwAAAMikfNoDsHVcvXo17RE2zJUrV9IeAeAj1mGA9G2Vtdg6zEbJJUny2Q9evXr18x8EAAAAWGdXrlzJfe5jjp8AAAAAmfTFnRoRYacGa2arbLWLsN0OKE/WYYD0bZW12DrMGvvsTg1RAwAAAChnjp8AAAAAm4uoAQAAAGSSqAEAAABkkqgBAAAAZJKoAQAAAGSSqAEAAABkkqgBAAAAZJKoAQAAAGSSqAEAAABkkqgBAAAAZJKoAQAAAGSSqAEAAABkkqgBAAAAZJKoAQAAAGSSqAEAAABkkqgBAAAAZJKoAQAAAGSSqAEAAABkkqgBAAAAZJKoAQAAAGSSqAEAAABkkqgBAAAAZJKoAQAAAGSSqAEAAABkkqgBAAAAZJKoAQAAAGSSqAEAAABkkqgBAAAAZJKoAQAAAGSSqAEAAABkkqgBAAAAZJKoAQAAAGSSqAEAAABkkqgBAAAAZFI+7QHgWyRJEgsLCzE3Nxfz8/MxPz8fy8vLkSRJ6Vsul4tcLhcVFRVRVVUVtbW1UVNTE7W1tZHP5yOXy6X9aQBkVpIksbS0FPPz86W1eGlpKYrF4kfrcC6Xi3w+X1qDa2tro7q62joM8I2Wl5dXrcMLCwuRJElpLY6IqKioKF0Tr6zDNTU1UVNTExUV7nWTXaIGmVAoFGJiYiLGxsZifHw8ZmdnS4v2ykL9NSorK0sL+s6dO6OpqSmampqirq7ORTbAe5Ikienp6RgbG4uxsbGYnp4urcOFQuGr/7+5XK60DtfV1UVjY2M0NzdHfX29i2yAD8zPz8fbt29jbGwsJicnSyFjaWnpm/6/27ZtK8XmxsbGaGpqisbGxqiqqlqjyWH95P7mBeHXv1qEbzA3Nxdv3rwpXTxPTExEsVj85I+trq5eVZvz+XypROdyuVWVemFhYVXF/tyFeHV1dSlwNDU1RXNzc+TzGiCwdSwtLZUunN++fRvj4+OfvWheCcQr6/DK7ouVtfj9dXh5ebm0Bn/pQryioqJ0Yd3c3BzNzc1RU1Oznp8yQFkpFosxPj5eWovHxsZibm7ukz92ZffFyjXxtm3bVu3MeH8X88qujpV1eGFh4bMz1NfXr7oe3rFjhxt/pOWzf/BEDcrG7OxsDA8Px/DwcIyNjX308fd3UuzYsaN08VxZWflVv977F9dzc3OlnSBjY2MfLe6VlZWxd+/eaGtri7179wocwKa0tLQUL168iOHh4Xj58uVHMbmmpqa0Djc0NJQunr/lTt77F9dTU1OliDIzM/PRj21paYm2trZoa2sTOIBNqVgsxuvXr2N4eDhGRkZicXFx1cfz+XxpHW5sbFyTo3zFYrF0nHtmZmbVTcUPXyvu2LEj2traor29PXbu3ClwsJFEDcrT50JGZWVltLS0RHNzc2nh3qjtb0mSxNzcXKmKv3nzJiYmJlbNJnAAm8WXQsb7uySampqitrZ2w+ZaXFwsXVi/ffs23rx5s2o2gQPYLL4UMurq6mLXrl2l6+GNDAkrx79Xrolfv369ajaBgw0malA+kiSJFy9exJMnT+L169el7y/nWPCl+LJ///44cuRI1NfXpzghwD8zPj4eT548iaGhoU/GgtbW1g2NGH/nS/Flz5490dnZGbt373ZRDWTG3NxcPHv2LAYGBlbtEq6rq4v29vZoa2uL+vr6slnXvhRf6uvr4/Dhw3HgwIGv3kUNf0PUIH3Ly8sxMDAQT548idnZ2Ygo75DxOZ8LHLt3746jR4+6qAbK1kpU7u/vjzdv3pS+v1xDxud8LnDs2LEjOjs7XVQDZW1iYiIeP34cQ0NDpeMd5RoyPudzgaO6ujo6Ojqis7Mztm3blvKUbDKiBulZWlqKp0+fRn9/f2nB2759exw5ciQOHjyY6acqT01NxZMnT2JwcLD00NHGxsY4fvx47Nu3r+y/IAFbQ5IkMTQ0FH19fTE1NRUR785lHzx4MI4cORJ1dXUpT/j1FhYWYmBgIJ4+fRrz8/MR8e7ZH0ePHo2Ojo5MxHJga3j79m309fXF6Oho6ftaW1ujs7MzmpubM3vdWCwWY2hoKJ48eRLj4+MR8e7GZUdHRxw9ejQTsZxMEDXYeMViMZ49exYPHjwoPd2+sbExjh07Fq2trZlduD9lcXExnj17tircNDQ0xNmzZ6O5uTnl6YCt7OXLl9Hb2xvT09MR8f+/4M96VP5QsViM4eHhePToUUxOTkbEu7coPHXqVBw8eHBTfc0BsmVmZiZ6e3tLMWPlBX9nZ2ds37495enWTpIk8fbt23j8+HG8ePEiIt69bXdnZ2ccP358U33NIRWiBhvrzZs3cffu3dKFZUtLSxw/fnzTH81YXl6Ov/76Kx49elS6Y3jgwIE4ffq0B9kBG2p2djbu3bsXIyMjEfFuh9yxY8c2/dGMJElidHQ0Hj58WLpj2NTUFF1dXdHU1JTydMBWsry8HI8ePYrHjx9HsViMfD4fhw8f3hJHMyYmJuLRo0cxNDQUEe+C+nfffRft7e2b+rUA60rUYGPMz8/HH3/8Ec+fP4+IdxfRZ86c2XJHMT71RezkyZNx+PDhqKioSHs8YBMrFArx+PHjePToURQKhaisrIwTJ07EkSNHNnXM+NDKkZt79+6VHsDX0dERp0+fjurq6pSnAzazJEliZGQk7t27F3NzcxGxdW9yjY2NRW9vb+k5dC0tLXH27FkP2OdriBqsryRJor+/Px4+fBjLy8tRUVERx44di2PHjm2pi+gPfbjdcOfOnXH27NnYtWtXypMBm9Ho6Gj09vbGzMxMRES0tbXFmTNntvR55qWlpejr64v+/v5IkiSqqqri9OnT0dHRsaViO7Axpqen4+7du/Hq1auIcBw54t3rhL/++ivu378fi4uLkcvl4vDhw3Hy5ElHUvgnRA3Wz/z8fPT09JTennXfvn1x5syZTD94bq29ePEient7S+/6cuLEiThx4oQLamBNFIvFuH//fvT390fEu4Da1dUVu3fvTnmy8jE1NRV3794tfa3au3dv/PDDD3ZtAGtmcHAwfv/99ygUClFVVRWnTp2KQ4cOud77/ywuLsaDBw/i6dOnEfHuHV8uXboUDQ0NKU9GRogarI+XL1/G7du3Y2FhIbZt2xbff/997Nu3L+2xylKhUIi+vr7o6+uLiIhdu3bFhQsXttw2RGBtzc7Oxs2bN2N8fDxyuVycPHkyjh496qjbJyRJEsPDw/H777/H0tJS1NbWxsWLF7f0HVTg2xUKhejt7Y2BgYGIiGhvb4+urq5N/9yMrzUxMRG3b9+OycnJqKioiK6uLrvn+E+IGqytJEni4cOH8fDhw4jwAv2fePXqVfT09JRC0IULF9xNBb7KyMhI/Pbbb16g/0Ozs7Nx69atGBsbi1wuF6dPn47Ozk4X1MA/NjU1Fbdu3fIC/R/6VAg6d+6c4yh8iajB2vnwuImjFP+c30PgW3x43GTfvn1x/vx5Ryn+Ab+HwLd6/7iJoxRfx+8h/4CowdqYnJyM69evx/z8vF0G3+jD3S779u2LixcvbukHqwJ/b2lpKX799dd4/fq1XQZr4P3dLnV1dXH58mXPhAK+KEmS+PPPP+PRo0cRYZfBt/pwt8ulS5ccZ+dTRA2+3djYWNy4cSMWFxejubk5Ll265LjJGnj58mXcunUrlpaWYteuXfHjjz/6ogh80uLiYly/fj3Gx8dj27Zt8eOPPzpusgZmZ2fj119/jYmJiaipqYnLly97u0Hgk5Ikibt378azZ88il8tFV1eXh4GugUKhEL///nsMDg5GLpeLH374Ifbv35/2WJQXUYNv8/r167hx40YUCoXYu3dvXLp0yY6CNTQ5ORnXrl2LhYWFaGxsjMuXL9sCDawyNzcX169fj6mpqdi+fXv89NNPdhSsoaWlpfjll1/izZs3UV1dHd3d3dHU1JT2WEAZKRaLcfv27RgaGoqKioq4ePFitLa2pj3WppEkSdy/fz8eP34cERHnzp2LQ4cOpTsU5UTU4Ou9ePEibt68GcViMfbv3x/nz5/3VP11MDMzE9euXYvZ2dnYuXNnXL58OWpra9MeCygD1oeNUSgU4ubNmzE6OhqVlZXR3d0du3btSnssoAx8uD7861//cgR7nfT19cWff/4ZERGnTp2K48ePpzwRZULU4OsMDg7Gb7/9FkmSxKFDh+Ls2bO2160jd2KBD9nJtbGKxWL89ttv8fz5c3digYhYvZOrqqoqLl++bCfXOnv69GncvXs3IiE6LcMAABu4SURBVCKOHj0ap0+f9hoEUYN/bnh4OG7evBkREceOHYtTp05ZTDbA+2fma2tr49///rc7srBFzczMxP/8z//E4uKiZ+5soA/PzHd3d8eePXvSHgtIQaFQiOvXr8ebN288c2eDPX/+PG7fvh1JksTx48fj1KlTaY9Euj77QtQZAj7pzZs30dPTExERJ0+eVEc3UHV1dfz000/R1NQUc3NzcePGjVhaWkp7LGCDLSwsxPXr12NxcTF2794d3d3dgsYGyeVycfbs2Thy5EgkSRK//vprjI+Ppz0WsMGSJInbt2+XgsbPP/8saGyg/fv3x6VLlyLi3ZGUp0+fpjwR5aosd2rcuHEjRkdH0/ilAT5p79690d3dnfYYG8Y6DJSbrbYOR1iLgfKS8jqcrZ0aFm+g3Gy1dWmrfb5A+duK69JW/JyB8lWua1I+7QG+5MqVK2mPABBXr15Ne4TUWIeBcrCV1+EIazGQvnJeh8typwYAAADA3xE1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMyqc9AKyH5eXl6O3tjYmJiTh06FAcOnQo7ZEAtpzHjx/H4OBgtLS0xJkzZ6Kiwr0UgI30+vXruH//flRXV8e5c+eitrY27ZFgzbm6YFP6448/4vnz5zExMRH37t2Lt2/fpj0SwJYyOjoaDx48iMnJyRgYGIiHDx+mPRLAlrK4uBg3btyIsbGxGB0djV9++SXtkWBdiBpsSpOTk1EsFkv/PTMzk+I0AFvP9PR0aR0uFosxNTWV8kQAW8v8/Pyq/56dnU1pElhfogabUmdnZ1RWVpa+7dmzJ+2RALaU1tbWyOfzkc/no7KyMg4fPpz2SABbys6dO2Pnzp2l62HHsdmsPFODTamtrS127NgR09PTsWvXrqiurk57JIAtZfv27fF//s//ibdv30Z9fX3s2LEj7ZEAtpRcLhc///xzvHr1KqqqqqKlpSXtkWBdiBpsWvX19VFfX5/2GABbVk1NTbS1taU9BsCWVVlZGfv27Ut7DFhXjp8AAAAAmSRqAAAAAJkkagAAAACZJGoAAAAAmSRqAAAAAJkkagAAAACZJGoAAAAAmSRqAAAAAJkkagAAAACZJGoAAAAAmSRqAAAAAJkkagAAAACZJGoAAAAAmSRqAAAAAJkkagAAAACZJGoAAAAAmSRqAAAAAJkkagAAAACZJGoAAAAAmSRqAAAAAJkkagAAAACZJGoAAAAAmSRqAAAAAJkkagAAAACZJGoAAAAAmSRqAAAAAJkkagAAAACZJGoAAAAAmSRqAAAAAJkkagAAAACZJGoAAAAAmSRqAAAAAJkkagAAAACZJGoAAAAAmSRqAAAAAJkkagAAAACZJGoAAAAAmSRqAAAAAJkkagAAAACZJGoAAAAAmZRPewDW3o0bN2J0dDTtMSgze/fuje7u7rTHgC3BOsynWIdh41iH+Rxr8eZjp8YmZAHnU/y5gI3j7xuf4s8FbBx/3/gcfzY2Hzs1NrErV66kPQJl4urVq2mPAFuSdZgV1mFIh3WY91mLNyc7NQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATBI1AAAAgEwSNQAAAIBMEjUAAACATMqnPQBbx9WrV9MeYcNcuXIl7REAPmIdBkjfVlmLrcNslFySJJ/94NWrVz//QQAAAIB1duXKldznPub4CQAAAJBJX9ypERF2arBmtspWuwjb7YDyZB0GSN9WWYutw6yxz+7UEDUAAACAcub4CQAAALC5iBoAAABAJokaAAAAQCaJGgAAAEAmiRoAAABAJokaAAAAQCaJGgAAAEAmiRoAAABAJokaAAAAQCaJGgAAAEAmiRoAAABAJokaAAAAQCaJGgAAAEAmiRoAAABAJokaAAAAQCaJGgAAAEAmiRoAAABAJokaAAAAQCaJGgAAAEAmiRoAAABAJokaAAAAQCaJGgAAAEAmiRoAAABAJokaAAAAQCaJGgAAAEAmiRoAAABAJokaAAAAQCaJGgAAAEAmiRoAAABAJokaAAAAQCaJGgAAAEAmiRoAAABAJokaAAAAQCaJGgAAAEAm5dMeANZSsViMQqEQSZJEsViMJEmioqIicrlc5HK5yOfzkcvl0h4TYNNKkiSWl5dXrcO5XK60FldWVkZFhXsqAOslSZIoFAqlNbhYLJauhSsqKqKioiIqKyvTHhPWjKhBpiwvL8f4+HjMzs7G3NxczM/Px/z8fOnfFxYWvvjzc7lc1NTURG1t7Uf/3LFjR9TX14seAF9QLBZjcnIypqenV62/76/JSZJ88f+xbdu2T67D27dvj4aGhsjnXZ4AfE6SJDE7OxuTk5MxNzf3yWviQqHwxf9HVVXVJ6+JV9bhbdu2bdBnA98u9zcXHl++KoF1lCRJTE9Px9u3b2NsbCzGxsZicnLyb3/eym6MlTuBSZKUKvXfLfCVlZXR1NS06ltNTc2afD4AWTQ3N1dag8fGxmJ8fPw/Wkvf3yUXEaU7hsvLy1/8ublcLurr66OpqSmam5ujqakp6urqBGdgy1paWorx8fFV18SLi4tf/DkrOzJW1uKIWHU9/Hfxua6ubtX1cENDg112pO2zFwKiBmVlcXExXrx4EcPDw/HmzZuPLn5XLnZ37tz5ybpcU1PzxQvfQqHw0Z3FlW+Tk5MxOzv70c/Zvn177N69O9ra2mLXrl0WdGBTKxaL8erVqxgaGorXr1/H3NzcRz+mrq4u6uvro7a29pPr8Je2NReLxVhYWPhoh8fc3FxMTU19Ml5XVVVFS0tLtLW1xb59+6KqqmpNP2eAcpIkSUxMTMTQ0FCMjo7G1NTURz+muro6GhsbY/v27Z+8Jv7SOpkkSSwuLn7ymnh2dvaT8bqioiKampqitbU12traora2ds0/b/gbogblayVkDA0NxatXr1aV45qamtKduqampmhsbFzXM4Dz8/Or7kiOjY2tWtSrq6ujtbU12tvbo6WlReAANoX3Q8aLFy9iaWmp9LF8Pl9ag5ubm6OxsXFdtyWvHDMcGxsr3ZV8/2hhRUVF7NmzJ9rb22Pv3r0CB7ApvB8yhoeHV91oy+Vy0dDQsOqaePv27eu2g61YLMbU1NSqnSHT09OrfkxTU1O0t7cLHGwkUYPyUiwWY2RkJAYHB+Ply5erQsauXbtKF6tpL5IrX2BGRkZiaGgoZmZmSh+rrq6Otra26OjoiMbGxhSnBPjnkiSJsbGxGBgYiJGRkVUho76+Ptra2qK1tTV27tyZ6tGPJElibm5u1S6+FSuB4+DBg7Fv3z5HVIDMmZ2djWfPnsXQ0NCqkLFt27Zoa2uLtra2aGpqSv3BnouLi/Hq1asYHh6O0dHRVTf9mpqaYv/+/XHgwAGhmfUkalAeFhcX49mzZ/H06dOYn58vff9KyGhtbS3bBxMlSRKTk5MxPDz8UeBoaWmJI0eOxL59++zeAMpasViM4eHh6O/vj/Hx8dL3r4SMtra22LlzZ4oTftnc3FyMjIx8FDi2b98eR44ciYMHD7qoBspakiTx9u3b6O/vj5GRkdL3vx8yWlpayjbULi8vx+jo6EeBI5/PR0dHRxw+fDjq6upSnpJNSNQgXXNzc9Hf3x/Pnj0rLXw7duyIw4cPR3t7e9mGjM9ZCRyDg4MxMDBQevZHXV1dHDt2LA4cOCBuAGWlUCjEwMBAPH78uPScjOrq6ujo6IgDBw6Udcj4nLm5uRgaGoqnT5+W7nDm8/k4fPhwdHZ2Zu5rC7C5JUkSo6Oj0dfXF2NjYxHx7mhJe3t7dHR0lHXI+Jzl5eV48eJFPHv2bFVo3r9/fxw7dizq6+tTnI5NRtQgHQsLC/HgwYP466+/olgsRkTEnj17orOzM3bv3p25hftTlpaWYnBwMJ48eVLavVFTUxPHjx+Pjo4OcQNIVbFYjCdPnsTjx49Lz6bYsWNHHD16NPbv35/6lua1kCRJvHjxIvr7+0sX1ZWVlXHo0KE4fvx4VFdXpzwhsJWtxIw///yz9DDk6urqOHToUBw6dCj149ZrZXx8PJ48eRLPnz8vHS3ft29fnD59OpPhnLIjarCxisViPH36NB48eFDaxdDW1hbHjh3btM+fWNnS3dfXV3pKdX19fZw9ezZaWlpSng7Yil6+fBm9vb2lB7w1NDTE8ePHo7W1dVNE5U95+/Zt9PX1xejoaES8e+Fw+vTpOHjw4Kb9nIHyNT09Hffu3SutSdu2bYujR4/GoUOHIp/Ppzzd+pidnY3Hjx/HwMBAFIvFyOVyceTIkThx4oTjgXwLUYON8/r16+jt7S2V6D179sR33323ZbafJUkSIyMjce/evdIW7/3798d3330XNTU1KU8HbAWzs7Nx79690lnturq6OHPmTOzdu3fLvLAfHx+Pe/fulXZuNDY2xtmzZ6OpqSnlyYCtYHl5OR49ehSPHz+OYrEY+Xw+Tp48GYcOHdoUO+T+E/Pz8/Hw4cN49uxZRLwLOt99913s379/y3wtYk2JGqy/ubm5uH//fjx//jwi3j207cyZM1v2ifTLy8vx+PHjePTo0aovZocPH3YkBVgXhUKhtO4UCoWorKyMEydORGdn55Zcd5IkiaGhofjjjz9KD6fu6OiIU6dOed4GsC4+dXPrwIEDcfr06S17c2tsbCx6e3tLzxFpaWmJrq6uaGhoSHkyMkbUYH0NDg7G77//HoVCISoqKuLYsWNx7NixLVOiv2RmZibu3bsXL168iIh3R1IuXrzobCGwpsbHx+PWrVulZ/u0t7fHd999t2nOan+LpaWl6Ovri/7+/kiSJKqqquL8+fPR2tqa9mjAJrK4uBi//fZb6ZqvoaEhzp49G83NzSlPlr4kSeKvv/6K+/fvx+LiYkREnDhxIk6cOLElb37yVUQN1kehUIje3t4YGBiIiHcPAzpz5oy3cfqEFy9eRG9vb8zOzkZlZWV8//33sX///rTHAjIuSZJ49uxZ3Lt3L4rFYuzYsSPOnj0bu3fvTnu0sjM1NRV3796N169fR0REZ2dnnD59ekvuYgHW1tu3b+PWrVsxNzcXVVVVcerUqTh06JAX7B9YXFyMBw8exNOnTyMiYteuXXHhwoUtu4uFf0TUYO1NTU3FrVu3YnJyMioqKqKrqys6Ojos3l+wtLQUv//+ewwNDUXEu23QXV1ddrQAX8Wa8s8lSRL9/f1x//79SJIkmpqa4uLFi7F9+/a0RwMyyJrydV69ehU9PT2xsLAQ27ZtiwsXLojx/B1Rg7X1/PnzuHPnThQKhairq4tLly45F/cfSpIkBgYGore3N4rFouMowFeZmJiImzdvxszMjN1fX+HDu6o//PBD7Nu3L+2xgAxZXFyM27dvl97ZxO6vf2Z+fj56enpKu+ccR+FviBqsjWKxGL29vaWnGLe3t8e5c+e8PdNX+PAFyYULF5zvBv4jg4ODcefOHWH0G334guTYsWNx6tQpF9TA35qYmIhffvmlFEY9p+frJEkSDx8+jIcPH0bEu+MoP/74o9cWfIqowbcrFArR09MTIyMjjpuskQ+3jp8/fz4OHjyY8lRAOevv74979+5FhOMmayFJknj8+HH8+eefkSRJHDx4MM6dO+dOK/BZb968iRs3bsTy8nI0NjbGpUuXHDf5Ri9fvozbt2/HwsJCNDQ0xOXLl71LFR8SNfg2y8vL8euvv8arV68in89Hd3d3tLS0pD3WpvBhoT5z5kx0dnamPBVQbqwV6+vly5fx66+/RqFQiNbW1rhw4YJYBHxkdHQ0bt68aa1YB7Ozs3Ht2rWYmZmJurq6+Omnn8Qi3idq8PUWFxfjxo0bMTY2Ftu2bYvLly97fsY6eP/uqzOFwPuSJIne3t7S0+Lt6lof79993b17d/z444+Rz+fTHgsoE0NDQ9HT02NX1zqan5+P69evx+TkZNTW1sbly5cdr2SFqMHX+XBh+emnn2LHjh1pj7VpDQwMxJ07dyIi4vDhw9HV1SVswBZXLBbjzp07MTg4GBUVFXHhwoVoa2tLe6xNa2JiIq5fvx4LCwvR1NQU3d3dUV1dnfZYQMrev0br7OyM7777zjXaOllaWoobN27E27dvo7q6Oi5fvhyNjY1pj0X6RA3+ufn5+fjf//3fmJmZiR07dsRPP/0UtbW1aY+16Q0PD0dPT08Ui8Xo6OiIc+fO+aIJW1SSJHHr1q0YHh6OysrK+PHHH2PPnj1pj7XpTU9Px7Vr12Jubi7q6+vj559/9tA62MKePn0ad+/ejYiIkydPxvHjx12brbPl5eW4efNmvHz5MvL5fPz000/R1NSU9lik67N/6eyX4pNWCunMzEw0NDTEzz//LGhskLa2tvjXv/4VlZWVMTAwEA8ePEh7JCAFSZLE3bt3Y3h4uHRBJ2hsjB07dsS///3vqKuri8nJyfjll1+iUCikPRaQguHh4VLQ6Orqcjx4g+Tz+fjXv/4VbW1tsby8HL/88kvMzMykPRZlStTgI8ViMW7evBkTExNRV1fn6cMp2LNnT1y8eDFyuVz09fWVztEDW8ejR4/i2bNnUVFREd3d3dHc3Jz2SFvKypHLmpqaePPmTdy+fTv+ZncrsMm8fv06enp6IiLi1KlTceTIkZQn2lpWjlzu3r07FhYWSkcD4UNlefzkxo0bpfeMBygHe/fuje7u7rTH2DDWYaDcbLV1OMJaDJSXlNfhbB0/sXgD5WarrUtb7fMFyt9WXJe24ucMlK9yXZPK+n3Krly5kvYIAHH16tW0R0iNdRgoB1t5HY6wFgPpK+d1uCx3agAAAAD8HVEDAAAAyCRRAwAAAMgkUQMAAADIJFEDAAAAyCRRAwAAAMgkUQMAAADIJFEDAAAAyCRRAwAAAMgkUQMAAADIJFEDAAAAyCRRAwAAAMgkUQMAAADIJFEDAAAAyCRRAwDg/7VvxyqNpmEYht+JIoKKoDC2ItioaCUoWM0J5FhzBoOVnaXdgBBFsVExiEXMVusuu7rDgvr55L+uNs1TxBe5+QIARBI1AAAAgEiiBgAAABBJ1AAAAAAiiRoAAABAJFEDAAAAiCRqAAAAAJFEDQAAACCSqAEAAABEEjUAAACASKIGAAAAEEnUAAAAACKJGgAAAEAkUQMAAACIJGoAAAAAkUQNAAAAIJKoAQAAAEQSNQAAAIBIogYAAAAQSdQAAAAAIokaAAAAQCRRAwAAAIgkagAAAACRRA0AAAAgkqjB1Lq9va3hcFhPT0+tpwB00uPjYw2Hw7q/v289BaCTxuNxXV5e1s3NTU0mk9Zz4EPMth4AH+Hi4qJOT0+rqqrX69WPHz9qfn6+8SqA7hiNRvXz58+Xf6L39/drbW2t8SqA7nh+fq7j4+MajUZVVbW+vl47OzuNV8H781KDqfTr168aj8c1Ho/r+fm5bm5uWk8C6JSrq6uXOzwej+v8/Lz1JIBOeXh4qNFo5A4z9UQNptLy8nL1en99vRcWFhquAeiepaWllzvc6/VqeXm58SKAbvnnK+XFxcVGS+Bj+fkJU2l7e7smk0nd3d3V+vp6raystJ4E0Cnfv3+vra2tGg6HtbKyUpubm60nAXTK3NxcHR4e1tnZWc3NzdXu7m7rSfAhRA2m0szMTO3t7bWeAdBpGxsbtbGx0XoGQGetrq7W0dFR6xnwofz8BAAAAIgkagAAAACRRA0AAAAgkqgBAAAARBI1AAAAgEiiBgAAABBJ1AAAAAAiiRoAAABAJFEDAAAAiCRqAAAAAJFEDQAAACCSqAEAAABEEjUAAACASKIGAAAAEEnUAAAAACKJGgAAAEAkUQMAAACIJGoAAAAAkUQNAAAAIJKoAQAAAEQSNQAAAIBIogYAAAAQSdQAAAAAIokaAAAAQCRRAwAAAIgkagAAAACRRA0AAAAgkqgBAAAARBI1AAAAgEiiBgAAABBJ1AAAAAAiiRoAAABAJFEDAAAAiCRqAAAAAJFEDQAAACCSqAEAAABEEjUAAACASKIGAAAAEEnUAAAAACKJGgAAAECk2dYDeH8nJyd1fX3degZfzNraWh0cHLSeAZ3gDvMadxg+jzvMW9zi6eOlxhRywHmN7wV8Hn9vvMb3Aj6Pvzfe4rsxfbzUmGL9fr/1BL6IwWDQegJ0kjvMn9xhaMMd5u/c4unkpQYAAAAQSdQAAAAAIokaAAAAQCRRAwAAAIgkagAAAACRRA0AAAAgkqgBAAAARBI1AAAAgEiiBgAAABBJ1AAAAAAiiRoAAABAJFEDAAAAiCRqAAAAAJFEDQAAACCSqAEAAABEEjUAAACASKIGAAAAEEnUAAAAACKJGgAAAEAkUQMAAACIJGoAAAAAkUQNAAAAIJKoAQAAAEQSNQAAAIBIogYAAAAQSdQAAAAAIokaAAAAQCRRAwAAAIgkagAAAACRRA0AAAAgkqgBAAAARBI1AAAAgEiiBgAAABBJ1AAAAAAiiRoAAABAJFEDAAAAiCRqAAAAAJFEDQAAACCSqAEAAABEEjUAAACASKIGAAAAEEnUAAAAACLNth5AdwwGg9YTPk2/3289AeBf3GGA9rpyi91hPsu3yWTy5oeDweDtDwEAAAA+WL/f//bWZ35+AgAAAET6z5caVeWlBu+mK0/tqjy3A74mdxigva7cYneYd/bmSw1RAwAAAPjK/PwEAAAAmC6iBgAAABBJ1AAAAAAiiRoAAABAJFEDAAAAiCRqAAAAAJFEDQAAACCSqAEAAABEEjUAAACASKIGAAAAEEnUAAAAACKJGgAAAEAkUQMAAACIJGoAAAAAkUQNAAAAIJKoAQAAAEQSNQAAAIBIs7/5/NunrAAAAAD4n7zUAAAAACKJGgAAAEAkUQMAAACIJGoAAAAAkUQNAAAAIJKoAQAAAET6A8JdA1LEtvOIAAAAAElFTkSuQmCC\n", 128 | "text/plain": [ 129 | "
" 130 | ] 131 | }, 132 | "metadata": {} 133 | } 134 | ] 135 | } 136 | ] 137 | } -------------------------------------------------------------------------------- /football/kloppy_wyscout.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from kloppy import WyscoutSerializer\n", 10 | "import requests\n", 11 | "import zipfile\n", 12 | "import os" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "# kloppy serializer\n", 22 | "serializer = WyscoutSerializer()" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "def download_url(url, save_path, chunk_size=128, json=False):\n", 32 | " '''Souce: https://stackoverflow.com/questions/9419162/download-returned-zip-file-from-url '''\n", 33 | " r = requests.get(url, stream=True)\n", 34 | " if json:\n", 35 | " r.encoding = 'unicode-escape'\n", 36 | " with open(save_path, 'wb') as fd:\n", 37 | " for chunk in r.iter_content(chunk_size=chunk_size):\n", 38 | " fd.write(chunk)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# saves in the current directory the Wyscout events.zip file\n", 48 | "save_path = os.path.join('events.json')\n", 49 | "download_url('https://ndownloader.figshare.com/files/14464685', save_path)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "# extract the zip files in the current directory (e.g. events_World_Cup.json)\n", 59 | "extract_path = os.path.join('')\n", 60 | "with zipfile.ZipFile(save_path, 'r') as zip_ref:\n", 61 | " zip_ref.extractall(extract_path)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "# currently not working\n", 71 | "with open('events_World_Cup.json', \"rb\") as event_data:\n", 72 | " dataset = serializer.deserialize(inputs={'event_data': event_data})" 73 | ] 74 | } 75 | ], 76 | "metadata": { 77 | "kernelspec": { 78 | "display_name": "Python 3", 79 | "language": "python", 80 | "name": "python3" 81 | }, 82 | "language_info": { 83 | "codemirror_mode": { 84 | "name": "ipython", 85 | "version": 3 86 | }, 87 | "file_extension": ".py", 88 | "mimetype": "text/x-python", 89 | "name": "python", 90 | "nbconvert_exporter": "python", 91 | "pygments_lexer": "ipython3", 92 | "version": "3.7.9" 93 | } 94 | }, 95 | "nbformat": 4, 96 | "nbformat_minor": 4 97 | } 98 | -------------------------------------------------------------------------------- /football/scrape_fbref.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "scrape_fbref.ipynb", 7 | "provenance": [], 8 | "authorship_tag": "ABX9TyM5/QSze4o6cPLxZnYWR98H", 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "language_info": { 16 | "name": "python" 17 | } 18 | }, 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "id": "view-in-github", 24 | "colab_type": "text" 25 | }, 26 | "source": [ 27 | "\"Open" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 1, 33 | "metadata": { 34 | "id": "xUucUiEAmqqm" 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "import requests\n", 39 | "from bs4 import BeautifulSoup\n", 40 | "import pandas as pd\n", 41 | "import numpy as np\n", 42 | "\n", 43 | "def get_soup(url):\n", 44 | " headers = {'User-Agent': ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) '\n", 45 | " 'Chrome/39.0.2171.95 Safari/537.36')}\n", 46 | " r = requests.get(url, headers=headers)\n", 47 | " r.encoding = 'unicode-escape'\n", 48 | " return BeautifulSoup(r.content, 'html.parser')\n", 49 | "\n", 50 | "def get_data_from_table(table, data_type, skip_rows):\n", 51 | " \"\"\"Helper method to get the data from a table. \"\"\"\n", 52 | " # https://stackoverflow.com/questions/42285417/how-to-preserve-links-when-scraping-a-table-with-beautiful-soup-and-pandas\n", 53 | " if data_type == 'title':\n", 54 | " data = [[td.a.get('title') if td.find('a') else ''.join(td.stripped_strings) for td in row.find_all('td')]\n", 55 | " for row in table.find_all('tr')]\n", 56 | " if data_type == 'link':\n", 57 | " data = [[td.a['href'] if td.find('a') else ''.join(td.stripped_strings) for td in row.find_all('td')]\n", 58 | " for row in table.find_all('tr')]\n", 59 | " else:\n", 60 | " data = [[td.a.string if td.find('a') else ''.join(td.stripped_strings) for td in row.find_all('td')]\n", 61 | " for row in table.find_all('tr')] \n", 62 | " \n", 63 | " data = [d for d in data if len(d)!=0][0::skip_rows]\n", 64 | " \n", 65 | " return data\n", 66 | "\n", 67 | "def get_fbref_big5(url):\n", 68 | " soup = get_soup(url)\n", 69 | " df = pd.read_html(str(soup))[0]\n", 70 | " \n", 71 | " # column names - collapse the multiindex\n", 72 | " col1 = list(df.columns.get_level_values(0))\n", 73 | " col1 = ['' if c[:7]=='Unnamed' else c.replace(' ', '_').lower() for c in col1]\n", 74 | " col2 = list(df.columns.get_level_values(1))\n", 75 | " col2 = [c.replace(' ', '_').lower() for c in col2]\n", 76 | " cols = [f'{c}_{col2[i]}' if c != '' else col2[i] for i, c in enumerate(col1)]\n", 77 | " df.columns = cols\n", 78 | " \n", 79 | " # remove lines that are the header row repeated\n", 80 | " df = df[df.rk != 'Rk'].copy()\n", 81 | " \n", 82 | " # add the url for the player profile and match logs\n", 83 | " # https://stackoverflow.com/questions/42285417/how-to-preserve-links-when-scraping-a-table-with-beautiful-soup-and-pandas\n", 84 | " parsed_table = soup.find_all('table')[0]\n", 85 | " data = [[td.a['href'] if td.find('a') else ''.join(td.stripped_strings) for td in row.find_all('td')]\n", 86 | " for row in parsed_table.find_all('tr')]\n", 87 | " data = [d for d in data if len(d)!=0]\n", 88 | " match_log = [d[-1] for d in data]\n", 89 | " player_profile = [d[0] for d in data]\n", 90 | " df['match_link'] = match_log\n", 91 | " df['player_link'] = player_profile\n", 92 | " \n", 93 | " # remove players who haven't played a minute from the playing time table\n", 94 | " if 'playing_time_mp' in df.columns:\n", 95 | " df = df[df.playing_time_mp != '0'].copy()\n", 96 | " df.reset_index(drop=True, inplace=True)\n", 97 | " df['rk'] = df.index + 1\n", 98 | " \n", 99 | " # drop the matches column\n", 100 | " df.drop('matches', axis='columns', inplace=True)\n", 101 | "\n", 102 | " # columns to numeric columns\n", 103 | " df[df.columns[6:-2]] = df[df.columns[6:-2]].apply(pd.to_numeric, errors='coerce', axis='columns')\n", 104 | " return df" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "source": [ 110 | "url = 'https://fbref.com/en/comps/Big5/keepersadv/players/Big-5-European-Leagues-Stats'" 111 | ], 112 | "metadata": { 113 | "id": "Xet6H4pZnDXE" 114 | }, 115 | "execution_count": 2, 116 | "outputs": [] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "source": [ 121 | "df_gk = get_fbref_big5(url)" 122 | ], 123 | "metadata": { 124 | "id": "FdOe3aqZnci5" 125 | }, 126 | "execution_count": 3, 127 | "outputs": [] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "source": [ 132 | "df_gk.head()" 133 | ], 134 | "metadata": { 135 | "colab": { 136 | "base_uri": "https://localhost:8080/", 137 | "height": 477 138 | }, 139 | "id": "hyIRd04BnfpC", 140 | "outputId": "1acbe9b6-0532-40f1-e35a-2af9be1b5316" 141 | }, 142 | "execution_count": 4, 143 | "outputs": [ 144 | { 145 | "output_type": "execute_result", 146 | "data": { 147 | "text/plain": [ 148 | " rk player nation pos squad comp age \\\n", 149 | "0 1 Julen Agirrezabala es ESP GK Athletic Club es La Liga NaN \n", 150 | "1 2 Doğan Alemdar tr TUR GK Rennes fr Ligue 1 NaN \n", 151 | "2 3 Alisson br BRA GK Liverpool eng Premier League NaN \n", 152 | "3 4 Alphonse Areola fr FRA GK West Ham eng Premier League NaN \n", 153 | "4 5 Kepa Arrizabalaga es ESP GK Chelsea eng Premier League NaN \n", 154 | "\n", 155 | " born 90s goals_ga ... goal_kicks_launch% goal_kicks_avglen \\\n", 156 | "0 2000.0 4.0 5.0 ... 38.5 38.3 \n", 157 | "1 2002.0 5.0 4.0 ... 86.2 57.7 \n", 158 | "2 1992.0 26.0 18.0 ... 43.5 39.1 \n", 159 | "3 1993.0 1.0 1.0 ... 71.4 53.2 \n", 160 | "4 1994.0 4.0 2.0 ... 28.6 30.2 \n", 161 | "\n", 162 | " crosses_opp crosses_stp crosses_stp% sweeper_#opa sweeper_#opa/90 \\\n", 163 | "0 35.0 2.0 5.7 5.0 1.25 \n", 164 | "1 46.0 1.0 2.2 7.0 1.40 \n", 165 | "2 166.0 17.0 10.2 38.0 1.46 \n", 166 | "3 13.0 1.0 7.7 0.0 0.00 \n", 167 | "4 31.0 1.0 3.2 5.0 1.25 \n", 168 | "\n", 169 | " sweeper_avgdist match_link \\\n", 170 | "0 17.3 /en/players/a2c1a8d3/matchlogs/2021-2022/keepe... \n", 171 | "1 14.3 /en/players/9e17ccff/matchlogs/2021-2022/keepe... \n", 172 | "2 17.6 /en/players/7a2e46a8/matchlogs/2021-2022/keepe... \n", 173 | "3 7.0 /en/players/2f965a72/matchlogs/2021-2022/keepe... \n", 174 | "4 16.2 /en/players/28d596a0/matchlogs/2021-2022/keepe... \n", 175 | "\n", 176 | " player_link \n", 177 | "0 /en/players/a2c1a8d3/Julen-Agirrezabala \n", 178 | "1 /en/players/9e17ccff/Dogan-Alemdar \n", 179 | "2 /en/players/7a2e46a8/Alisson \n", 180 | "3 /en/players/2f965a72/Alphonse-Areola \n", 181 | "4 /en/players/28d596a0/Kepa-Arrizabalaga \n", 182 | "\n", 183 | "[5 rows x 36 columns]" 184 | ], 185 | "text/html": [ 186 | "\n", 187 | "
\n", 188 | "
\n", 189 | "
\n", 190 | "\n", 203 | "\n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | "
rkplayernationpossquadcompageborn90sgoals_ga...goal_kicks_launch%goal_kicks_avglencrosses_oppcrosses_stpcrosses_stp%sweeper_#opasweeper_#opa/90sweeper_avgdistmatch_linkplayer_link
01Julen Agirrezabalaes ESPGKAthletic Clubes La LigaNaN2000.04.05.0...38.538.335.02.05.75.01.2517.3/en/players/a2c1a8d3/matchlogs/2021-2022/keepe.../en/players/a2c1a8d3/Julen-Agirrezabala
12Doğan Alemdartr TURGKRennesfr Ligue 1NaN2002.05.04.0...86.257.746.01.02.27.01.4014.3/en/players/9e17ccff/matchlogs/2021-2022/keepe.../en/players/9e17ccff/Dogan-Alemdar
23Alissonbr BRAGKLiverpooleng Premier LeagueNaN1992.026.018.0...43.539.1166.017.010.238.01.4617.6/en/players/7a2e46a8/matchlogs/2021-2022/keepe.../en/players/7a2e46a8/Alisson
34Alphonse Areolafr FRAGKWest Hameng Premier LeagueNaN1993.01.01.0...71.453.213.01.07.70.00.007.0/en/players/2f965a72/matchlogs/2021-2022/keepe.../en/players/2f965a72/Alphonse-Areola
45Kepa Arrizabalagaes ESPGKChelseaeng Premier LeagueNaN1994.04.02.0...28.630.231.01.03.25.01.2516.2/en/players/28d596a0/matchlogs/2021-2022/keepe.../en/players/28d596a0/Kepa-Arrizabalaga
\n", 353 | "

5 rows × 36 columns

\n", 354 | "
\n", 355 | " \n", 365 | " \n", 366 | " \n", 403 | "\n", 404 | " \n", 428 | "
\n", 429 | "
\n", 430 | " " 431 | ] 432 | }, 433 | "metadata": {}, 434 | "execution_count": 4 435 | } 436 | ] 437 | } 438 | ] 439 | } -------------------------------------------------------------------------------- /football/statsbomb_duckdb/360_v1.py: -------------------------------------------------------------------------------- 1 | with raw_json as ( 2 | select 3 | * replace(unnest(freeze_frame) as freeze_frame) 4 | from 5 | read_json( 6 | $filename, 7 | format = 'array', 8 | columns = {event_uuid: varchar, 9 | freeze_frame: 'struct(teammate boolean, actor boolean, keeper boolean, location double[])[]'}, 10 | filename = true 11 | ) 12 | ) 13 | select 14 | cast(split(split(filename, '/') [-1], '.') [1] as integer) as match_id, 15 | event_uuid, 16 | freeze_frame.teammate, 17 | freeze_frame.actor, 18 | freeze_frame.keeper, 19 | freeze_frame.location [1] as x, 20 | freeze_frame.location [2] as y, 21 | from 22 | raw_json; -------------------------------------------------------------------------------- /football/statsbomb_duckdb/360_visible_v1.py: -------------------------------------------------------------------------------- 1 | with raw_json as ( 2 | select 3 | * 4 | from 5 | read_json( 6 | $filename, 7 | format = 'array', 8 | columns = {event_uuid: varchar, 9 | visible_area: 'double[]'}, 10 | filename = true 11 | ) 12 | ) 13 | select 14 | cast(split(split(filename, '/') [-1], '.') [1] as integer) as match_id, 15 | event_uuid, 16 | visible_area 17 | from 18 | raw_json; -------------------------------------------------------------------------------- /football/statsbomb_duckdb/competition_v4.py: -------------------------------------------------------------------------------- 1 | with raw_json as ( 2 | select 3 | * 4 | from 5 | read_json( 6 | $filename, 7 | format = 'array', 8 | columns = {'competition_id': integer, 9 | 'season_id': integer, 10 | 'country_name': varchar, 11 | 'competition_name': varchar, 12 | 'competition_gender': varchar, 13 | 'competition_youth': boolean, 14 | 'competition_international': boolean, 15 | 'season_name': varchar, 16 | 'match_updated': varchar, 17 | 'match_updated_360': varchar, 18 | 'match_available_360': varchar, 19 | 'match_available': varchar} 20 | ) 21 | ), 22 | final as ( 23 | select 24 | * replace(case when match_updated is null then null else cast(left(concat(replace(match_updated, 'T', ' '), ':00'), 19) as timestamp) end as match_updated, 25 | case when match_available is null then null else cast(left(concat(replace(match_available, 'T', ' '), ':00'), 19) as timestamp) end as match_available, 26 | cast(match_available_360 as timestamp) as match_available_360, 27 | cast(match_updated_360 as timestamp) as match_updated_360 28 | ) 29 | from 30 | raw_json) 31 | select 32 | * 33 | from 34 | final; -------------------------------------------------------------------------------- /football/statsbomb_duckdb/events_freeze_v7.py: -------------------------------------------------------------------------------- 1 | with raw_json as ( 2 | select 3 | * 4 | from 5 | read_json( 6 | $filename, 7 | format = 'array', 8 | filename = true, 9 | -- columns from the StatsBomb docs, but excluding tactics, related_events, and shot.freeze_frame as many to one relationships 10 | -- these are instead handled seperately to create their own dataframes. 11 | columns = { 'id': varchar, 12 | type: 'struct(name varchar)', 13 | shot: 'struct(freeze_frame struct(location double[], player struct(id integer, name varchar), position struct(id integer, name varchar), teammate boolean)[])' } 14 | ) 15 | ), 16 | final as ( 17 | select 18 | cast(split(split(filename, '/') [-1], '.') [1] as integer) as match_id, 19 | id as event_uuid, 20 | unnest(shot.freeze_frame).location [1] as x, 21 | unnest(shot.freeze_frame).location [2] as y, 22 | unnest(shot.freeze_frame).player.id as player_id, 23 | unnest(shot.freeze_frame).player.name as player_name, 24 | unnest(shot.freeze_frame).position.id as position_id, 25 | unnest(shot.freeze_frame).position.name as position_name, 26 | unnest(shot.freeze_frame).teammate as teammate 27 | from 28 | raw_json 29 | where 30 | type.name = 'Shot' 31 | ) 32 | select 33 | * 34 | from 35 | final; -------------------------------------------------------------------------------- /football/statsbomb_duckdb/events_related_v7.py: -------------------------------------------------------------------------------- 1 | with raw_json as ( 2 | select 3 | * 4 | from 5 | read_json( 6 | $filename, 7 | format = 'array', 8 | auto_detect = true, 9 | maximum_object_size = 11000000, 10 | filename = false, 11 | columns = { 'id': varchar, 12 | index: integer, 13 | type: 'struct(id ubigint, name varchar)', 14 | related_events: 'VARCHAR[]' } 15 | ) 16 | ), 17 | related as ( 18 | select 19 | id as event_uuid, 20 | index, 21 | replace(type.name, '*', '') as type_name, 22 | unnest(related_events) as event_uuid_related 23 | from 24 | raw_json 25 | ), 26 | events as ( 27 | select 28 | id as event_uuid_related, 29 | index as index_related, 30 | replace(type.name, '*', '') as type_name_related 31 | from 32 | raw_json 33 | ), 34 | final as ( 35 | select 36 | related.*, 37 | events.* exclude event_uuid_related 38 | from 39 | related 40 | join events on related.event_uuid_related = events.event_uuid_related 41 | ) 42 | select 43 | * 44 | from 45 | final; -------------------------------------------------------------------------------- /football/statsbomb_duckdb/events_tactics_v7.py: -------------------------------------------------------------------------------- 1 | with raw_json as ( 2 | select 3 | * 4 | from 5 | read_json( 6 | $filename, 7 | format = 'array', 8 | filename = true, 9 | -- columns from the StatsBomb docs, but excluding tactics, related_events, and shot.freeze_frame as many to one relationships 10 | -- these are instead handled seperately to create their own dataframes. 11 | columns = { 'id': varchar, 12 | index: integer, 13 | period: integer, 14 | timestamp: time, 15 | minute: integer, 16 | second: integer, 17 | type: 'struct(id ubigint, name varchar)', 18 | team: 'struct(id ubigint, name varchar)', 19 | tactics: 'struct(lineup struct(jersey_number integer, player struct(id integer, name varchar), position struct(id integer, name varchar))[])' } 20 | ) 21 | ), 22 | final as ( 23 | select 24 | cast(split(split(filename, '/') [-1], '.') [1] as integer) as match_id, 25 | id as event_uuid, 26 | index, 27 | period, 28 | timestamp, 29 | minute, 30 | second, 31 | type.id as type_id, 32 | type.name as type_name, 33 | team.id as team_id, 34 | team.name as team_name, 35 | unnest(tactics.lineup).jersey_number as jersey_number, 36 | unnest(tactics.lineup).player.id as player_id, 37 | unnest(tactics.lineup).player.name as player_name, 38 | unnest(tactics.lineup).position.id as position_id, 39 | unnest(tactics.lineup).position.name as position_name 40 | from 41 | raw_json 42 | where 43 | type.name in ('Starting XI', 'Tactical Shift') 44 | ) 45 | select 46 | * 47 | from 48 | final; -------------------------------------------------------------------------------- /football/statsbomb_duckdb/events_v7.py: -------------------------------------------------------------------------------- 1 | with raw_json as ( 2 | select 3 | * 4 | from 5 | read_json( 6 | $filename, 7 | format = 'array', 8 | filename = true, 9 | -- columns from the StatsBomb docs, but excluding tactics, related_events, and shot.freeze_frame as many to one relationships 10 | -- these are instead handled seperately to create their own dataframes. 11 | columns = { 'id': varchar, 12 | index: integer, 13 | period: integer, 14 | timestamp: time, 15 | minute: integer, 16 | second: integer, 17 | type: 'struct(id ubigint, name varchar)', 18 | possession: integer, 19 | possession_team: 'struct(id ubigint, name varchar)', 20 | play_pattern: 'struct(id ubigint, name varchar)', 21 | team: 'struct(id ubigint, name varchar)', 22 | player: 'struct(id ubigint, name varchar)', 23 | position: 'struct(id ubigint, name varchar)', 24 | location: 'double[]', 25 | duration: double, 26 | under_pressure: boolean, 27 | off_camera: boolean, 28 | out: boolean, 29 | tactics: 'struct(formation varchar)', 30 | obv_for_after: double, 31 | obv_for_before: double, 32 | obv_for_net: double, 33 | obv_against_after: double, 34 | obv_against_before: double, 35 | obv_against_net: double, 36 | obv_total_net: double, 37 | -- in the docs counterpress is within the event type objects 38 | -- however, counterpress appears to be outside of these events, i.e. not within 50_50 event type ibject 39 | -- included this both inside the event type objects and outside just in case 40 | counterpress: boolean, 41 | '50_50': 'struct( 42 | outcome struct(id ubigint, name varchar), 43 | counterpress boolean 44 | )', 45 | bad_behaviour: 'struct(card struct(id ubigint, name varchar))', 46 | ball_receipt: 'struct(outcome struct(id ubigint, name varchar))', 47 | ball_recovery: 'struct(offensive boolean, recovery_failure boolean)', 48 | block: 'struct( 49 | deflection boolean, 50 | offensive boolean, 51 | save_block boolean, 52 | counterpress boolean 53 | )', 54 | carry: 'struct(end_location double[])', 55 | -- in the open-data there are some boolean columns left_foot, right_foot, head, other 56 | -- these are covered by body_part so ignored (columns are not in the docs) 57 | clearance: 'struct( 58 | aerial_won boolean, 59 | body_part struct(id ubigint, name varchar) 60 | )', 61 | dribble: 'struct( 62 | overrun boolean, 63 | nutmeg boolean, 64 | outcome struct(id ubigint, name varchar), 65 | no_touch boolean 66 | )', 67 | dribbled_past: 'struct(counterpress boolean)', 68 | duel: 'struct( 69 | counterpress boolean, 70 | type struct(id ubigint, name varchar), 71 | outcome struct(id ubigint, name varchar) 72 | )', 73 | foul_committed: 'struct( 74 | counterpress boolean, 75 | offensive boolean, 76 | type struct(id ubigint, name varchar), 77 | advantage boolean, 78 | penalty boolean, 79 | card struct(id ubigint, name varchar) 80 | )', 81 | foul_won: 'struct( 82 | defensive boolean, 83 | advantage boolean, 84 | penalty boolean 85 | )', 86 | -- open data also has the following boolean columns for goalkeeper 87 | -- shot_saved_to_post, shot_saved_off_target, punched_out, lost_out, success_out, lost_in_play, success_in_play, penalty_saved_to_post, saved_to_post 88 | -- ignored as not in the official spec and covered by type and outcome columns 89 | goalkeeper: 'struct( 90 | position struct(id ubigint, name varchar), 91 | technique struct(id ubigint, name varchar), 92 | body_part struct(id ubigint, name varchar), 93 | type struct(id ubigint, name varchar), 94 | outcome struct(id ubigint, name varchar), 95 | end_location double[] -- added but not in docs 96 | )', 97 | half_end: 'struct( 98 | early_video_end boolean, 99 | match_suspended boolean 100 | )', 101 | half_start: 'struct(late_video_start boolean)', 102 | injury_stoppage: 'struct(in_chain boolean)', 103 | interception: 'struct(outcome struct(id ubigint, name varchar))', 104 | miscontrol: 'struct(aerial_won boolean)', 105 | -- open data pass columns also has inswinging, outswinging, through_ball, straight 106 | -- ignored as not in the official spec and covered by technique column 107 | pass: 'struct( 108 | recipient struct(id ubigint, name varchar), 109 | length double, 110 | angle double, 111 | height struct(id ubigint, name varchar), 112 | end_location double[], 113 | assisted_shot_id varchar, 114 | backheel boolean, 115 | deflected boolean, 116 | miscommunication boolean, 117 | "cross" boolean, 118 | cut_back boolean, 119 | switch boolean, 120 | shot_assist boolean, 121 | goal_assist boolean, 122 | body_part struct(id ubigint, name varchar), 123 | type struct(id ubigint, name varchar), 124 | outcome struct(id ubigint, name varchar), 125 | technique struct(id ubigint, name varchar), 126 | aerial_won boolean, -- added but not in docs 127 | no_touch boolean -- added but not in docs 128 | )', 129 | player_off: 'struct(permanent boolean)', 130 | pressure: 'struct(counterpress boolean)', 131 | -- open data shot columns also has saved_off_target, saved_to_post, kick_off 132 | -- ignored as not in the official spec and covered by other columns (type/ outcome) 133 | shot: 'struct( 134 | key_pass_id varchar, 135 | end_location double[], 136 | aerial_won boolean, 137 | follows_dribble boolean, 138 | first_time boolean, 139 | open_goal boolean, 140 | one_on_one boolean, 141 | statsbomb_xg double, 142 | deflected boolean, 143 | technique struct(id ubigint, name varchar), 144 | shot_shot_assist boolean, 145 | shot_goal_assist boolean, 146 | body_part struct(id ubigint, name varchar), 147 | type struct(id ubigint, name varchar), 148 | outcome struct(id ubigint, name varchar), 149 | redirect boolean -- added but not in docs 150 | )', 151 | substitution: 'struct(replacement struct(id ubigint, name varchar), outcome struct(id ubigint, name varchar))' } 152 | ) 153 | ), 154 | final as ( 155 | select 156 | cast( 157 | split(split(filename, '/') [-1], '.') [1] as integer 158 | ) as match_id, 159 | id as event_uuid, 160 | index, 161 | period, 162 | timestamp, 163 | minute, 164 | second, 165 | type.id as type_id, 166 | replace(type.name, '*', '') as type_name, 167 | coalesce(duel.type.id, foul_committed.type.id, goalkeeper.type.id, pass.type.id, shot.type.id) as event_type_id, 168 | coalesce(duel.type.name, foul_committed.type.name, goalkeeper.type.name, pass.type.name, shot.type.name) as event_type_name, 169 | coalesce("50_50".outcome.id, ball_receipt.outcome.id, dribble.outcome.id, duel.outcome.id, goalkeeper.outcome.id, interception.outcome.id, pass.outcome.id, shot.outcome.id, substitution.outcome.id) as outcome_id, 170 | coalesce("50_50".outcome.name, ball_receipt.outcome.name, dribble.outcome.name, duel.outcome.name, goalkeeper.outcome.name, interception.outcome.name, pass.outcome.name, shot.outcome.name, substitution.outcome.name) as outcome_name, 171 | possession, 172 | possession_team.id as possession_team_id, 173 | possession_team.name as possession_team_name, 174 | play_pattern.id as play_pattern_id, 175 | play_pattern.name as play_pattern_name, 176 | team.id as team_id, 177 | team.name as team_name, 178 | player.id as player_id, 179 | player.name as player_name, 180 | position.id as position_id, 181 | position.name as position_name, 182 | location [1] as x, 183 | location [2] as y, 184 | location [3] as z, 185 | coalesce(carry.end_location[1], goalkeeper.end_location[1], pass.end_location[1], shot.end_location[1]) as end_x, 186 | coalesce(carry.end_location[2], goalkeeper.end_location[2], pass.end_location[2], shot.end_location[2]) as end_y, 187 | shot.end_location [3] as end_z, 188 | duration, 189 | under_pressure, 190 | off_camera, 191 | out, 192 | tactics.formation as tactics_formation, 193 | obv_for_after, 194 | obv_for_before, 195 | obv_for_net, 196 | obv_against_after, 197 | obv_against_before, 198 | obv_against_net, 199 | obv_total_net, 200 | coalesce(counterpress, "50_50".counterpress, block.counterpress, dribbled_past.counterpress, duel.counterpress, foul_committed.counterpress, pressure.counterpress) as counterpress, 201 | coalesce(block.offensive, ball_recovery.offensive, foul_committed.offensive) as offensive, 202 | coalesce(clearance.aerial_won, miscontrol.aerial_won, pass.aerial_won, shot.aerial_won) as aerial_won, 203 | coalesce(clearance.body_part.id, goalkeeper.body_part.id, pass.body_part.id, shot.body_part.id) as body_part_id, 204 | coalesce(clearance.body_part.name, goalkeeper.body_part.name, pass.body_part.name, shot.body_part.name) as body_part_name, 205 | coalesce(goalkeeper.technique.id, pass.technique.id, shot.technique.id) as technique_id, 206 | coalesce(goalkeeper.technique.name, pass.technique.name, shot.technique.name) as technique_name, 207 | coalesce(dribble.no_touch, pass.no_touch) as no_touch, 208 | coalesce(pass.deflected, shot.deflected) as deflected, 209 | bad_behaviour.card.id as bad_behaviour_card_id, 210 | bad_behaviour.card.name as bad_behaviour_card_name, 211 | ball_recovery.recovery_failure as ball_recovery_recovery_failure, 212 | block.deflection as block_deflection, 213 | block.save_block as block_save_block, 214 | dribble.overrun as dribble_overrun, 215 | dribble.nutmeg as dribble_nutmeg, 216 | coalesce(foul_committed.advantage, foul_won.advantage) as foul_advantage, 217 | coalesce(foul_committed.penalty, foul_won.penalty) as foul_penalty, 218 | foul_committed.card.id as foul_card_id, 219 | foul_committed.card.name as foul_card_name, 220 | foul_won.defensive as foul_defensive, 221 | goalkeeper.position.id as goalkeeper_position_id, 222 | goalkeeper.position.name as goalkeeper_position_name, 223 | half_end.early_video_end as half_end_early_video_end, 224 | half_end.match_suspended as half_end_match_suspended, 225 | half_start.late_video_start as half_start_late_video_start, 226 | injury_stoppage.in_chain as injury_stoppage_in_chain, 227 | pass.recipient.id as pass_recipient_id, 228 | pass.recipient.name as pass_recipient_name, 229 | pass.length as pass_length, 230 | pass.angle as pass_angle, 231 | pass.height.id as pass_height_id, 232 | pass.height.name as pass_height_name, 233 | pass.assisted_shot_id as pass_assisted_shot_id, 234 | pass.backheel as pass_backheel, 235 | pass.miscommunication as pass_miscommunication, 236 | pass."cross" as pass_cross, 237 | pass.cut_back as pass_cut_back, 238 | pass.switch as pass_switch, 239 | pass.shot_assist as pass_shot_assist, 240 | pass.goal_assist as pass_goal_assist, 241 | player_off.permanent as player_off_permanent, 242 | shot.key_pass_id as shot_key_pass_id, 243 | shot.follows_dribble as shot_follows_dribble, 244 | shot.first_time as shot_first_time, 245 | shot.open_goal as shot_open_goal, 246 | shot.one_on_one as shot_one_on_one, 247 | shot.statsbomb_xg as shot_statsbomb_xg, 248 | shot.shot_shot_assist, 249 | shot.shot_goal_assist, 250 | shot.redirect as shot_redirect, 251 | substitution.replacement.id as substitution_replacement_id, 252 | substitution.replacement.name as substitution_replacement_name 253 | from 254 | raw_json 255 | ) 256 | select 257 | * 258 | from 259 | final; -------------------------------------------------------------------------------- /football/statsbomb_duckdb/lineup_v4.py: -------------------------------------------------------------------------------- 1 | with raw_json as ( 2 | select 3 | * 4 | from 5 | read_json( 6 | $filename, 7 | format = 'array', 8 | filename = true, 9 | columns = {team_id: integer, 10 | team_name: varchar, 11 | lineup: 'STRUCT( 12 | player_id UBIGINT, 13 | player_name VARCHAR, 14 | jersey_number UBIGINT, 15 | country STRUCT(id UBIGINT, "name" VARCHAR), 16 | player_nickname VARCHAR 17 | )[]'} 18 | ) 19 | ), 20 | final as ( 21 | select 22 | cast(split(split(filename, '/') [-1], '.') [1] as integer) as match_id, 23 | team_id, 24 | team_name, 25 | unnest(lineup).player_id as player_id, 26 | unnest(lineup).player_name as player_name, 27 | unnest(lineup).jersey_number as jersey_number, 28 | unnest(lineup).player_nickname as player_nickname, 29 | unnest(lineup).country.id as country_id, 30 | unnest(lineup).country.name as country_name, 31 | from 32 | raw_json 33 | ) 34 | select 35 | * 36 | from 37 | final; -------------------------------------------------------------------------------- /football/statsbomb_duckdb/match_v5.py: -------------------------------------------------------------------------------- 1 | with raw_json as ( 2 | select 3 | * 4 | from 5 | read_json( 6 | $filename, 7 | format = 'array', 8 | columns = {match_id: integer, 9 | match_date: date, 10 | kick_off: time, 11 | competition: 'struct(competition_id integer, country_name varchar, competition_name varchar)', 12 | season: 'struct(season_id integer, season_name varchar)', 13 | home_team: 'struct(home_team_id integer, home_team_name varchar, home_team_gender varchar, home_team_group varchar, country struct(id integer, name varchar), 14 | managers struct(id varchar, name varchar, nickname varchar, dob date, country struct(id integer, name varchar))[])', 15 | away_team: 'struct(away_team_id integer, away_team_name varchar, away_team_gender varchar, away_team_group varchar, country struct(id integer, name varchar), 16 | managers struct(id varchar, name varchar, nickname varchar, dob date, country struct(id integer, name varchar))[])', 17 | home_score: integer, 18 | away_score: integer, 19 | match_status: varchar, 20 | match_status_360: varchar, 21 | last_updated: varchar, 22 | last_updated_360: varchar, 23 | metadata: 'struct(data_version varchar)', 24 | match_week: integer, 25 | competition_stage: 'struct(id integer, name varchar)', 26 | stadium: 'struct(id integer, name varchar, country struct(id integer, name varchar))', 27 | referee: 'struct(id integer, name varchar, country struct(id integer, name varchar))'} 28 | ) 29 | ), 30 | final as ( 31 | select 32 | match_id, 33 | match_date, 34 | kick_off, 35 | competition.competition_id, 36 | competition.country_name as competition_country_name, 37 | competition.competition_name, 38 | season.season_id, 39 | season.season_name, 40 | home_team.home_team_id, 41 | home_team.home_team_name, 42 | home_team.home_team_gender, 43 | home_team.home_team_group, 44 | home_team.country.id as home_team_country_id, 45 | home_team.country.name as home_team_country_name, 46 | home_team.managers [1].id as home_team_manager_id, 47 | home_team.managers [1].name as home_team_manager_name, 48 | home_team.managers [1].nickname as home_team_manager_nickname, 49 | home_team.managers [1].dob as home_team_manager_dob, 50 | home_team.managers [1].country.id as home_team_manager_country_id, 51 | home_team.managers [1].country.name as home_team_manager_country_name, 52 | away_team.away_team_id, 53 | away_team.away_team_name, 54 | away_team.away_team_gender, 55 | away_team.away_team_group, 56 | away_team.country.id as away_team_country_id, 57 | away_team.country.name as away_team_country_name, 58 | away_team.managers [1].id as away_team_manager_id, 59 | away_team.managers [1].name as away_team_manager_name, 60 | away_team.managers [1].nickname as away_team_manager_nickname, 61 | away_team.managers [1].dob as away_team_manager_dob, 62 | away_team.managers [1].country.id as away_team_manager_country_id, 63 | away_team.managers [1].country.name as away_team_manager_country_name, 64 | home_score, 65 | away_score, 66 | match_status, 67 | match_status_360, 68 | case when last_updated is null then null else cast(left(concat(replace(last_updated, 'T', ' '), ':00'), 19) as timestamp) end as last_updated, 69 | case when last_updated_360 is null then null else cast(left(concat(replace(last_updated_360, 'T', ' '), ':00'), 19) as timestamp) end as last_updated_360, 70 | metadata.data_version as metadata_data_version, 71 | match_week, 72 | competition_stage.id as competition_stage_id, 73 | competition_stage.name as competition_stage_name, 74 | stadium.id as stadium_id, 75 | stadium.name as stadium_name, 76 | stadium.country.id as country_id, 77 | stadium.country.name as country_name, 78 | referee.id as referee_id, 79 | referee.name as referee_name, 80 | referee.country.id as referee_country_id, 81 | referee.country.name as referee_country_name 82 | from 83 | raw_json 84 | ) 85 | select 86 | * 87 | from 88 | final; -------------------------------------------------------------------------------- /football/statsbomb_parser.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Work in progress, StatsBomb parser" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "\n", 18 | "import pandas as pd\n", 19 | "import requests" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "class Sbopen:\n", 29 | " def __init__(self, dataframe=True):\n", 30 | " self.dataframe = dataframe\n", 31 | " self.url = 'https://raw.githubusercontent.com/statsbomb/open-data/master/data/'\n", 32 | "\n", 33 | " @staticmethod\n", 34 | " def get_data(url):\n", 35 | " resp = requests.get(url=url)\n", 36 | " resp.raise_for_status()\n", 37 | " return resp.json()\n", 38 | "\n", 39 | " def event(self, match_id):\n", 40 | " url = f'{self.url}events/{match_id}.json'\n", 41 | " data = self.get_data(url)\n", 42 | " return flatten_event(data, match_id, self.dataframe)\n", 43 | "\n", 44 | " def lineup(self, match_id):\n", 45 | " url = f'{self.url}lineups/{match_id}.json'\n", 46 | " data = self.get_data(url)\n", 47 | " return flatten_lineup(data, match_id, self.dataframe)\n", 48 | "\n", 49 | " def match(self, competition, season):\n", 50 | " url = f'{self.url}matches/{competition}/{season}.json'\n", 51 | " data = self.get_data(url)\n", 52 | " return flatten_match(data, self.dataframe)\n", 53 | "\n", 54 | " def competition(self):\n", 55 | " url = f'{self.url}competitions.json'\n", 56 | " data = self.get_data(url)\n", 57 | " if self.dataframe:\n", 58 | " return pd.DataFrame(data)\n", 59 | " return data\n", 60 | "\n", 61 | " def frame(self, match):\n", 62 | " url = f'{self.url}three-sixty/{match}.json'\n", 63 | " data = self.get_data(url)\n", 64 | " return flatten_360(data, match, self.dataframe)\n", 65 | "\n", 66 | "\n", 67 | "class Sbapi:\n", 68 | " def __init__(self, username=None, password=None, dataframe=True):\n", 69 | " if username is None:\n", 70 | " username = os.environ.get(\"SB_USERNAME\")\n", 71 | " if password is None:\n", 72 | " password = os.environ.get(\"SB_PASSWORD\")\n", 73 | " self.auth = requests.auth.HTTPBasicAuth(username, password)\n", 74 | " self.dataframe = dataframe\n", 75 | " self.url = 'https://data.statsbombservices.com/api/v'\n", 76 | "\n", 77 | " def get_data(self, url):\n", 78 | " resp = requests.get(url=url, auth=self.auth)\n", 79 | " resp.raise_for_status()\n", 80 | " return resp.json()\n", 81 | "\n", 82 | " def event(self, match_id, version=6):\n", 83 | " url = f'{self.url}{version}/events/{match_id}'\n", 84 | " data = self.get_data(url)\n", 85 | " return flatten_event(data, match_id, self.dataframe)\n", 86 | "\n", 87 | " def lineup(self, match_id, version=2):\n", 88 | " url = f'{self.url}{version}/lineups/{match_id}'\n", 89 | " data = self.get_data(url)\n", 90 | " return flatten_lineup(data, match_id, self.dataframe)\n", 91 | "\n", 92 | " def match(self, competition, season, version=5):\n", 93 | " url = f'{self.url}{version}/competitions/{competition}/seasons/{season}/matches'\n", 94 | " data = self.get_data(url)\n", 95 | " return flatten_match(data, self.dataframe)\n", 96 | "\n", 97 | " def competition(self, version=4):\n", 98 | " url = f'{self.url}{version}/competitions'\n", 99 | " data = self.get_data(url)\n", 100 | " if self.dataframe:\n", 101 | " return pd.DataFrame(data)\n", 102 | " return data\n", 103 | "\n", 104 | " def frame(self, match_id, version=1):\n", 105 | " url = f'{self.url}{version}/360-frames/{match_id}'\n", 106 | " data = self.get_data(url)\n", 107 | " return flatten_360(data, match_id, self.dataframe)\n", 108 | "\n", 109 | "\n", 110 | "def _flatten_location(row, value, keyword=''):\n", 111 | " \"\"\" Flatten a list of locations into dictionary keys (x, y, z).\"\"\"\n", 112 | " if len(value) == 2:\n", 113 | " row[f'{keyword}x'], row[f'{keyword}y'] = value\n", 114 | " elif len(value) == 3:\n", 115 | " row[f'{keyword}x'], row[f'{keyword}y'], row[f'{keyword}z'] = value\n", 116 | " else:\n", 117 | " msg = 'location length not equal to 2 (x, y) or 3 (x, y, z)'\n", 118 | " raise AssertionError(msg)\n", 119 | "\n", 120 | "\n", 121 | "def _flatten_freeze(data, match_id, event_id):\n", 122 | " \"\"\" Flatten the freeze-frame events.\"\"\"\n", 123 | " for row in data:\n", 124 | " row['match_id'] = match_id\n", 125 | " row['id'] = event_id\n", 126 | " for key in list(row):\n", 127 | " value = row[key]\n", 128 | " if key == 'location':\n", 129 | " _flatten_location(row, value)\n", 130 | " del row['location']\n", 131 | " elif key in ['player', 'position']:\n", 132 | " for nested_key in value:\n", 133 | " row[f'{key}_{nested_key}'] = value[nested_key]\n", 134 | " del row[key]\n", 135 | " return data\n", 136 | "\n", 137 | "\n", 138 | "def _flatten_tactic(data, match_id, event_id):\n", 139 | " \"\"\" Flatten the tactics events.\"\"\"\n", 140 | " for row in data:\n", 141 | " row['match_id'] = match_id\n", 142 | " row['id'] = event_id\n", 143 | " for key in list(row):\n", 144 | " if key in ['player', 'position']:\n", 145 | " value = row[key]\n", 146 | " for nested_key in value:\n", 147 | " row[f'{key}_{nested_key}'] = value[nested_key]\n", 148 | " del row[key]\n", 149 | " return data\n", 150 | "\n", 151 | "\n", 152 | "def _flatten_list_of_lists(list_of_lists, key):\n", 153 | " \"\"\" Flatten a list of lists into a list\"\"\"\n", 154 | " flat_list = []\n", 155 | " for sublist in list_of_lists:\n", 156 | " for idx, item in enumerate(sublist):\n", 157 | " item[key] = idx + 1\n", 158 | " flat_list.append(item)\n", 159 | " return flat_list\n", 160 | "\n", 161 | "\n", 162 | "def _event_dataframe(data):\n", 163 | " \"\"\" Transform the event dictionary into a dataframe.\"\"\"\n", 164 | " df = pd.DataFrame(data)\n", 165 | " df['timestamp'] = pd.to_datetime(df['timestamp']).dt.time\n", 166 | " df.sort_values(['period', 'timestamp', 'index'], inplace=True)\n", 167 | " df.reset_index(drop=True, inplace=True)\n", 168 | " for col in ['counterpress', 'under_pressure', 'off_camera', 'out']:\n", 169 | " if col in df.columns:\n", 170 | " df[col] = df[col].astype(float)\n", 171 | " return df\n", 172 | "\n", 173 | "\n", 174 | "def _related_dataframe(data, df_events):\n", 175 | " \"\"\" Transform the related-events dictionary into a dataframe. For carries, we also\n", 176 | " ensure that both the carry and the related event are related both ways.\n", 177 | " Sometimes another event is not related to the carry event (but it is the other way round)\"\"\"\n", 178 | " df = pd.DataFrame(data)\n", 179 | " cols = ['id', 'index', 'type_name']\n", 180 | " df = df.merge(df_events[cols].rename({'id': 'id_related'}, axis='columns'),\n", 181 | " how='left', on='id_related', validate='m:1',\n", 182 | " suffixes=('', '_related'))\n", 183 | " df_carry = df[df['type_name'] == 'Carry'].copy()\n", 184 | " df_carry.rename({'id': 'id_related',\n", 185 | " 'index': 'index_related',\n", 186 | " 'type_name': 'type_name_related',\n", 187 | " 'id_related': 'id',\n", 188 | " 'index_related': 'index',\n", 189 | " 'type_name_related': 'type_name'},\n", 190 | " axis='columns', inplace=True)\n", 191 | " df = pd.concat([df, df_carry]).drop_duplicates()\n", 192 | " return df\n", 193 | "\n", 194 | "\n", 195 | "def _competition_dataframe(data):\n", 196 | " df = pd.DataFrame(data)\n", 197 | " date_cols = ['match_updated', 'match_updated_360', 'match_available_360', 'match_available']\n", 198 | " for date in date_cols:\n", 199 | " if date in df.columns:\n", 200 | " df[date] = pd.to_datetime(df[date])\n", 201 | " return df\n", 202 | "\n", 203 | "\n", 204 | "def _match_dataframe(data):\n", 205 | " df = pd.DataFrame(data)\n", 206 | " df['kick_off'] = pd.to_datetime(df['match_date'] + ' ' + df['kick_off'])\n", 207 | " date_cols = ['match_date', 'last_updated', 'last_updated_360',\n", 208 | " 'home_team_managers_dob', 'away_team_managers_dob']\n", 209 | " for date in date_cols:\n", 210 | " if date in df.columns:\n", 211 | " df[date] = pd.to_datetime(df[date])\n", 212 | " return df\n", 213 | "\n", 214 | "\n", 215 | "def flatten_event(events, match_id, dataframe=True):\n", 216 | " \"\"\" Flatten the events (list) so each row (dictionary) contains no nested events.\n", 217 | "\n", 218 | " Parameters\n", 219 | " ----------\n", 220 | " events : list of dicts\n", 221 | " The events to flatten.\n", 222 | " match_id : int\n", 223 | " The StatsBomb match identifier.\n", 224 | " dataframe : bool, default True\n", 225 | " Whether to return the results as a dataframe (True)\n", 226 | " or as flattened lists of dictionaries (False)\n", 227 | "\n", 228 | " Returns\n", 229 | " -------\n", 230 | " events, related, freeze, tactics\n", 231 | " If dataframe=True then returns dataframes else if dataframe=False\n", 232 | " each of the returned values is a list of dictionaries.\n", 233 | " \"\"\"\n", 234 | " related = []\n", 235 | " freeze = []\n", 236 | " tactics = []\n", 237 | " for row in events:\n", 238 | " row['match_id'] = match_id\n", 239 | " for key in list(row):\n", 240 | "\n", 241 | " # unpack nested columns\n", 242 | " if isinstance(row[key], dict):\n", 243 | " for nested_key in list(row[key]):\n", 244 | " nested_value = row[key][nested_key]\n", 245 | " if nested_key == 'end_location':\n", 246 | " _flatten_location(row, nested_value, keyword='end_')\n", 247 | " elif nested_key == 'aerial_won':\n", 248 | " row[f'{nested_key}'] = nested_value\n", 249 | " elif nested_key in ['outcome', 'body_part', 'technique', 'aerial_won']:\n", 250 | " for k in nested_value:\n", 251 | " row[f'{nested_key}_{k}'] = nested_value[k]\n", 252 | " elif nested_key == 'freeze_frame':\n", 253 | " freeze.append(_flatten_freeze(nested_value, match_id, row['id']))\n", 254 | " elif nested_key == 'lineup':\n", 255 | " tactics.append(_flatten_tactic(nested_value, match_id, row['id']))\n", 256 | " elif nested_key == 'type':\n", 257 | " for k in nested_value:\n", 258 | " row[f'sub_{nested_key}_{k}'] = nested_value[k]\n", 259 | " elif isinstance(nested_value, dict):\n", 260 | " for k in nested_value:\n", 261 | " row[f'{key}_{nested_key}_{k}'] = nested_value[k]\n", 262 | " else:\n", 263 | " row[f'{key}_{nested_key}'] = nested_value\n", 264 | " del row[key]\n", 265 | "\n", 266 | " # unpack the location column\n", 267 | " if 'location' in row:\n", 268 | " _flatten_location(row, row['location'])\n", 269 | " del row['location']\n", 270 | "\n", 271 | " # replace random star in ball receipts in some rows\n", 272 | " row['type_name'] = row['type_name'].replace('Ball Receipt*', 'Ball Receipt')\n", 273 | "\n", 274 | " # pass through ball is deprecated now, but it was not always added to technique name\n", 275 | " if 'pass_through_ball' in row:\n", 276 | " row['technique_name'] = 'Through Ball'\n", 277 | "\n", 278 | " # drop cols that are covered by other columns\n", 279 | " # (e.g. pass technique covers through, ball, inswinging etc.)\n", 280 | " cols_to_drop = ['pass_through_ball', 'pass_outswinging', 'pass_inswinging',\n", 281 | " 'clearance_head', 'clearance_left_foot', 'clearance_right_foot',\n", 282 | " 'pass_straight', 'clearance_other', 'goalkeeper_punched_out',\n", 283 | " 'goalkeeper_shot_saved_off_target', 'shot_saved_off_target',\n", 284 | " 'goalkeeper_shot_saved_to_post', 'shot_saved_to_post',\n", 285 | " 'goalkeeper_lost_out', 'goalkeeper_lost_in_play',\n", 286 | " 'goalkeeper_success_out', 'goalkeeper_success_in_play',\n", 287 | " 'goalkeeper_saved_to_post', 'shot_kick_off',\n", 288 | " 'goalkeeper_penalty_saved_to_post',\n", 289 | " ]\n", 290 | " for col in cols_to_drop:\n", 291 | " row.pop(col, None)\n", 292 | "\n", 293 | " # remove related_events as storing as separate dictionary\n", 294 | " if 'related_events' in row:\n", 295 | " for related_event in row['related_events']:\n", 296 | " related.append({'match_id': match_id,\n", 297 | " 'id': row['id'],\n", 298 | " 'index': row['index'],\n", 299 | " 'type_name': row['type_name'],\n", 300 | " 'id_related': related_event})\n", 301 | " del row['related_events']\n", 302 | "\n", 303 | " # flatten list of lists (e.g. player in lineup or freeze-frame into separate entry)\n", 304 | " tactics = _flatten_list_of_lists(tactics, key='event_tactics_id')\n", 305 | " freeze = _flatten_list_of_lists(freeze, key='event_freeze_id')\n", 306 | "\n", 307 | " if dataframe:\n", 308 | " events = _event_dataframe(events)\n", 309 | " related = _related_dataframe(related, events)\n", 310 | " freeze = pd.DataFrame(freeze)\n", 311 | " tactics = pd.DataFrame(tactics)\n", 312 | "\n", 313 | " return events, related, freeze, tactics\n", 314 | "\n", 315 | "\n", 316 | "def flatten_lineup(data, match_id, dataframe=True):\n", 317 | " lineup = []\n", 318 | " for row in data:\n", 319 | " for player in row['lineup']:\n", 320 | " player['match_id'] = match_id\n", 321 | " player['team_id'] = row['team_id']\n", 322 | " player['team_name'] = row['team_name']\n", 323 | " if 'country' in player:\n", 324 | " player['country_id'] = player['country']['id']\n", 325 | " player['country_name'] = player['country']['name']\n", 326 | " del player['country']\n", 327 | " if player['player_nickname'] is None:\n", 328 | " player['player_nickname'] = player['player_name']\n", 329 | " player.pop('positions', None) # if flattened would be multiple lines\n", 330 | " player.pop('cards', None) # if flattened would be multiple lines\n", 331 | " lineup.append(player)\n", 332 | " if dataframe:\n", 333 | " lineup = pd.DataFrame(lineup)\n", 334 | " return lineup\n", 335 | "\n", 336 | "\n", 337 | "def flatten_match(match, dataframe=True):\n", 338 | " for row in match:\n", 339 | " for key in list(row):\n", 340 | " value = row[key]\n", 341 | " if isinstance(value, dict):\n", 342 | " for nested_key in list(value):\n", 343 | " nested_value = value[nested_key]\n", 344 | " if isinstance(nested_value, list):\n", 345 | " nested_value = nested_value[0]\n", 346 | " if isinstance(nested_value, dict):\n", 347 | " for k in list(nested_value):\n", 348 | " if k == 'nickname' and not nested_value[k]:\n", 349 | " row[f'{key}_{nested_key}_{k}'] = nested_value['name']\n", 350 | " else:\n", 351 | " if isinstance(nested_value[k], dict):\n", 352 | " for sub_k in nested_value[k]:\n", 353 | " nested_sub_value = nested_value[k][sub_k]\n", 354 | " row[f'{key}_{nested_key}_{k}_{sub_k}'] = nested_sub_value\n", 355 | " else:\n", 356 | " row[f'{key}_{nested_key}_{k}'] = nested_value[k]\n", 357 | " elif key in ['competition_stage', 'stadium', 'referee', 'metadata']:\n", 358 | " row[f'{key}_{nested_key}'] = nested_value\n", 359 | " else:\n", 360 | " row[nested_key] = nested_value\n", 361 | " del row[key]\n", 362 | " if dataframe:\n", 363 | " match = _match_dataframe(match)\n", 364 | " return match\n", 365 | "\n", 366 | "\n", 367 | "def flatten_360(data, match_id, dataframe=True):\n", 368 | " frames = []\n", 369 | " visible = []\n", 370 | " for row in data:\n", 371 | " for idx, frame in enumerate(row['freeze_frame']):\n", 372 | " frame['match_id'] = match_id\n", 373 | " frame['id'] = row['event_uuid']\n", 374 | " _flatten_location(frame, frame['location'])\n", 375 | " del frame['location']\n", 376 | " frames.append(frame)\n", 377 | " frame_visible = {'match_id': match_id,\n", 378 | " 'id': row['event_uuid'],\n", 379 | " 'visible_area': row['visible_area'],\n", 380 | " }\n", 381 | " visible.append(frame_visible)\n", 382 | " if dataframe:\n", 383 | " frames = pd.DataFrame(frames)\n", 384 | " visible = pd.DataFrame(visible)\n", 385 | " return frames, visible" 386 | ] 387 | }, 388 | { 389 | "cell_type": "markdown", 390 | "metadata": {}, 391 | "source": [ 392 | "# Test it works" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": null, 398 | "metadata": {}, 399 | "outputs": [], 400 | "source": [ 401 | "parser = Sbapi(username=None, password=None)\n", 402 | "match_id = 3798898\n", 403 | "competition_id = 11\n", 404 | "season_id = 90\n", 405 | "\n", 406 | "#parser = Sbopen()\n", 407 | "#match_id = 3788741\n", 408 | "#competition_id = 11\n", 409 | "#season_id = 1" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": null, 415 | "metadata": {}, 416 | "outputs": [], 417 | "source": [ 418 | "events, related, freeze, tactics = parser.event(match_id)" 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": null, 424 | "metadata": {}, 425 | "outputs": [], 426 | "source": [ 427 | "lineups = parser.lineup(match_id)" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": null, 433 | "metadata": {}, 434 | "outputs": [], 435 | "source": [ 436 | "frames, visible = parser.frame(match_id)" 437 | ] 438 | }, 439 | { 440 | "cell_type": "code", 441 | "execution_count": null, 442 | "metadata": {}, 443 | "outputs": [], 444 | "source": [ 445 | "competition = parser.competition()" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": null, 451 | "metadata": {}, 452 | "outputs": [], 453 | "source": [ 454 | "matches = parser.match(competition=competition_id, season=season_id)" 455 | ] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": null, 460 | "metadata": {}, 461 | "outputs": [], 462 | "source": [ 463 | "events.head()" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": null, 469 | "metadata": {}, 470 | "outputs": [], 471 | "source": [ 472 | "related.head()" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": null, 478 | "metadata": {}, 479 | "outputs": [], 480 | "source": [ 481 | "freeze.head()" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": null, 487 | "metadata": {}, 488 | "outputs": [], 489 | "source": [ 490 | "tactics.head()" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": null, 496 | "metadata": {}, 497 | "outputs": [], 498 | "source": [ 499 | "lineups.head()" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": null, 505 | "metadata": {}, 506 | "outputs": [], 507 | "source": [ 508 | "frames.head()" 509 | ] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "execution_count": null, 514 | "metadata": {}, 515 | "outputs": [], 516 | "source": [ 517 | "visible.head()" 518 | ] 519 | }, 520 | { 521 | "cell_type": "code", 522 | "execution_count": null, 523 | "metadata": {}, 524 | "outputs": [], 525 | "source": [ 526 | "competition.head()" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": null, 532 | "metadata": {}, 533 | "outputs": [], 534 | "source": [ 535 | "matches.head()" 536 | ] 537 | } 538 | ], 539 | "metadata": { 540 | "kernelspec": { 541 | "display_name": "Python 3 (ipykernel)", 542 | "language": "python", 543 | "name": "python3" 544 | }, 545 | "language_info": { 546 | "codemirror_mode": { 547 | "name": "ipython", 548 | "version": 3 549 | }, 550 | "file_extension": ".py", 551 | "mimetype": "text/x-python", 552 | "name": "python", 553 | "nbconvert_exporter": "python", 554 | "pygments_lexer": "ipython3", 555 | "version": "3.10.4" 556 | } 557 | }, 558 | "nbformat": 4, 559 | "nbformat_minor": 4 560 | } 561 | -------------------------------------------------------------------------------- /pysport/01_get_f24_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "2c2b202c-59b9-40f5-ad19-a897c72c7918", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import os\n", 11 | "import pandas as pd\n", 12 | "import glob\n", 13 | "from optasoccer import read_f24" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "id": "fe4ade04-0cf8-4477-a853-1f8fd4ac043a", 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "all_events = []\n", 24 | "all_matches = []\n", 25 | "files = glob.glob(os.path.join('data', 'f24', '*.xml'))\n", 26 | "for file in files:\n", 27 | " events, matches = read_f24(file)\n", 28 | " all_events.append(events)\n", 29 | " all_matches.append(matches)\n", 30 | "all_events = pd.concat(all_events)\n", 31 | "all_events = all_events[list(all_events.columns[:19]) + list(all_events.columns[19:].sort_values())].copy()\n", 32 | "all_matches = pd.concat(all_matches)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "id": "22385103-5b33-4aea-8e7b-43b9fd5ed963", 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "competition_id = all_matches.competition_id.unique()[0]\n", 43 | "season_id = all_matches.season_id.unique()[0]\n", 44 | "fname_suffix = f'{competition_id}_{season_id}'\n", 45 | "all_events = all_events.sort_values(['match_id', 'period_id', 'min', 'sec', 'timestamp']).reset_index(drop=True).copy()\n", 46 | "all_matches.to_parquet(f'opta_matches_{fname_suffix}.parquet')\n", 47 | "all_events.to_parquet(f'opta_events_{fname_suffix}.parquet')" 48 | ] 49 | } 50 | ], 51 | "metadata": { 52 | "kernelspec": { 53 | "display_name": "Python 3 (ipykernel)", 54 | "language": "python", 55 | "name": "python3" 56 | }, 57 | "language_info": { 58 | "codemirror_mode": { 59 | "name": "ipython", 60 | "version": 3 61 | }, 62 | "file_extension": ".py", 63 | "mimetype": "text/x-python", 64 | "name": "python", 65 | "nbconvert_exporter": "python", 66 | "pygments_lexer": "ipython3", 67 | "version": "3.12.2" 68 | } 69 | }, 70 | "nbformat": 4, 71 | "nbformat_minor": 5 72 | } 73 | -------------------------------------------------------------------------------- /pysport/02_get_fbref_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "aff60507-446d-4691-968a-da6411082285", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import requests\n", 11 | "from bs4 import BeautifulSoup, Comment\n", 12 | "import pandas as pd\n", 13 | "import numpy as np\n", 14 | "from lxml import etree, html\n", 15 | "import re\n", 16 | "from io import StringIO\n", 17 | "import os\n", 18 | "import glob\n", 19 | "\n", 20 | "DATA_DIR = os.path.join('data', 'fbref')\n", 21 | "\n", 22 | "def get_soup(url):\n", 23 | " headers = {'User-Agent': ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) '\n", 24 | " 'Chrome/39.0.2171.95 Safari/537.36')}\n", 25 | " r = requests.get(url, headers=headers)\n", 26 | " r.encoding = 'unicode-escape'\n", 27 | " return BeautifulSoup(r.content, 'html.parser')\n", 28 | "\n", 29 | "def get_url(stat):\n", 30 | " prefix = 'https://fbref.com/en/comps/37/2022-2023/'\n", 31 | " suffix = '/2022-2023-Belgian-Pro-League-Stats'\n", 32 | " return f'{prefix}{stat}{suffix}'\n", 33 | "\n", 34 | "def flatten_cols(df):\n", 35 | " col_level1 = list(df.columns.get_level_values(0))\n", 36 | " col_level1 = ['' if c[:7]=='Unnamed' else c.replace(' ', '_').lower() for c in col_level1]\n", 37 | " col_level2 = list(df.columns.get_level_values(1))\n", 38 | " col_level2 = [c.replace(' ', '_').lower() for c in col_level2]\n", 39 | " cols = [f'{c}_{col_level2[i]}' if c != '' else col_level2[i] for i, c in enumerate(col_level1)]\n", 40 | " cols = [re.sub('[^0-9a-zA-Z]+', '_', c.replace('%', '_percent').replace('+/-', '_plus_minus')).rstrip('_') for c in cols]\n", 41 | " df.columns = cols\n", 42 | " return df\n", 43 | "\n", 44 | "def extract_stats(url):\n", 45 | " soup = get_soup(url)\n", 46 | " comments = soup.findAll(string=lambda string:isinstance(string, Comment))\n", 47 | " extracted_comments = [comment.extract() for comment in comments if 'table' in str(comment)] \n", 48 | " df = pd.read_html(StringIO(str(extracted_comments[0])))[0]\n", 49 | " return flatten_cols(df)\n", 50 | "\n", 51 | "def stats_to_parquet(stat, directory):\n", 52 | " url = get_url(stat)\n", 53 | " df = extract_stats(url)\n", 54 | " df = df[df['rk'] != 'Rk'].copy()\n", 55 | " df.drop(['rk', 'matches'], axis='columns', inplace=True)\n", 56 | " file_name = os.path.join(directory, f'{stat}.parquet')\n", 57 | " df.to_parquet(file_name)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "id": "445947f7-f6e0-4c62-ab59-4caec70fe5f6", 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "stats_to_parquet('playingtime', DATA_DIR)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "id": "227b0731-5275-4707-84a7-e03ae40fb2ab", 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "stats_to_parquet('stats', DATA_DIR)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "id": "81ff4ca9-097f-4295-be7d-1e7ede6a1d9c", 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "stats_to_parquet('keepers', DATA_DIR)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "id": "f22b0102-28f7-47c4-9327-8933b2a5d8e6", 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "stats_to_parquet('keepersadv', DATA_DIR)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "id": "8d335176-8c03-4955-8b3a-60eef1bd4de5", 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "stats_to_parquet('shooting', DATA_DIR)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "id": "18f228d7-4aa8-4e3d-ae22-731b634c6412", 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "stats_to_parquet('passing', DATA_DIR)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "id": "ea0776f6-f3a5-4c55-b3c2-384f8af0ecc0", 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "stats_to_parquet('passing_types', DATA_DIR)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "id": "2d8a5718-e385-41fa-9673-60edcd3cbe92", 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "stats_to_parquet('gca', DATA_DIR)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "id": "8df12e5e-f0cf-4baf-ac1c-87a8812f15ac", 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "stats_to_parquet('defense', DATA_DIR)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "id": "58c29c73-6ab0-458d-ab83-1bfff8e3dfe7", 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "stats_to_parquet('possession', DATA_DIR)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "id": "0d9f1448-91c5-429f-870e-f712fb501fd9", 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "stats_to_parquet('misc', DATA_DIR)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "id": "84bdc0bc-17b9-4794-b310-c1843d44b37b", 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "files = glob.glob(os.path.join(DATA_DIR, '*'))\n", 178 | "df = pd.read_parquet([f for f in files if 'playingtime' in f][0])\n", 179 | "files = [f for f in files if 'playingtime' not in f]\n", 180 | "for f in files:\n", 181 | " df_temp = pd.read_parquet(f)\n", 182 | " df = df.merge(df_temp, on=['player', 'squad'], how='left', suffixes=['', '_to_remove'])\n", 183 | "cols_to_remove = [col for col in df.columns if '_to_remove' in col]\n", 184 | "df.drop(cols_to_remove, axis='columns', inplace=True)\n", 185 | "df.to_parquet('player_stats_112_2022.parquet')" 186 | ] 187 | } 188 | ], 189 | "metadata": { 190 | "kernelspec": { 191 | "display_name": "Python 3 (ipykernel)", 192 | "language": "python", 193 | "name": "python3" 194 | }, 195 | "language_info": { 196 | "codemirror_mode": { 197 | "name": "ipython", 198 | "version": 3 199 | }, 200 | "file_extension": ".py", 201 | "mimetype": "text/x-python", 202 | "name": "python", 203 | "nbconvert_exporter": "python", 204 | "pygments_lexer": "ipython3", 205 | "version": "3.12.2" 206 | } 207 | }, 208 | "nbformat": 4, 209 | "nbformat_minor": 5 210 | } 211 | -------------------------------------------------------------------------------- /pysport/03_turn_f24_data_to_actions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "2c2b202c-59b9-40f5-ad19-a897c72c7918", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd\n", 11 | "from mplsoccer import Pitch" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "id": "c81bc44e-35a5-432b-811c-22c58ef9d804", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "df = pd.read_parquet('opta_events_112_2022.parquet')\n", 22 | "p = Pitch(pitch_type='opta')" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "f8574cf9-d45a-4785-83bc-b8b444c17806", 28 | "metadata": {}, 29 | "source": [ 30 | "Add on the x/ coordinates in relation to the camera instead of left to right" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "id": "3f68fddc-d8ae-46f0-8a6f-37485c7b3248", 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "df_direction = df.loc[df.direction_of_play.notnull(), ['match_id', 'team_id', 'period_id', 'direction_of_play']].copy()\n", 41 | "df_direction['camera_right_to_left'] = df_direction['direction_of_play'] == 'Right to Left'\n", 42 | "df.drop('direction_of_play', axis='columns', inplace=True)\n", 43 | "df = df.merge(df_direction, on=['match_id', 'team_id', 'period_id'], how='left', validate='m:1')\n", 44 | "# fill missing\n", 45 | "df.loc[df['shot_goal_mouth_y_coordinate'].notnull(), 'shot_goal_mouth_x_coordinate'] = p.dim.right\n", 46 | "df.loc[df['type_name'] == 'foul_throw_in', 'end_x'] = df.loc[df['type_name'] == 'foul_throw_in', 'x']\n", 47 | "df.loc[df['type_name'] == 'foul_throw_in', 'end_y'] = df.loc[df['type_name'] == 'foul_throw_in', 'y']\n", 48 | "# flip coordinates for camera\n", 49 | "df['camera_x'], df['camera_y'] = p.flip_side(df['x'], df['y'], df['camera_right_to_left'])\n", 50 | "df['camera_pass_end_x'], df['camera_pass_end_y'] = p.flip_side(df['pass_end_x'], df['pass_end_y'], df['camera_right_to_left'])\n", 51 | "df['camera_shot_blocked_x_coordinate'], df['camera_shot_blocked_y_coordinate'] = p.flip_side(df['shot_blocked_x_coordinate'], df['shot_blocked_y_coordinate'], df['camera_right_to_left'])\n", 52 | "df['camera_shot_goal_mouth_x_coordinate'], df['camera_shot_goal_mouth_y_coordinate'] = p.flip_side(df['shot_goal_mouth_x_coordinate'], df['shot_goal_mouth_y_coordinate'], df['camera_right_to_left'])\n", 53 | "# end coordinates\n", 54 | "df['camera_end_x'] = (df['camera_pass_end_x']\n", 55 | " .fillna(df['camera_shot_blocked_x_coordinate'])\n", 56 | " .fillna(df['camera_shot_goal_mouth_x_coordinate'])\n", 57 | " )\n", 58 | "df['camera_end_y'] = (df['camera_pass_end_y']\n", 59 | " .fillna(df['camera_shot_blocked_y_coordinate'])\n", 60 | " .fillna(df['camera_shot_goal_mouth_y_coordinate'])\n", 61 | " )" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "id": "5a8b3368-e93e-435f-92c5-f6a5a2ff328f", 67 | "metadata": {}, 68 | "source": [ 69 | "Add on some boolean columns for pass/shot/goal/ and set pieces" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "id": "83bfc757-842c-46ff-8d58-61f3b7fbd5ec", 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "df['shot'] = df['type_name'].isin(['goal', 'attempt_saved', 'miss', 'post'])\n", 80 | "df['goal'] = df['type_name'] == 'goal'\n", 81 | "df['pass'] = df['type_name'].isin(['pass', 'offside_pass', 'foul_throw_in'])\n", 82 | "df['set_piece_taken'] = (df['free_kick_taken'] |\n", 83 | " df['free_kick'] | # includes tap pass can exclude with df['assisted'].isnull()\n", 84 | " df['corner_taken'] |\n", 85 | " df['shot_corner_direct'] |\n", 86 | " df['goalkeeper_goal_kick'] |\n", 87 | " df['kick_off'] |\n", 88 | " df['throw_in'] |\n", 89 | " (df['type_name'] == 'foul_throw_in')\n", 90 | " )" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "id": "e2056e99-c056-4412-9357-b471e2c30ec9", 96 | "metadata": {}, 97 | "source": [ 98 | "Remove some events that aren't the offensive team or aren't related to play (e.g. formations)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "id": "225e2369-ef21-4888-a226-8bce6c4e7c0d", 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "remove_events = ['attempted_tackle', 'card', 'challenge', 'chance_missed',\n", 109 | " 'coach_setup', 'collection_end', 'contentious_referee_decision',\n", 110 | " 'coverage_interruption', 'cross_not_claimed', 'delayed_start',\n", 111 | " 'deleted_after_review', 'end', 'end_delay',\n", 112 | " 'formation_change', 'good_skill', 'injury_time_announcement',\n", 113 | " 'obstacle', 'offside_provoked', 'penalty_faced',\n", 114 | " 'player_becomes_goalkeeper', 'player_off', 'player_on',\n", 115 | " 'player_retired', 'referee_drop_ball', 'shield_ball_opp', 'start',\n", 116 | " 'start_delay', 'take_on', 'team_set_up']\n", 117 | "df = df[~df['type_name'].isin(remove_events)].copy()\n", 118 | "# remove defensive duels\n", 119 | "df = df[df['duel_events_defensive'].isnull()].copy()" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "id": "6853f008-de31-4fb4-bc69-d57a61324488", 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "multi_outcome_events = ['aerial', '50_50', 'foul',\n", 130 | " 'corner_awarded', 'foul_throw_in', 'out', 'referee_drop_ball']\n", 131 | "mask_multi = (df['type_name'].isin(multi_outcome_events))\n", 132 | "mask_success = df['outcome'] == 1\n", 133 | "df = df[(~mask_multi) | (mask_multi & mask_success)].dropna(how='all', axis='columns').reset_index(drop=True).copy()" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "id": "48324fb7-3cfe-4b47-9f42-0bdd8fe604a4", 139 | "metadata": {}, 140 | "source": [ 141 | "Add 'out' events to the previous events and change the outcome of events where the pass or ball recovery went out/ was offside but it looks succesful" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "id": "2d582f70-e3e2-4591-9bcf-75df690b65ca", 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "df['out'] = df['type_name'].isin(['out', 'corner_awarded'])\n", 152 | "df['previous_out'] = df.groupby(['match_id', 'period_id'])['out'].shift(1) == True\n", 153 | "df['out'] = df.groupby(['match_id', 'period_id'])['out'].shift(-1) == True\n", 154 | "df['next_camera_x'] = df.groupby(['match_id', 'period_id'])['camera_x'].shift(-1)\n", 155 | "df['next_camera_y'] = df.groupby(['match_id', 'period_id'])['camera_y'].shift(-1)\n", 156 | "df = df[~df['type_name'].isin(['out', 'corner_awarded'])].dropna(how='all', axis='columns').reset_index(drop=True).copy()" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "id": "b16a9c1b-2768-4e2a-a772-b4b38dbff4e0", 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "mask_change_outcome = (df['out']) & (df['type_name'] == 'pass') & (df['outcome'] == 1)\n", 167 | "print('Number of outcomes changed:', mask_change_outcome.sum())\n", 168 | "df.loc[mask_change_outcome, 'outcome'] = 0\n", 169 | "# change outcome of offside pass to zero\n", 170 | "mask_change_outcome2 = df['type_name'].isin(['offside_pass', 'foul_throw_in'])\n", 171 | "print('Number of outcomes changed:', mask_change_outcome2.sum())\n", 172 | "df.loc[mask_change_outcome, 'outcome'] = 0\n", 173 | "mask_change_type = (df['type_name'] == 'ball_recovery') & (df['out'] == True)\n", 174 | "print('Number of type_name changed', mask_change_type.sum())\n", 175 | "df.loc[mask_change_type, 'type_name'] = 'ball_touch'" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "id": "af80f6a2-da3b-440a-b3f8-4a9f8ff0f8ff", 181 | "metadata": {}, 182 | "source": [ 183 | "Fix x/y end coordinates that went out but the event doesn't have the final coordinate" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "id": "526ce63b-00ce-4a2e-9364-d11503ca2b8e", 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "mask_missing = df['out'] & (df['camera_end_x'].isnull())\n", 194 | "mask_change = (df['out'] & \n", 195 | " (df['camera_end_x'] > 0) & (df['camera_end_x'] < 100) & (df['camera_end_y'] > 0) & (df['camera_end_y'] < 100) & \n", 196 | " ((df['next_camera_x'] <= 0) | (df['next_camera_x'] >= 0) | (df['next_camera_x'] >= 100) | (df['next_camera_y'] >= 100))\n", 197 | " )\n", 198 | "df.loc[mask_missing | mask_change, 'camera_end_x'] = df.loc[mask_missing | mask_change, 'next_camera_x']\n", 199 | "df.loc[mask_missing | mask_change, 'camera_end_y'] = df.loc[mask_missing | mask_change, 'next_camera_y']" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "id": "ac5ec3cb-d286-419b-aa2a-16c282852d4c", 205 | "metadata": {}, 206 | "source": [ 207 | "Work out if an event was a carry" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "id": "f5f7d008-a7d3-485c-a7f5-828c3c44b446", 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "df['previous_team_name'] = df.groupby(['match_id', 'period_id'])['team_name'].shift(1)\n", 218 | "df['previous_type_name'] = df.groupby(['match_id', 'period_id'])['type_name'].shift(1)\n", 219 | "df['previous_player_id'] = df.groupby(['match_id', 'period_id'])['player_id'].shift(1)\n", 220 | "df['previous_camera_end_x'] = df.groupby(['match_id', 'period_id'])['camera_end_x'].shift(1)\n", 221 | "df['previous_camera_end_y'] = df.groupby(['match_id', 'period_id'])['camera_end_y'].shift(1)\n", 222 | "df['previous_camera_x'] = df.groupby(['match_id', 'period_id'])['camera_x'].shift(1)\n", 223 | "df['previous_camera_y'] = df.groupby(['match_id', 'period_id'])['camera_y'].shift(1)\n", 224 | "df['previous_outcome'] = df.groupby(['match_id', 'period_id'])['outcome'].shift(1)\n", 225 | "df['same_team'] = (df['previous_team_name'] == df['team_name']) | (df['previous_team_name'].isnull())\n", 226 | "df['same_player'] = (df['previous_player_id'] == df['player_id'])\n", 227 | "df['previous_defensive_touch_type_control'] = df.groupby(['match_id', 'period_id'])['defensive_touch_type_control'].shift(1)\n", 228 | "df['previous_timestamp_utc'] = df.groupby(['match_id', 'period_id'])['timestamp_utc'].shift(1)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "id": "08b35c74-c607-4ece-8101-6dfba96668c1", 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "mask1 = df['same_team'] & df['previous_type_name'].isin(['pass', 'ball_recovery', 'keeper_pick_up', 'drop_of_ball', 'miss', 'attempt_saved', 'smother', 'punch', 'post'])\n", 239 | "mask2 = df['previous_type_name'].isin(['50_50', 'tackle']) & (df['same_player'] == 1) & (df.type_name == 'pass') & df['duel_events_offensive'].isnull()\n", 240 | "mask3 = (df['previous_type_name'] == 'claim') & (df['same_player']) & (df['previous_outcome'] == 1)\n", 241 | "mask4 = (df['timestamp_utc'] - df['previous_timestamp_utc']) < pd.Timedelta(1, 'minute')\n", 242 | "mask_carry = ((mask1 | mask2 | mask3)\n", 243 | " & mask4 # rules out around 200 dribbles for being 1+ minutes\n", 244 | " & (df['set_piece_taken'].isnull())\n", 245 | " & (df['shot_first_touch'].isnull())\n", 246 | " & (df['shot_volley'].isnull())\n", 247 | " & (df['body_part_head'].isnull())\n", 248 | " & (df['body_part_other'].isnull())\n", 249 | " )\n", 250 | "df['carry_between'] = mask_carry\n", 251 | "df.index = df.index + df['carry_between'].cumsum()" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "id": "6753234d-ade8-4c18-9f64-aabbe29fd5b0", 257 | "metadata": {}, 258 | "source": [ 259 | "Create carry events and add to the other actions" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "id": "7f6859ad-9448-458e-bbbc-3a506259e2ea", 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "df_carry = df.loc[df['carry_between'], ['match_id', 'period_id', 'team_id', 'team_name', 'player_id', 'type_name', 'previous_camera_x', 'previous_camera_y',\n", 270 | " 'previous_camera_end_x', 'previous_camera_end_y', 'camera_x', 'camera_y']].copy()\n", 271 | "df_carry.rename({'camera_x': 'camera_end_x', 'camera_y': 'camera_end_y'}, axis='columns', inplace=True)\n", 272 | "df_carry['type_name'] = 'carry'\n", 273 | "df_carry['camera_x'], df_carry['camera_y'] = df_carry['previous_camera_end_x'].fillna(df['previous_camera_x']), df_carry['previous_camera_end_y'].fillna(df['previous_camera_y'])\n", 274 | "df_carry = df_carry.drop(['previous_camera_x', 'previous_camera_y', 'previous_camera_end_x', 'previous_camera_end_y'], axis='columns').copy()\n", 275 | "xstart, ystart = p.standardizer.transform(df_carry['camera_x'], df_carry['camera_y'])\n", 276 | "xend, yend = p.standardizer.transform(df_carry['camera_end_x'], df_carry['camera_end_y'])\n", 277 | "df_carry['angle'], df_carry['length'] = p.calculate_angle_and_distance(xstart, ystart, xend, yend, standardized=True)\n", 278 | "df_carry.index = df_carry.index - 1\n", 279 | "df_carry = df_carry[df_carry['length'] >= 2].copy() # rules out 51.1k for being less than 2 meters\n", 280 | "df_carry['pass'] = False\n", 281 | "df_carry['shot'] = False\n", 282 | "df_carry['goal'] = False\n", 283 | "df_carry['outcome'] = 1\n", 284 | "# add to the other actions\n", 285 | "df_actions = pd.concat([df, df_carry]).sort_index().reset_index(drop=True)\n", 286 | "df_actions['carry'] = df_actions['type_name'] == 'carry'" 287 | ] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "id": "5ff5860d-e28e-4e36-b191-e6f47e355526", 292 | "metadata": {}, 293 | "source": [ 294 | "Clean up and add the non-camera coordinates and save the final action filem" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": null, 300 | "id": "5420e2fe-cc27-44cd-9708-a5d7da957910", 301 | "metadata": {}, 302 | "outputs": [], 303 | "source": [ 304 | "df_actions.drop(columns=['camera_right_to_left', 'shot_goal_mouth_x_coordinate', 'camera_pass_end_x', 'camera_pass_end_y', 'end_x', 'end_y',\n", 305 | " 'camera_shot_blocked_x_coordinate', 'camera_shot_blocked_y_coordinate', 'camera_shot_goal_mouth_x_coordinate',\n", 306 | " 'camera_shot_goal_mouth_y_coordinate', 'out', 'previous_out',\n", 307 | " 'next_camera_x', 'next_camera_y', 'previous_team_name', 'previous_type_name', 'previous_player_id', 'previous_camera_end_x',\n", 308 | " 'previous_camera_end_y', 'previous_camera_x', 'previous_camera_y', 'previous_outcome', 'same_team', 'same_player', \n", 309 | " 'previous_defensive_touch_type_control', 'previous_timestamp_utc', 'carry_between'], axis='columns', inplace=True)" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "id": "b8e9fba9-45ba-408b-ac4d-da419d56b2ff", 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [ 319 | "df_actions = df_actions.merge(df_direction, on=['match_id', 'team_id', 'period_id'], how='left', validate='m:1')\n", 320 | "new_x, new_y = p.flip_side(df_actions['camera_x'], df_actions['camera_y'], df_actions['camera_right_to_left'])\n", 321 | "df_actions.loc[df_actions['x'].isnull(), 'x'] = new_x[df_actions['x'].isnull()].round(1)\n", 322 | "df_actions.loc[df_actions['y'].isnull(), 'y'] = new_y[df_actions['y'].isnull()].round(1)\n", 323 | "df_actions['end_x'], df_actions['end_y'] = p.flip_side(df_actions['camera_end_x'], df_actions['camera_end_y'], df_actions['camera_right_to_left'])\n", 324 | "df_actions['end_x'] = df_actions['end_x'].round(1)\n", 325 | "df_actions['end_y'] = df_actions['end_y'].round(1)" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": null, 331 | "id": "44cbd222-31fe-4ec9-b82e-3b440152a396", 332 | "metadata": {}, 333 | "outputs": [], 334 | "source": [ 335 | "df_actions.to_parquet('opta_actions_112_2022.parquet')" 336 | ] 337 | } 338 | ], 339 | "metadata": { 340 | "kernelspec": { 341 | "display_name": "Python 3 (ipykernel)", 342 | "language": "python", 343 | "name": "python3" 344 | }, 345 | "language_info": { 346 | "codemirror_mode": { 347 | "name": "ipython", 348 | "version": 3 349 | }, 350 | "file_extension": ".py", 351 | "mimetype": "text/x-python", 352 | "name": "python", 353 | "nbconvert_exporter": "python", 354 | "pygments_lexer": "ipython3", 355 | "version": "3.12.2" 356 | } 357 | }, 358 | "nbformat": 4, 359 | "nbformat_minor": 5 360 | } 361 | -------------------------------------------------------------------------------- /pysport/data/f24/README.md: -------------------------------------------------------------------------------- 1 | # Data for analysis 2 | Placeholder for saving the Opta f24 data. 3 | -------------------------------------------------------------------------------- /pysport/data/fbref/README.md: -------------------------------------------------------------------------------- 1 | # Data for analysis 2 | Placeholder for saving the fbref data. 3 | -------------------------------------------------------------------------------- /pysport/old_trafford_google_earth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewRowlinson/data-science/91bc237e20e27dcb62cb1525655790b0ab8c5e85/pysport/old_trafford_google_earth.png -------------------------------------------------------------------------------- /pysport/pysport_presentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewRowlinson/data-science/91bc237e20e27dcb62cb1525655790b0ab8c5e85/pysport/pysport_presentation.pdf -------------------------------------------------------------------------------- /simulation/simulate_composition_method.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "The composition method can be used to simulate complicated distributions, such as the double exponential. First, a random number is generated to identify, which distribution to sample from. Then a random variable is selected from the chosen distribution." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "#import seaborn\n", 21 | "import seaborn as sns\n", 22 | "# set to plot automatically\n", 23 | "%matplotlib inline\n", 24 | "# set font size of charts\n", 25 | "sns.set(font_scale=2)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "# Example: double exponential distribution" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "Set parameters" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": { 46 | "collapsed": true 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "# set samples\n", 51 | "samples = 1000\n", 52 | "# set lambda\n", 53 | "lam = 3" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "Generate random uniform number U~U(0,1) and V~U(0,1)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 3, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "U = np.random.uniform(0,1,(samples,2))" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "step a) If $U \\leq \\frac{1}{2}$ then generate $X$ from $\\frac{1}{\\lambda} \\log(V)$" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 4, 84 | "metadata": { 85 | "collapsed": true 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "# step a) filter to return V where U<=1/2\n", 90 | "a=U[:,1][np.where(U[:,0]<=0.5)]\n", 91 | "# calculate X1\n", 92 | "X1=(1/lam)*np.log(a)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "step b) If $U > \\frac{1}{2}$ then generate $X$ from $-\\frac{1}{\\lambda} \\log(1-V)$" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 5, 105 | "metadata": { 106 | "collapsed": true 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "# step b) filter to return V where U>1/2\n", 111 | "b=U[:,1][np.where(U[:,0]>0.5)]\n", 112 | "# calculate X2\n", 113 | "X2=(-1/lam)*np.log(1-b)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 6, 119 | "metadata": { 120 | "collapsed": true 121 | }, 122 | "outputs": [], 123 | "source": [ 124 | "# combine X1 and X2 to form a single distribution\n", 125 | "X=np.concatenate([X1,X2])" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 7, 131 | "metadata": {}, 132 | "outputs": [ 133 | { 134 | "data": { 135 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfUAAAFTCAYAAAAgDZXiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X9Y1fX9//EHHAVMQPxBXoYgCkuv5owfjVq/NszVdHaJ\nmVJKijo7XgvxUlZqXrmyLrWuPjb8QSBmCOoSV+H6savNTMvWZVhJWxstDTKPP8YUElR+HA/fP/rC\nYhzwR284nBf323+93i/OeZ4Xxx7n9eL1Pi+fxsbGRgEAAK/n6+kCAACANQh1AAAMQagDAGAIQh0A\nAEMQ6gAAGIJQBwDAED08XcD3VVFR7ekSOlTfvlepsvKcp8swBuNpLcbTWoyntUwdz9DQoDavMVPv\n4nr0sHm6BKMwntZiPK3FeFqrO44noQ4AgCEIdQAADEGoAwBgCEIdAABDEOoAABiCUAcAwBCEOgAA\nhiDUAQAwBKEOAIAhCHUAAAxBqAMAYAhCHQAAQ3j9KW0APGPWqt1tXtu0eHQnVgKgCTN1AAAMQagD\nAGAIQh0AAEMQ6gAAGIJQBwDAEIQ6AACGINQBADAEoQ4AgCEIdQAADEGoAwBgCEIdAABDEOoAABiC\nUAcAwBCEOgAAhiDUAQAwxGWfp37y5EmNGzdO8+bNU2pqaqvrRUVFysvLU3l5uYKDgzV27Filp6er\nd+/erfru2bNHzz//vP71r38pICBAiYmJysjIUP/+/a/oxQCwVntnpgPoei5rpn727FnNmzdPNTU1\nbq/n5ORo0aJFcrlcSklJ0YgRI5SXl6fZs2ervr6+Rd/XX39ddrtdp06d0v3336+bbrpJr776qu67\n7z6dOXPmyl8RAADd1CXP1B0Oh+bNm6fPPvuszetr1qxRbGysCgoK1LNnT0lSZmamsrKyVFhYqJSU\nFEnffjhYvny5wsPDVVRUpMDAQEnSLbfcoqVLl+r555/XokWLvu9rAwCgW7mkmXpeXp7uvvtulZaW\n6qabbnLbp7CwUE6nU3a7vTnQJWnu3LkKDAzUjh07mtveeOMNffPNN0pNTW0OdEm69957NXToUL3y\nyiu6cOHClb4mAAC6pUsK9fz8fIWFhWnLli2aMGGC2z7FxcWSpISEhBbt/v7+iomJUWlpqaqrq1v0\nvfHGG1s9TkJCgqqqqvTFF19c+qsAAACXFupPPPGEioqKFBcX12afI0eOaMCAAW43xIWFhUmSysrK\nJElff/21JCk8PLxV38GDB7foCwAALs0lhfptt90mm83Wbp+qqioFBQW5vdbU3rTBrrKyUn5+fgoI\nCGjVt2k5vq3NeAAAwD3L7lN3Op3y8/Nze62pva6u7rL7AgCAS3PZ96m3JSAgQA0NDW6vNd3O1qtX\nr8vuezF9+16lHj3aX0XwdqGh7ldAcGUYz47HGF85xs5a3W08LQv14ODg5o1w/6upvWkZPjg4WHV1\ndaqvr281Y29adm9rKf9/VVaeu9KSvUJoaJAqKtyPKy4f49k5GOMrw/vTWqaOZ3sfVCxbfo+MjNSp\nU6dUW1vb6prD4ZCvr6+GDBnS3FeSjh492qpvU9vQoUOtKg0AgG7BslCPj4+Xy+XSgQMHWrTX1dXp\n4MGDio6Obt4EFx8fL+m/t7Z91/79+xUUFKSoqCirSgMAoFuwLNTHjx8vm82mdevWtfhK2OzsbNXU\n1Cg5Obm5bcyYMerdu7c2btyoqqqq5vY//OEPKi8v1+TJk+Xry1kzAABcDsv+ph4VFaVZs2YpNzdX\nSUlJSkxM1KFDh7Rnzx7FxcVpypQpzX1DQkL08MMP6/HHH1dSUpLGjh2rkydP6k9/+pMiIyNlt9ut\nKgvARXBoC2AOy0JdkjIyMjRo0CBt27ZN+fn5Cg0NVWpqqtLS0lptiLv//vvVp08fbdy4UVu3blWf\nPn2UlJSkBQsWKCQkxMqyAADoFnwaGxsbPV3E92HizsbvMnX3pqcwnq11xEx90+LRlj9md8D701qm\njmen7H4HAACeRagDAGAIQh0AAEMQ6gAAGIJQBwDAEIQ6AACGINQBADAEoQ4AgCEIdQAADEGoAwBg\nCEIdAABDEOoAABiCUAcAwBCEOgAAhiDUAQAwBKEOAIAhCHUAAAxBqAMAYAhCHQAAQxDqAAAYooen\nCwBgnlmrdrtt37R4dCdXAnQvzNQBADAEoQ4AgCEIdQAADEGoAwBgCEIdAABDEOoAABiCUAcAwBCE\nOgAAhiDUAQAwBKEOAIAhCHUAAAxBqAMAYAhCHQAAQxDqAAAYglAHAMAQhDoAAIbo0REPWllZqd/9\n7nfavXu3KisrdfXVV+sXv/iF5s2bp169erXoW1RUpLy8PJWXlys4OFhjx45Venq6evfu3RGlAQBg\nLMtn6mfPntXUqVP10ksvaejQoXrggQd09dVX64UXXtDMmTPldDqb++bk5GjRokVyuVxKSUnRiBEj\nlJeXp9mzZ6u+vt7q0gAAMJrlM/Xt27fryy+/1PTp07V06VJJUmNjox5++GG99tpreu211zRx4kQ5\nHA6tWbNGsbGxKigoUM+ePSVJmZmZysrKUmFhoVJSUqwuDwAAY1k+U//b3/4mSZo0aVJzm4+PjyZP\nnixJOnjwoCSpsLBQTqdTdru9OdAlae7cuQoMDNSOHTusLg0AAKNZHuohISGSpGPHjrVoP3nypCSp\nX79+kqTi4mJJUkJCQot+/v7+iomJUWlpqaqrq60uDwAAY1ke6pMmTVLPnj21cuVKffTRRzp//rz2\n79+vZ599VkFBQc0z+CNHjmjAgAFuN8SFhYVJksrKyqwuDwAAY1ke6iNHjtSLL76o2tpaTZ06VTEx\nMZo+fbpsNpt+//vfa/DgwZKkqqoqBQUFuX2MpvaamhqrywMAwFiWb5Q7deqUVq9erYqKCiUmJmro\n0KH6+9//rg8//FDLli1TTk6OgoOD5XQ65efn5/Yxmtrr6uou+nx9+16lHj1slr6GriY01P2HH1wZ\nxtNzGPuLY4ys1d3G0/JQz8jI0Mcff6znnntO48aNa27Py8vTypUr9dhjjykzM1MBAQFqaGhw+xhN\nt7P97z3t7lRWnrOm8C4qNDRIFRXsLbAK4+lZjH37eH9ay9TxbO+DiqXL7ydOnNAHH3ygH//4xy0C\nXZJSU1MVHR2tP//5z6qpqVFwcHCbG+Ga2ttangcAAK1ZGurHjx+XJA0bNszt9aioKLlcLp08eVKR\nkZE6deqUamtrW/VzOBzy9fXVkCFDrCwPAACjWRrqAwYMkCSVl5e7vf7VV1/Jx8dH/fv3V3x8vFwu\nlw4cONCiT11dnQ4ePKjo6GgFBgZaWR4AAEaz9G/q4eHh+uEPf6gPP/xQu3bt0pgxY5qv7dixQ6Wl\npbrtttsUEhKi8ePHKycnR+vWrVNCQkLz5rjs7GzV1NQoOTnZytIAdAGzVu1u89qmxaM7sRLATJZv\nlFuxYoUeeOABzZs3r3n3++eff6733ntPoaGh+u1vfyvp26X4WbNmKTc3V0lJSUpMTNShQ4e0Z88e\nxcXFacqUKVaXBgCA0SwP9REjRujll1/W+vXr9f7772vv3r3q37+/kpOTlZaWpquvvrq5b0ZGhgYN\nGqRt27YpPz9foaGhSk1NVVpaWpu3uwEAAPc65OjViIgIPf300xft5+Pjo2nTpmnatGkdUQYAAN2K\n5d8oBwAAPINQBwDAEIQ6AACGINQBADAEoQ4AgCEIdQAADEGoAwBgCEIdAABDEOoAABiCUAcAwBCE\nOgAAhiDUAQAwBKEOAIAhCHUAAAxBqAMAYAhCHQAAQxDqAAAYglAHAMAQhDoAAIYg1AEAMAShDgCA\nIQh1AAAMQagDAGAIQh0AAEMQ6gAAGIJQBwDAEIQ6AACGINQBADAEoQ4AgCEIdQAADEGoAwBgCEId\nAABDEOoAABiih6cLANDxZq3a7ekSAHQCZuoAABiCmTqALqG91YRNi0d3YiWA9+qwmfof//hH3Xvv\nvbr++ut16623Kj09XWVlZa36FRUVKSkpSTExMbr99tu1cuVKnT17tqPKAgDAWB0S6s8995wefvhh\nVVdXa+rUqUpISNCuXbuUnJyso0ePNvfLycnRokWL5HK5lJKSohEjRigvL0+zZ89WfX19R5QGAICx\nLF9+//TTT5WTk6OEhATl5uYqICBAknTnnXdq/vz5Wr9+vVauXCmHw6E1a9YoNjZWBQUF6tmzpyQp\nMzNTWVlZKiwsVEpKitXlAQBgLMtn6lu3bpUkLV++vDnQJemuu+5ScnKyIiIiJEmFhYVyOp2y2+3N\ngS5Jc+fOVWBgoHbs2GF1aQAAGM3ymfq7776ra6+9VkOHDm3R7uPjo+XLlzf/d3FxsSQpISGhRT9/\nf3/FxMRo3759qq6uVlBQkNUlAgBgJEtn6qdOndLp06f1gx/8QIcPH1ZaWppuuOEGxcfHKz09XV9/\n/XVz3yNHjmjAgAHq3bt3q8cJCwuTJLcb6wAAgHuWhvq///1vSdLJkyc1efJkORwOTZo0SXFxcXrr\nrbeUnJwsh8MhSaqqqmpzFt7UXlNTY2V5AAAYzdLl93Pnzkn6dmk9KSlJK1askM1mkyQVFBToqaee\n0ooVK7R+/Xo5nU75+fm5fZym9rq6uos+Z9++V6lHD5tFr6BrCg3lTxBWYjy9T3f6nXWn19oZutt4\nWhrqvr7fTvxtNpuWLFnSHOiSNG3aNG3evFl79+7V+fPnFRAQoIaGBreP03Q7W69evS76nJWV5yyo\nvOsKDQ1SRUW1p8swBuPpnbrL74z3p7VMHc/2PqhYuvzetGweFhamkJCQlk/k66vhw4eroaFBx44d\nU3BwsKqr3Q92Uzub5AAAuHSWztTDw8Nls9nanIE7nU5J387AIyMjVVxcrNra2ha3vkmSw+GQr6+v\nhgwZYmV5gPE4uAXo3iydqfv7+2vkyJE6fvy4vvrqqxbXnE6nSktLFRISooEDByo+Pl4ul0sHDhxo\n0a+urk4HDx5UdHS0AgMDrSwPAACjWf7lM1OmTJEkPfXUUy1m7Js2bdKJEyeUlJQkm82m8ePHy2az\nad26dS2+EjY7O1s1NTVKTk62ujQAAIxm+ZfPTJo0Se+884527dqlpKQk3X777Tp8+LD27t2ryMhI\npaWlSZKioqI0a9Ys5ebmKikpSYmJiTp06JD27NmjuLi45g8HAADg0lg+U/fx8VFmZqaWLFkiSdqy\nZYv++c9/aurUqXrppZdabH7LyMjQsmXL5OPjo/z8fH3xxRdKTU3Vhg0b2rzdDQAAuOfT2NjY6Oki\nvg8Tb1f4LlNvyfAU08fT1I1y3eU8ddPfn53N1PHstFvaAACA5xDqAAAYglAHAMAQhDoAAIYg1AEA\nMAShDgCAIQh1AAAMQagDAGAIQh0AAEMQ6gAAGIJQBwDAEIQ6AACGINQBADAEoQ4AgCEIdQAADEGo\nAwBgCEIdAABDEOoAABiCUAcAwBCEOgAAhiDUAQAwBKEOAIAhCHUAAAxBqAMAYAhCHQAAQxDqAAAY\nglAHAMAQhDoAAIYg1AEAMAShDgCAIQh1AAAMQagDAGAIQh0AAEMQ6gAAGIJQBwDAEIQ6AACG6JRQ\nf/rppzV8+HDt37+/1bWioiIlJSUpJiZGt99+u1auXKmzZ892RlkAABilw0P9008/1ebNm91ey8nJ\n0aJFi+RyuZSSkqIRI0YoLy9Ps2fPVn19fUeXBgCAUXp05IPX19fr0Ucf1YULF1pdczgcWrNmjWJj\nY1VQUKCePXtKkjIzM5WVlaXCwkKlpKR0ZHkAABilQ2fq2dnZKi8v180339zqWmFhoZxOp+x2e3Og\nS9LcuXMVGBioHTt2dGRpAAAYp8NCvbS0VBs2bJDdbld0dHSr68XFxZKkhISEFu3+/v6KiYlRaWmp\nqqurO6o8AACM0yGhfuHCBS1dulRDhgyR3W532+fIkSMaMGCAevfu3epaWFiYJKmsrKwjygMAwEgd\n8jf1F154Qf/4xz+0bds2+fn5ue1TVVWlwYMHu70WFBQkSaqpqemI8gAAMJLloV5WVqZ169Zp6tSp\nio2NbbOf0+lsM/Cb2uvq6i76fH37XqUePWxXVqyXCA0N8nQJRmE8vU93+p11p9faGbrbeFoa6o2N\njVq6dKn69++vhQsXtts3ICBADQ0Nbq813c7Wq1eviz5nZeW5yy/Ui4SGBqmigr0FVmE8vVN3+Z3x\n/rSWqePZ3gcVS0N969at+uijj7Rhwwa3fyv/ruDg4DY3wjW1Ny3DAwCAi7M01N966y1J0oMPPuj2\n+vTp0yVJb7/9tiIjI1VcXKza2loFBAS06OdwOOTr66shQ4ZYWR4AAEazNNQnTpzY6hY1SXrvvfdU\nUlKiiRMnKiwsTMHBwYqPj9f+/ft14MAB3Xrrrc196+rqdPDgQUVHRyswMNDK8gAAMJqloX7PPfe4\nbT9z5kxzqN94442SpPHjxysnJ0fr1q1TQkJC8+a47Oxs1dTUKDk52crSAAAwXod+TWx7oqKiNGvW\nLOXm5iopKUmJiYk6dOiQ9uzZo7i4OE2ZMsVTpQEA4JU8FuqSlJGRoUGDBmnbtm3Kz89XaGioUlNT\nlZaW1ubtbgAAwD2fxsbGRk8X8X2YeLvCd5l6S4anmD6es1bt9nQJHWLT4tGeLqFTmP7+7Gymjmd7\nt7R1ynnqAACg4xHqAAAYglAHAMAQhDoAAIbw6O53AO61t+Gtu2waA3D5mKkDAGAIQh0AAEMQ6gAA\nGIJQBwDAEIQ6AACGYPc74GVM/SpYAN8fM3UAAAxBqAMAYAhCHQAAQxDqAAAYglAHAMAQhDoAAIYg\n1AEAMAShDgCAIQh1AAAMwTfKATASZ9KjO2KmDgCAIQh1AAAMwfI7AK/GATfAfzFTBwDAEIQ6AACG\nINQBADAEoQ4AgCHYKAegy2MzHHBpmKkDAGAIQh0AAEMQ6gAAGIJQBwDAEIQ6AACGINQBADAEoQ4A\ngCE65D71iooKrV27Vnv37tWpU6fUp08f/eQnP9H8+fMVHh7eom9RUZHy8vJUXl6u4OBgjR07Vunp\n6erdu3dHlAYAgLEsn6lXVFRo8uTJ2r59u6KiovTAAw/oRz/6kV5//XXde++9Ki8vb+6bk5OjRYsW\nyeVyKSUlRSNGjFBeXp5mz56t+vp6q0sDAMBols/U165dq+PHj2vx4sWaOXNmc/vOnTv1yCOPaNWq\nVcrOzpbD4dCaNWsUGxurgoIC9ezZU5KUmZmprKwsFRYWKiUlxeryAAAwluUz9V27dqlfv36aMWNG\ni/YJEyYoIiJC+/btk8vlUmFhoZxOp+x2e3OgS9LcuXMVGBioHTt2WF0aAABGs3SmfuHCBdntdvXo\n0UO+vq0/L/j5+amhoUFOp1PFxcWSpISEhBZ9/P39FRMTo3379qm6ulpBQUFWlggAgLEsDXWbzdZq\nht7k8OHD+vLLLxURESE/Pz8dOXJEAwYMcLshLiwsTJJUVlamUaNGWVkiALR7QMymxaM7sRLAWp1y\nS5vL5dKTTz4pl8ulKVOmSJKqqqranIU3tdfU1HRGeQAAGKHDQ72xsVHLli3TBx98oJEjRzbP5J1O\np/z8/Nz+TFN7XV1dR5cHAIAxOvQ8dafTqccee0yvvPKKwsPDlZWV1RzYAQEBamhocPtzTbez9erV\n66LP0bfvVerRw2Zd0V1QaCj7CqzEeKI97b0/7s7Y6bb9tf+b0CnPj8vX3cazw0L9/Pnzmj9/vvbu\n3avIyEi9+OKLGjhwYPP14OBgVVdXu/3ZpvZL2SRXWXnOmoK7qNDQIFVUuB8nXD7GExdzJe8Pq95T\nvD+tZep4tvdBpUNC/ZtvvtGcOXNUUlKi6667Ths3blT//v1b9ImMjFRxcbFqa2sVEBDQ4prD4ZCv\nr6+GDBnSEeUBnaqtTVlsyAJgNcv/pl5XVye73a6SkhIlJCSooKCgVaBLUnx8vFwulw4cONDq5w8e\nPKjo6GgFBgZaXR4AAMayPNRXr16tTz75RLGxscrNzW0zmMePHy+bzaZ169a1+ErY7Oxs1dTUKDk5\n2erSAAAwmqXL7xUVFdq6daskadiwYcrNzXXb78EHH1RUVJRmzZql3NxcJSUlKTExUYcOHdKePXsU\nFxfXfOsbYKr27pUGgCthaaiXlJQ072h/+eWX2+w3Y8YM+fv7KyMjQ4MGDdK2bduUn5+v0NBQpaam\nKi0trc3b3QAAgHuWhvqYMWP0+eefX3J/Hx8fTZs2TdOmTbOyDAAAuqVO+UY5AADQ8Qh1AAAMQagD\nAGAIQh0AAEMQ6gAAGIJQBwDAEIQ6AACGINQBADAEoQ4AgCEIdQAADNEh56kDJmrvABbORgfQFTBT\nBwDAEIQ6AACGINQBADAEoQ4AgCEIdQAADEGoAwBgCEIdAABDEOoAABiCUAcAwBB8oxy6pba+HY5v\nhgPgzZipAwBgCEIdAABDsPwOAN/R3sE9Vj8ef+6B1ZipAwBgCEIdAABDsPwOWMDqJVt4F37/6CqY\nqQMAYAhm6vBqVm9CYsaFzsQmOliNmToAAIYg1AEAMATL7+hUnbncyFI6gO6GmToAAIYg1AEAMASh\nDgCAIQh1AAAMwUY5XBHurwU8g397aI/HZ+pOp1N5eXkaN26cRo0apTvuuEPr169XQ0ODp0sDAMCr\neDzUly9frpUrVyokJETTp0/XwIEDtWbNGmVkZHi6NAAAvIpHl98//vhjbd++XXfddZcyMzPl4+Oj\nxsZGLV68WEVFRXrnnXeUmJjoyRLRibivHPivK/n30BFL8209Jkv9XZNHZ+pbt26VJKWlpcnHx0eS\n5OPjo4ULF8rHx0c7duzwZHkAAHgVj87UDxw4oL59++raa69t0T5w4EBFRkaquLi402vqKt94ZvVh\nJO09HhtvgO7N6lWBK9XW/28687muVFf5/6jHZur19fU6ceKEIiIi3F4PCwvTmTNndPr06U6uDAAA\n7+SxUK+qqpIkBQUFub3e1F5dXd1pNQEA4M18GhsbGz3xxMeOHVNiYqJGjx6t559/vtX1Rx55RDt3\n7tRrr73WankeAAC05rGZekBAgCS1eT96fX29JKlXr16dVhMAAN7MY6EeGBgoX19f1dTUuL3etOze\n1vI8AABoyWOh7ufnp2uuuUZHjx51e/3o0aPq16+fQkJCOrkyAAC8k0fvU4+Pj1dFRYXKyspatJ88\neVLl5eW6/vrrPVQZAADex6OhnpSUJEl67rnn5HK5JEmNjY1avXq1JCk5OdljtQEA4G08tvu9yYIF\nC/Tmm29q1KhRuvHGG/XJJ5/owIEDLb46FgAAXJzHD3R55plnlJ6ersrKSm3evFn/+c9/lJ6ermef\nfZZA/46//vWvmjlzpm644QaNHDlS48aN04YNG+R0Oj1dmtfbsmWLhg8frjNnzni6FK/B6Yod4+TJ\nk4qPj1deXp6nS/FaFRUVWrZsmX76059q5MiRuuWWW/Sb3/xGX3/9tadL6xQen6nj4nbu3KlFixap\nd+/euvPOOxUYGKj3339fhw8f1ujRo5WVlcUHoCtUXFysX/3qV6qtrVVxcbGCg4M9XZJXWLZsmbZv\n3674+HjFxcXp448/1kcffaS77rpLa9as8XR5Xuns2bOaOXOmSkpKtGTJEqWmpnq6JK9TUVGhyZMn\n6/jx47rllls0fPhwlZWVac+ePerTp4+2b9+uyMhIT5fZoTz63e+4uNraWq1YsUKBgYF69dVXFR4e\nLunb+/t//etfa/fu3frLX/6iO++808OVep833nhDS5cuVW1tradL8Sqcrmg9h8OhefPm6bPPPvN0\nKV5t7dq1On78uBYvXqyZM2c2t+/cuVOPPPKIVq1apezsbA9W2PE8vvyO9u3fv19VVVWaPHlyc6BL\nUs+ePWW32yVJ7777rqfK80qnT5/WQw89pIULF6pfv34aMmSIp0vyKpyuaK28vDzdfffdKi0t1U03\n3eTpcrzarl271K9fP82YMaNF+4QJExQREaF9+/Y1b8o2FaHexQ0ePFgLFy7Uz3/+81bX/Pz8JEnn\nzp3r7LK82hdffKG3335b99xzj4qKijRw4EBPl+RVuuLpit4sPz9fYWFh2rJliyZMmODpcrzWhQsX\nZLfblZaWJl/f1tHm5+enhoYG4/chsfzexUVFRSkqKsrttV27dkmSoqOjO7MkrxcREaGdO3dq+PDh\nni7F6zSdrtjWd0iEhYWprKxMp0+fVr9+/Tq5Ou/0xBNP6Oabb5bNZlN5ebmny/FaNput1Qy9yeHD\nh/Xll18qIiKieTJkKmbqXurw4cPKz8+Xn5+fJk6c6OlyvMqgQYMI9CvE6YrWu+2222Sz2TxdhrFc\nLpeefPJJuVwuTZkyxdPldDhm6h4yevRoORyOdvtMmzZNy5Yta9V+4sQJzZkzR+fPn9eSJUs0aNCg\njirTa3yf8cSla1q6bGu209ReV1fXaTUBbWlsbNSyZcv0wQcfaOTIkW3O5E1CqHvImDFjdPr06Xb7\njBo1qlXbV199pZkzZ8rhcOi+++7jtpf/70rHE5eH0xXhLZxOpx577DG98sorCg8PV1ZWlvFL7xKh\n7jGPPvroZf/Mp59+KrvdrtOnT+u+++7T448/bn1hXupKxhOXj9MV4Q3Onz+v+fPna+/evYqMjNSL\nL77YbTbEEupe4v3331daWprOnTunuXPnasGCBZ4uCd0Qpyuiq/vmm280Z84clZSU6LrrrtPGjRvV\nv39/T5fVadgo5wUOHjyohx56SOfPn9ejjz5KoMOjOF0RXVVdXZ3sdrtKSkqUkJCggoKCbhXoEqHe\n5Z09e1YLFizQ+fPntXjx4m6x0QNdG6croqtavXq1PvnkE8XGxio3N1eBgYGeLqnTsfzexRUWFurY\nsWMKCQlRdXW11q5d26rPsGHD9Mtf/tID1aE7uvnmmzVu3Di9+eabSk5ObnW64s9+9jNPl4huqKKi\novnbDocNG6bc3Fy3/R588EH5+/t3ZmmdilDv4pq+nauqqkrr1q1z2+eOO+4g1NGpnnnmGUVHR+vV\nV1/V5s2bdc011yg9PV1z5szhcCF4RElJSfNdGS+//HKb/WbMmGF0qHNKGwAAhuBv6gAAGIJQBwDA\nEIQ6AADX8TGDAAAALUlEQVSGINQBADAEoQ4AgCEIdQAADEGoAwBgCEIdAABDEOoAABiCUAcAwBD/\nD4SbU4b4/S2pAAAAAElFTkSuQmCC\n", 136 | "text/plain": [ 137 | "" 138 | ] 139 | }, 140 | "metadata": {}, 141 | "output_type": "display_data" 142 | } 143 | ], 144 | "source": [ 145 | "# plot\n", 146 | "plt.hist(X,bins=60);" 147 | ] 148 | } 149 | ], 150 | "metadata": { 151 | "kernelspec": { 152 | "display_name": "Python [conda root]", 153 | "language": "python", 154 | "name": "conda-root-py" 155 | }, 156 | "language_info": { 157 | "codemirror_mode": { 158 | "name": "ipython", 159 | "version": 3 160 | }, 161 | "file_extension": ".py", 162 | "mimetype": "text/x-python", 163 | "name": "python", 164 | "nbconvert_exporter": "python", 165 | "pygments_lexer": "ipython3", 166 | "version": "3.5.3" 167 | } 168 | }, 169 | "nbformat": 4, 170 | "nbformat_minor": 2 171 | } 172 | -------------------------------------------------------------------------------- /simulation/simulate_exponential_random_variable_from_uniform.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Generate exponential random variables from the uniform distribution using the inverse transform method. However, note that the built-in numpy functions run faster." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "# import libraries\n", 19 | "import numpy as np\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "import seaborn as sns\n", 22 | "from scipy.stats import expon\n", 23 | "# set to plot automatically\n", 24 | "%matplotlib inline" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "# Simulate values" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": { 38 | "collapsed": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "# set lambda for exponential distribution\n", 43 | "lam=0.01\n", 44 | "# set random state so repeatable results\n", 45 | "np.random.seed(42)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 3, 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "# exponential simulated from the inverse transform method: -1/lambda*ln(1-X) where X~U(0,1)\n", 57 | "exponential_sim=(-1/lam)*np.log(1-np.random.uniform(0,1,1000))" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "# Plot simulated values versus actual pdf" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 4, 70 | "metadata": { 71 | "collapsed": true 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "# create x and y values for the actual pdf so we can plot them\n", 76 | "x = np.linspace(expon.ppf(0.0001,scale=1/lam),expon.ppf(0.9999,scale=1/lam))\n", 77 | "y = expon.pdf(x,scale=1/lam)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 5, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEFCAYAAAASWssjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmYXHWV//F3bb2lF7qTzgpIgHBAVEDACcomiqIzuA4u\nqLgBw4z7MjOOo4zj6Oj4G2R0lHFEgxvKqIgaQVHZZIuAAoIkBwIkECChk3TSnd5r+f3xvd1d3emu\nqoRUV3fV5/U89XTVvXWrT13CPf3dzo3lcjlEREQKiVc6ABERmf2ULEREpCglCxERKUrJQkREilKy\nEBGRopKVDqAcurp6n9EUr/b2Jrq7+/dVOHOWzkOg8xDoPATVfB46O1ti0+1Ty2IKyWSi0iHMCjoP\ngc5DoPMQ1Op5ULIQEZGilCxERKQoJQsRESlKyUJERIpSshARkaKULEREpKiyrbMwszhwCXAUMASc\n6+7r8/afCVwIpIFV7n5p3r6/AP7D3U+NXh8KfAvIAfcD73H3bLliFxGRicrZsngN0ODuJwAfAy4a\n3WFmKeBi4GXAKcD5ZrYo2vcPwDeAhrzP+iLwCXc/CYgBry5HwFu2xPjOd1LceGM5Pl1EZO4qZ7I4\nEfgVgLuvAY7L23cEsN7du919GLgFODna9zDwukmfdSxwU/T8l8BLyxHwr3+d5N5741xxRUgcIiIS\nlLPcRyuwM+91xsyS7p6eYl8v0Abg7lea2UGTPivm7rnJ751Oe3vTXq2ynDcPGhshl4Oenmae85w9\n/oiq09nZUukQZgWdh0DnIajF81DOZNED5J/ReJQoptrXAuwo8Fn54xPF3rvXdVuamxMMDCRpbKxj\n3boBVqxIFz+oinV2ttDV1VvpMCpO5yHQeQiq+TwUSoLl7Ia6FXglgJmtBO7L27cWWGFmHWZWR+iC\nur3AZ91tZqdGz18B3Lzvw4XFi8frDz71lLqhRERGlbNlcRVwupndRhiUfqeZnQ00u/vXzezDwLWE\nhLXK3Z8o8FkfAS6NEsta4MflCHhistCsYhGRUWVLFtHU1gsmbV6Xt381sHqaYzcAK/NeP0iYNVVW\nCxbkSKXC854e6OsL4xgiIrVOfz7niccnti42b1ZXlIgIKFnsZvHi8bF0dUWJiAS6Gk6iQW4Rkd0p\nWUyyZIkGuUVEJtPVcJIlS8a7obZsiZF7RnfzFhGpDkoWk7S0jM+AGhyE7u7KxiMiMhsoWUwSi8Gy\nZeOv1RUlIqJkMaX8ZKHpsyIiShZTUstCRGQiXQmnMDFZqGUhIqJkMYWlS8efP/10jHRtF58VEVGy\nmEpDA3R0hDmz2Sx0dal1ISK1TcliGqoRJSIyTsliGlrJLSIyTlfBaeSv5NYgt4jUOiWLaeS3LNQN\nJSK1TsliGgsW5EgkwvPt22MMDlY2HhGRSlKymEYyCZ2dKlcuIgJKFgXld0Vt2aJTJSK1S1fAAjTI\nLSISKFkUoLvmiYgEShYFTGxZxHUjJBGpWUoWBbS3h9IfAP390NNT2XhERColWekAZqMzP/Kzsefr\n1x5K/85w67xzP/MILfN7pzxm1cdOm5HYREQqQS2LIhqaxxdYDO5qqGAkIiKVo2RRhJKFiIiSRVEN\nLQNjzwd3NVYwEhGRylGyKGJiy6JeM6JEpCYpWRSRTGVI1o8AkMvGGe6vr3BEIiIzT8miBBq3EJFa\np2RRgsbm8XGLgV6NW4hI7VGyKEFj63iy6O9pqmAkIiKVoWRRgqa2vrHnAzubNMgtIjWnbCu4zSwO\nXAIcBQwB57r7+rz9ZwIXAmlglbtfOt0xZnY08LXovQ9G27PMkFTDCMm6NOnhJJl0guH+eurnDc3U\nrxcRqbhytixeAzS4+wnAx4CLRneYWQq4GHgZcApwvpktKnDMvwCfdvcTgXrgL8sY925iMWhq6x97\n3b9TXVEiUlvKmSxOBH4F4O5rgOPy9h0BrHf3bncfBm4BTi5wzN1Ah5nFgBZgpIxxT6mxNS9ZaNxC\nRGpMOQsJtgI7815nzCzp7ukp9vUCbdMdAzwEfBX4RLT/xkK/uL29iWQy8Yy/QL6mvGQxMEWy6Oxs\n2ae/b7ao1u+1p3QeAp2HoBbPQzmTRQ+hFTAqHiWKqfa1ADumO8bMvgSc5O5/NrP3ELqn3jPdL+7u\n7p9u115rzOuGGuhtJJuNEY+Pj3R3dU1djXYu6+xsqcrvtad0HgKdh6Caz0OhJFjObqhbgVcCmNlK\n4L68fWuBFWbWYWZ1hC6o2wscs52QSACeBNrLGPeUkqkM9U1hUDuXjTHYq8V5IlI7ytmyuAo43cxu\nA2LAO83sbKDZ3b9uZh8GriUkrFXu/oSZ7XZM9FnnAleYWRoYBs4rY9zTamztZygq99G/cx5NbQNF\njhARqQ4lJQszey6wAsgSBqbvL3ZMNLX1gkmb1+XtXw2sLuEY3P0W4EWlxFpOTW397NgcGjVTjVuI\niFSraZNFNPPoAuCDhAHoxwizkJabWSvwJeB/Z3K9Q6VNmBGl6bMiUkMKtSx+DPwGWOnu3fk7zKwN\neDuhq+nV5QtvdmlsGSAWy5HLxRjqryczkiCRylQ6LBGRsiuULM5x976pdrj7TuDLZvbN8oQ1O8UT\nORpaBhnoCcUE+3saaZm/q8JRiYiU37SzoUYThZl9PlrrQPR6sZmtzn9PLWksst5CRKQalTJ1tgO4\nw8yebWZvBe4AbihvWLNXflFBjVuISK0oOhvK3c83szcB9wJbgRe6+6Nlj2yWasovVx5VoI3FKhiQ\niMgMKNqyMLN3Av8P+GdC3aYfRVVga1L9vEHiyTConR5OMTKUqnBEIiLlV8o6iwuA0919HYCZ/SXw\nU+CgMsY1a8VioXWxa3szEMYt6hp2FjlKRGRum7ZlYWaj9SxOGE0UAO5+NeF+E/nvqSlabyEitaZQ\nN9TlZnYeMG+KfdmooN8PyhPW7KZ7W4hIrSnUDXUW8LfAnWa2A9hEuFPdQcB8wgrus8od4Gw0efqs\nbrMqItWuULI40d2/CnzVzI5ivDbUw+5+74xEN0ul6kdI1o+QHkqRzcQZ6qvJ3jgRqSGFksVXgeea\n2R3u/gLC1FlhdJC7n56uNiCs5BYRqWaFksWTZrYJ6DSzR/K2x4Ccux9c3tBmt6a2vGSxc6phHRGR\n6lEoWbwC2J9QRvxVMxPO3KGyHyJSS6ZNFlHp8ceIpsnKRPkruQd7GxgZgZTW54lIlSrnbVWrWiL/\nNqu5GE88oZofIlK9lCyegfz1Fo89plMpItWrlNpQXzWz42cimLkmf9xi40YlCxGpXqXUhvo98Hkz\nWwh8B/iuu28ub1hzw7z28RsfrV8fVwVaEalaRf8cdvfvuPtLgFcSps3eZma/MLPXlD26Wa6heZBE\nKg3Arl2wZYsyhYhUp5L6TsxsOfCO6LGecO/tN5jZd8oW2RwQi0Fz+/jNkNavV1eUiFSnUsYsbgV+\nE708w91f5u7fBM4BXl7O4OaCyV1RIiLVqJQxi4vc/Sf5G8zsWe6+EVhUnrDmjuaO3rHnDz+scQsR\nqU7TJgszO4AwRvFpM7szej56zDXA4eUPb/arnzdEsm4EgP5+ePLJGMuWqQytiFSXQi2LfwVeDCwF\nfpe3PQ38opxBzSWxGMybNG6xbFmmghGJiOx7hcp9vAvAzP7R3f9j5kKae5o7Jo5bnHKKkoWIVJdC\n3VDnu/vXgQYzu3Dyfnf/dFkjm0OaO3ohalw88kicbBbiGusWkSpS6JIWm/R88kMidY3DtLWFcYrB\nQdi0SadHRKrLtMnC3f83+vmvwOein5cDdwH/NjPhzQ2xGBx6aHbstabQiki1KWWdxSeBb5jZgYSB\n7g8CXyt3YHNNfrJ4+GElCxGpLqVc1V4NnAecDXzP3U8Hnl/WqOag/GTx6KNxMhrjFpEqUkqySLj7\nEPBXwDVmFgd0H9FJOjqgoyOMWwwNwWOPadxCRKpHKcniOjO7H6gjdEPdRLjVqkxyyCHqihKR6lS0\n3Ie7f9TMvgxscvesmb3P3e8pdlzUArmEcFvWIeBcd1+ft/9M4ELCIr9V7n7pdMdE5dEvBdqBBHCO\nuz+8p1+23FasyHLnnQkgDHK/9KXqixKR6lDKAPezgPcBl5rZKuD90c9iXgM0uPsJwMeAi/I+MwVc\nDLwMOAU438wWFTjmC8Dl7n4y8AlmaamR/HGLDRvipNMVDEZEZB8qpa/kh4R1FTcTuqBGH8WcCPwK\nwN3XAMfl7TsCWO/u3e4+DNwCnFzgmBcB+5vZb4G3ADeW8PtnXFsbdHaGcYuREdi4UeMWIlIdSqk6\nm3L3j+7FZ7cCO/NeZ8ws6e7pKfb1Am3THQMcBHS7+0uj1eT/SOjCmlJ7exPJZGIvQt57nZ0tADz/\n+fC7qJJWV1cdK1fOaBj73Oj3qnU6D4HOQ1CL56GUZHFLNL5wbdQKKFUPkH9G41GimGpfC7BjumPM\nbBvw82jbauCzhX5xd3d/od1l0dUVSpUvXBhnYCAFwB/+kGXlypEZj2Vf6exsGftetUznIdB5CKr5\nPBRKgqV0Q/018DNg0MwyZpY1s1JGbm8l3IoVM1sJ3Je3by2wwsw6zKyO0AV1e4FjbhndHr33zyX8\n/orInxG1cWOc4T1JryIis1Qps6GW7uVnXwWcbma3EcY83mlmZwPN7v51M/swcC0hYa1y9yfMbLdj\nos/6CGEV+d8SuqnO3suYyq6lBRYtyrFlS4xMJgx0H3ZYtviBIiKzWNFkEf3l/1HACLOiPgh8vliX\nlLtngQsmbV6Xt381k9ZrTHMM0V35Ti8W62yxYkWWLVvGp9AqWYjIXFdKN9RXgWbgWMKaiEOBb5Yz\nqLkufwrtQw9pcZ6IzH2lXMmOdfePAyPu3g+8HTimvGHNbQcfnB27n8Xjj8forc6xMBGpIaUki1zU\nFTV6Y+kFec9lCvPmwfLloXWRy8EDD8zsNF4RkX2tlGTxX8BvgcVm9l+E+1lcXNaoqsCRR453Rd1/\nv7qiRGRuK3oVc/fvEgadPws8Apzp7qWU+6hp+cnioYfiDA1VMBgRkWeo0D24z5m0abTn/WgzO9rd\nv1O+sOa+BQtyLF6cY/PmGCMj8OCDcZ77XM2KEpG5qdDU2RdHPw8hzIC6GsgAZxAWxSlZFHHkkVk2\nbw7jFX/+s5KFiMxd0yYLd38ngJndADzP3bdGr9uBn85MeHPbkUdmuO66kCzWrk2QzabHZkmJiMwl\npVy6lgLb8173AUvKE051OfDAHK2t4fmuXbBhg6rQisjcVEohwauB35jZTwjJ5Szg/8oa1Rz0rs9f\nP+X2TQ/vz/Yn5gPwwGeeZslhT03Yv+pjp5U9NhGRZ6qU2VAfJty97nBgBfCf7v7JcgdWLVoXjldc\n7+lqq2AkIiJ7r5SWBe5+JXBlmWOpSs3tu4gnsmQzcYb66xnsq6dhnubRisjcouHWMosncrTMH6/3\n0dPVWsFoRET2jpLFDGjtzOuKelpdUSIy95RSovwa4DLgp+4+d2/7VkEtnT0Qy0EuRn9PEyNDSVL1\n6eIHiojMEqW0LD5PWIj3kJl91cyOL3NMVSeZyjBvv77wIhejd6u6okRkbillNtTv3P3dwBHAGuBK\nM7vfzD5oZvVlj7BKtHb2jD3XrCgRmWtKGrMws1OBrwD/DvwK+ACwGPh52SKrMvnjFr3bmsmkNVwk\nInNHKWMWGwnVZi8D3uvuA9H2G4E7yxpdFalvGqaheZDBXQ3ksnH6upsntDZERGazUtZZ/KW735+/\nwcxWuvsa4PnlCas6tXbuZHBXAwA7n25TshCROaNQifIXAQngG2b2bmC0sFEK+B/gsPKHV11aO3t4\n+tFFAPRubSWn+w2KyBxRqGVxOnAKoWjgp/O2p4H/LWdQ1aqxtZ9k/QjpoRTp4SS921oqHZKISEkK\nlSj/FICZvS26W548Q7EY7Le4m60bFwLQ/WRHhSMSESlNoW6oT0UJ4zQze/Hk/e7+rnIGVq3al4wn\ni56uNvr7oampwkGJiBRRqBvqD9HPG2cgjprR2DJIY8sAA72N5LIx/vSnBCtXZiodlohIQYWSxb1m\ndiBww0wFUyval25nwJcBcOedShYiMvsVShY3ATnGZ0HlywEHlyWiGrDf4h089eBScrkYGzbE6OqK\n0dmpqVEiMnsVGuBePpOB1JJkXZqWBT1jZT/uuivBK16hwoIiMnsVHeA2s1VT7dcA9zPTvrR7LFn8\n4Q9xzjgjzJYSEZmNShngvmkmAqk1LQt6SKRCa6K7O8bDD8c49FB1RYnI7DRtNTt3Xx39/DbwS2A7\nsAVYHW2TZyAez7Hf4h1jr++6K1HBaERECita+tTMzgLuAd4OnA/cY2ZnlDuwWtC+dPvY8z/9KcGQ\nbs0tIrNUKXWyPwEc6+5/7e6vA04C/qO8YdWGxpYBFi0KXU9DQ3DffSpbLiKzUylVZ0eAzaMv3H2j\nmRWdumNmceAS4ChgCDjX3dfn7T8TuJBQa2qVu19awjFnA+9z9xNK+XKzXSwGxx2X4eqrw3+Gu+5K\ncNxx2QpHJSKyu0Kzoc6Jnj4KrDazbxMu7G8G7i3hs18DNLj7CWa2ErgIeHX02SngYuB4oA+41cx+\nDryowDHHAPnVb6vCscdmuOaaJLkcPPRQnO3boUMlo0RklinU7/Hi6LEL6AJeCbyKcHEv5YJ9IuGu\nekT3vjgub98RwHp373b3YeAW4OTpjjGz+YS79H2w1C82V7S1gdl4a+KPf9RAt4jMPoUW5b1zun1m\n1ljCZ7cCO/NeZ8ws6e7pKfb1Am3THFMPfBP4MDBQwu+lvb2JZHJuXHQ7O1s4/XTYuDG8fuCBOt70\nptmz5qKzU2XUQedhlM5DUIvnoZTbqr6eMLbQTGhRJIBGYGGRQ3uA/DMajxLFVPtagB1THUMYv1hB\nuOFSA/BsM/svd5+2ldHd3V8ktNmjq6uXZcsgl6tncDAkjTvuGObggyu/5qKzs4Wurt5Kh1FxOg+B\nzkNQzeehUBIsZfrNFwjdP2uBtxDuxf3DEo67ldB1RTT+cF/evrXACjPrMLM6QhfU7VMd4+53uPuR\n7n4q8CbggUKJYi5KpeCYY8aLCd54YynzDkREZk4pyaLb3W8A1gBt0T0uSpmNdBUwaGa3EQazP2Rm\nZ5vZ+e4+QuhWupaQJFa5+xNTHbPH32iOOumk8WTxwANxurpmST+UiAilTZ0dMLPDCK2BU83sesL4\nQkHungUumLR5Xd7+1cDqEo7J378BWFlCzHPOokU5jjgiy9q1cXI5uPnmBK97nYoLisjsUOqivM8A\nvwBeQij5cVU5g6pVp5wy3rq4444EfX0VDEZEJE/RloW738R4McHjzazd3bvLG1bteNfnrx97nsvB\nQw8fxmBvmGx234eeYtHBT+92zKqPnTZj8YmIQGm1ofY3s5+Y2XYz2wz8t5l1zkBsNScWg84Du8Ze\nb3t8Admsxi5EpPJK6YZaBfwGeBZwGKF0+WXlDKqWtS3eQap+BID0cIodm/ercEQiIqUNcHe6+//k\nvb7YzN5eroBqXTyeY/4BW9m8fgkAWzcupH1J96xZpCcitamUlsUdZvam0Rdm9lfAXeULSebvv414\nIpQAGdzVwK7ttbdaVERml0KFBLNAjrBq+zwz+yaQIazk7gbOnZEIa1AilaF96Xa2Pb4AgK6NnbTM\nr84VoyIyNxSqDaWbK1TQggO72LZpPuRi7NrWwkBvA40tg5UOS0RqVCm1oZqAfyGssUgC1wOfdHet\nAiij+qZh2jp72Pl0WP+49bFODjjy8QpHJSK1qpTWw1eAecC7CLdWrQO+Vs6gJFjwrPE1Fjs2tzMy\npJpRIlIZpVx9jnX3o/Jev9fMHihXQDKuqa2fprZ++nc2kcvG6Nq4kKWHPVnpsESkBpXSsoib2dhk\n/+i5ihbNgFgMOg8ab11se3wBwwOpCkYkIrWqlJbFFwnTZ0eL/r0K+Fz5QpJ8rZ07J7Qutjy8pNIh\niUgNKqVlsRp4HfAIsAF4nbuvKmdQMi4Wg8Urxrueujfvx5NPaoWeiMysUloWN7v7EcD95Q5Gptbc\n3kdrZw89Xa2Qi3H11UnOO2+k0mGJSA0pJVnca2ZvA+4g7x7Y7v5Y2aKS3Sw+9Cl6trZALsa6dXEe\neijOihXZSoclIjWilGTxF9EjXw44eN+HI9NpaB6kfUk33U92AHD11Uk+8IFh1YwSkRlRyv0sls9E\nIFLcokM2s2NzOwCPPx7j3nvjHH20WhciUn6FakMtJSzIWwHcAvyTu++YqcBkd3UNIyw4oAtYBsAv\nf5nkuc8dJpGobFwiUv0KzYa6jHDP7L8HGoCLZyQiKWjh8qdpagrPt26NsWaNMoWIlF+hZLHM3T/u\n7r8CzgdeMEMxSQGJVIbTThtfE/nrXycZVH1BESmzQsliePSJu4/kv5bKOvHEDPvtlwNg1y644QbV\njBKR8tqTMuS5skUheySVgpe/fLx1cf31CZ56StOiRKR8Cv1JeqSZPZL3eln0Ogbk3F1TZyvo+OOz\n/P73OTZsiJHNwv/9X4r3v3+YuO5CIiJlUChZHDZjUcgei8XgDW8Y4YtfrCOdDlNpb7opwYtfnKl0\naCJShQrdKW/jTAYie27Rohwve1maa64J/xmvvTbJkUdmWbhQPYYism+p02KOO/XUDMuWheQwMgI/\n+lGSnHKFiOxjShZzXCIBb3zjyNhYxSOPxLntNq29EJF9S8miCixbluO008bHKq6+Osn27RUMSESq\njpJFlTj99DSLFoX+p6EhuPLKlLqjRGSfUbKoEslkmB01WoV23bo4d96p/7wism/oalJFDjoox0kn\njXdH/fSnKbZs0WI9EXnmlCyqzBlnpOnsHO+OuuyylGpHicgzpmRRZerr4ZxzRkilwuuurhhXXKHx\nCxF5ZspWgc7M4sAlwFHAEHCuu6/P238mcCGQBla5+6XTHWNmRwP/DWSi7ee4+5ZyxT7XLV2a4w1v\nGOHyy0PGuO++ODfckJgwY0pEZE+Us1zpa4AGdz/BzFYCFwGvBjCzFOH+GMcDfcCtZvZz4EXTHPMl\n4H3ufo+Z/Q3wj8CHyxj7rPauz19f0vue3L6UrY91AnDTvTm+v/9CDjtMd9YTkT1XzmRxIvArAHdf\nY2bH5e07Aljv7t0AZnYLcDJwwjTHvMndn8qLuWAvfHt7E8mkFqYtWfEkA72N9HU3Qy7GlVfO45//\nGebPL/0zOjtbyhfgHKLzEOg8BLV4HsqZLFqBnXmvM2aWdPf0FPt6gbYCxzwFYGYvBN5LSCzT6u7u\n3wfhz32xOBz43I2s//1hjAyl2Lp1mIsuyvHe9w6PjWkU0tnZQldXb/kDneV0HgKdh6Caz0OhJFjO\nAe4eIP83x6NEMdW+FmBHoWPM7I3A14C/dPeuskVdZVL1aQ583gZi8TDCvWlTjCuvVP0oEdkz5UwW\ntwKvBIjGH+7L27cWWGFmHWZWR2gp3D7dMWb2VkKL4lR3z7/HhpRg3n79LDnsibHXd96Z4Be/UMIQ\nkdKVM1lcBQya2W2EwewPmdnZZnZ+dJvWDwPXEpLEKnd/YppjEsCXCS2On5jZjWb2r2WMuyrN338b\nL3jB+GyoG29M8JvfaFxHREpTtjELd88CF0zavC5v/2pgdQnHAHTs8wBrTCwGZ52Vpr8/xv33h78R\nrr02SX09nHKKptSKSGFalFdD4nF429tGMBufPvvznydZs0YtDBEpTMmixiST8I53jLB8+XjC+PGP\nk/zxj/qnICLT0xWiBtXVwbvfPcIBB4QR7lwOfvCDFPfdp38OIjI1XR1qVGMjnHfeMIsXh4SRzcJ3\nv5tSC0NEpqQrQw2bNw8uuGB4rEptJgOXX57iuusSmlYrIhOUcwW3zDLT1ZQaHkzx6EMHM9TXAMBN\n90DHt7ex7IhNXPZPp81kiCIyS6llIdQ1jHDo8euZ175rbNv2J+az4e6DdS8MEQGULCSSSGVY/vxH\n2G9J99i23m0tXHJJHTt3FjhQRGqCkoWMicdzHHDkYyxcPn6rkCeeiPHlL9excaNuzypSy5QsZIJY\nDBYfupn9n/04sVgY5d6xI8ZXvlLHddclyOp2GCI1SQPcMqWOZdtJNYzQULeMwcEwtfaaa5I8+GCc\ns88eoa2t0hGKyExSspBptczvZePAdTz26IH075wHhJlS3/pZmgOOfJzWzp5pj131Mc2iEqkm6oaS\nguoahznk+PVhHCPqlsqMJNlwz3KeWLeMbEZjGSK1QMlCihodxzj42IdJNYyMbd/2+AIevP1werpa\ntYhPpMopWUjJmtv7WLHSaVs4Ppd2eKCODfcsZ8M9yxnqr6tgdCJSTkoWskeSqQwHPm8D+z/7cRKp\n9Nj23q2tPHj74Wxev1hdUyJVSMlC9lgsFmZL2YvW0bH/trGxjFw2xtOPLsJvO5w//jGuabYiVUTJ\nQvZaMpVh/yM2cegLHqKprX9s+8hgHZdfnuILX6jjD39Q0hCpBkoW8ow1tQ5wyPEPsf+zHydZN941\n1dUV4/vfD0njrruUNETmMq2zkH1itGuqbeFOuh7rpKFh6VgRwq6uGD/4QYrf/jbHqadmOOaYDPX1\nlY1XRPaMkoXsU4lUhsWHbGbTyK/Z+vQCtj7WSSY9fo/vH/8GEskM+y3pZv7+W2loHhrbp4V8IrOX\nkoWURSKVYdEhW1hw4Fa2Ph4ljZGQNDLpBNseX8C2xxcwr30X8/ffRutClbYVmc2ULKSsEqkMiw7e\nwoIDu9j+xHy2b5rPUP94H1RfdzN93c0k69L8+MdJjjkmw/LlOeIaTROZVZQsZEYkklk6n9XFggO7\n2LW9me2bFkQrv8OajPRwkttvT3D77QlaW+GoozIcdVSGgw7KEdOyDZGKU7KQGRWLQcv8XbTM38XI\nYDK0Np6Yz8hQauw9PT1w880Jbr45wX775Tj88CyHH55lxYosDQ0VDF6khilZSMWkGtIsOmQLCw/e\nQt+OebzwBYv4058S7Bq/uys7dsRYsybBmjUJ4nE46KAsRxyRxSzL0qVqdYjMFCULqbhYLNSdev3r\n07z2tWnWr49z991x7r8/Qf/4Wj+yWXjkkTiPPBLn6quhsTEkj+XLw+OAA3KkUtP/HhHZe0oWMmu8\n6/PXT3gkCylDAAALKUlEQVSda4S+4Xns2tZC77ZWBnoadz/o9vGnsXiOxpZ+GlsHaGztp7FlgIZ5\ng8SmGCzXNF2RPaNkIbNWLB5aHM3tfSw+dDMjQ8kocbSwa3sz6eGJzYhcNkb/znljN2oKn5GjoXkg\nJI7mQRqaB6lvGiSXQ11YIntAyULmjFR9mval3bQv7SaXC+XR+3fMoy96DPXtPvqdy8YY6GlioKdp\nwvZPfrKehQtzLFyYZeHCHPPn5+joCD+bmnb7GJGap2Qhc1IsBvVNw9Q3DdO+tBuA9HCC/p3zGOht\nZKC3kcHeRoYHpr7HxsAAbNwYY+PGxG77mpqgoyNHe3t4POtZAHHa2nLRAxK7HyZS1ZQspGok6zK0\ndvZMuDd4eiTBYG8jAz2NDPY1MNTXwGBf4cJU/f3Q3x9j06bQT9XYCAMDocvrpnuehFiOZCpDsi5N\nsm4k/KyPnkfbE8kMibo0yVSGb33iZC0ylDlPyUKqWjKVobljF80d4/Nxczm48O8W8vTTcbZsibFt\nW4ytW2Ns3x6ej4wU+ECAXIz0cJL0cBIovvDjH3rqaWiAxsbc2M/GxvGfdXVQXx/21dfnqK8neuSo\nq4O6utGfkEpprEUqo2zJwsziwCXAUcAQcK67r8/bfyZwIZAGVrn7pdMdY2aHAt8CcsD9wHvcXQWv\nZa/EYtDWBm1tWVasmLgvl4Ndu2Dr1hg7doRHLlfHY49l2bkzRnLtSEgSudKv2Llc6PYaGBg9Zu+v\n9rEYJJMhaaRSIYmMvk4mc9HP0W05Eonx1+F52Db6SCYhHodEIjf2nnh8dNv4vlgM+vqguzs2tn/8\nEcqzxGJM+DnVtlhMyW6uKmfL4jVAg7ufYGYrgYuAVwOYWQq4GDge6ANuNbOfAy+a5pgvAp9w9xvN\n7GvRtqvKGLtUucnTdEv17JPDxX+0ZZEeSpEeTjIynCQ9nCIznCQ9kiAzkiQzkiA9kgxdV3vhlKOX\n7rYtl4OREaLWz+SrbnmvwqE7bt/cZ300aUyVSPKTyej28Dw34dj8904+bqrXk7cX+zn52FHNzdDX\nVzfpPbkp31vK5+3t9uksXZrjJS9J7/OJGuVMFicCvwJw9zVmdlzeviOA9e7eDWBmtwAnAydMc8yx\nwE3R818CL0PJQiokFgszs1L1aWgZLPr+XC5U2s2mE2RGEmTyf6YTZDNxMuk42eh5NpMgk45zxyPr\no9fjj1x29gx+TJXMSpXLhcee3RBrZpok+zK5V8KDD0Jra45TTsns088tZ7JoBfLrTmfMLOnu6Sn2\n9QJt0x0DxNw9N+m90+rsbHlG/6pWX/TqZ3K4iMxpB1U6gH1g37QA85Xzz5QeoCX/d0WJYqp9LcCO\nAsdkp3iviIjMkHImi1uBVwJE4w/35e1bC6wwsw4zqyN0Qd1e4Ji7zezU6PkrgJvLGLeIiEwSy+Vy\nxd+1F/JmNj2P0Nn4TuD5QLO7fz1vNlScMBvqq1Md4+7rzOww4FJC22otcJ6779sOORERmVbZkoWI\niFSP2TO1QkREZi0lCxERKUrJQkREilJtqEix8iTVKFpJv4owsbwe+AzwAFOUVjGz84C/IZRn+Yy7\n/6ISMZeTmS0E/gCcTvie36LGzoOZ/RPwKsJkkksIi2G/RQ2dh+j/i28T/r/IAOdRo/8e8qllMW6s\nPAnwMUKpkWr3VmCbu58EnAF8hfHSKicRZqS92swWA+8nlGN5OfA5MytcunWOiS4Q/wsMRJtq7jxE\n09NfSPh+pwAHUIPngTB9P+nuLwQ+DXyW2jwPEyhZjJtQngQ4rvDbq8KPgE9Gz2OEv44ml1Z5KfAC\n4FZ3H3L3ncB6wvTmavKfwNeA0VoPtXgeXk5Y23QVsBr4BbV5Hh4EklFvQyswQm2ehwmULMZNV2qk\narn7LnfvNbMW4MfAJ5i6tMp05Vmqgpm9A+hy92vzNtfceQAWEP5IOgu4ALicUEWh1s7DLkIX1DrC\n+q4vU5v/HiZQshhXqDxJ1TKzA4AbgO+6+/eZurTKdOVZqsW7gNPN7EbgaOA7wMK8/bVyHrYB17r7\nsLs7MMjEi1+tnIcPEc7DYYQxzG8zsdhSrZyHCZQsxhUqT1KVzGwR8GvgH919VbR5qtIqdwAnmVmD\nmbURqgbfP9Pxlou7n+zup7j7qcA9wDnAL2vtPAC3AGeYWczMlgLzgOtq8Dx0M95i2A6kqMH/LybT\nCu7IdKVGKhtVeZnZl4A3Eprboz5AaHZPKK0Szfo4n/AHxr+7+5UzHe9MiFoXFxBaWLuVmKn282Bm\nXwBeTPh+HwcepcbOg5k1E2YJLiF87y8Bd1Fj52EyJQsRESlK3VAiIlKUkoWIiBSlZCEiIkUpWYiI\nSFFKFiIiUlRVr1CW6mVmBxHKMjwQbRotzfBtd/+XffD57wBOdfd3PNPP2heidQ/fcPdXFnjPpwDc\n/VOTtp8KfCpaR1LK71pFWJT6juh1PWEd0qfd/ed7Hr1UAyULmcuedPejR19EF9SHzOwKd19bwbj2\nOXd/kmjR6Az4AHCPmb3W3a8iFNW8VYmitilZSDVZQlhQ2RvV9fof4DnAIsCB10XPryKstD0G2AKc\n5e7bzexthPpYPcBGQo2g0RX9XwIagK3A37j7+mgB392EonKNwPsIVUiPBC5294tHA4vieQw4xt23\nmFlHFMOzCCWu30ZYMZ0F3ujua81sA/B7QgmStwE/dPeDzOw5wH8DzYSyJBe5+5ejX/UCM/t9tO/r\n7v6l/BNkZodG52U+0A+8z93vzn9PVC/sbcBPzKwdOCF6SA3TmIXMZUvN7B4zW2dmWwn343itu28i\nlNoejkrOH0q4mI/+ZX4U8EV3fw6hls9bolbJF4CTCRfGFgAzqwOuAN7r7kcRKtP+ID8Id38u8F3C\nBfz1wEnAhZPekyZU+T0r2vR64KdRXK8hdHk9J9r2d3mH/tLdDXg6b9u5hHsnHE9Ybf3ZvH1LgNOi\n7/BeMzuaib4N/IO7P5+w8viK3c5qiPc2wv0bvg682d2Hp3qf1A4lC5nLRruhnk24WNcB1wO4+++A\nS8zsPYRWwQrCX9sAT+f9NX0/0EFILre5+5bowv69aP9hQLe73xl97o+AQ6NaQBDKVUNoiaxx9353\n3wjsN0W83wXeFD1/M/A9d+8BzgbeZGafA87MixNCy2KyjwAN0Y2KPjvp/Ve4e1/0uasJ96UAxspY\nHA9cZmb3AN8Hms1s/uRfYGaJ6JxsBV42RQxSY5QsZM5z9yzw94Qupo8CmNmrCCW2+4HLgN8Ruqgg\nVFMdlYu255j4/8NoxeGp/h+JAYnoef5f3AWrFLv7XUCHmR0P7O/ut0VVf28nJJdfEv6aj+UdNrDb\nB8EPgdcSBvc/Pmlffgwxwr0YRiWAQXc/evQB/AWhWN5kFxIK6r0E+LSZHVHou0n1U7KQqhC1Bj4K\nfDy6g9lLCX38lwGbCd1LiQIfcQuw0syWRUUl3zj60cD86AKPmb0B2OjuU11gS3E54Y58o90/xwPr\no/GN3xMqmhaKE8JtXy90958RtRyilgDAX5tZfTTWcCah/Hz4IuEGPQ+Z2VujY04nJNEJzOwU4N3A\nu939z4S7xV0edclJjVKykKrh7r8C1hDGLi4F3mxmdwM/ibYvL3DsFsIA9W8Jpad7ou1DhMTxFTO7\nH3gv44lkb3yPMGA92s31ayBuZg9EMW4oFGfkU8AtZvZHwt3t8o/ZSJjmeguhCurkWWFvAc41sz8B\nnyMMpo9VE40G3r9LSBRbo81fIrQ+/m1PvqhUF1WdFRGRotSyEBGRopQsRESkKCULEREpSslCRESK\nUrIQEZGilCxERKQoJQsRESnq/wMMegB84K8KtQAAAABJRU5ErkJggg==\n", 88 | "text/plain": [ 89 | "" 90 | ] 91 | }, 92 | "metadata": {}, 93 | "output_type": "display_data" 94 | } 95 | ], 96 | "source": [ 97 | "#plt.hist(exponential_sim,bins=20)\n", 98 | "plt.hist(exponential_sim,bins=20,normed=True);\n", 99 | "plt.plot(x, y,'b-', lw=3, alpha=0.6, label='exponential pdf');\n", 100 | "plt.xlabel('Random variable X');\n", 101 | "plt.ylabel('Probability density f(x)');" 102 | ] 103 | } 104 | ], 105 | "metadata": { 106 | "kernelspec": { 107 | "display_name": "Python [conda root]", 108 | "language": "python", 109 | "name": "conda-root-py" 110 | }, 111 | "language_info": { 112 | "codemirror_mode": { 113 | "name": "ipython", 114 | "version": 3 115 | }, 116 | "file_extension": ".py", 117 | "mimetype": "text/x-python", 118 | "name": "python", 119 | "nbconvert_exporter": "python", 120 | "pygments_lexer": "ipython3", 121 | "version": "3.5.3" 122 | } 123 | }, 124 | "nbformat": 4, 125 | "nbformat_minor": 2 126 | } 127 | -------------------------------------------------------------------------------- /simulation/simulate_linearly_related_random_variables.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Simulated linearly related random variables" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Delivery times $X_1$ and $X_2$ for two products are related through a linear transformation:\n", 15 | "$\\begin{bmatrix} X_1 \\\\ X_2 \\end{bmatrix} = \\begin{bmatrix} 2 & 2 \\\\ 1 & 4 \\end{bmatrix} \\begin{bmatrix} Z_1 \\\\ Z_2 \\end{bmatrix} + \\begin{bmatrix} 15 \\\\ 20 \\end{bmatrix}$
\n", 16 | "where $Z_1$ and $Z_2$ both follow standardized normal distribution N(0,1), and are independent of each other. $Z_1$ and $Z_2$ indicate conditions related to certain raw materials. This notebook simulates the values $X_1$ and $X_2$." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "# import libraries\n", 28 | "import numpy as np\n", 29 | "import matplotlib.pyplot as plt\n", 30 | "# set to plot automatically\n", 31 | "%matplotlib inline" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": { 38 | "collapsed": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "# set random state so repeatable results\n", 43 | "np.random.seed(42)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 3, 49 | "metadata": { 50 | "collapsed": true 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "# Draw conditions for materials\n", 55 | "samples=1000\n", 56 | "Z = np.random.randn(2,1000)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 4, 62 | "metadata": { 63 | "collapsed": true 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "# calculate X through dot product and addition (broadcasting)\n", 68 | "X = np.dot(np.array([[2,2],[1,4]]),Z)+np.array([[15],[20]])" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 5, 74 | "metadata": { 75 | "collapsed": true 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "# store values in variables for X1 and X2\n", 80 | "X1 = X[0]\n", 81 | "X2 = X[1]" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "# Detect outliers using Mahalanobis distance" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "The Mahalanobis distance can be defined as
\n", 96 | "$\\sqrt {({\\vec {x}}-{\\vec {\\mu }})^{T}S^{-1}({\\vec {x}}-{\\vec {\\mu }})}$
\n", 97 | "This can be used to detect outliers as only 5% of **squared** Mahalanobis distances are expected to be greater than 5.99 see 'use of Mahalanobis distance for detecting outliers and clusters in markedly non-normal data' available at http://www.dtic.mil/dtic/tr/fulltext/u2/a545834.pdf" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 6, 103 | "metadata": { 104 | "collapsed": true 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "# calculate covariance matrix: cov_matrix\n", 109 | "cov_matrix=np.cov(X1,X2)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 7, 115 | "metadata": { 116 | "collapsed": true 117 | }, 118 | "outputs": [], 119 | "source": [ 120 | "# calculate inverse covariance matrix: inv_cov\n", 121 | "inv_cov_matrix=np.linalg.inv(cov_matrix)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 8, 127 | "metadata": { 128 | "collapsed": true 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "# calculate mean of random variable X: mean_X\n", 133 | "mean_X=X.mean(axis=1)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 9, 139 | "metadata": { 140 | "collapsed": true 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "# calculate X - mean: part1\n", 145 | "part1 = np.transpose(X)-mean_X" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 10, 151 | "metadata": { 152 | "collapsed": true 153 | }, 154 | "outputs": [], 155 | "source": [ 156 | "# dot product of part 1 with the inv_cov_matrix: part2\n", 157 | "part2 = np.dot(part1,inv_cov_matrix)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 11, 163 | "metadata": { 164 | "collapsed": true 165 | }, 166 | "outputs": [], 167 | "source": [ 168 | "# calculate squared mahalanobis distance: mahalanobis_distance_sq \n", 169 | "mahalanobis_distance_sq = np.sum(np.multiply(part2,part1),axis=1)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 12, 175 | "metadata": { 176 | "collapsed": true 177 | }, 178 | "outputs": [], 179 | "source": [ 180 | "# only 5% of squared mahalanobis distances are expected to be greater than 5.99\n", 181 | "# create mask for outliers\n", 182 | "mask = mahalanobis_distance_sq>5.99\n", 183 | "# filter outliers\n", 184 | "X1_outliers = X1[mask]\n", 185 | "X2_outliers = X2[mask]\n", 186 | "# remove outliers\n", 187 | "X1_new = X1[~mask]\n", 188 | "X2_new = X2[~mask]" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 13, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "data": { 198 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAFpCAYAAABnHGgVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xt4VPW1P/73mmEwA9UEEBUSEOzDgSrkgiFiAbVQhArK\npYpaPAdbLW2//dlq20BoBaP1W1BQrLanPWi9VK2HFDHihaICfhUeRYMJQURqVRQC5Z4oZDCTZP3+\n2DPDTLLnft95v54nT5I9e2Y+GcKaT9Zn7fURVQUREVmLLd0DICKixGNwJyKyIAZ3IiILYnAnIrIg\nBnciIgticCcisqCIg7uI2EWkVkRe9HzfW0ReFZGPPJ97JW+YREQUjWhm7j8HsNPv+woA61V1CID1\nnu+JiCgDRBTcRaQAwBQAj/gdngbgCc/XTwCYntihERFRrCKduT8AYB6Adr9jZ6vqfs/X/wZwdiIH\nRkREsesW7gQRmQrgoKpuFZHLzM5RVRUR0z4GIjIXwFwA6Nmz54XDhg2LY7hERF3P1q1bD6tq32ju\nEza4AxgD4CoRuQJADoAzROQpAAdEpJ+q7heRfgAOmt1ZVVcAWAEApaWlWlNTE834iIi6PBH5LNr7\nhE3LqOoCVS1Q1UEArgOwQVVvALAGwBzPaXMAPB/tkxMRUXLEU+e+BMBEEfkIwLc93xMRUQaIJC3j\no6qvA3jd8/URABMSPyQiIopXVMGdKFXcbjf27t2LkydPpnsoRCmTk5ODgoICOByOuB+LwZ0y0t69\ne3H66adj0KBBEJF0D4co6VQVR44cwd69ezF48OC4H4+9ZSgjnTx5En369GFgpy5DRNCnT5+E/bXK\n4E4Zi4GduppE/s4zuBPF6IorrkBjYyMaGxvx3//9377jr7/+OqZOnWp6n5tvvhkffPBBxM9x7Ngx\nzJgxA4WFhSgrK8P777/vu23QoEEYMWIEiouLUVpa6js+f/58FBYW4r/+6798x5566ik88MAD0fx4\nMbn++utRWFiI5cuXY9GiRXjttdc6nRPq9Ummffv24eqrrw573u9+97sUjAa47LLLkNTrflQ1ZR8X\nXnihEkXigw8+SPcQIvbpp5/qBRdc4Pt+48aNOmXKlIQ89q9+9SutrKxUVdWdO3fq+PHjfbede+65\neujQoYDzGxsb9dvf/raqqt50001aX1+vzc3NOn78eG1paUnImILZv3+/fv3rXw97XiJfn2To2bNn\n1PdpbW2N+j6XXnqpvvvuu52Om/3uA6jRKOMtZ+5kCdW1DRizZAMGV7yEMUs2oLq2Ia7HW7p0KR58\n8EEAwG233Ybx48cDADZs2IDZs2cDMGbOhw8fRkVFBT7++GMUFxejvLwcAHD8+HFcffXVGDZsGGbP\nng3j/2fgbO1rX/safvOb36CoqAijR4/GgQMHOo3jgw8+8D33sGHDsHv3btPzvGw2G9xuN1QVzc3N\ncDgcWLZsGW655ZaQFRj33HMPRowYgaKiIlRUGA1e6+rqMHr0aBQWFmLGjBk4duyY72eYP38+ysrK\n8B//8R948803AQCXX345GhoaUFxcjDfffBM33ngjVq1aBQD4xz/+gWHDhmHkyJFYvXq173lPnDiB\nH/zgBygrK0NJSQmef964FvLxxx/HzJkzMXnyZAwZMgTz5s3z3ecf//gHRo4ciaKiIkyYMCHk4/jb\nvXs3hg8fHvLxKyoq4HK5UFxc7Pt3fuqpp1BWVobi4mL86Ec/Qltbm+/f75e//CWKioqwePFiXHPN\nNb7n8v/r5Cc/+QlKS0txwQUX4I477gj6b5Bw0b4bxPPBmTtFKpqZ+3Pv7dVht6/Vc+e/6PsYdvta\nfe69vTE//1tvvaVXX321qqqOHTtWR40apS0tLVpZWal//vOfVfXUzNls5n7GGWfonj17tK2tTUeP\nHq1vvvmmqgbO1gDomjVrVFW1vLxcf/vb33Yax4IFC/TWW29VVdUtW7ao3W7XmpoaVVUdNGiQFhUV\n6ciRI/V//ud/fPe55557tKioSH/xi1/ovn37ws6SX375Zb344ov1xIkTqqp65MgRVVUdMWKEvv76\n66qqunDhQv35z3/u+xl+8YtfqKrqSy+9pBMmTFDVzn/BzJkzR//+97+ry+XSgoIC/ec//6nt7e16\nzTXX+Ma0YMECffLJJ1VV9dixYzpkyBA9fvy4PvbYYzp48GBtbGxUl8ulAwcO1M8//1wPHjyoBQUF\n+sknnwSMNdjj+PMfX7DHVw2cuX/wwQc6depU3189P/nJT/SJJ57w/futXLlSVVXdbrcOGDDA95w/\n/vGPfePxjrG1tVUvvfRS3bZtm+915MydKISl63bB5W4LOOZyt2Hpul0xP+aFF16IrVu34osvvsBp\np52Giy++GDU1NXjzzTcxbty4sPcvKytDQUEBbDYbiouLsXv37k7ndO/e3Te7u/DCC03PqaioQGNj\nI4qLi/HQQw+hpKQEdrsdALBp0ybU1dVh7dq1+OMf/4g33ngDADBv3jzU1dXhvvvuw8KFC3HXXXfh\nkUcewaxZs3D33Xd3eo7XXnsN3//+99GjRw8AQO/evdHU1ITGxkZceumlAIA5c+b4Hh8AZs6cGXLc\n/j788EMMHjwYQ4YMgYjghhtu8N32yiuvYMmSJSguLsZll12GkydP4vPPPwcATJgwAbm5ucjJycH5\n55+Pzz77DG+//TYuueQSX6lg7969wz5OMGaP39H69euxdetWjBo1CsXFxVi/fj0++eQTAIDdbsd3\nv/tdAEC3bt0wefJkvPDCC2htbcVLL72EadOmAQCqqqowcuRIlJSUYMeOHVGtucSDde6U9fY1uqI6\nHgmHw4HBgwfj8ccfxze/+U0UFhZi48aN+Ne//oVvfOMbYe9/2mmn+b622+1obW01fQ5vdUSwc844\n4ww89thjAIy/sgcPHozzzjsPAJCfnw8AOOusszBjxgy88847uOSSS3z3ra2thapi6NChWLBgAdat\nW4fvf//7+OijjzBkyJAoXo3gP1+wcUdKVfHss89i6NChAce3bNkS0WsY7nFCieTxVRVz5szB4sWL\nO92Wk5Pje6MFgOuuuw5/+MMf0Lt3b5SWluL000/Hp59+imXLluHdd99Fr169cOONN6bswjzO3Cnr\n9c9zRnU8UuPGjcOyZctwySWXYNy4cfjzn/+MkpKSTuVqp59+Or788su4niuYxsZGtLS0AAAeeeQR\nXHLJJTjjjDNw4sQJ33OeOHECr7zyii+f7LVw4UL89re/hdvt9uWJbTYbmpubA86bOHEiHnvsMd/x\no0ePIjc3F7169fLl05988knfLD5a3rWCjz/+GADwzDPP+G6bNGkSHnroId+aRG1tbcjHGj16NN54\n4w18+umnvrHG8jihOBwOuN1uAMbsftWqVTh48KDv+cxm+ABw6aWX4r333sPDDz+M6667DgDwxRdf\noGfPnsjNzcWBAwewdu3amMcVLQZ3ynrlk4bC6bAHHHM67CifFPkszsy4ceOwf/9+XHzxxTj77LOR\nk5NjmpLp06cPxowZg+HDh/sWVBNl586dGD58OIYOHYq1a9fi97//PQDgwIEDGDt2LIqKilBWVoYp\nU6Zg8uTJvvtVV1ejtLQU/fv3R15eHoqLizFixAicPHkSRUVFAc8xefJkXHXVVSgtLUVxcTGWLVsG\nAHjiiSdQXl6OwsJC1NXVYdGiRTH9DDk5OVixYgWmTJmCkSNH4qyzzvLdtnDhQrjdbhQWFuKCCy7A\nwoULQz5W3759sWLFCsycORNFRUW49tprY3qcUObOnYvCwkLMnj0b559/Pu6++25cfvnlKCwsxMSJ\nE7F//37T+9ntdkydOhVr1671pduKiopQUlKCYcOG4Xvf+x7GjBkT87iiJd53ulRgP3eK1M6dOyNK\nf3hV1zZg6bpd2NfoQv88J8onDcX0kvwkjpAoOcx+90Vkq6qWBrmLKebcyRKml+QzmBP5YVqGiMiC\nGNyJiCyIwZ2IyIIY3ImILIjBnYjIghjciRLg8ccfx759+3zf+zcI87YGJkolBneiBOgY3P29/PLL\nyMvLi/ixvFeTEsWDwZ2sob4KWD4cqMwzPtdXxf2Q999/P4YPH47hw4fjgQceCGgZCwDLli1DZWUl\nVq1ahZqaGsyePRvFxcVwuQJ72nhbAwORtY996623UFFRgfPPPx+FhYX41a9+FffPQl0Pgztlv/oq\n4IWfAU17AKjx+YWfxRXgt27disceewxbtmzB22+/jYcfftjXz7yjq6++GqWlpXj66adRV1cHp9O8\np83OnTuxcuVKbN68GXV1dbDb7Xj66acBGP1hLrroImzbtg3f+MY38Nxzz2HHjh2or6/H7bffHvPP\nQV0Xr1Cl7Lf+LsDdoQOk22UcL5wV00Nu2rQJM2bMQM+ePQEYLW69TbRiHqZf+1gAcLlcvj4r/u1j\nvW1ob7rpJkydOjUtW9JR9mNwp+zXtDe64zFqbGxEe3u77/toW7dG2j62W7dueOedd7B+/XqsWrUK\nf/jDH7Bhw4b4Bk9dDtMylP1yC6I7HoFx48ahuroazc3NOHHiBJ577jl85zvfwcGDB3HkyBF89dVX\nePHFF33nR9L2N9L2scePH0dTUxOuuOIKLF++HNu2bYv556CuizN3yn4TFhk5dv/UjMNpHI/RyJEj\nceONN6KsrAwAcPPNN2PUqFFYtGgRysrKkJ+fj2HDhvnOv/HGG/HjH/8YTqcTb731lulj+rePbW9v\nh8PhwB//+Eece+65Aed9+eWXmDZtGk6ePAlVxf333x/zz0FdF1v+UkaKtuUv6quMHHvTXmPGPmFR\nzPl2onRiy18if4WzGMyJ/DDnTkRkQQzuREQWxOBOGSuV60FEmSCRv/Nhg7uI5IjIOyKyTUR2iMid\nnuOVItIgInWejysSNirq8nJycnDkyBEGeOoyVBVHjhxBTk5OQh4vkgXVrwCMV9XjIuIAsElE1npu\nW66qyxIyEiI/BQUF2Lt3Lw4dOpTuoRClTE5ODgoKYr8+w1/Y4K7G1Om451uH54PTKUoqh8OBwYMH\np3sYRFkropy7iNhFpA7AQQCvquoWz023iEi9iDwqIr2C3HeuiNSISA1nYUREqRFRcFfVNlUtBlAA\noExEhgP4E4DzABQD2A/gviD3XaGqpapa2rdv3wQNm4iIQomqWkZVGwFsBDBZVQ94gn47gIcBlCVj\ngEREFL1IqmX6ikie52sngIkAPhSRfn6nzQDwfnKGSERE0YqkWqYfgCdExA7jzaBKVV8UkSdFpBjG\n4upuAD9K3jCJiCgakVTL1AMoMTn+n0kZERERxY1XqBIRWRCDOxGRBTG4ExFZEIM7EZEFMbgTEVkQ\ngzsRkQUxuBMRWRCDOxGRBTG4EyVKfRWwfDhQmWd8rq9K94ioC4uk/QARdVRfBay/C2jaC+QWAEMu\nB7b9DXC7jNub9gAv/Mz4unBW+sZJXRZn7kTRqq8yAnfTHgBqfK559FRg93K7jDcAojRgcCeK1vq7\nOgfyYJuTNe1N+nCIzDC4E0UrmoCdm5j9MImixeBOFK2gAVsCv3U4gQmLkj4cIjMM7kTRmrDICNz+\nHE6g9AdA7gAAYny+8kEuplLasFqGKFregO1fLTNhEQM5ZRQGd6JYFM5iMKeMxrQMEZEFMbgTpQKv\nXqUUY1qGKNm8Fz3x6lVKIc7ciZLN7KInXr1KScbgTpRswS564tWrlEQM7kTJFuyiJ169SknE4E6U\nbMEueuLVq5REXFAlioRZi9+PXonsIiZe9ERpIKpButklQWlpqdbU1KTs+YgSor4KeP6nQFtL8HMc\nzsxoN9DxTYhvIpYgIltVtTSa+zAtQxTO2vmhAzuQGdUvZn3mX/gZa+q7KAZ3onBcRyM7L93VLyy5\nJD8M7kSJku7qF5Zckh8Gd6JwnL3Dn5MJ1S8suSQ/DO5E4XznHsDm6HDQ5gn6GdS7nSWX5IelkEQm\nqmsbsHTdLuxrdKF/3pl4oPj/YtTHD2V2FQpLLslP2FJIEckB8AaA02C8GaxS1TtEpDeAlQAGAdgN\nYJaqHgv1WCyFpGxQXduABau3w+Vu8x1zOuxYPHMEppfkp3Fk1FUlqxTyKwDjVbUIQDGAySIyGkAF\ngPWqOgTAes/3RFlv6bpdAYEdAFzuNvyyahsGV7yEMUs2oLq2wbiBrXwpQ4VNy6gxtT/u+dbh+VAA\n0wBc5jn+BIDXAcxP+AiJUmxfo8v0eJvnr9yGRhcWrN6O/D0vYtT2O9jKlzJSRAuqImIXkToABwG8\nqqpbAJytqvs9p/wbwNlB7jtXRGpEpObQoUMJGTRRRGKcVffPc4Y9x+Vuw4D3lrKunDJWRAuqqtoG\noFhE8gA8JyLDO9yuImKavFfVFQBWAEbOPc7xEkXGbIOM1XOBz98Gpt4fsGCa63RABGhsdiOvhwMn\nO6RkgjlLDwFicgPryikDRFUto6qNIrIRwGQAB0Skn6ruF5F+MGb1RJnB7GpNKFDzKN5tH4oF757r\ny6s3uty+M441uxGpfXomCuRw5xvEZvy1wGoVSqOwaRkR6euZsUNEnAAmAvgQwBoAczynzQHwfLIG\nSRS1oLNnxYD3lnZaMI3Fva2z0KzdTZ6iDeztQukWSc69H4CNIlIP4F0YOfcXASwBMFFEPgLwbc/3\nRJkhxFWZZ6nJbDsGa9rHosJ9M/a2n4l2FUDsnU9iDp7SJJJqmXoAJSbHjwCYkIxBEcVtwiIjx47O\nyzwH5cyYH9Yu4quaAYwAj1bg193/jnM0SMEAc/CUBmw/QNZUOAso/QE6rXg6nNgzshxOh8ksOwyn\nw47rLxoQcN+rbJtwj+MRnIMQlWDs7UJpwOBO1jX1fmDmCqP3i18PmFFX/QiLZ44IeVcBkOd0oFcP\nBwRAfp4Ti2eOwN3TR2DxzBHIz3NCYMzYnRJmEw/2dqE04E5M1CV4Sx8bGl2+1ErHFItXfp4TmyvG\nR/bAlXkwS/0AMN5MWC1DCRBL+wE2DiPL69grxhvQzQK7APjWsL4dGoc5UT5pqHlfmdwCz85HHY8P\nAG57P5E/BlFUmJYhyzPrFROMAlj5zh6Ur9qGhkYXFKfaDfj6yfhjm13KUAzuZHnBesUE425XuNsC\nZ/UudxuWrtvV+eTCWUYv9w55faZiKN0Y3MnyIukVEwnTN4n6qsj6p7N7JKUYc+5kKWa58vJJQzv1\nZ49FpzeJYP1rVv8wcDHV7Dx2j6QkY7UMWUaoTTYA+KplYiUCqBrVNOWThmL665PMF1O9HE4jRbP+\nLi66UlxiqZZhcKes51/maMYugvtmFfmqXTrO7mMJ+ALg49O+B5tZV0h/uQM8V6ia/T8ToLIx6uem\nroelkNTlmM3WO2pTxYLV2wEA00vyfR9eY5ZsiDrAK0J0hfTnzcWbztx55SolDxdUKatFWuYYtNoF\nQPmkoaZt2cMJ2hXSn3eRleWSlGKcuVNWi6bMseO5/umZWJKTa9rHAm5gXrcq5MthiAgC0i/eAO5d\nNI2kqoYoQZhzp6wWTUqlVw8HenTvhoZGF2wCtCfwVz8/z4nNVxxmAKekYM6dsptJzXh12xj8enU9\nmt3tAIyKldkXDcTd040KmPJJQ3HbyrqIZt5NzW7fTkuJDOwOm6B80lCgcDyDOWUM5twpM3hrwZv2\nwLuLUevzt+D1VX/wBXbAKEV86u3PcXv1qQXSSON0e/hTTEmYhPzXcrqZ950hSiMGd8oMJnuedms7\niV/Zza/kfGbLqeqT/DBXoMbSu91fuMxlYxT7rhKlCoM7ZQQNsltRfzliety/o2P5pKGdArh3sm0X\nifvKVHuYqXui2hsQJRKDO2WE/ehjenyfmh/3D7jTS/IDNtDIz3Ni9uiBcDrspm19o+GwS6fdl/w5\nHXYj306UYRjcKe2qaxuwpKVzzXizdse9reYLlHYbUHLXKxhc8RLGLNkAANhcMR7Lry0GYOTl452x\nA0DP7t0Cdl8CTr2xeHdnYr6dMhGrZSh5IuyYuHTdLjT41Yz3lyPYp31wb+sso5bcREubosWT6/b2\nW6/57Cie3dqQkKDu1eQynqPjVa1EmY7BnZIjik6I3ouL1rSPxZoW82Aejsvdhme27Ik7DdNRRPn0\nSNv+EqUQ0zKUHCbVL3C7jOMdJGpBMtGBPaJ8ukkJJ174Gfu1U9oxuFNyBKl+MTtuVu0Sb/livPKc\nDuQ4bLhtZR3GLNlgvsUeENWbGFEqMbhTcgTreBjkeI7j1K9intOBxTNHIM/pSMbIQvLW4DS5jKtZ\nw+6hGsWbGFEqMbhTcph0Qmy156DyxHd9FS7VtQ2+lr3H/C4E+qrVuJZ0alG/lAzVvye7dvjsFbSr\nZJRvYkSpwgVVSo4OnRCbnedg0YnvYlVLGYBTs+Ech61TdUuo9ryJZrcJbADaI8jXm3agnLAocOEY\nYDtfyggM7pQ8hbN8QX7ikg1oaAkMji53W9CyxXi2w4vG6ad1Q6MrsvYBpgu/bOdLGYrBnVIimr7r\nXh26oyecABEHdl/nRzN+b2JEmYI5d0q66toG2IL0Z+nhsAXdBSnZOw1E8/js/EjZhjN3Sprq2gZU\nrtkRcnbs3843mDynw/cY3WyC1kQ2Y48QOz9StuHMnZLCWwUTadojlKlF/bB7yRTcMHpgWgI7wM6P\nlH3CztxFZACAvwI4G8ZfsitU9fciUgnghwAOeU79taq+nKyBUnaJdOPqSDz99ucAjGZg6cDOj5SN\nIknLtAL4paq+JyKnA9gqIq96bluuqsuSNzzKVrEsoAajOBXgE0Uk/CYcgNH5sXzSUObbKeuEDe6q\nuh/Afs/XX4rITgD8TaeQ+uc5E1rOmOhkjGjox7xh9Kl9WomyUVQ5dxEZBKAEwBbPoVtEpF5EHhWR\nXkHuM1dEakSk5tChQ2anULarrwKWDwcq84zP9VWm/WIySahl3Dyng4Gdsl7EwV1EvgbgWQC3quoX\nAP4E4DwAxTBm9veZ3U9VV6hqqaqW9u3bNwFDpowSpCvidPvmgA0usoXTYUflVRecOmDyxkWUDSIK\n7iLigBHYn1bV1QCgqgdUtU1V2wE8DKAsecOkjBWiK+L0knxsrhiPG0YPTM/YYjByYO6p/Drb+VIW\nCxvcRUQA/AXATlW93++4f1enGQDeT/zwKONF0BXx7ukjMObrvVM0oPhs/vgozl+41ugAyXa+lMUi\nqZYZA+A/AWwXkTrPsV8DuF5EimGsS+0G8KOkjJAyW26BZ2ZrctyjurYB733elMJBxafZ3Y4Fq7dj\nmn2v+dWzbOdLWSCSaplNgOnvOGvaKaKuiImseU8Vl7sNB+xn4hyYFAGwnS9lAV6hSvEpnAVc+SCQ\nOwCAGJ+vfDCgkVYia95TaV1rETrNa9jOl7IEe8tQXKprG7B03ZnY13gP+uc5UX7ZUEwvzA+4Pent\nHZPgKtsmXGN/A4EDF6Doe+wASVmBM3eKmbd/TEOjy7cd3W0r63B79faA2xO8b3VKzOtWhR7S0uGo\nAh+9kpbxEEWLM3eKmVku3dsqoPTc3rjzhR1Zl2v36i+HzW/gYiplCc7cKWbBcukK4DfPBe6Lmm32\n40zzG7iYSlmCwZ1iFqoN7omW7Jqx222CPKcDAqNZ2L4L53Xa4JuLqZRNmJahmH1rWN+0teFNhPw8\nJ/Y1uoyF4E6dH8cDg3pxb1TKWgzuFJPq2gY8u7Uh3cOIWX6eE5srxoc+iXujUhZjWoZiko0XJvn7\n1jCTJnZsEkYWwpk7xSRbL0zyevrtz/HU25+f2ozDvjnwSltvkzCAs3fKSpy5U0yyfU9Rb+l9Q6ML\nC1ZvR/PaRWwSRpbCmTtFzLgadRf2NbqQ18MBh03gTtOG1ZGwCRDJ8FzuNuS4/m1+I+vaKUtx5k4R\n6Xg16rFmN1ozOLADkQV2r33tfcxvYF07ZSkGd4pIsKtRreLe1llw4bTAg6xrpyzGtAxFJNsXUMNZ\n0z4W0gL8vu8LrGsnS2Bwp4j0z3OiwSIBvofDhmZ35y2ya86YCNy2OA0jIko8pmUoIuWThsLpsKd7\nGHHr1cOB380s7PSzOB12lE8amqZRESUeZ+4UEe+l+UvX7cqKGbzDLoAioJrH6bDjjisvCPhZgrcf\nIMpuDO5kSe42Ra8eDqgCTS73qQBu3wwsvwvTm/Ziem4B8L1FQOGUdA+XKOEY3Cki3lLIbGo5cKzZ\nDafDjuXXFhuz8voqXoVKXYZoCrfJKS0t1ZqampQ9H0XH/yKljqmKMUs2ZEU6xkzP7nbk9eiOlc0/\nRIHNZBOO3AHAbe+nfmBEERKRrapaGs19OHPvSuqrTrWwdfYyjrmOAbkFePfrt2DBu+f6Zubey/IB\nI9+ezaWQJ1racKLFhf6ncXcl6jpYLdNVeFMSTXsAKOA6anxAgaY9GP7e7ZjY9v8C7uJyt2Hpul0A\ngLwejtSPOcH2KXdXoq6Dwb2rWH9X58ZYfpxowbxunVvcemfs2bjJdUf3ts5Cs3YPPMirUMmimJbp\nKiJIPfSXI52P5Tlxe/V2NLqydz9UrzXtYwE3MK9bFfrbjsDGq1DJwhjcu4rcAk9KJrh92rl51qA+\nzqzeSq+jNe1j8apeisXTRrCunSyNaRmr8+4u1LQHgAQ9rVm7497WwBlsrx4OvP3JsSQPMLny85y4\nYfRA5Oc5fZtfL57JwE7Wx5m7lXWs64bCCPAKOHsDANqbj2Gf9sG9rbOMtIWfO668ALeurEvpkBNF\ngFP17URdEIO7lZkuompAXfe4IPXrvXo4ML0kH7+s2oa2LFtNFQCzRw9kYKcujWkZKwu2iOp3PFhD\nsGPNbgyqeCnrArtdBLNHD8Td00ekeyhEacXgbmXB6rf9jk8vycfimSPQs3v2d3wEgDZVPLu1AdW1\nDekeClFahQ3uIjJARDaKyAciskNEfu453ltEXhWRjzyfeyV/uBSVCYuMOm5/JnXdNZ8dxYmW7OkZ\nE47/xVdEXVUkM/dWAL9U1fMBjAbwUxE5H0AFgPWqOgTAes/3lEkKZwFXPmjk2CHG5ysf7FTX/bct\n1il19MrmdglEiRB2QVVV9wPY7/n6SxHZCSAfwDQAl3lOewLA6wDmJ2WUFLvCWWEv0snwfa5j0j/P\nGf4kIgs/1cyvAAAWK0lEQVSLKucuIoMAlADYAuBsT+AHgH8DODuhIyOKQJ7TAbstsH7fYRfuqkRd\nXsTBXUS+BuBZALeq6hf+t6nRN9h0/icic0WkRkRqDh06FNdgKTkcWbys3uhyo63jnx4W/EuEKFoR\n/bcWEQeMwP60qq72HD4gIv08t/cDcNDsvqq6QlVLVbW0b9++iRgzJUB1bQPGLNmAwRUvodViwdDd\nruYLqt6rdSvzjM/1nRulEVlF2Jy7iAiAvwDYqar3+920BsAcAEs8n59PyggpoaprG1C5ZkdgIzCL\nBXfAZEGVuzBRFxPJzH0MgP8EMF5E6jwfV8AI6hNF5CMA3/Z8TxnMu1WeFTo8htNpQdXsal23yzhO\nZEGRVMtsQvCOUxMSOxxKpqXrdmXVHqixcjrsnRdUI7hal8hK2FumCwlX+22T7CyLnGbbhPndq3CO\nHsZB6Ys9I8sxqmRy4EnBWh5zFyayqCyuk+hCErQQGK72O6db9v06XGXbhMWOR9Afh2ET4Bwcwqjt\nd3R+jSK8WpfIKrLvf3NX03HvU+9CYAwBPliTMK9md3scA02Ped2q0ENaAg+a5dIjvFqXyCpEU9j1\nr7S0VGtqalL2fJbg22ijA7+2vcFU1zZg6bpd2NfoQv88py8P7T1mE8m6ro8dfXLa92AzXRESoLIx\n1cMhSgoR2aqqpdHchzP3TBfjQqC3Mqah0QUF0NDowoLV2wEAmyvG49MlU9CeRYE92Ir+Pj3T/Abm\n0qmLY3DPdBG07TVjVhnTsVtirtMR9/BS5Ztf7418kzWDe1tnoVm7Bx5kLp2IwT3jxbgQaLa7EmBU\nzFTXNqD4zleyqt599xEXNleMx+4lU3DD6IGwizGXf0nH4cVzK3zbBgIAurFpGBFLITOdd8Fv/V1G\nKia3wAjsIRYCq2sbvDuldpLXw4EFq7dnXb27fxnn3dNHBO60VH8ceMHvzcx1lFefUpfH4J4NImjb\n62/pul1BOwo0utzIolS7T8gUUqirTxncqYtiWsaCQl2slI2BHQBOtLQG3zqPV58SdcLgbkFW3KjC\n3Rak0yMQ86IzkZUxLWMBHevZB/VxBl1QzTZX2TZhXrcq9JfD2Nd8JlC/uHOqZcKiwI6PACtmqMtj\ncM9y3np27wJpQ6PLMvuHXmXbhCWOR3xXoBbIYWD1XODzt4Gpft2nY1h0JrI6BvcsV7lmR6fKlyxN\nq3di2loACtQ8CgwcHRi8o1x0JrI65tyzWHVtQ1bVqkervxwOcouyDztRGAzuWSzoAqNF7EeQ1gIA\nK2GIwmBwz2JWya37s4tAAOTnObHvwnkI2lWGlTBEITHnnsX651mnKsarXRWfLpni+W48YNtl5Nj9\nVxJYCUMUFmfuWax80lA47MH6JWanTjX6U+8HZq5gH3aiKHHmnsWml+QDAO58YQeONYdfWO3Z3Y4T\nLZnbU8Z071OAlTBEMeDMPVNFsbVej+7dIAje8xww9kdtac3snZa+e2G+7w2LiOLDmXsm8m6t573i\n0ru1HuCbwVbXNqByzY6ISyHbFRm/OcfGDw+lewhElsGZeyYK1eUQp65KtVqNuxWrf4jShcE9E4Xp\ncmi2y5IVWLHhGVG6MLhnojBdDsPNcCULC2iCLqYSUUwY3DNRmK31Qs1wnQ47Zl80EA5b9kT4/Dwn\nFs8cwcVUogRicM9EhbOMWu4gtd3lk4bC6bB3uluvHg4snjkCGz88BHd7Zi+eAsYb0QPXFmNzxXgG\ndqIEY7VMpgpR2+0NhP493MsnDfUdv21lXcqGGav8DmMmosQSTWF5XGlpqdbU1KTs+bqKjpt1NLe0\nRnRRUzI4bIA7gnL63b4WA0QUjohsVdXSaO7DtEyW85ZFNjS6oDA26zh+sjVtbQn8A3uwEQgQfD9U\nIkoIpmUyVX1Vp52FqtvGBMzQvzWsL57ZsgdtHf76crcrejhsaG8H2lRhF+l0TioEe0aFkVJiSoYo\neRjcM5HJFaqtz9+CTe6b0dDyTQDGDP2ptz8P+hDNflPoNlUI0rtDU8BeqHomln4xC8D4NI6IyNrC\npmVE5FEROSgi7/sdqxSRBhGp83xckdxhdhHefjKrf9jpCtVubSdxK/435odWhO49kyx2Ed9eqAW2\nw7AJUGA7jCXd/xKyXw4RxSeSnPvjACabHF+uqsWej5cTO6wuyDtbb9oT9JT+ciSup1AYwTZVnA47\nrr9oAOY7Ou+F6sRX3CqPKInCBndVfQPA0RSMpWsz6yfTwT7tE9dT5Oc5cf1FA+J6jEjlOR3466jP\ncPen1wffC5Vb5RElTTzVMreISL0nbdMr2EkiMldEakSk5tAhdv0LKkyga7Xn4AFcF/PD28RoWxAq\nTx+tUH8DTLdvxqjtdwBNe4Kfx63yiJIm1uD+JwDnASgGsB/AfcFOVNUVqlqqqqV9+/aN8em6gFCB\nLncAuk17CGNn/B/kx9hcq10Tv6Aa6vFubnkq9F8i3CqPKKliqpZR1QPer0XkYQAvJmxEXdWERYEV\nMoARAP3aDkz3HL41zBWo6a6MAYD+thDrA7kDjJ+XuysRJU1MwV1E+qnqfs+3MwC8H+p8ioA30HWo\nbe8YAJeu2xXyYSK9QjSZHHbBSec56OHa3/nG3AHAbfx1IUq2sMFdRJ4BcBmAM0VkL4A7AFwmIsUw\nJoi7AfwoiWPsOkL0k/G2GGgI0+433YEdAKDAjm/cZuTcO6ZmWk4YlUGctRMlFXvLZAFvi4F0btDh\ndNijev78PCc2X3EYWDsfcHUotuqQbiKi0NhbxqKi3XnJrB1wvBbPHBHVYu6+RpcRvLv37Hyj35aB\nRJQcDO5ZIJq9RZ0OG07rduqftYfDhh6O+P+Zp5fkm/aRD1bm6NtQJMyWgUSUHAzuWSCavUW/am0P\n2Djb3ab43cxC7F4yBTeMHhhTCwKnw4YxSzbgtpV1yHHYkOd0QGCkXmaPHtgp4AdsmRdmy0AiSg4G\n9ywQbOeljgRGPbs/d7uics0OVNc24NmtDTGVSLa2q6+l8LFmN75qbcdyzw5Kd08f4UvZeAN+wJZ5\nYbYMJKLk4IJqlqiubcCdL+wIuglHuAXPPKcjYEYfKZt0fsPwing3JZP2xVxMJYpcLAuqDO5Zxr8k\n0tun3Rtkw13cFK1IKmScDjs3tyZKMlbLdAHehc38PCfa/QL79JJ89OrhiPvxe3a3B6RXwlXIuNxt\nYS+sIqLU42YdGarjvqjfGtYXGz88hIZGV0B7gYZGFxas3g4AuOPKC+KavffsbkdzS5tvw20AaG5p\nDXu/aKp5iCg1mJbJQLFctJSf58TmivEYXPFS1IumPRw2uNsV7rZT93TYBBAEHAv33ESUHLGkZThz\nzyCRthgws6/RheraBogA0b5fN5v0LHAHWUXt2JQsoOyRiDIGg3u6eSpJtGkvRmkfXOiehQaMjfph\n+uc5sXTdrqCVLYmiMGbq3nRRRNUyRJRyDO7p5LcRtgDIl8NY4ngEcANr2iMP8A67oHzSUNyW4GoZ\nM0zBEGUHVsukk8nWej2kBfO6RblxtGe2Hs2VrOE4bAKHPfB6VqZgiLIHg3s6BemvEu1G2O52xS+r\ntkWUqxcAD1xb3OmKV4dN0KvHqbYCS68pwtKri4JfeUpEGY1pmXTwXrEZpK6l40bY3kXM/Dxn0ADe\nFuEqqk2M2fjimSMCSi2D5c4ZzImyE4N7qvnl2c00a3fc2xp4ab43sG+uGI/iO1+JqY2AV5sqFqze\njsUzRzB3TmRhTMukmkme3Sd3ABa4bzZdTPWWOp6I4KKicHhVKZH1MbinWtA+5gLc9j5qzphoeqtN\nBJVrdkR0UVEkeFUpkbUxuKeas1fI498a1tf05jbViNMxDrv4eq4Hk8jKGiLKPAzuGWbjh4eivk+e\n0xFQ1bL06iLU3XE5vvn13qbn24BTJY31VcDy4UBlnvG5PsoyTCLKSFxQTTXXsZDHo02XOB12VF51\nAQD4ql+WrtuFms+OYvPHR83vJJ4qmI6Lu017jO8B9lsnynKcuadamG3nIkmX2EUCas8BYMHq7b7d\nkhoaXXj67c+D3t/XosBscZebVxNZAoN7qk1YBNi7Bx6zd/dtOxfJlnrtqvh0yRRsrhiP6SX5WLpu\nV6cOkqGWXe2eWnduXk1kXQzu6dDxgiO/76eX5IfdJKPj7D7aVM7iITuN/HqwtwBuXk2U9RjcU239\nXUB7h6qXdndAKmR6ST42V4w3bRNg1t8lWCrHrFqm/Jw6zNq/1Mivm+Hm1USWwOCeakFTIXs6Vaz4\nz+JD9XcxS+U4HXbMHj0w4L4PXFuMn+ozIS+iwpUPcjGVyAJYLZNquQXBZ81Q04qVE1+1+hZK73xh\nB4DAni/eryPpFYPnQ19ERUTWwOCeahMWhewtA8BXsVLdNgblf98WsCvSsWY3yldtA9A5wEfU5CvY\nmwvz7ESWwrRMqhXOMlIfuQNgnhX3aNqLpet2mW53527T2HvDTFhk5NX9Mc9OZDkM7ulQOMtIgVQ2\neoK8idyCkFUwMfeG6fjmwjw7kSUxLZNuZmkaz0y6/8vB+7fH1RumcBaDOZHFceaebiFm0uWThsJh\n65y68e6ZSkQUTNiZu4g8CmAqgIOqOtxzrDeAlQAGAdgNYJaqBmmaQmEFmUl7F0gr1+zwdYTs1cOB\nO668gDskEVFIomG2ZxORSwAcB/BXv+B+L4CjqrpERCoA9FLV+eGerLS0VGtqahIwbCKirkNEtqpq\naTT3CZuWUdU3AHRsLzgNwBOer58AMD2aJyUiouSKNed+tqru93z9bwBnBztRROaKSI2I1Bw6FH2v\nciIiil7cC6pq5HWC5nZUdYWqlqpqad++5rsMERFRYsUa3A+ISD8A8Hw+mLghERFRvGIN7msAzPF8\nPQfA84kZThfHLe+IKEEiKYV8BsBlAM4Ukb0A7gCwBECViNwE4DMAvCImXtzyjogSKGxwV9Xrg9w0\nIcFj6Rrqq4ze7U17jWZdExYZwTvUlncM7kQUJbYfSKVQs3NueUdECcT2A6kUanYeZuNsIqJoMLin\nSn1V8E06mvayFS8RJRSDeyp40zHB5BawFS8RJRRz7qlglo7x8p+dewO5d8HVu2k2AzwRRYnBPdHM\nqmFCLYr6z85ZDklECcK0TCJ5g3PTHgRsdu3oYX6+s3dg0A614EpEFAXO3BMpWHCWCN9DWQ5JRAnC\n4J5IwYKwtpsfdx0F7uwNaJuxgOrsZRzriOWQRBQlpmUSKVgQFnvw+2ib8blpD9ByHLA5Am9nOSQR\nxYDBPZGC1apfeGPn42baWoDTTmc5JBHFjWmZROpYyujfOwYAtj5+aqYejOsYMP/TpA6TiKyPwT3R\nzDa7rq8Ctv0tfGAHmF8nooRgWiYVQl3EFECYXyeihGBwT4WIShkFKP0B8+tElBBMy6RCboF50zCx\nG2WSHXPzRERxYnBPhQmLAtsKAEb1DCthiChJmJZJBXZ8JKIU48w9VcyqaIiIkoQzdyIiC2JwJyKy\nIAZ3IiILYnAnIrIgBnciIgticCcisiAGdyIiC2JwJyKyIAZ3IiILYnAnIrIgBnciIgticCcisiAG\ndyIiC4qrK6SI7AbwJYA2AK2qWpqIQRERUXwS0fL3W6p6OAGPQ0RECcK0DBGRBcUb3BXAayKyVUTm\nmp0gInNFpEZEag4dOhTn0xERUSTiDe5jVbUYwHcA/FRELul4gqquUNVSVS3t27dvnE9HRESRiCu4\nq2qD5/NBAM8BKEvEoIiIKD4xB3cR6Skip3u/BnA5gPcTNTAiIopdPNUyZwN4TkS8j/M3Vf1HQkZF\nRERxiTm4q+onAIoSOBYiIkoQlkISEVkQgzsRkQUxuBMRWRCDOxGRBTG4ExFZEIM7EZEFMbgTEVkQ\ngzsRkQUxuBMRWRCDOxGRBTG4ExFZEIM7EZEFMbgTEVkQgzsRkQUxuBMRWRCDOxGRBTG4ExFZEIM7\nEZEFMbgTEVkQgzsRkQUxuBMRWRCDOxGRBTG4ExFZEIM7EZEFMbgTEVkQgzsRkQUxuBMRWRCDOxGR\nBTG4ExFZEIM7EZEFMbgTEVlQXMFdRCaLyC4R+ZeIVCRqUEREFJ+Yg7uI2AH8EcB3AJwP4HoROT9R\nAyMiotjFM3MvA/AvVf1EVVsA/C+AaYkZFhERxSOe4J4PYI/f93s9x4iIKM26JfsJRGQugLmeb78S\nkfeT/ZwJcCaAw+keRAQ4zsTJhjECHGeiZcs4h0Z7h3iCewOAAX7fF3iOBVDVFQBWAICI1KhqaRzP\nmRIcZ2JlwzizYYwAx5lo2TTOaO8TT1rmXQBDRGSwiHQHcB2ANXE8HhERJUjMM3dVbRWR/w/AOgB2\nAI+q6o6EjYyIiGIWV85dVV8G8HIUd1kRz/OlEMeZWNkwzmwYI8BxJpplxymqmoyBEBFRGrH9ABGR\nBaUkuGdLmwIR2S0i20WkLpbV6WQRkUdF5KB/GamI9BaRV0XkI8/nXukco2dMZuOsFJEGz2taJyJX\npHOMnjENEJGNIvKBiOwQkZ97jmfUaxpinBnzmopIjoi8IyLbPGO803M8017LYOPMmNfSn4jYRaRW\nRF70fB/165n0tIynTcE/AUyEcaHTuwCuV9UPkvrEMRCR3QBKVTWj6l5F5BIAxwH8VVWHe47dC+Co\nqi7xvGH2UtX5GTjOSgDHVXVZOsfmT0T6Aeinqu+JyOkAtgKYDuBGZNBrGmKcs5Ahr6mICICeqnpc\nRBwANgH4OYCZyKzXMtg4JyNDXkt/IvILAKUAzlDVqbH8f0/FzJ1tCuKkqm8AONrh8DQAT3i+fgLG\nf/q0CjLOjKOq+1X1Pc/XXwLYCePq6ox6TUOMM2Oo4bjnW4fnQ5F5r2WwcWYcESkAMAXAI36Ho349\nUxHcs6lNgQJ4TUS2eq6szWRnq+p+z9f/BnB2OgcTxi0iUu9J26Q9feRPRAYBKAGwBRn8mnYYJ5BB\nr6knhVAH4CCAV1U1I1/LIOMEMui19HgAwDwA7X7Hon49uaAaaKyqFsPodPlTT5oh46mRW8vIWQiA\nPwE4D0AxgP0A7kvvcE4Rka8BeBbArar6hf9tmfSamowzo15TVW3z/L8pAFAmIsM73J4Rr2WQcWbU\naykiUwEcVNWtwc6J9PVMRXCPqE1BJlDVBs/ngwCeg5FSylQHPDlZb272YJrHY0pVD3j+U7UDeBgZ\n8pp68q7PAnhaVVd7Dmfca2o2zkx9TVW1EcBGGHnsjHstvfzHmYGv5RgAV3nW//4XwHgReQoxvJ6p\nCO5Z0aZARHp6Fq0gIj0BXA4gk5ucrQEwx/P1HADPp3EsQXl/IT1mIANeU8/i2l8A7FTV+/1uyqjX\nNNg4M+k1FZG+IpLn+doJo3DiQ2Tea2k6zkx6LQFAVReoaoGqDoIRKzeo6g2I5fVU1aR/ALgCRsXM\nxwB+k4rnjGGM5wHY5vnYkUnjBPAMjD8Z3TDWLG4C0AfAegAfAXgNQO8MHeeTALYDqPf8gvbLgHGO\nhfFnbT2AOs/HFZn2moYYZ8a8pgAKAdR6xvI+gEWe45n2WgYbZ8a8liZjvgzAi7G+nrxClYjIgrig\nSkRkQQzuREQWxOBORGRBDO5ERBbE4E5EZEEM7kREFsTgTkRkQQzuREQW9P8DDCQA+SmX00wAAAAA\nSUVORK5CYII=\n", 199 | "text/plain": [ 200 | "" 201 | ] 202 | }, 203 | "metadata": {}, 204 | "output_type": "display_data" 205 | } 206 | ], 207 | "source": [ 208 | "fig = plt.figure(figsize=(6,6));\n", 209 | "plt.scatter(X1_new,X2_new,label='within 95% confidence interval');\n", 210 | "plt.scatter(X1_outliers,X2_outliers, label='outliers');\n", 211 | "plt.legend()\n", 212 | "plt.xlim(0,40);\n", 213 | "plt.ylim(0,40);" 214 | ] 215 | } 216 | ], 217 | "metadata": { 218 | "kernelspec": { 219 | "display_name": "Python [conda root]", 220 | "language": "python", 221 | "name": "conda-root-py" 222 | }, 223 | "language_info": { 224 | "codemirror_mode": { 225 | "name": "ipython", 226 | "version": 3 227 | }, 228 | "file_extension": ".py", 229 | "mimetype": "text/x-python", 230 | "name": "python", 231 | "nbconvert_exporter": "python", 232 | "pygments_lexer": "ipython3", 233 | "version": "3.5.3" 234 | } 235 | }, 236 | "nbformat": 4, 237 | "nbformat_minor": 2 238 | } 239 | --------------------------------------------------------------------------------