├── CNN - Music Genre
    ├── CNN_Model.ipynb
    └── Data Creation.ipynb
├── Champion Classes - UMAP and KMeans
    ├── champ_classification.ipynb
    └── champ_data.ipynb
├── Classification
    ├── Classification.ipynb
    ├── vertebral_column_data.txt
    └── vertebral_column_metadata.txt
├── Data Analysis and API - LoL Tilt
    ├── desktop.ini
    ├── phase3.csv
    └── tilt_trend.ipynb
├── Data Creation - Jungle Stats per Champion
    ├── champ_parse.py
    ├── get_match_ids.py
    ├── main.py
    └── match_data.py
├── MCMC - Enigma Thanos
    ├── BreakEnigma.ipynb
    ├── WikiData.ipynb
    └── marvel_data.txt
├── PCA for Recommendation
    └── Champ Recommendation and PCA.ipynb
├── Principle Component Analysis
    ├── ANSUR_II_FEMALE_Public.csv
    └── Principle Component Analysis.ipynb
├── README.md
└── RNN - Marvel Character Generator
    ├── RNN_Marvel_Generator.ipynb
    └── Wiki_Data.ipynb


/Champion Classes - UMAP and KMeans/champ_classification.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd\n",
 10 |     "import numpy as np\n",
 11 |     "import umap\n",
 12 |     "from sklearn.preprocessing import StandardScaler\n",
 13 |     "from sklearn.decomposition import PCA\n",
 14 |     "from sklearn.compose import ColumnTransformer\n",
 15 |     "import seaborn as sns\n",
 16 |     "import matplotlib.pyplot as plt\n",
 17 |     "import matplotlib.ticker as mtick\n",
 18 |     "sns.set_style(\"whitegrid\", {'axes.grid' : False})\n",
 19 |     "\n",
 20 |     "from sklearn.cluster import KMeans"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 3,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "df = pd.read_csv('final_data2.csv')\n",
 30 |     "try:\n",
 31 |     "    df.drop('Unnamed: 0', axis= 1, inplace = True)\n",
 32 |     "except:\n",
 33 |     "    pass\n",
 34 |     "avg_lane_stats = df.groupby('Lane').mean()\n",
 35 |     "champ_ids = pd.read_csv('champ_ids.csv')\n",
 36 |     "champ_ids = champ_ids[['Champ ID', 'Champion']]\n",
 37 |     "count_df = df.groupby(['Champ ID', 'Lane']).count()\n",
 38 |     "vol_champs = list(count_df[count_df['Tier'] > 200].index)\n",
 39 |     "agg_df = df.groupby(['Champ ID', 'Lane']).mean()\n",
 40 |     "final_df = agg_df[agg_df.index.isin(vol_champs)].reset_index()\n",
 41 |     "final_df.set_index('Champ ID', drop= True, inplace= True)"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 22,
 47 |    "metadata": {
 48 |     "scrolled": true
 49 |    },
 50 |    "outputs": [
 51 |     {
 52 |      "data": {
 53 |       "text/plain": [
 54 |        "Index(['Champ ID', 'Lane', 'Tier', 'Result', 'kills', 'deaths', 'assists',\n",
 55 |        "       'largestKillingSpree', 'largestMultiKill', 'killingSprees',\n",
 56 |        "       'longestTimeSpentLiving', 'totalDamageDealt', 'magicDamageDealt',\n",
 57 |        "       'physicalDamageDealt', 'trueDamageDealt', 'largestCriticalStrike',\n",
 58 |        "       'totalDamageDealtToChampions', 'magicDamageDealtToChampions',\n",
 59 |        "       'trueDamageDealtToChampions', 'totalHeal', 'damageSelfMitigated',\n",
 60 |        "       'damageDealtToObjectives', 'damageDealtToTurrets', 'timeCCingOthers',\n",
 61 |        "       'totalDamageTaken', 'goldEarned', 'turretKills', 'inhibitorKills',\n",
 62 |        "       'totalMinionsKilled', 'totalTimeCrowdControlDealt',\n",
 63 |        "       'neutralMinionsKilled', 'neutralMinionsKilledTeamJungle',\n",
 64 |        "       'neutralMinionsKilledEnemyJungle', 'firstBloodKill', 'firstBloodAssist',\n",
 65 |        "       'firstTowerKill', 'firstTowerAssist', 'first10_xpm', 'first10_gpm',\n",
 66 |        "       'soloKills', 'earlyGanks', 'drakesKilled'],\n",
 67 |        "      dtype='object')"
 68 |       ]
 69 |      },
 70 |      "execution_count": 22,
 71 |      "metadata": {},
 72 |      "output_type": "execute_result"
 73 |     }
 74 |    ],
 75 |    "source": [
 76 |     "df.columns"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 23,
 82 |    "metadata": {},
 83 |    "outputs": [
 84 |     {
 85 |      "data": {
 86 |       "text/plain": [
 87 |        "42"
 88 |       ]
 89 |      },
 90 |      "execution_count": 23,
 91 |      "metadata": {},
 92 |      "output_type": "execute_result"
 93 |     }
 94 |    ],
 95 |    "source": [
 96 |     "len(df.columns)"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 5,
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "top_df = final_df[final_df['Lane'] == 'top'].drop('Lane', axis = 1)\n",
106 |     "jung_df = final_df[final_df['Lane'] == 'jungle'].drop('Lane', axis = 1)\n",
107 |     "mid_df = final_df[final_df['Lane'] == 'middle'].drop('Lane', axis = 1)\n",
108 |     "adc_df = final_df[final_df['Lane'] == 'adc'].drop('Lane', axis = 1)\n",
109 |     "sup_df = final_df[final_df['Lane'] == 'support'].drop('Lane', axis = 1)\n",
110 |     "sup_df = sup_df[sup_df['totalMinionsKilled']<100]"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 14,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "kills = mid_df['kills']\n",
120 |     "gold = mid_df['goldEarned']"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 21,
126 |    "metadata": {},
127 |    "outputs": [
128 |     {
129 |      "data": {
130 |       "text/plain": [
131 |        "<matplotlib.axes._subplots.AxesSubplot at 0x19283ac1088>"
132 |       ]
133 |      },
134 |      "execution_count": 21,
135 |      "metadata": {},
136 |      "output_type": "execute_result"
137 |     },
138 |     {
139 |      "data": {
140 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZAAAAEECAYAAAAGSGKZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3de3zT9b0/8FcuTZp7Wu6Xltu4VBRpwR2PIkcmEy8oWASSTpjTc1RU9pvnOAEnyIaKbNizjR034THPOasuaRVFj3pU3BgFnJehQMXKoIDSUi7SW5K2SZN8f3948qWXJE3SfPNN2tfzL/nybfNBSl75fD+f9+etEARBABERUZyUcg+AiIgyEwOEiIgSwgAhIqKEMECIiCghDBAiIkqIWu4BSOnAgQPQarVyD4OIKKN4vV5Mnz691/v6dYBotVoUFBTIPQwiooxSXV0d0318hEVERAlhgBARUUIYIERElBAGCBERJYQBQkRECWGAEBFRQhggRESUEAYIERElRLIAOXjwIJYtWwYAOHbsGOx2O2w2G9avX49AIAAAqKioQHFxMZYsWYJdu3YBABoaGnDXXXehpKQEP/rRj9DW1hbxXiIiSi5BEBBrmyhJAmTbtm147LHH4PV6AQClpaX413/9VzidTrS3t+PPf/4zzp8/j7KyMjidTvz+979HaWkpfD4fnn32WcyfPx9//OMfcckll6C8vDzivURElDwulwu1tbXyBkh+fj62bNki/nrLli244oor4PP5cP78eQwaNAiHDh1CYWEhNBoNTCYT8vPz8cUXX2D//v245pprAACzZ8/G+++/H/FeIiLqO5fLhVOnTuHMmTNob2+P+eskCZB58+ZBrb54zJZKpUJdXR3mz5+PxsZGjBs3Dm63GyaTSbzHYDDA7XZ3uW4wGOByuSLeS0REiXO73WJwtLW1xTzzCEnZIvqoUaPw7rvvwm634+mnn4bRaITH4xF/3+PxwGQydbnu8XhgNpsj3ktERPFrbW1FbW0t6uvrEwqOkJQEyH333YeTJ08C+Gb2oFQqMW3aNOzfvx9erxculws1NTWYNGkSioqKsHv3bgBAZWUlZsyYEfFeIiKKXVtbG+rq6lBXV4fW1taEgyMkJce533PPPVi9ejWysrKg0+nwxBNPYMiQIVi2bBlKSkogCAIeeughaLVarFixAqtWrUJFRQVycnLwzDPPQK/Xh72XiIh6197ejsbGRrjd7j6HRmcKIZnfLc1UV1ezHwgRDVherxeNjY1wuVwxB4dCoYDP58PUqVN7vbdfN5QiIhqIfD4fmpqa0NLSgmAwKNnrMECIiPqJjo4ONDU1obm5WdLgCGGAEBFlOL/fLwZH6KSPVGCAEBFlqEAggKamJjQ1NaU0OEIYIEREGSYYDIrB4ff7ZRsHA4SIKEMEg0G0tLSgqakpLc4DZIAQEaU5QRDQ0tKCxsbGtAiOEAYIEVGaCgVHU1OTeLp5OmGAEFGfVde3oKquGZeNsqBghFnu4WS8YDAIl8uVdjOO7hggRNQn1fUtuP/FT9DhDyBLrcKz3ytiiCQoGWscNefcOHLWhcnDTJgw1JjkEXbFACGiPqmqa0aHP4AcgxaNHi+q6poZIHFK1uJ4zTk31v/PYfj9QajVSqy/ZWpcISIIAk6cOIHRo0fHdD8DhIj65LJRFmSpVWj0eJGlVuGyURa5h5Qxkr2r6shZF/z+IMz6LLS0duDIWVdMAdLS0oIdO3bA6XSipqYGn332WUyvxwAhoj4pGGHGs98r4hpIHKTaVTV5mAlqtRItrR1Qq5WYPCx636Sqqio4HA68+eabcXUiDGGAEFGfFYwwMzhiIAiCuDguxa6qCUONWH/L1KhrIK2trXjzzTfhcDhw+PBh8bper8ctt9yCkpKSmF+PAUJEJLFUbsedMNQYNjhqamrgcDiwY8cOuFwu8fqkSZNgs9mwYMECGI1G8Tj3WDBAiIgkInfluM/nw86dO+F0OvHRRx+J17OysnDDDTfAbrejqKgICoUioe/PACEiSjK5g6O2thYVFRV4+eWXceHCBfF6Xl4ebDYbiouLkZub2+fXYYAQESWJnMERCASwZ88eOBwO7N69W+xAqFQq8Z3vfAc2mw1XX301lEpl0l6TAUJE1EdyBsfXX3+Nl19+GRUVFairqxOvDx06FIsXL8aSJUswfPhwSV6bAUJElCC5gkMQBHz00UdwOp3YuXMnOjo6xN+7+uqrYbPZMGfOHGRlZUk6DgYIEVGc5AqO7gV/IVarFcXFxVi6dCnGjh2bsvEwQIiIYiTXseqRCv4KCwths9lwww03IDs7O2XjCWGAEBH1Qo7gaGtrwxtvvAGn09nlaJHOBX9TpkxJyVgiYYAQEUUgR3DEWvCXDhggRETdpLqRk8/nw3vvvQeHw9Gj4O/GG2+EzWbrU8GfVBggRCS7dGlIJQgC3G43GhoaUhIckQr+8vPzsXTp0qQV/EmFAUJEskqXhlQejwcNDQ1ob28Xi/Ck0FvBn91ux1VXXZXUgj+pMECISFZyN6Rqb29HQ0MDPB6PpMFx/vx5bN++vUfB35AhQ7BkyRJJC/6kwgAhIlnJ1ZDK5/OhsbERLS0tkgVHqODP4XBg586d8Pv94u+lsuBPKgwQIpJVqhtSeb1eNDc3o6WlBcFgUJLXSLeCP6kwQIhIdqloSOX1etHU1ASXyyVZcBw6dAhOp7NHwd/06dNht9tlK/iTCgOEiPq19vZ2NDU1we12SxIcvXX4s9vtKCgoSPrrpgMGCBGlzTbaZGpraxODQ4o1jmgFf3a7HbfeemvaFPxJhQFCNMClyzbaZGltbUVTU5Mku6qk7vAnJ6VSCZ1OB71ej/r6+pi+hgFCNMBJsY1WjhmNlNtxM73gLxyFQoGsrCzo9XoxOFQqFQDgzJkzMX0PBgjRANfXbbTdwyLVMxqv14vGxka4XK6kBkcgEEBlZSUcDgcqKytT0uFPap1nGTqdrs8L+pIFyMGDB7F582aUlZWhuroaGzZsgEqlgkajwaZNmzB48GBUVFTA6XRCrVZjxYoVmDNnDhoaGvDwww+jvb0dQ4cOxcaNG6HT6cLeS0R915dttOHCIlWFgR0dHWIdRzIXx8+fPy92+Dt9+rR4PRUd/pIt2iwjGSQJkG3btuH111+HTqcDADz55JNYu3YtCgoK4HQ6sW3bNvzzP/8zysrKsH37dni9XpSUlODqq6/Gs88+i/nz56O4uBhbt25FeXk5br755rD3ajQaKYZPFLdMX4ROdBttuLCQujAwEAigqakJTU1NCAQCSfme/angL9mzjGgkCZD8/Hxs2bIFjzzyCACgtLQUQ4cOBfDNX75Wq8WhQ4dQWFgIjUYDjUaD/Px8fPHFF9i/fz/uvfdeAMDs2bNRWlqKvLy8sPdOmzZNiuETxaW/LULHI1xYSFUYGAwGxeDo/AbfF83NzWLB3/Hjx8XrmVTwJ/UsIxpJAmTevHmora0Vfx0Kj08++QQvvPACXnzxRezZswcmk0m8x2AwwO12w+12i9cNBgNcLleXa53vJUoHcp/lJKdIYZHMwkApenIcOnQIDocDb731VtgOfzfeeCO0Wm1SXksKCoUCWq0WBoMBBoNBtuLElC2iv/XWW/jtb3+LrVu3Ijc3F0ajER6PR/x9j8cDk8kkXs/OzobH44HZbI54L1E6kOssp1Tp7fGcVFXkye7J0VvBXzp0+ItGqVQiOzsbBoMBer0+LQIuJQHy2muvoby8HGVlZbBarQCAadOm4Ze//CW8Xi98Ph9qamowadIkFBUVYffu3SguLkZlZSVmzJgR8V6idJDqs5wikWIdRo7Hc8FgEC6XK2nBcezYMTidzowr+FMoFFAqlWJg6HS6tFuDkTxAAoEAnnzySYwYMQIrV64EAFxxxRX44Q9/iGXLlqGkpASCIOChhx6CVqvFihUrsGrVKlRUVCAnJwfPPPMM9Hp92HuJ0kUqznKKRqo3+lQ+ngsEAmhpaUFzc3OfH1VFK/hL5w5/SqUSGo0GBoMBOp0OOp0u7cbYmUKQ8gB8mVVXV/fbM2ho4Ao306j42yn8+r2/i2/0P5w7CUtm5iXltaSegfj9fjQ3NydlV1WmFfyFZhl6vV6cZaTD7tJY3ztZSEiUQSK9oUu1DiPl4zm/34+mpiY0Nzf3KThCBX9Op7NLhz+VSoU5c+akXYe/7rOM7OzstBlbvBggRBkk0iMlKd/ok/14LrQdt7GxsU/BEa3gb8mSJVi8eHFaFPyl6ywjGRggRBkk2kxD7nWY3iRjjaO3gj+73Y5rr71W9sXm/jTLiIYBQpRB0mXHVzxCaxzNzc0JFwCme8Fff55lRMMAIcow6T7TCOnrkSOCIKCqqiptC/5CFeCdt9n2x1lGNAwQol5k+jlXqdbXI0eiFfzdeuutsNvtKS34qznnxpGzLkweZsK3hpmQlZUFk8kkawV4umCAEEUxkM+5ipcgCOJ23ETWOCIV/E2ePBk2my1pBX+dA2HC0Ojf7/h5D5557yi0Gg3er+vAugV5mDhmSJ/H0F8wQIiiGMjnXMVKEAS4XC40NjbGXTkeKvhzOBz4+OOPxeuhgj+73Y7CwsKkFdPVnHNj/f8cht8fhFqtxPpbpvYIkc6PpjyNSmTnDINOq0F9Uxs+P9+OS8ckZSj9AgOEKIr+fs5VX/TlrKpoBX82mw233XabJAV/R8664PcHYdZnoaW1A0fOujBhqLHLOVOdj0C/xK+FZ08tmtra+PcfBgOEKIpM3PXUXbLXcBI9HTdShz+VSiV2+JO64G/yMBPUaiW8viAGmfX4h8mjMGLEMOh0OqjVPd8O+8Pfv5R4lAlRP5bMNZxgMCjOOOIJjnQp+FMqldBqtTjXJuBkUwcuzR+CycMZCOHwKBMiSsoaTiIzDkEQ8OGHH8LpdMpW8KdQKKDRaKDX65GdnS3OMvIAzJDsVQcWBghRP9aXNZxEFsebm5vx6quvwul04sSJE+L1VBT8hYr5Ordz5and0mKAEKVQqmtKEnmGHwqOpqamLsV70e6Xq+AvtPjduf93Oh9/3t8wQIhSRK6aklgr1wVBgNvtFmccvS2PylXwp1KpoNfrxQrwcIvflBr8P0+UIulaUxJvB8Bjx47B4XBgx44dcLvd4vXJkyfDbrfjlltuSWqHv9CjKaPRKAZHLDu1eIKA9BggRCmSbjUl8eyqilTwp9FoxA5/ySz4Cy2Ah+oy9Hp9XN+bJwikBgOEKEX6WlOQrE/U8RyrXltbi/Lycmzfvl3Sgr/Op9mGWrn2Zc0kXWd7/Q0DhCiFuq9HxBoKyfhEHWvr2GgFf3PmzEFJSQn+8R//sc8FfwqFAmq1WlzL0Ov1SSsiTLfZXn/FAKGMl6nPuuMJhVg+UUf6/+Dz+dDc3IyWlpaowRGt4G/p0qVYvHgxhg0blvCfNzTL6HxciFTbbOWqIM/Un8VEMUAoo2Xys+54HrP09om6+/+H391RhDFWDZqbm+FyuRAMBsN+31gK/ubMmZPwTieFQgGVSgWTySTOMlK1zTbVfVMy+WcxUQwQymjJftadyk+Q8Txm6e0TdVVdMwKBAPJy9fC2ufHXqqNQjjVH3IorZYe/0KMpo9EozjYGQm3GQFx3YYBQRkvms+5Uf4KM9zFLtE/Ul40wYrhOwNf1tWj1+jBsxqAe4SFlwd9ADY3OBuK6CwOEMloyn3XL8Qmyr49ZvF4vWlpaoGlvwQ+KcnHkbFaPRknRCv4WLFgAm80Wd8Ff6NFU6JwprVY74KvAB+LJvQwQynjJetadSZ8gW1tb0dzcDI/HI65vTBhq7BIc0Tr8JVrwF1oEP98OHGv04xKdHlOs/f+NMlaZ0q8+WaIGyLJlyyJ+ovjDH/4gyYCI5JLunyADgQBcLhdaWloiHjXi8/nw7rvvwul0hi34s9vtmD59elwzhdABhaHHU8fOt+L/bR9Yi8UUXtQA+elPfwoA+I//+A9cd911mDFjBg4dOoRdu3alZHBEqdJ58XzJzDy5h9NFW1sbXC4XXC4XjtY344sw/bxPnToldvhraGgQr48cnYeZ35mPkiW3o3Di6LDfP1yPcIVCAa1WC5PJBKPR2OXY9UMDcLGYwosaIOPHjwcAfP3117jpppsAAN/97ndRVlYm/ciIkqS3nVXpuP3S7/fD7XZ3mW107+e99qYpqK3eD4fDgT179vTo8PdPNy7Ea/UG1AaA0so6rLdYe/T/Dn3PYECASa/B4wun4dIxw2AwGMS2rt1l0qM+klbMayAvvfQSpk2bhk8//RQ6nU7KMRGFlcgW21jCIV22XwqCAI/HA7fbDbfb3aN2I9TPWxv04OS+P2H58/vQ+PVZ8fdDHf6WLFmCYcOG4a2qegRqT/bo/93ZiQutGGwxYuigXFxoC+ICzBg0aFDUcab7oz5KnZgCZPPmzXj++eexc+dOjB8/Hv/+7/8u9biIukh0lhBLOMj9ibq9vR0ejwculwsdHR1h1zYEQUB73Rc4+ebzaDi6HwherCiPVPAX6v/d0toBtVqJycNMAL55PJWdnQ2TyYRr9YPxPycP42ijD1lqVcxhMNAWiym8mAJkyJAhuOaaazBu3DhMmzYNKpVK6nERdZHoLCGWcOjLJ+pECw+DwSAOnTiDA8frMdKowthB+rD3RSr4M1ssuH3RIthsNowZMybs104YasT6W6biyFkXCoabMWmkVVzTCD2eslqB0iWXx/VnGGjHdVBkMQVIaWkpzpw5g5qaGmRlZWHr1q0oLS2VemxEokRnCbGGQyKfqBOZFbW1tcHtduPg8Xqs23EIXl8AarUS62+ZKj5eSmbB3+SRVsyYNFo8sDDc7qt4/uyJ/JnlChwGnfRiCpD9+/fjxRdfxLJly3DbbbfB4XBIPS6iLvoyS5DqcUuss6Jwj6g+q22E1xfosj4xwqiMu8NfpB1UOp0OFosFRqMxqcV98c4E5dqgkI4bI/qjmAIkEAjA6/VCoVAgEAgk7chlonikw3P3zp9qo82KAoGAuIuqvb29x7pG5/UJX9Np/PmP/4u1774ZV4e/zruytBoVfrZwGi4fPwImkyniDqq+incmKNcGhXTZGNHfxRQg3//+91FcXIyGhgYsXrwYd955p8TDIko/4T7Vdp4VTRluEndQeTyeqEen51k1uFZ3Ci9XlON49SF89n/X4yn4O3LWBY1KiXHDBqFDlQVXVg6GDBmS5D91V7HOBENBa9SoZdmgIPfGiIEipgC58cYbcdVVV+HLL7/E6NGjk9KBjCjTdP9U+9npZtxeNBpjLGp4PB6cOPE1AoFAxBNwgW8K/kId/joX/I0ZMwY2mw0LFy6M6d+XSqXCVZfkY289cMEnwN8BTBmRmjfJ3maC3YP2x9dPhtvnT+laBLcap0ZMAVJdXY3y8nJ4vV7x2saNG6N+zcGDB7F58+YuRYdPPfUUxo0bB7vdDgCoqKiA0+mEWq3GihUrMGfOHDQ0NODhhx9Ge3s7hg4dio0bN0Kn04W9l6THhciLLhtlgTZLDb8/gOFGFcYZg/jyyy8jbr0NCQQC2L17d9iCv+uuuw42my2mDn+h6nCz2QyTyYQJKhU2WAel3d9P96B1+/yyVPenwyPP/i6mAFm9ejXuuOMODB8+PKZvum3bNrz++utiwWFDQwMeeeQRnDx5EnfffTeAb7qflZWVYfv27fB6vSgpKcHVV1+NZ599FvPnz0dxcTG2bt2K8vJy3HzzzWHv1Wg0Cf6xKRZciLwoEAgg36zCpvljcfD4GeRZtDArvIjWUvz8+fN46aWXUFFRgfr6evH6sGHDsGTJkpg7/KlUKhiNRrEpU2fp+CbJx0cDR0wBMnjwYCxevDjmb5qfn48tW7bgkUceAQB4PB6sXLkSlZWV4j2HDh1CYWEhNBoNNBoN8vPz8cUXX2D//v249957AQCzZ89GaWkp8vLywt47bdq0eP6sFKeBvBApCALa29vR2tqKtrY2cSHcrBBwzYScqF/34YcfwuFw4L333ku4w59SqRQPLzQYDBm1cYWPjwaOmAJk1KhR2Lp1KwoKCsRFvVmzZkW8f968eaitrRV/nZeXh7y8vC4B4na7YTKZxF8bDAZxATJ03WAwwOVyRbyXpDXQPkkGg0F4PB60traKi+DRHk111tzcjFdffRVOpxMnTpwQr4c6/EUr+OtMpVLBbDbDYrFk9Aw7HWdGlHwxBUhHRwdOnDjR5R9GtACJhdFohMfjEX/t8XjEKlmPx4Ps7Gx4PB6YzeaI95K05PwkmYq1lyNnWvB5XRPG56gxTK/sdedUd50L/t58880ua4SFhYWw2+244YYbei34UygUyMrKgtlshtlsTrj/OFGqxfSTajabsWbNmqS+8LRp0/DLX/4SXq8XPp8PNTU1mDRpEoqKirB7924UFxejsrISM2bMiHgvSU+OT5JSr720t7fj86/OY/2rn6C5xYOOYBBrb76kx0GDkbS2tuKNN96A0+mMueAvHKVSCb1eD7PZDIPBMKC7+VFmiilAampq0NLSArM5ef+IhwwZgmXLlqGkpASCIOChhx6CVqvFihUrsGrVKlRUVCAnJwfPPPMM9Hp92Hupf0r22ksgEEBbWxva2trg8XjQ0dGByqrTqDvXBLM+C972QNiTars7evQonnu+DDvffgPtrRdnxPF2+FOr1bBYLDCZTBn9mIpIIcTwkHfOnDk4c+YMcnNzxU9Je/fulXxwfVVdXY2CggK5h0Fx6usMJLQAHgqNtra2Hkejd++t0fksqs4idfhTqLIwZ+71uOcHy2Lq8KdQKMTZRrKPFyFKtljfO2MKkEzFAMlc8a6BdHR0iIvfocDo7Uc73DlSIZEK/rTWYRg98zoYJl6FH1x3KW66bETU1wgtikt5vAhRssX63hnTI6wDBw7glVdeQUdHBwDg3Llz+P3vf9+3ERJF0dvaSyAQQHt7uzjTaG9v7zHL6M2EocYuwREIBPCXv/wFTqczbMHfP924EDtO6xEIoEt/jXBBFHpMZbFYuChO/VZMP9lPPPEE7rzzTrzzzjuYNGkSfNGqp4gkEKkuIxkT6HPnzuHll1+OqeCvsFtYdH4UptOq8cSi6Zg+YRTMZnNG1W4QJSLmXVjz58/Hvn37sHLlStxxxx1Sj4syULK33naeYbS2tsb0WCpWgiDggw8+gMPhwJ/+9KcuBX9FV1yJ6XNuxm03Xo9JI61dvq77rOXIWRdUUGDsyFx0qLPRqLDAau36NUT9VUwBolAocPToUbS1teH48eM4f/681OOiDJOMrbfhdksle4mut4K/q757K7Z+2oID/iA++98j4uJ6pL4bM781HO/XC2jqAHwdwJQBVDzHc9Io5rOwjh49imXLluHhhx8WD0MkCklk620wGBQfRyX7sVRn8RT8vVVVD7+/qUujJwBddmxtWHAZLh07FDk5OZio02HQ0BEZ/0YabxjwnDQCegkQQRCgUCgwceJETJw4EQDwyiuvoK6uLiWDo8wR67EnPp9PfCSV7MdS3bW2tsbd4a9zo6fQQvmRsy74/UEMtWRDlaWFO8uMkSNHil+T6cd2JBIGA/mcNLooaoB8//vfxx/+8AcAwKZNm7Bq1SoAwJo1a8TrREDkY08EQYDX6xXPmPJ6vXHvlorX0aNH4XQ6sWPHjrg6/AHfrHGsv2Wq+LgKAFra/Rg5NBdqnQFeIQsFowdLOv5USyQMBto5aRRerzOQkM6f4Ppx6Qj1QeiTeCAQQEtLi7iWEc+hhIkKFfw5HA787W9/E6/H0+EvJLRQfqqpHc+9X492hQYelQkLLhmJ66cO73eftBMJA564S0CMayBA19BgFS115/P5ehTypUJvHf5uu+025OREPn69O4VCgezsbFgsFhx2N+Oc72uYdFnw+b0YYdWhYIQ5pYvHqXitRMMg0x/dUd9FDZDOQcHQoM6CwaBYl9Ha2gqfz5fU0IhWJd5bwV+sHf46UygU0Ol0sFqt4uOtKSMAhVLZ5ZN5KhePpXyt7sHEMKBERA2Qw4cPw2azQRAEHDt2TPzvmpqaVI2P0kjouBCpF8AjnVMVa8FfzTk33j58Nmz4dBcuOELCfTKv+NspyRaPu7+pS7VQzR1UlCxRA+T1119P1TgoDXWu/k7VAjgAcdeTWZ+FZo8Pb7y3GzUfvNOj4G/WrFmw2+249tprxeNCYj0kUalUwmAwwGq1iq2Xw+n+yVyqxeNwb+rRXqsvj7a4g4qSJWqAdD59tLtRo0YlfTAkv0AgIAZGqhbAu5s8zAR0tOLonj24cGgXPm08I/6e1WrFokWLYLPZkJ+f3+Xras658fIntWjz+THIqBXrODoHSOhwQ7PZnFBLAKkWj8O9qS+ZmRf2tfo6g+AOKkqWqAESelR14MAB6HQ6FBYWoqqqCn6/HwsXLkzJAEl6nY8LSeUCeHeCIODQoUNwOp347I034fNdLPgrKiqC3W7HvHnzwr7xh2YebR0BNLV+M0vRadTiVlyVSgWr1ZqUww2lWC+I9KYe7rX6OoPgDipKlqj/kv7t3/4NAHD33Xdj69at4vW77rpL2lGRpEIV4KFttj6fT9at2aEOfw6HA59//rl4Xa/XY8GCBbDZbL12+As99hpk+KZB0xVjc3F70WhMGmFJWnBIKZ439WTMILhoTskQ07+ohoYGsSNhY2MjmpqapB4XJZnP50N7e7tY0CdlBXis+lLw113nCnKjVo2SK8dh+oSRMJvNyMrKkuqPkFSxvqlzBkHpIqYAue+++7Bo0SIYjUa43W489dRTUo+L+sjv94vnS7W2tqKjo0O2R1OdJbPgr7MJQ4342a2X4rTbj+kTRuLy8SP69XHqnEFQOoirI+GFCxdgtVqhUqmkHFPSDKSOhH6/Xzz+XIq6jL5KdsFfd9nZ2bBarTCZTKxZIuqjpHQkXLp0acR/jE6nM7GRUVIIgiCuY6Ryi208QgV/DocDe/fu7VHwZ7fbceWVVyY8U1AoFNBqtQwOIplEDZDS0tJUjYNi0NHRIQaGx+NJi3WMcM6dO4eXXnoJL730Uq8d/hIROm4kVPzH4CCSR9QACdV6nDlzBk899RRqamowduxYrFmzJiWDI3Qp5Euk73eqROvwF67gLxEKhQJ6vR5WqxUGgyEZwyaiPojpX1qfrvIAABJhSURBVPNjjz0Gu92OK664Ah999BF+8pOf4L//+7+lHtuAlIqufMnU1NSEHTt2wOFw4OTJk+L13gr+Ip1zFY5CoYDRaOy1apyIUiumAPF6vbjuuusAAHPnzsV//ud/SjqogSR0XEgoNOQs5ItV54K/7h3+Yi346+2oEeCb4DAYDMjNzUV2drZkf554sZUr0TdiCpBAIIAjR45g8uTJOHLkCJ8590GowVLn6u90XcvozuPx4I033oDT6exS8GcwGMSCv8mTJ0f9Hp3PuQp31AhwMThycnLSbsbBgwiJLoopQNauXYuf/OQnOHfuHIYNG4YNGzZIPa5+pXtgyHG+VF8cPXoUDocDr732WpeCvylTpsBut2P+/PkJFfyFWsaGpHNwhPAgQqKLYgqQe+65BxcuXEBubi4+//xzlJSUYPDgwXj88cdx9dVXSz3GjNPR0dElMNJ9HSOcaAV/N910E+x2Oy6//PKECv46t4ydMNQoLo7n5uambXCE8CBCootiCpArrrgCDz74IMaPH4+vvvoKv/nNb/DAAw/gxz/+MQMEFxe+07WILx5SF/wBF1vGhrbj5ubmZsyuKh4jQnRRTAFy5swZjB8/HgCQn5+P+vp6jBkzJmMq0pMtdBhhaJbR3t4OQRAybpYRInXBXzharRY5OTkwm9PzDTjaQjmPESH6RkwBMmTIEGzevBmFhYX49NNPMXjwYOzbty9jDqnrq3A7pTI5MEJi7fCXTBqNRgyOdN2MwYVyotjEFCA///nPUV5ejsrKSkyaNAkrV67E559/3q8r1UPnSmXaTqnepKLgLxy1Wo2cnBxYLJa0P+SQC+VEsYnpXUKr1WL58uVdrhUWFkoyILmEjjsPBYbf7+8XgRESreCvuLgYNpsNY8aMSfrrqtVqsR9Hpjzy5EI5UWzSt8OOxNL1uPNkChX8ORwOvPXWW10K/goLC2G323HDDTck1Nq1N6EOgJl0enMIF8qJYjNgAqTzTqm2tra0PL02WXor+Fu6dGmvHf4SpVKpYLF80wUwnTsA9oYL5US9y9x/4TEIBoO4cOFCv9gpFYu///3vYoc/j8cjXi8oKIDdbsfNN98cc8FfvJRKpRgcA2VzBdFANyACpD/z+Xx455134HQ6uxT8abVascNfIgV/sVIqlTCZTMjJyYFGo5HkNYgoPUkWIAcPHsTmzZtRVlaGL7/8EqtXr4ZCocDEiRPx+OOPQ6lU4je/+Q3+8pe/QK1W49FHH8W0adPiuncgO3XqFJxOJ7Zv347Gxkbx+tixY8WCP6vVKtnrh07IzcnJSauDDokodSQJkG3btuH1118Xj6XYuHEjfvSjH+Ef/uEfsG7dOvzpT3/CyJEj8dFHH4lNh1auXInt27fHde9AE63gb+7cubDZbEkv+OsuE86rIqLUkCRA8vPzsWXLFjzyyCMAgMOHD+Pb3/42AGD27NnYt28fxo0bh1mzZkGhUGDkyJEIBAJoaGiI697c3Fwphp92pO7wF4vQeVU5OTnQ6/WSvhYRZQZJAmTevHmora0Vfy0IgvgM3mAwwOVywe12d3nEEroez739OUDkKvjrLjTjsFqtDA4i6iIli+idH6l4PB6YzWYYjcYuO4U8Hg9MJlNc9/ZHTU1NePXVV+F0OrsU/OXk5GDRokVYunRpjw5/UuCjKiLqTUrOlLjkkkvw4YcfAgAqKysxc+ZMFBUVYe/evQgGgzh9+jSCwSByc3Pjure/EAQBBw8exOrVqzF79mw8/fTTYngUFRXhF7/4BXbv3o0f//jHkodHaHF89OjRGDlyJMODiCJKyQxk1apVWLt2LUpLSzF+/HjMmzcPKpUKM2fOxNKlSxEMBrFu3bq47810kQr+9Ho9FixYALvd3muHv2Rh33EiipdC6MeVdVVVVZIc09FXkQr+Eunw11dKpVIMjnTfjste5ESpUV1djYKCgl7v69eFhOnE5/Ph7bffhtPpxP79+8Xrfe3wl6hQAaDFYkn74AB4xDpROmKASKy3gr+FCxf2ucNfPJRKJcxmM6xWa0ZVjvOIdaL0wwCRgN/v71LwFxIq+At1+EtlQ6XQjMNqtablY73e8Ih1ovTDAEmiUMFfRUUFzpw5I15PZcFfd6EZh8ViycjgCOER60TphwHSR+lS8NddfwmOznjEOlF6YYAkKFrBX6jDXyoK/rrL9EdVRJQ5GCBxCBX8OZ3OHh3+ioqKxA5/cixO83RcIko1BkgMQgV/DocD1dXV4vVQhz+bzZaygr/uFAoFdDodcnNzeVYVEaUUAySKv//973A4HHjttddS3uEvFlqtFrm5uf32XDAiSm8MkG5CBX8OhwOffPKJeF2r1eKmm26CzWZLacFfOGq1Grm5ubBYLLKOg4gGNgbI//nqq69QXl4uW4e/WGg0GlgsFlgsFkmbRhERxWJAB0g6FvyFo9FokJOT0+O4eyIiOQ3IADl79qzY4a9zwd/w4cOxZMkS3H777Skv+OtOoVB0CQ65Q4yIqLsBEyDBYBAffvghHA4H3nvvPQQCAfH35Cz4606hUECr1SInJwdGo5HBQURpq98HSG8d/mw2G/Ly8uQb4P9RKBTIzs4Wg4OIKN316wD56quvUFxcDJ/PJ16bMWMG7HY75s2blxan0TI4iChT9esAaWhogM/ng8FgwMKFC7F06VLZCv666/yoinUcRJSJ+nWAGAwG/OxnP8P8+fNhMBjkHo4oVADINQ4iymT9OkAmTpyISy+9VO5hiLRaLaxWK8xmM4ODiDJevw6QdBHajhstONjvm4gyDQNEQrEEB8B+30SUmRggEog1OELY75uIMhEDJImysrLE4IjnyBH2+yaiTMQASQK1Wg2r1Qqr1ZrQWVXs901EmYgB0gcqlUoMDpVK1afvxX7fRJRpGCAJCAWHxWKR/ewsIiK58N0vDiqVChaLBVarlcFBRAMe3wVjoFQqxeDIysqSezhERGmBARJFKDgsFktaHLxIRJROGCBhKJVKmEwm5OTkMDiIiCJggHSiUCjE4NBqtXIPh4gorTFA8E1w6PV65ObmQqfTyT0cIqKMMKADRKFQQKfTIScnJ62OeyciygQDMkBCzZxCPTmIiCh+Ay5A2MyJiCg5BkyAaDQa5ObmwmQyMTiIiJIgZQHi8/mwZs0anDp1CkajEevWrUNTUxOefPJJqFQqzJo1Cw8++CCCwSDWr1+PI0eOQKPR4IknnsCYMWNw4MCBHvfGQqPRiMeOMDiIiJInZQFSUVEBvV6PiooKHD9+HBs2bMDXX3+NLVu2IC8vD/fccw8OHz6Muro6+Hw+lJeX48CBA3j66afx29/+Fo8//niPe6dOnRr1NZVKJfLz8xM6IZeIiKJL2TvrsWPHMHv2bADA+PHjUVVVBZ/Ph/z8fCgUCsyaNQt//etfsX//flxzzTUAgOnTp+Ozzz6D2+0Oe29vlEolw4OISCIpe3ctKCjArl27IAgCDhw4AJfLBb1eL/6+wWCAy+WC2+3usjNKpVL1uBa6l4iI5JOyAFm0aBGMRiOWL1+OXbt2YcqUKWhraxN/3+PxwGw2w2g0wuPxiNeDwWCPa6F7iYhIPikLkKqqKsyYMQNlZWWYO3cuxo4di6ysLHz11VcQBAF79+7FzJkzUVRUhMrKSgDAgQMHMGnSJBiNxrD3EhGRfFK2iD5mzBj86le/wvPPPw+TyYQnn3wS9fX1ePjhhxEIBDBr1ixcfvnluOyyy7Bv3z7YbDYIgoCnnnoKAPDTn/60x71ERCQfhSAIgtyDkEp1dTUKCgrkHgYRUUaJ9b2TW5SIiCghDBAiIkoIA4SIiBLCAEmS6voWVPztFKrrW+QeChFRSgyYwxSlVF3fgvtf/AQd/gCy1Co8+70iFIxgnQoR9W+cgSRBVV0zOvwB5Bi06PAHUFXXLPeQiIgkxwBJgstGWZClVqHR40WWWoXLRlnkHhIRkeT4CCsJCkaY8ez3ilBV14zLRln4+IqIBgQGSJIUjDAzOIhoQOEjLCIiSggDhIiIEsIAISKihDBAwmBRIBFR77iI3g2LAomIYsMZSDcsCiQiig0DpBsWBRIRxYaPsLphUSARUWwYIGGwKJCIqHd8hEVERAlhgBARUUIYIERElBAGCBERJYQBQkRECWGAEBFRQvr1Nl6v14vq6mq5h0FElFG8Xm9M9ykEQRAkHgsREfVDfIRFREQJYYAQEVFCGCBERJQQBggRESWEAUJERAlhgBARUUL6dR2InAKBAB577DGcOHECKpUKGzduRH5+vtzDiujChQsoLi7G888/jwkTJsg9nIgWLlwIk8kEABg9ejQ2btwo84gie+655/DnP/8ZHR0dsNvtWLx4sdxDCuuVV17Bq6++CuBi7dS+fftgNqdXS4OOjg6sXr0adXV1UCqV2LBhQ9r+rPp8PqxZswanTp2C0WjEunXrMHbsWLmH1cPBgwexefNmlJWV4csvv8Tq1auhUCgwceJEPP7441Aqe5ljCCSJnTt3CqtXrxYEQRA++OAD4b777pN5RJH5fD7h/vvvF66//nrh2LFjcg8novb2dmHBggVyDyMmH3zwgXDvvfcKgUBAcLvdwq9//Wu5hxST9evXC06nU+5hhLVz507hhz/8oSAIgrB3717hwQcflHlEkZWVlQmPPfaYIAiCUFNTI9x1110yj6inrVu3CvPnzxcWL14sCIIg3HvvvcIHH3wgCIIgrF27Vnj33Xd7/R58hCWRuXPnYsOGDQCA06dPY/DgwTKPKLJNmzbBZrNh6NChcg8lqi+++AJtbW246667sHz5chw4cEDuIUW0d+9eTJo0CQ888ADuu+8+XHvttXIPqVdVVVU4duwYli5dKvdQwho3bhwCgQCCwSDcbjfU6vR9gHLs2DHMnj0bADB+/HjU1NTIPKKe8vPzsWXLFvHXhw8fxre//W0AwOzZs/H+++/3+j3S92+gH1Cr1Vi1ahV27tyJX//613IPJ6xXXnkFubm5uOaaa7B161a5hxNVdnY27r77bixevBgnT57Ev/zLv+Dtt99OyzeSxsZGnD59Gr/73e9QW1uLFStW4O2334ZCoZB7aBE999xzeOCBB+QeRkR6vR51dXW48cYb0djYiN/97ndyDymigoIC7Nq1C3PnzsXBgwdx9uxZBAIBqFQquYcmmjdvHmpra8VfC4Ig/nwaDAa4XK5evwdnIBLbtGkT3nnnHaxduxatra1yD6eH7du34/3338eyZctQXV2NVatW4fz583IPK6xx48bh1ltvhUKhwLhx42C1WtN2rFarFbNmzYJGo8H48eOh1WrR0NAg97AiamlpwfHjx3HllVfKPZSI/uu//guzZs3CO++8g9deew2rV6+O+cymVFu0aBGMRiOWL1+OXbt2YerUqWkVHuF0Xu/weDwxrYExQCSyY8cOPPfccwAAnU4HhUKRlj9AL774Il544QWUlZWhoKAAmzZtwpAhQ+QeVlgvv/wynn76aQDA2bNn4Xa703asM2bMwJ49eyAIAs6ePYu2tjZYrVa5hxXRxx9/jKuuukruYURlNpvFDRQWiwV+vx+BQEDmUYVXVVWFGTNmoKysDHPnzkVeXp7cQ+rVJZdcgg8//BAAUFlZiZkzZ/b6Nek39+8nrr/+eqxZswbf+9734Pf78eijj0Kr1co9rIx2++23Y82aNbDb7VAoFHjqqafS8vEVAMyZMwcff/wxbr/9dgiCgHXr1qXlB4iQEydOYPTo0XIPI6o777wTjz76KEpKStDR0YGHHnoIer1e7mGFNWbMGPzqV7/C888/D5PJhCeffFLuIfVq1apVWLt2LUpLSzF+/HjMmzev16/habxERJQQPsIiIqKEMECIiCghDBAiIkoIA4SIiBLCACEiooQwQIgk8sorr2Dz5s3irysrK1FeXo7a2losWbIEAPCd73wnbYvhiHqTnpvoifqh0NlInY+PIMpkDBAiiTU0NOD+++/HokWL8OWXX8Jms/W4591338W2bdugVqsxatQo/PznP+/9KG0imfEnlEhCFy5cwIoVK7BmzZqolehvvPEG7rzzTjgcDsyaNQtutzuFoyRKDAOESEJ79uyBz+dDMBiMet+aNWvw8ccf44477sAnn3zC2QdlBP6UEklo4cKF+MUvfoHHHnsMbW1tEe8rLy/HypUr8cILLwAAdu7cmaohEiWMAUIksW9961u49dZbo7bfnTZtGn7wgx9g+fLlOH/+fEY0oCLiYYpERJQQzkCIiCghDBAiIkoIA4SIiBLCACEiooQwQIiIKCEMECIiSggDhIiIEvL/AcrdsxAMBz4+AAAAAElFTkSuQmCC\n",
141 |       "text/plain": [
142 |        "<Figure size 432x288 with 1 Axes>"
143 |       ]
144 |      },
145 |      "metadata": {},
146 |      "output_type": "display_data"
147 |     }
148 |    ],
149 |    "source": [
150 |     "sns.regplot(kills, gold, marker = '.', line_kws={\"color\": \"black\"})"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "plt.figure(figsize=(4,4))\n",
160 |     "sns.regplot(list(Xs[i]), list(Ys[i]), data= final_df, marker='.')\n",
161 |     "plt.xlabel(X_labels[i])\n",
162 |     "plt.ylabel(Y_labels[i])\n",
163 |     "plt.tight_layout()"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": 6,
169 |    "metadata": {
170 |     "scrolled": true
171 |    },
172 |    "outputs": [],
173 |    "source": [
174 |     "def standardise_features(df):\n",
175 |     "    scaler = StandardScaler()\n",
176 |     "    df_std = pd.DataFrame(scaler.fit_transform(df))\n",
177 |     "    df_std.columns = df.columns\n",
178 |     "    return df_std, scaler\n",
179 |     "\n",
180 |     "def fit_pca(df_std, num_comps = False):\n",
181 |     "    if num_comps == False:\n",
182 |     "        num_comps = len(df_std.columns)\n",
183 |     "    pca = PCA(n_components= num_comps)\n",
184 |     "    principalComponents = pca.fit_transform(df_std)\n",
185 |     "    pca_str = pca.explained_variance_ratio_\n",
186 |     "    return pca, pca_str\n",
187 |     "\n",
188 |     "def fit_umap(df_std, num_comps = False):\n",
189 |     "    if num_comps == False:\n",
190 |     "        num_comps = 2\n",
191 |     "    reducer = umap.UMAP(n_components = num_comps)\n",
192 |     "    reducer.fit_transform(df_std)\n",
193 |     "    return reducer, None\n",
194 |     "    \n",
195 |     "\n",
196 |     "def draw_component_str(df_std, pca_str):\n",
197 |     "    num_features = len(df_std.columns)\n",
198 |     "    # Plot the variance of each PC\n",
199 |     "    plt.figure(figsize=(12,8))\n",
200 |     "    plt.bar(np.arange(1, num_features + 1), pca_str, color=(0.2, 0.4, 0.6, 0.6),edgecolor='black')\n",
201 |     "    plt.xticks(np.arange(1, num_features + 1))\n",
202 |     "    plt.xlabel('Number of Components',fontweight='bold')\n",
203 |     "    plt.ylabel('Variance (%)',fontweight='bold')\n",
204 |     "    plt.title('Individual Explained Variance',fontweight='bold')\n",
205 |     "    plt.show()\n",
206 |     "    \n",
207 |     "def draw_component_features(df_std, pca, feature = 1):\n",
208 |     "    # Create a table of all the feature strengths for each component\n",
209 |     "    all_comps = []\n",
210 |     "    for i in range(feature):\n",
211 |     "        all_comps.append(pca.components_[i])\n",
212 |     "    var_str = pd.DataFrame(all_comps, columns= df_std.columns, index=range(1, feature + 1)).abs().transpose()\n",
213 |     "\n",
214 |     "    # Plot the varialbes in PC1\n",
215 |     "    plt.figure(figsize=(12,8))\n",
216 |     "    plt.bar(var_str.sort_values(feature, ascending=False)[1].keys(),var_str.sort_values(1,ascending=False)[1],color=(0.2, 0.4, 0.6, 0.6),edgecolor='black')\n",
217 |     "    plt.xticks(rotation=90)\n",
218 |     "    plt.xlabel('Variables',fontweight='bold')\n",
219 |     "    plt.ylabel('Variable Influence',fontweight='bold')\n",
220 |     "    plt.title('Variable Influence on Principle Component 1',fontweight='bold')\n",
221 |     "    \n",
222 |     "    \n",
223 |     "def fit_kmeans(df_std, n_comps, n_clusters, drt= 'PCA'):\n",
224 |     "    if drt == 'PCA':\n",
225 |     "        reducer, pca_str = fit_pca(df_std, num_comps= n_comps)\n",
226 |     "    if drt == 'UMAP':\n",
227 |     "        reducer, pca_str = fit_umap(df_std, num_comps= n_comps)\n",
228 |     "    components = reducer.fit_transform(df_std)\n",
229 |     "    kmeans = KMeans(init=\"random\", n_clusters= n_clusters, n_init= 10, max_iter= 30, random_state= 42)\n",
230 |     "    kmeans.fit(components)\n",
231 |     "    return kmeans, components\n",
232 |     "\n",
233 |     "\n",
234 |     "def predict_classes(kmeans, components, lane_df, champ_ids):\n",
235 |     "    classes = kmeans.predict(components)\n",
236 |     "    champs = lane_df.index\n",
237 |     "    lane_classified = pd.DataFrame({'Class': classes, 'Champ ID': champs})\n",
238 |     "    lane_classified = pd.merge(champ_ids, lane_classified, on = 'Champ ID')\n",
239 |     "    return lane_classified\n",
240 |     "\n",
241 |     "\n",
242 |     "def classify_lane(lane_df, n_comps, n_clusters, drt= 'PCA', vis= False):\n",
243 |     "    df_std, scaler = standardise_features(lane_df)\n",
244 |     "    df_std.drop(['Result'], axis= 1, inplace = True)\n",
245 |     "    if vis != False:\n",
246 |     "        draw_component_features(df_std, pca, 1)\n",
247 |     "        draw_component_str(df_std, pca_str)\n",
248 |     "    kmeans, components = fit_kmeans(df_std, n_comps, n_clusters, drt= drt)\n",
249 |     "    classisfied = predict_classes(kmeans, components, lane_df, champ_ids)\n",
250 |     "    champ_stats_df = pd.merge(lane_df, classisfied, on = 'Champ ID')\n",
251 |     "    class_stats_df = champ_stats_df.groupby('Class').mean()\n",
252 |     "    class_stats_df.drop('Champ ID', axis = 1, inplace = True)\n",
253 |     "    return champ_stats_df, class_stats_df, kmeans\n",
254 |     "\n",
255 |     "\n",
256 |     "def sort_four_lists(list_to_sort, list2, list3, list4):\n",
257 |     "    sorted_list, list2, list3, list4 = map(list, zip(*sorted(zip(list_to_sort, list2, list3, list4), reverse = True)))\n",
258 |     "    return sorted_list, list2, list3, list4\n",
259 |     "\n",
260 |     "\n",
261 |     "# Order the stats by the difference from the class to the lane\n",
262 |     "def sort_stats_by_perc_diff(class_stats_df, avg_lane_stats, lane, class_num):\n",
263 |     "    cols = list(class_stats_df.columns)\n",
264 |     "    class_avg_stats, lane_avg_stats, perc_diffs = [], [], []\n",
265 |     "    for col in cols:\n",
266 |     "        class_avg_stat = class_stats_df[col][class_num]\n",
267 |     "        lane_avg_stat = avg_lane_stats[col][lane]\n",
268 |     "        class_avg_stats.append(class_avg_stat)\n",
269 |     "        lane_avg_stats.append(lane_avg_stat)\n",
270 |     "        perc_diffs.append(class_avg_stat / lane_avg_stat)\n",
271 |     "    perc_diffs, cols, class_avg_stats, lane_avg_stats = sort_four_lists(perc_diffs, \n",
272 |     "                                                                                          class_stats_df.columns, \n",
273 |     "                                                                                          class_avg_stats, \n",
274 |     "                                                                                          lane_avg_stats)\n",
275 |     "    col_dict = {}\n",
276 |     "    for i in range(len(cols)):\n",
277 |     "        col_dict[cols[i]] = round(perc_diffs[i], 2), round(class_avg_stats[i], 4), round(lane_avg_stats[i], 4)\n",
278 |     "    return col_dict"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": 7,
284 |    "metadata": {},
285 |    "outputs": [],
286 |    "source": [
287 |     "# To change lane, switch adc_df to mid_df, and so on. \n",
288 |     "lane_to_classify = adc_df\n",
289 |     "# Numbers of components compressed to during feature reduction\n",
290 |     "n_comps = 5\n",
291 |     "# Number of classes created\n",
292 |     "n_clusters = 4\n",
293 |     "# Technique used (PCA or UMAP)\n",
294 |     "technique = 'UMAP'\n",
295 |     "# Turn of plot prints, can be enabled only with PCA (UMAP can't provide variable strengths metrics)\n",
296 |     "vis = False\n",
297 |     "mega_df, stats_df, kmeans = classify_lane(lane_to_classify, n_comps, n_clusters, techinique, vis)"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": 25,
303 |    "metadata": {},
304 |    "outputs": [
305 |     {
306 |      "data": {
307 |       "text/html": [
308 |        "<div>\n",
309 |        "<style scoped>\n",
310 |        "    .dataframe tbody tr th:only-of-type {\n",
311 |        "        vertical-align: middle;\n",
312 |        "    }\n",
313 |        "\n",
314 |        "    .dataframe tbody tr th {\n",
315 |        "        vertical-align: top;\n",
316 |        "    }\n",
317 |        "\n",
318 |        "    .dataframe thead th {\n",
319 |        "        text-align: right;\n",
320 |        "    }\n",
321 |        "</style>\n",
322 |        "<table border=\"1\" class=\"dataframe\">\n",
323 |        "  <thead>\n",
324 |        "    <tr style=\"text-align: right;\">\n",
325 |        "      <th></th>\n",
326 |        "      <th>Result</th>\n",
327 |        "      <th>kills</th>\n",
328 |        "      <th>deaths</th>\n",
329 |        "      <th>assists</th>\n",
330 |        "      <th>largestKillingSpree</th>\n",
331 |        "      <th>largestMultiKill</th>\n",
332 |        "      <th>killingSprees</th>\n",
333 |        "      <th>longestTimeSpentLiving</th>\n",
334 |        "      <th>totalDamageDealt</th>\n",
335 |        "      <th>magicDamageDealt</th>\n",
336 |        "      <th>...</th>\n",
337 |        "      <th>neutralMinionsKilledEnemyJungle</th>\n",
338 |        "      <th>firstBloodKill</th>\n",
339 |        "      <th>firstBloodAssist</th>\n",
340 |        "      <th>firstTowerKill</th>\n",
341 |        "      <th>firstTowerAssist</th>\n",
342 |        "      <th>first10_xpm</th>\n",
343 |        "      <th>first10_gpm</th>\n",
344 |        "      <th>soloKills</th>\n",
345 |        "      <th>earlyGanks</th>\n",
346 |        "      <th>drakesKilled</th>\n",
347 |        "    </tr>\n",
348 |        "    <tr>\n",
349 |        "      <th>Class</th>\n",
350 |        "      <th></th>\n",
351 |        "      <th></th>\n",
352 |        "      <th></th>\n",
353 |        "      <th></th>\n",
354 |        "      <th></th>\n",
355 |        "      <th></th>\n",
356 |        "      <th></th>\n",
357 |        "      <th></th>\n",
358 |        "      <th></th>\n",
359 |        "      <th></th>\n",
360 |        "      <th></th>\n",
361 |        "      <th></th>\n",
362 |        "      <th></th>\n",
363 |        "      <th></th>\n",
364 |        "      <th></th>\n",
365 |        "      <th></th>\n",
366 |        "      <th></th>\n",
367 |        "      <th></th>\n",
368 |        "      <th></th>\n",
369 |        "      <th></th>\n",
370 |        "      <th></th>\n",
371 |        "    </tr>\n",
372 |        "  </thead>\n",
373 |        "  <tbody>\n",
374 |        "    <tr>\n",
375 |        "      <th>0</th>\n",
376 |        "      <td>0.534768</td>\n",
377 |        "      <td>7.231082</td>\n",
378 |        "      <td>6.141935</td>\n",
379 |        "      <td>6.601940</td>\n",
380 |        "      <td>3.196620</td>\n",
381 |        "      <td>1.657331</td>\n",
382 |        "      <td>1.710934</td>\n",
383 |        "      <td>523.961056</td>\n",
384 |        "      <td>128455.950023</td>\n",
385 |        "      <td>61338.246133</td>\n",
386 |        "      <td>...</td>\n",
387 |        "      <td>2.415889</td>\n",
388 |        "      <td>0.094317</td>\n",
389 |        "      <td>0.0</td>\n",
390 |        "      <td>0.071985</td>\n",
391 |        "      <td>0.055026</td>\n",
392 |        "      <td>362.639191</td>\n",
393 |        "      <td>282.433367</td>\n",
394 |        "      <td>0.692657</td>\n",
395 |        "      <td>0.050133</td>\n",
396 |        "      <td>0.197316</td>\n",
397 |        "    </tr>\n",
398 |        "    <tr>\n",
399 |        "      <th>1</th>\n",
400 |        "      <td>0.510697</td>\n",
401 |        "      <td>6.870552</td>\n",
402 |        "      <td>5.394268</td>\n",
403 |        "      <td>6.212223</td>\n",
404 |        "      <td>3.324403</td>\n",
405 |        "      <td>1.573970</td>\n",
406 |        "      <td>1.605745</td>\n",
407 |        "      <td>563.996556</td>\n",
408 |        "      <td>138491.540696</td>\n",
409 |        "      <td>9218.265972</td>\n",
410 |        "      <td>...</td>\n",
411 |        "      <td>2.728653</td>\n",
412 |        "      <td>0.142591</td>\n",
413 |        "      <td>0.0</td>\n",
414 |        "      <td>0.124285</td>\n",
415 |        "      <td>0.056086</td>\n",
416 |        "      <td>366.214734</td>\n",
417 |        "      <td>284.773831</td>\n",
418 |        "      <td>0.686490</td>\n",
419 |        "      <td>0.043162</td>\n",
420 |        "      <td>0.263578</td>\n",
421 |        "    </tr>\n",
422 |        "    <tr>\n",
423 |        "      <th>2</th>\n",
424 |        "      <td>0.504749</td>\n",
425 |        "      <td>5.696161</td>\n",
426 |        "      <td>5.253136</td>\n",
427 |        "      <td>7.917039</td>\n",
428 |        "      <td>2.830109</td>\n",
429 |        "      <td>1.406265</td>\n",
430 |        "      <td>1.343797</td>\n",
431 |        "      <td>581.407458</td>\n",
432 |        "      <td>118958.017767</td>\n",
433 |        "      <td>58119.092096</td>\n",
434 |        "      <td>...</td>\n",
435 |        "      <td>1.505958</td>\n",
436 |        "      <td>0.089813</td>\n",
437 |        "      <td>0.0</td>\n",
438 |        "      <td>0.078280</td>\n",
439 |        "      <td>0.065374</td>\n",
440 |        "      <td>365.780578</td>\n",
441 |        "      <td>281.929186</td>\n",
442 |        "      <td>0.596190</td>\n",
443 |        "      <td>0.028411</td>\n",
444 |        "      <td>0.140793</td>\n",
445 |        "    </tr>\n",
446 |        "    <tr>\n",
447 |        "      <th>3</th>\n",
448 |        "      <td>0.502236</td>\n",
449 |        "      <td>5.939398</td>\n",
450 |        "      <td>5.303801</td>\n",
451 |        "      <td>7.105783</td>\n",
452 |        "      <td>2.970148</td>\n",
453 |        "      <td>1.503534</td>\n",
454 |        "      <td>1.381198</td>\n",
455 |        "      <td>577.887791</td>\n",
456 |        "      <td>145684.131736</td>\n",
457 |        "      <td>21112.003760</td>\n",
458 |        "      <td>...</td>\n",
459 |        "      <td>2.320029</td>\n",
460 |        "      <td>0.101799</td>\n",
461 |        "      <td>0.0</td>\n",
462 |        "      <td>0.139872</td>\n",
463 |        "      <td>0.055545</td>\n",
464 |        "      <td>366.575414</td>\n",
465 |        "      <td>283.231749</td>\n",
466 |        "      <td>0.495683</td>\n",
467 |        "      <td>0.034015</td>\n",
468 |        "      <td>0.195676</td>\n",
469 |        "    </tr>\n",
470 |        "  </tbody>\n",
471 |        "</table>\n",
472 |        "<p>4 rows × 39 columns</p>\n",
473 |        "</div>"
474 |       ],
475 |       "text/plain": [
476 |        "         Result     kills    deaths   assists  largestKillingSpree  \\\n",
477 |        "Class                                                                \n",
478 |        "0      0.534768  7.231082  6.141935  6.601940             3.196620   \n",
479 |        "1      0.510697  6.870552  5.394268  6.212223             3.324403   \n",
480 |        "2      0.504749  5.696161  5.253136  7.917039             2.830109   \n",
481 |        "3      0.502236  5.939398  5.303801  7.105783             2.970148   \n",
482 |        "\n",
483 |        "       largestMultiKill  killingSprees  longestTimeSpentLiving  \\\n",
484 |        "Class                                                            \n",
485 |        "0              1.657331       1.710934              523.961056   \n",
486 |        "1              1.573970       1.605745              563.996556   \n",
487 |        "2              1.406265       1.343797              581.407458   \n",
488 |        "3              1.503534       1.381198              577.887791   \n",
489 |        "\n",
490 |        "       totalDamageDealt  magicDamageDealt  ...  \\\n",
491 |        "Class                                      ...   \n",
492 |        "0         128455.950023      61338.246133  ...   \n",
493 |        "1         138491.540696       9218.265972  ...   \n",
494 |        "2         118958.017767      58119.092096  ...   \n",
495 |        "3         145684.131736      21112.003760  ...   \n",
496 |        "\n",
497 |        "       neutralMinionsKilledEnemyJungle  firstBloodKill  firstBloodAssist  \\\n",
498 |        "Class                                                                      \n",
499 |        "0                             2.415889        0.094317               0.0   \n",
500 |        "1                             2.728653        0.142591               0.0   \n",
501 |        "2                             1.505958        0.089813               0.0   \n",
502 |        "3                             2.320029        0.101799               0.0   \n",
503 |        "\n",
504 |        "       firstTowerKill  firstTowerAssist  first10_xpm  first10_gpm  soloKills  \\\n",
505 |        "Class                                                                          \n",
506 |        "0            0.071985          0.055026   362.639191   282.433367   0.692657   \n",
507 |        "1            0.124285          0.056086   366.214734   284.773831   0.686490   \n",
508 |        "2            0.078280          0.065374   365.780578   281.929186   0.596190   \n",
509 |        "3            0.139872          0.055545   366.575414   283.231749   0.495683   \n",
510 |        "\n",
511 |        "       earlyGanks  drakesKilled  \n",
512 |        "Class                            \n",
513 |        "0        0.050133      0.197316  \n",
514 |        "1        0.043162      0.263578  \n",
515 |        "2        0.028411      0.140793  \n",
516 |        "3        0.034015      0.195676  \n",
517 |        "\n",
518 |        "[4 rows x 39 columns]"
519 |       ]
520 |      },
521 |      "execution_count": 25,
522 |      "metadata": {},
523 |      "output_type": "execute_result"
524 |     }
525 |    ],
526 |    "source": [
527 |     "# The average statistics for each class\n",
528 |     "stats_df"
529 |    ]
530 |   },
531 |   {
532 |    "cell_type": "code",
533 |    "execution_count": 9,
534 |    "metadata": {},
535 |    "outputs": [
536 |     {
537 |      "name": "stderr",
538 |      "output_type": "stream",
539 |      "text": [
540 |       "C:\\Users\\Jack Wills\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:96: RuntimeWarning: invalid value encountered in double_scalars\n"
541 |      ]
542 |     },
543 |     {
544 |      "data": {
545 |       "text/plain": [
546 |        "{'neutralMinionsKilledTeamJungle': (2.75, 9.286, 3.3726),\n",
547 |        " 'largestCriticalStrike': (2.44, 201.8767, 82.8721),\n",
548 |        " 'magicDamageDealtToChampions': (2.04, 11468.2898, 5616.3885),\n",
549 |        " 'trueDamageDealtToChampions': (1.55, 1989.5481, 1282.797),\n",
550 |        " 'damageDealtToObjectives': (1.36, 10418.1406, 7660.4017),\n",
551 |        " 'firstBloodAssist': (nan, 0.0, 0.0),\n",
552 |        " 'drakesKilled': (2.33, 0.1973, 0.0847),\n",
553 |        " 'magicDamageDealt': (1.81, 61338.2461, 33948.6284),\n",
554 |        " 'neutralMinionsKilled': (1.67, 15.2795, 9.1418),\n",
555 |        " 'killingSprees': (1.47, 1.7109, 1.1612),\n",
556 |        " 'kills': (1.42, 7.2311, 5.0916),\n",
557 |        " 'totalDamageDealtToChampions': (1.29, 21258.0022, 16516.245),\n",
558 |        " 'largestMultiKill': (1.26, 1.6573, 1.315),\n",
559 |        " 'largestKillingSpree': (1.25, 3.1966, 2.5641),\n",
560 |        " 'trueDamageDealt': (1.21, 9114.2616, 7538.5123),\n",
561 |        " 'assists': (1.2, 6.6019, 5.5171),\n",
562 |        " 'deaths': (1.18, 6.1419, 5.2029),\n",
563 |        " 'goldEarned': (1.08, 11693.7486, 10832.7723),\n",
564 |        " 'inhibitorKills': (1.06, 0.1863, 0.176),\n",
565 |        " 'Result': (1.05, 0.5348, 0.5074),\n",
566 |        " 'totalTimeCrowdControlDealt': (1.04, 231.8851, 223.9104),\n",
567 |        " 'totalDamageDealt': (1.04, 128455.95, 124052.5547),\n",
568 |        " 'totalMinionsKilled': (1.02, 173.0637, 170.2526),\n",
569 |        " 'first10_gpm': (1.0, 282.4334, 282.6026),\n",
570 |        " 'first10_xpm': (1.0, 362.6392, 363.6761),\n",
571 |        " 'firstTowerAssist': (1.0, 0.055, 0.0552),\n",
572 |        " 'neutralMinionsKilledEnemyJungle': (0.97, 2.4159, 2.4913),\n",
573 |        " 'firstBloodKill': (0.96, 0.0943, 0.098),\n",
574 |        " 'longestTimeSpentLiving': (0.9, 523.9611, 580.9603),\n",
575 |        " 'turretKills': (0.88, 1.2216, 1.3915),\n",
576 |        " 'damageDealtToTurrets': (0.82, 3185.1619, 3887.3995),\n",
577 |        " 'totalHeal': (0.81, 4162.844, 5160.4172),\n",
578 |        " 'totalDamageTaken': (0.8, 19029.3924, 23926.664),\n",
579 |        " 'timeCCingOthers': (0.77, 15.9375, 20.7292),\n",
580 |        " 'physicalDamageDealt': (0.7, 58002.5764, 82564.6542),\n",
581 |        " 'firstTowerKill': (0.67, 0.072, 0.1068),\n",
582 |        " 'earlyGanks': (0.6, 0.0501, 0.0841),\n",
583 |        " 'damageSelfMitigated': (0.41, 9816.7249, 23879.6685),\n",
584 |        " 'soloKills': (0.4, 0.6927, 1.741)}"
585 |       ]
586 |      },
587 |      "execution_count": 9,
588 |      "metadata": {},
589 |      "output_type": "execute_result"
590 |     }
591 |    ],
592 |    "source": [
593 |     "# A list of all the statistics and how the class is different, ordered from greatest from mean to smallest from mean\n",
594 |     "sort_stats_by_perc_diff(stats_df, avg_lane_stats, 'top', 0)"
595 |    ]
596 |   },
597 |   {
598 |    "cell_type": "code",
599 |    "execution_count": 10,
600 |    "metadata": {},
601 |    "outputs": [
602 |     {
603 |      "data": {
604 |       "text/plain": [
605 |        "['Twitch', 'Karthus', 'Vayne', 'Cassiopeia', 'KogMaw']"
606 |       ]
607 |      },
608 |      "execution_count": 10,
609 |      "metadata": {},
610 |      "output_type": "execute_result"
611 |     }
612 |    ],
613 |    "source": [
614 |     "# To see the Champs in the Class\n",
615 |     "mega_df[mega_df['Class']== 0]['Champion'].to_list()"
616 |    ]
617 |   },
618 |   {
619 |    "cell_type": "code",
620 |    "execution_count": null,
621 |    "metadata": {},
622 |    "outputs": [],
623 |    "source": []
624 |   },
625 |   {
626 |    "cell_type": "code",
627 |    "execution_count": null,
628 |    "metadata": {},
629 |    "outputs": [],
630 |    "source": []
631 |   }
632 |  ],
633 |  "metadata": {
634 |   "kernelspec": {
635 |    "display_name": "Python 3",
636 |    "language": "python",
637 |    "name": "python3"
638 |   },
639 |   "language_info": {
640 |    "codemirror_mode": {
641 |     "name": "ipython",
642 |     "version": 3
643 |    },
644 |    "file_extension": ".py",
645 |    "mimetype": "text/x-python",
646 |    "name": "python",
647 |    "nbconvert_exporter": "python",
648 |    "pygments_lexer": "ipython3",
649 |    "version": "3.7.5"
650 |   }
651 |  },
652 |  "nbformat": 4,
653 |  "nbformat_minor": 2
654 | }
655 | 


--------------------------------------------------------------------------------
/Champion Classes - UMAP and KMeans/champ_data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import requests\n",
 10 |     "import pandas as pd\n",
 11 |     "import numpy as np\n",
 12 |     "import datetime\n",
 13 |     "import time as t\n",
 14 |     "import seaborn as sns\n",
 15 |     "import matplotlib.pyplot as plt\n",
 16 |     "import matplotlib.ticker as mtick\n",
 17 |     "from sklearn.preprocessing import StandardScaler\n",
 18 |     "from sklearn.decomposition import PCA\n",
 19 |     "sns.set_style(\"whitegrid\", {'axes.grid' : False})\n",
 20 |     "import os\n",
 21 |     "import math\n",
 22 |     "import time as t"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 7,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "# For any URL, return the JSON\n",
 32 |     "def return_json(URL, session):\n",
 33 |     "    while True:\n",
 34 |     "        response = session.get(URL)\n",
 35 |     "        try:\n",
 36 |     "            # Check for 404 error and quit if received\n",
 37 |     "            if response.json()['status']['status_code'] == 404:\n",
 38 |     "                return \"error - status code 404\"\n",
 39 |     "            # Check for 429 (too many requests made), sleep if received\n",
 40 |     "            elif response.json()['status']['status_code'] == 429:\n",
 41 |     "                t.sleep(10)\n",
 42 |     "                continue\n",
 43 |     "            else:\n",
 44 |     "                return \"error - unknown reason\"\n",
 45 |     "        except:\n",
 46 |     "            break\n",
 47 |     "    return response.json()\n",
 48 |     "\n",
 49 |     "# Provide the match-id & region, receive the json of match timeline (1 minute interval of match data)\n",
 50 |     "def get_matchTimeline(matchId, region, key, session):\n",
 51 |     "    URL = 'https://' + region + '.api.riotgames.com/lol/match/v4/timelines/by-match/' + str(\n",
 52 |     "        matchId) + '/?api_key=' + key\n",
 53 |     "    json = return_json(URL, session)\n",
 54 |     "    return json\n",
 55 |     "\n",
 56 |     "\n",
 57 |     "# Provide the match-id & region, receive the match information (game length, participants etc..)\n",
 58 |     "def get_gameInfo(matchId, region, key, session):\n",
 59 |     "    URL = 'https://' + region + '.api.riotgames.com/lol/match/v4/matches/' + str(matchId) + '/?api_key=' + key\n",
 60 |     "    json = return_json(URL, session)\n",
 61 |     "    return json\n",
 62 |     "\n",
 63 |     "# Decide how much data to gather in each elo\n",
 64 |     "def set_volume(tier):\n",
 65 |     "    tier_list = {\n",
 66 |     "        'DIAMOND': 20,\n",
 67 |     "        'GOLD': 1,\n",
 68 |     "        'SILVER': 1\n",
 69 |     "    }\n",
 70 |     "    size = tier_list[tier]\n",
 71 |     "    return size\n",
 72 |     "\n",
 73 |     "# Loop to get summoner IDs from given regions / tiers\n",
 74 |     "def get_summoners(fullRegionList, tierList, key, session):\n",
 75 |     "    summonerIds, summonerRegions, summonerTier = [], [], []\n",
 76 |     "    for y in fullRegionList:\n",
 77 |     "        for z in range(len(tierList)):\n",
 78 |     "            size = set_volume(tierList[z][0])\n",
 79 |     "            for x in range(size):\n",
 80 |     "                page = x + 1\n",
 81 |     "                URL_ids = ('https://' + y + '.api.riotgames.com/lol/league-exp/v4/entries/RANKED_SOLO_5x5/' +\n",
 82 |     "                           tierList[z][0] + '/' + tierList[z][1] + '/?page=' + str(page) + '&api_key=' + key)\n",
 83 |     "                json = return_json(URL_ids, session)\n",
 84 |     "                for x in range(0, len(json)):\n",
 85 |     "                    summonerIds.append(json[x]['summonerId'])\n",
 86 |     "                    summonerRegions.append(y)\n",
 87 |     "                    summonerTier.append(tierList[z][0])\n",
 88 |     "    return summonerIds, summonerRegions, summonerTier\n",
 89 |     "\n",
 90 |     "\n",
 91 |     "# Convert a list of names to IDs\n",
 92 |     "def name_to_id(selectedIds, selectedRegions, selectedTiers,  key, session):\n",
 93 |     "    accountIds, accountRegions, accountTiers = [], [], []\n",
 94 |     "    for i in range(len(selectedIds)):\n",
 95 |     "        URL = 'https://' + selectedRegions[i] + '.api.riotgames.com/lol/summoner/v4/summoners/' + selectedIds[\n",
 96 |     "            i] + '/?api_key=' + key\n",
 97 |     "        json = return_json(URL, session)\n",
 98 |     "        account_id = json['accountId']\n",
 99 |     "        accountIds.append(account_id)\n",
100 |     "        accountRegions.append(selectedRegions[i])\n",
101 |     "        accountTiers.append(selectedTiers[i])\n",
102 |     "    return accountIds, accountRegions, accountTiers\n",
103 |     "\n",
104 |     "\n",
105 |     "# Python code to remove duplicate elements\n",
106 |     "def remove_duplicates(list1, list2, list3):\n",
107 |     "    final_list1 = []\n",
108 |     "    final_list2 = []\n",
109 |     "    final_list3 = []\n",
110 |     "    for i in range(len(list1)):\n",
111 |     "        if list1[i] not in final_list1:\n",
112 |     "            final_list1.append(list1[i])\n",
113 |     "            final_list2.append(list2[i])\n",
114 |     "            final_list3.append(list3[i])\n",
115 |     "    return final_list1, final_list2, final_list3\n",
116 |     "\n",
117 |     "\n",
118 |     "def time_conv(yyyy, mm, dd):\n",
119 |     "    sd = datetime.date(yyyy, mm, dd) - datetime.timedelta(7)\n",
120 |     "    time = t.mktime(sd.timetuple())\n",
121 |     "    time = str(int(time)) + \"000\"\n",
122 |     "    return time\n",
123 |     "    \n",
124 |     "def get_matchIds(accountIds, accountRegions, accountTiers, key, session):\n",
125 |     "    matchIds, matchTiers, matchRegions = [], [], []\n",
126 |     "    start_time = time_conv(2020, 10, 19)\n",
127 |     "    for i in range(len(accountIds)):\n",
128 |     "        URL = 'https://' + accountRegions[i] + '.api.riotgames.com/lol/match/v4/matchlists/by-account/' + accountIds[\n",
129 |     "            i] + '/?beginTime=' + start_time + '&queue=420' + '&api_key=' + key\n",
130 |     "        try:\n",
131 |     "            match_json = return_json(URL, session) \n",
132 |     "            for match in match_json['matches'][:10]:\n",
133 |     "                matchIds.append(match['gameId'])\n",
134 |     "                matchRegions.append(accountRegions[i])\n",
135 |     "                matchTiers.append(accountTiers[i])\n",
136 |     "        except:\n",
137 |     "            pass\n",
138 |     "    return matchIds, matchRegions, matchTiers\n",
139 |     "\n",
140 |     "def main(fullRegionList, tierList, key, session):\n",
141 |     "    summonerIds, summonerRegions, summonerTiers = get_summoners(fullRegionList, tierList, key, session)\n",
142 |     "    print(str(len(summonerIds)) + \" summoners found. Transforming to account IDs.\")\n",
143 |     "    accountIds, accountRegions, accountTiers = name_to_id(summonerIds, summonerRegions, summonerTiers,  key, session)\n",
144 |     "    print(str(len(accountIds)) + \" account IDs successfully transformed. Getting match IDs.\")\n",
145 |     "    matchIds, matchRegions, matchTiers = get_matchIds(accountIds, accountRegions, accountTiers, key, session)\n",
146 |     "    print(str(len(matchIds)) + \" game IDs found, converting to data.\")\n",
147 |     "    return matchIds, matchRegions, matchTiers"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 8,
153 |    "metadata": {},
154 |    "outputs": [],
155 |    "source": [
156 |     "# For all lanes, check whether the jungler is within a given range at a given time\n",
157 |     "def current_lane(pos, dist):\n",
158 |     "    for lane in lane_locs:\n",
159 |     "        if lane_distance(pos, lane) < dist:\n",
160 |     "            return lane\n",
161 |     "    return False\n",
162 |     "        \n",
163 |     "# Find the distance between jungler at a given time, and a given lane\n",
164 |     "def lane_distance(pos, lane):\n",
165 |     "    loc_X = pos['x']\n",
166 |     "    loc_Y = pos['y']\n",
167 |     "    current_loc = [loc_X, loc_Y]\n",
168 |     "    shortest_dist = 99999\n",
169 |     "    for i in range(2):\n",
170 |     "        lane_loc = lane_locs[lane][i]\n",
171 |     "        dist = distance(current_loc, lane_loc)\n",
172 |     "        if dist < shortest_dist:\n",
173 |     "            shortest_dist = dist\n",
174 |     "    return shortest_dist\n",
175 |     "\n",
176 |     "# Calculate the distance between point 1 (x1, y1) and point 2 (x2, y2)\n",
177 |     "def distance(p1, p2):\n",
178 |     "    return math.sqrt(((p1[0] - p2[0]) ** 2) + ((p1[1] - p2[1]) ** 2))\n",
179 |     "\n",
180 |     "# Create dictionary of lane co-ordinates, which can be used to check for ganks\n",
181 |     "lane_locs = {\"top\": [[2250, 12750],\n",
182 |     "                      [1500, 12000],\n",
183 |     "                      [3500, 13500]],\n",
184 |     "             \"middle\": [[7500, 7500],\n",
185 |     "                     [8500, 8500],\n",
186 |     "                     [6500, 6750]],\n",
187 |     "             \"adc\": [[12750, 2250],\n",
188 |     "                     [13500, 3500],\n",
189 |     "                     [11500, 1500],\n",
190 |     "                     [10500, 800],\n",
191 |     "                     [14100, 4500]]}\n",
192 |     "\n",
193 |     "\n",
194 |     "def determine_lane(lane_cs, jungle_cs, coord_2, coord_3):\n",
195 |     "    if jungle_cs > 5:\n",
196 |     "        lane = \"jungle\"\n",
197 |     "    elif lane_cs < 12:\n",
198 |     "        lane = \"support\"\n",
199 |     "    elif lane_cs >= 13:\n",
200 |     "        lane = position_lane_check(coord_2, coord_3, 1800)\n",
201 |     "    else:\n",
202 |     "        lane = False\n",
203 |     "    return lane\n",
204 |     "\n",
205 |     "\n",
206 |     "def position_lane_check(coord_2, coord_3, dist):\n",
207 |     "    lane_check_one = current_lane(coord_2, dist)\n",
208 |     "    lane_check_two = current_lane(coord_3, dist)\n",
209 |     "\n",
210 |     "    if lane_check_one == lane_check_two and lane_check_one != False:\n",
211 |     "        lane = lane_check_one\n",
212 |     "    elif lane_check_one == False and lane_check_two != False:\n",
213 |     "        lane = lane_check_two\n",
214 |     "    elif lane_check_one != False and lane_check_two == False:\n",
215 |     "        lane = lane_check_one\n",
216 |     "    else:\n",
217 |     "        lane = False\n",
218 |     "    return lane\n",
219 |     "\n",
220 |     "\n",
221 |     "def create_id_lane_dict(match_timeline):\n",
222 |     "    id_lane_dict = {}\n",
223 |     "    for i in range(1, 11):\n",
224 |     "        part_id = match_timeline['frames'][0]['participantFrames'][str(i)]['participantId']\n",
225 |     "        lane_cs = match_timeline['frames'][4]['participantFrames'][str(i)]['minionsKilled']\n",
226 |     "        jungle_cs = match_timeline['frames'][4]['participantFrames'][str(i)]['jungleMinionsKilled']\n",
227 |     "        coord_2 = match_timeline['frames'][3]['participantFrames'][str(i)]['position']\n",
228 |     "        coord_3 = match_timeline['frames'][4]['participantFrames'][str(i)]['position']\n",
229 |     "        lane = determine_lane(lane_cs, jungle_cs, coord_2, coord_3)\n",
230 |     "        id_lane_dict[part_id] = lane\n",
231 |     "    return id_lane_dict\n",
232 |     "\n",
233 |     "\n",
234 |     "def parse_event_info(match_timeline):\n",
235 |     "    killer, victim, assists, timestamp, pos = [], [], [], [], []\n",
236 |     "    all_kills = []\n",
237 |     "    drake_kills_list = []\n",
238 |     "    for i in range(len(match_timeline['frames'])):\n",
239 |     "        for event in match_timeline['frames'][i]['events']:\n",
240 |     "            if event['type'] == 'CHAMPION_KILL':\n",
241 |     "                kill_info = [event['killerId']]\n",
242 |     "                kill_info.append(event['victimId'])\n",
243 |     "                kill_info.append(event['assistingParticipantIds'])\n",
244 |     "                kill_info.append(event['timestamp'])\n",
245 |     "                kill_info.append(event['position'])\n",
246 |     "                all_kills.append(kill_info)\n",
247 |     "            if event['type'] == 'ELITE_MONSTER_KILL' and event['monsterType'] == 'DRAGON':\n",
248 |     "                drake_kills_list.append(event['killerId'])\n",
249 |     "    kill_df = pd.DataFrame(all_kills, columns = ['Killer', 'Victim', 'Assists', 'Timestamp','Position'])\n",
250 |     "    solo_kills_df = kill_df[kill_df['Assists'].map(lambda d: len(d)) == 0].groupby('Killer').count()\n",
251 |     "    \n",
252 |     "    early_kills_df = kill_df[kill_df['Timestamp'] < 60000 * 10]\n",
253 |     "    lanes = []\n",
254 |     "    for i in range(len(early_kills_df)):\n",
255 |     "        kill_lane = current_lane(early_kills_df.loc[i]['Position'], 1750)\n",
256 |     "        lanes.append(kill_lane)\n",
257 |     "    early_kills_df['Lanes'] = lanes\n",
258 |     "    \n",
259 |     "    return solo_kills_df, early_kills_df, drake_kills_list\n",
260 |     "\n",
261 |     "\n",
262 |     "def get_solo_kills(solo_kills_df, part_id):\n",
263 |     "    if part_id in solo_kills_df.index:\n",
264 |     "        solo_kills = solo_kills_df.loc[part_id].Victim\n",
265 |     "    else:\n",
266 |     "        solo_kills = 0\n",
267 |     "    return solo_kills\n",
268 |     "\n",
269 |     "\n",
270 |     "def get_early_ganks(early_kills_df, part_id, lane):\n",
271 |     "    early_ganks = early_kills_df[(early_kills_df['Lanes'] != False) &\n",
272 |     "                   (early_kills_df['Killer'] == part_id) &\n",
273 |     "                  (early_kills_df['Lanes'] != lane)].count()['Victim']\n",
274 |     "    return early_ganks\n",
275 |     "    \n",
276 |     "\n",
277 |     "def blue_win_check(game_info):\n",
278 |     "    if game_info['teams'][0]['win'] == 'Win':\n",
279 |     "        return 1\n",
280 |     "    else:\n",
281 |     "        return 0\n",
282 |     "    \n",
283 |     "def get_team_data(features, matchId, matchRegion, matchTier, key, session):  \n",
284 |     "    game_info = get_gameInfo(matchId, matchRegion, key, session)\n",
285 |     "    match_timeline = get_matchTimeline(matchId, matchRegion, key, session)\n",
286 |     "    solo_kills_df, early_kills_df, drake_kills_list = parse_event_info(match_timeline)\n",
287 |     "    id_lane_dict = create_id_lane_dict(match_timeline)\n",
288 |     "    game_duration = game_info['gameDuration']\n",
289 |     "    all_team_stats = []\n",
290 |     "    blue_result = blue_win_check(game_info)\n",
291 |     "    red_result = 1 - blue_result\n",
292 |     "    for player in game_info['participants']:\n",
293 |     "        part_id = player['participantId']\n",
294 |     "        lane = id_lane_dict[part_id]\n",
295 |     "        player_stats = []\n",
296 |     "        for stat in features:\n",
297 |     "            player_stats.append(player['stats'][stat])\n",
298 |     "        first10_xp = game_info['participants'][0]['timeline']['xpPerMinDeltas']['0-10']\n",
299 |     "        first10_gold = game_info['participants'][0]['timeline']['goldPerMinDeltas']['0-10']\n",
300 |     "        solo_kills = get_solo_kills(solo_kills_df, part_id) \n",
301 |     "        early_ganks = get_early_ganks(early_kills_df, part_id, lane)\n",
302 |     "        drake_kills = drake_kills_list.count(part_id)\n",
303 |     "        player_stats.append(first10_xp)\n",
304 |     "        player_stats.append(first10_gold)\n",
305 |     "        player_stats.append(solo_kills)\n",
306 |     "        player_stats.append(early_ganks)\n",
307 |     "        player_stats.append(drake_kills)\n",
308 |     "        if part_id <= 5:\n",
309 |     "            player_stats.insert(0, blue_result)\n",
310 |     "        else:\n",
311 |     "            player_stats.insert(0, red_result)\n",
312 |     "        player_stats.insert(0, matchTier)\n",
313 |     "        player_stats.insert(0, lane)\n",
314 |     "        player_stats.insert(0, player['championId'])\n",
315 |     "\n",
316 |     "        all_team_stats.append(player_stats)\n",
317 |     "    return all_team_stats\n",
318 |     "\n",
319 |     "\n",
320 |     "def gameIds_to_data(matchIds, matchRegions, matchTiers, key, session):\n",
321 |     "\n",
322 |     "    features = ['kills', 'deaths', 'assists', 'largestKillingSpree', 'largestMultiKill', 'killingSprees', \n",
323 |     "                'longestTimeSpentLiving',  'totalDamageDealt', 'magicDamageDealt', 'physicalDamageDealt', \n",
324 |     "                'trueDamageDealt', 'largestCriticalStrike', 'totalDamageDealtToChampions', \n",
325 |     "                'magicDamageDealtToChampions', 'trueDamageDealtToChampions', 'totalHeal', 'damageSelfMitigated',\n",
326 |     "                'damageDealtToObjectives', 'damageDealtToTurrets', 'timeCCingOthers', 'totalDamageTaken', \n",
327 |     "                'goldEarned', 'turretKills', 'inhibitorKills', 'totalMinionsKilled', 'totalTimeCrowdControlDealt',\n",
328 |     "               'neutralMinionsKilled', 'neutralMinionsKilledTeamJungle', 'neutralMinionsKilledEnemyJungle',\n",
329 |     "                'firstBloodKill', 'firstBloodAssist', 'firstTowerKill', 'firstTowerAssist']\n",
330 |     "    \n",
331 |     "    all_stats = []\n",
332 |     "    for i in range(len(matchIds)):\n",
333 |     "        if i % 1000 == 0:\n",
334 |     "            print(i)\n",
335 |     "        try:\n",
336 |     "            all_stats.extend(get_team_data(features, matchIds[i], matchRegions[i], matchTiers[i], key, session))\n",
337 |     "        except:\n",
338 |     "            pass\n",
339 |     "    col = ['Champ ID', 'Lane', 'Tier', 'Result'] + features + ['first10_xpm', 'first10_gpm', 'soloKills', 'earlyGanks', 'drakesKilled']\n",
340 |     "    df = pd.DataFrame(all_stats, columns = col) \n",
341 |     "    bool_cols = ['firstBloodKill', 'firstBloodAssist', 'firstTowerKill', 'firstTowerAssist']\n",
342 |     "    for col in bool_cols:\n",
343 |     "        df[col] = df[col].astype(int)\n",
344 |     "    return df\n",
345 |     "\n",
346 |     "def full(fullRegionList, tierList, key, session):\n",
347 |     "    start = t.time()\n",
348 |     "    matchIds, matchRegions, matchTiers = main(fullRegionList, tierList, key, session)\n",
349 |     "    end = t.time()\n",
350 |     "    run_time = end - start\n",
351 |     "    print(\"Match ID finding run time:\", run_time)\n",
352 |     "    start = t.time()\n",
353 |     "    df = gameIds_to_data(matchIds, matchRegions, matchTiers, key, session)\n",
354 |     "    end = t.time()\n",
355 |     "    run_time = end - start\n",
356 |     "    print(\"ID to DF run time:\", run_time)\n",
357 |     "    return df"
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "code",
362 |    "execution_count": 9,
363 |    "metadata": {},
364 |    "outputs": [
365 |     {
366 |      "name": "stdout",
367 |      "output_type": "stream",
368 |      "text": [
369 |       "12300 summoners found. Transforming to account IDs.\n",
370 |       "12300 account IDs successfully transformed. Getting match IDs.\n",
371 |       "68415 game IDs found, converting to data.\n",
372 |       "Match ID finding run time: 4306.0135061740875\n",
373 |       "0\n"
374 |      ]
375 |     },
376 |     {
377 |      "name": "stderr",
378 |      "output_type": "stream",
379 |      "text": [
380 |       "C:\\Users\\Jack Wills\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:102: SettingWithCopyWarning: \n",
381 |       "A value is trying to be set on a copy of a slice from a DataFrame.\n",
382 |       "Try using .loc[row_indexer,col_indexer] = value instead\n",
383 |       "\n",
384 |       "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
385 |       "C:\\Users\\Jack Wills\\Anaconda3\\lib\\site-packages\\pandas\\core\\ops\\__init__.py:1115: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n",
386 |       "  result = method(y)\n"
387 |      ]
388 |     },
389 |     {
390 |      "name": "stdout",
391 |      "output_type": "stream",
392 |      "text": [
393 |       "1000\n",
394 |       "2000\n",
395 |       "3000\n",
396 |       "4000\n",
397 |       "5000\n",
398 |       "6000\n",
399 |       "7000\n",
400 |       "8000\n",
401 |       "9000\n",
402 |       "10000\n",
403 |       "11000\n",
404 |       "12000\n",
405 |       "13000\n",
406 |       "14000\n",
407 |       "15000\n",
408 |       "16000\n",
409 |       "17000\n",
410 |       "18000\n",
411 |       "19000\n",
412 |       "20000\n",
413 |       "21000\n",
414 |       "22000\n",
415 |       "23000\n",
416 |       "24000\n",
417 |       "25000\n",
418 |       "26000\n",
419 |       "27000\n",
420 |       "28000\n",
421 |       "29000\n",
422 |       "30000\n",
423 |       "31000\n",
424 |       "32000\n",
425 |       "33000\n",
426 |       "34000\n",
427 |       "35000\n",
428 |       "36000\n",
429 |       "37000\n",
430 |       "38000\n",
431 |       "39000\n",
432 |       "40000\n",
433 |       "41000\n",
434 |       "42000\n",
435 |       "43000\n",
436 |       "44000\n",
437 |       "45000\n",
438 |       "46000\n",
439 |       "47000\n",
440 |       "48000\n",
441 |       "49000\n",
442 |       "50000\n",
443 |       "51000\n",
444 |       "52000\n",
445 |       "53000\n",
446 |       "54000\n",
447 |       "55000\n",
448 |       "56000\n",
449 |       "57000\n",
450 |       "58000\n",
451 |       "59000\n",
452 |       "60000\n",
453 |       "61000\n",
454 |       "62000\n",
455 |       "63000\n",
456 |       "64000\n",
457 |       "65000\n",
458 |       "66000\n",
459 |       "67000\n",
460 |       "68000\n",
461 |       "ID to DF run time: 52127.05663204193\n"
462 |      ]
463 |     }
464 |    ],
465 |    "source": [
466 |     "fullRegionList = ['euw1']\n",
467 |     "tierList = [['DIAMOND', 'III'], ['DIAMOND', 'II'], ['DIAMOND', 'I']]\n",
468 |     "key = 'YOUR_KEY (see Riot API site for details)'\n",
469 |     "session = requests.Session()\n",
470 |     "\n",
471 |     "df = full(fullRegionList, tierList, key, session)"
472 |    ]
473 |   },
474 |   {
475 |    "cell_type": "code",
476 |    "execution_count": 12,
477 |    "metadata": {},
478 |    "outputs": [],
479 |    "source": [
480 |     "df.to_csv('final_data2.csv')\n",
481 |     "\n",
482 |     "# 12,300 summoners\n",
483 |     "# 65 seconds per batch, 60 batches = 1 hour 5 minutes\n",
484 |     "# ~~ 92,852 games\n",
485 |     "# 92,852 games * 0.685 secs per game = ~~63,418 seconds\n",
486 |     "# 17 hours 36 minutes + 1 hour 5 minutes = ~~18 hours 41 minutes\n",
487 |     "# 21:08 start time\n",
488 |     "# 15:49 end time\n",
489 |     "\n",
490 |     "# gameIds were found by 22:17\n",
491 |     "# 22:17 to 16:27 = 18 hours 10 minutes = 65,400 seconnds\n",
492 |     "# 65,400 seconds / 95,000 batch  = 0.688 secs per game\n",
493 |     "\n",
494 |     "# 18,000 batches * 0.688 = 12,384 / 60 / 60 =  3 hours 26 minutes\n",
495 |     "# 09:03 + 3 hours 26 minutes = 12:44 end time\n",
496 |     "# 12,000 batches * 0.688 = 8,256 / 60 / 60 = 2 hours 17 minutes\n",
497 |     "# 10:32 + 2 hours 17 minutes = 12:49 end time\n",
498 |     "\n",
499 |     "# 11,000 batches * 0.688 = 2 hours 6 minutes\n",
500 |     "# 13:06 + 2 hours 6 minutes = 15:12 ????\n",
501 |     "\n",
502 |     "# 4,500 batches * 0.688 = 51 minutes\n",
503 |     "\n"
504 |    ]
505 |   }
506 |  ],
507 |  "metadata": {
508 |   "kernelspec": {
509 |    "display_name": "Python 3",
510 |    "language": "python",
511 |    "name": "python3"
512 |   },
513 |   "language_info": {
514 |    "codemirror_mode": {
515 |     "name": "ipython",
516 |     "version": 3
517 |    },
518 |    "file_extension": ".py",
519 |    "mimetype": "text/x-python",
520 |    "name": "python",
521 |    "nbconvert_exporter": "python",
522 |    "pygments_lexer": "ipython3",
523 |    "version": "3.7.5"
524 |   }
525 |  },
526 |  "nbformat": 4,
527 |  "nbformat_minor": 2
528 | }
529 | 


--------------------------------------------------------------------------------
/Classification/vertebral_column_data.txt:
--------------------------------------------------------------------------------
  1 | 63.03 22.55 39.61 40.48 98.67 -0.25 AB
  2 | 39.06 10.06 25.02 29 114.41 4.56 AB
  3 | 68.83 22.22 50.09 46.61 105.99 -3.53 AB
  4 | 69.3 24.65 44.31 44.64 101.87 11.21 AB
  5 | 49.71 9.65 28.32 40.06 108.17 7.92 AB
  6 | 40.25 13.92 25.12 26.33 130.33 2.23 AB
  7 | 53.43 15.86 37.17 37.57 120.57 5.99 AB
  8 | 45.37 10.76 29.04 34.61 117.27 -10.68 AB
  9 | 43.79 13.53 42.69 30.26 125 13.29 AB
 10 | 36.69 5.01 41.95 31.68 84.24 0.66 AB
 11 | 49.71 13.04 31.33 36.67 108.65 -7.83 AB
 12 | 31.23 17.72 15.5 13.52 120.06 0.5 AB
 13 | 48.92 19.96 40.26 28.95 119.32 8.03 AB
 14 | 53.57 20.46 33.1 33.11 110.97 7.04 AB
 15 | 57.3 24.19 47 33.11 116.81 5.77 AB
 16 | 44.32 12.54 36.1 31.78 124.12 5.42 AB
 17 | 63.83 20.36 54.55 43.47 112.31 -0.62 AB
 18 | 31.28 3.14 32.56 28.13 129.01 3.62 AB
 19 | 38.7 13.44 31 25.25 123.16 1.43 AB
 20 | 41.73 12.25 30.12 29.48 116.59 -1.24 AB
 21 | 43.92 14.18 37.83 29.74 134.46 6.45 AB
 22 | 54.92 21.06 42.2 33.86 125.21 2.43 AB
 23 | 63.07 24.41 54 38.66 106.42 15.78 AB
 24 | 45.54 13.07 30.3 32.47 117.98 -4.99 AB
 25 | 36.13 22.76 29 13.37 115.58 -3.24 AB
 26 | 54.12 26.65 35.33 27.47 121.45 1.57 AB
 27 | 26.15 10.76 14 15.39 125.2 -10.09 AB
 28 | 43.58 16.51 47 27.07 109.27 8.99 AB
 29 | 44.55 21.93 26.79 22.62 111.07 2.65 AB
 30 | 66.88 24.89 49.28 41.99 113.48 -2.01 AB
 31 | 50.82 15.4 42.53 35.42 112.19 10.87 AB
 32 | 46.39 11.08 32.14 35.31 98.77 6.39 AB
 33 | 44.94 17.44 27.78 27.49 117.98 5.57 AB
 34 | 38.66 12.99 40 25.68 124.91 2.7 AB
 35 | 59.6 32 46.56 27.6 119.33 1.47 AB
 36 | 31.48 7.83 24.28 23.66 113.83 4.39 AB
 37 | 32.09 6.99 36 25.1 132.26 6.41 AB
 38 | 35.7 19.44 20.7 16.26 137.54 -0.26 AB
 39 | 55.84 28.85 47.69 27 123.31 2.81 AB
 40 | 52.42 19.01 35.87 33.41 116.56 1.69 AB
 41 | 35.49 11.7 15.59 23.79 106.94 -3.46 AB
 42 | 46.44 8.4 29.04 38.05 115.48 2.05 AB
 43 | 53.85 19.23 32.78 34.62 121.67 5.33 AB
 44 | 66.29 26.33 47.5 39.96 121.22 -0.8 AB
 45 | 56.03 16.3 62.28 39.73 114.02 -2.33 AB
 46 | 50.91 23.02 47 27.9 117.42 -2.53 AB
 47 | 48.33 22.23 36.18 26.1 117.38 6.48 AB
 48 | 41.35 16.58 30.71 24.78 113.27 -4.5 AB
 49 | 40.56 17.98 34 22.58 121.05 -1.54 AB
 50 | 41.77 17.9 20.03 23.87 118.36 2.06 AB
 51 | 55.29 20.44 34 34.85 115.88 3.56 AB
 52 | 74.43 41.56 27.7 32.88 107.95 5 AB
 53 | 50.21 29.76 36.1 20.45 128.29 5.74 AB
 54 | 30.15 11.92 34 18.23 112.68 11.46 AB
 55 | 41.17 17.32 33.47 23.85 116.38 -9.57 AB
 56 | 47.66 13.28 36.68 34.38 98.25 6.27 AB
 57 | 43.35 7.47 28.07 35.88 112.78 5.75 AB
 58 | 46.86 15.35 38 31.5 116.25 1.66 AB
 59 | 43.2 19.66 35 23.54 124.85 -2.92 AB
 60 | 48.11 14.93 35.56 33.18 124.06 7.95 AB
 61 | 74.38 32.05 78.77 42.32 143.56 56.13 AB
 62 | 89.68 32.7 83.13 56.98 129.96 92.03 AB
 63 | 44.53 9.43 52 35.1 134.71 29.11 AB
 64 | 77.69 21.38 64.43 56.31 114.82 26.93 AB
 65 | 76.15 21.94 82.96 54.21 123.93 10.43 AB
 66 | 83.93 41.29 62 42.65 115.01 26.59 AB
 67 | 78.49 22.18 60 56.31 118.53 27.38 AB
 68 | 75.65 19.34 64.15 56.31 95.9 69.55 AB
 69 | 72.08 18.95 51 53.13 114.21 1.01 AB
 70 | 58.6 -0.26 51.5 58.86 102.04 28.06 AB
 71 | 72.56 17.39 52 55.18 119.19 32.11 AB
 72 | 86.9 32.93 47.79 53.97 135.08 101.72 AB
 73 | 84.97 33.02 60.86 51.95 125.66 74.33 AB
 74 | 55.51 20.1 44 35.42 122.65 34.55 AB
 75 | 72.22 23.08 91 49.14 137.74 56.8 AB
 76 | 70.22 39.82 68.12 30.4 148.53 145.38 AB
 77 | 86.75 36.04 69.22 50.71 139.41 110.86 AB
 78 | 58.78 7.67 53.34 51.12 98.5 51.58 AB
 79 | 67.41 17.44 60.14 49.97 111.12 33.16 AB
 80 | 47.74 12.09 39 35.66 117.51 21.68 AB
 81 | 77.11 30.47 69.48 46.64 112.15 70.76 AB
 82 | 74.01 21.12 57.38 52.88 120.21 74.56 AB
 83 | 88.62 29.09 47.56 59.53 121.76 51.81 AB
 84 | 81.1 24.79 77.89 56.31 151.84 65.21 AB
 85 | 76.33 42.4 57.2 33.93 124.27 50.13 AB
 86 | 45.44 9.91 45 35.54 163.07 20.32 AB
 87 | 59.79 17.88 59.21 41.91 119.32 22.12 AB
 88 | 44.91 10.22 44.63 34.7 130.08 37.36 AB
 89 | 56.61 16.8 42 39.81 127.29 24.02 AB
 90 | 71.19 23.9 43.7 47.29 119.86 27.28 AB
 91 | 81.66 28.75 58.23 52.91 114.77 30.61 AB
 92 | 70.95 20.16 62.86 50.79 116.18 32.52 AB
 93 | 85.35 15.84 71.67 69.51 124.42 76.02 AB
 94 | 58.1 14.84 79.65 43.26 113.59 50.24 AB
 95 | 94.17 15.38 67.71 78.79 114.89 53.26 AB
 96 | 57.52 33.65 50.91 23.88 140.98 148.75 AB
 97 | 96.66 19.46 90.21 77.2 120.67 64.08 AB
 98 | 74.72 19.76 82.74 54.96 109.36 33.31 AB
 99 | 77.66 22.43 93.89 55.22 123.06 61.21 AB
100 | 58.52 13.92 41.47 44.6 115.51 30.39 AB
101 | 84.59 30.36 65.48 54.22 108.01 25.12 AB
102 | 79.94 18.77 63.31 61.16 114.79 38.54 AB
103 | 70.4 13.47 61.2 56.93 102.34 25.54 AB
104 | 49.78 6.47 53 43.32 110.86 25.34 AB
105 | 77.41 29.4 63.23 48.01 118.45 93.56 AB
106 | 65.01 27.6 50.95 37.41 116.58 7.02 AB
107 | 65.01 9.84 57.74 55.18 94.74 49.7 AB
108 | 78.43 33.43 76.28 45 138.55 77.16 AB
109 | 63.17 6.33 63 56.84 110.64 42.61 AB
110 | 68.61 15.08 63.01 53.53 123.43 39.5 AB
111 | 63.9 13.71 62.12 50.19 114.13 41.42 AB
112 | 85 29.61 83.35 55.39 126.91 71.32 AB
113 | 42.02 -6.55 67.9 48.58 111.59 27.34 AB
114 | 69.76 19.28 48.5 50.48 96.49 51.17 AB
115 | 80.99 36.84 86.96 44.14 141.09 85.87 AB
116 | 129.83 8.4 48.38 121.43 107.69 418.54 AB
117 | 70.48 12.49 62.42 57.99 114.19 56.9 AB
118 | 86.04 38.75 47.87 47.29 122.09 61.99 AB
119 | 65.54 24.16 45.78 41.38 136.44 16.38 AB
120 | 60.75 15.75 43.2 45 113.05 31.69 AB
121 | 54.74 12.1 41 42.65 117.64 40.38 AB
122 | 83.88 23.08 87.14 60.8 124.65 80.56 AB
123 | 80.07 48.07 52.4 32.01 110.71 67.73 AB
124 | 65.67 10.54 56.49 55.12 109.16 53.93 AB
125 | 74.72 14.32 32.5 60.4 107.18 37.02 AB
126 | 48.06 5.69 57.06 42.37 95.44 32.84 AB
127 | 70.68 21.7 59.18 48.97 103.01 27.81 AB
128 | 80.43 17 66.54 63.43 116.44 57.78 AB
129 | 90.51 28.27 69.81 62.24 100.89 58.82 AB
130 | 77.24 16.74 49.78 60.5 110.69 39.79 AB
131 | 50.07 9.12 32.17 40.95 99.71 26.77 AB
132 | 69.78 13.78 58 56 118.93 17.91 AB
133 | 69.63 21.12 52.77 48.5 116.8 54.82 AB
134 | 81.75 20.12 70.56 61.63 119.43 55.51 AB
135 | 52.2 17.21 78.09 34.99 136.97 54.94 AB
136 | 77.12 30.35 77.48 46.77 110.61 82.09 AB
137 | 88.02 39.84 81.77 48.18 116.6 56.77 AB
138 | 83.4 34.31 78.42 49.09 110.47 49.67 AB
139 | 72.05 24.7 79.87 47.35 107.17 56.43 AB
140 | 85.1 21.07 91.73 64.03 109.06 38.03 AB
141 | 69.56 15.4 74.44 54.16 105.07 29.7 AB
142 | 89.5 48.9 72 40.6 134.63 118.35 AB
143 | 85.29 18.28 100.74 67.01 110.66 58.88 AB
144 | 60.63 20.6 64.54 40.03 117.23 104.86 AB
145 | 60.04 14.31 58.04 45.73 105.13 30.41 AB
146 | 85.64 42.69 78.75 42.95 105.14 42.89 AB
147 | 85.58 30.46 78.23 55.12 114.87 68.38 AB
148 | 55.08 -3.76 56 58.84 109.92 31.77 AB
149 | 65.76 9.83 50.82 55.92 104.39 39.31 AB
150 | 79.25 23.94 40.8 55.3 98.62 36.71 AB
151 | 81.11 20.69 60.69 60.42 94.02 40.51 AB
152 | 48.03 3.97 58.34 44.06 125.35 35 AB
153 | 63.4 14.12 48.14 49.29 111.92 31.78 AB
154 | 57.29 15.15 64 42.14 116.74 30.34 AB
155 | 41.19 5.79 42.87 35.39 103.35 27.66 AB
156 | 66.8 14.55 72.08 52.25 82.46 41.69 AB
157 | 79.48 26.73 70.65 52.74 118.59 61.7 AB
158 | 44.22 1.51 46.11 42.71 108.63 42.81 AB
159 | 57.04 0.35 49.2 56.69 103.05 52.17 AB
160 | 64.27 12.51 68.7 51.77 95.25 39.41 AB
161 | 92.03 35.39 77.42 56.63 115.72 58.06 AB
162 | 67.26 7.19 51.7 60.07 97.8 42.14 AB
163 | 118.14 38.45 50.84 79.7 81.02 74.04 AB
164 | 115.92 37.52 76.8 78.41 104.7 81.2 AB
165 | 53.94 9.31 43.1 44.64 124.4 25.08 AB
166 | 83.7 20.27 77.11 63.43 125.48 69.28 AB
167 | 56.99 6.87 57.01 50.12 109.98 36.81 AB
168 | 72.34 16.42 59.87 55.92 70.08 12.07 AB
169 | 95.38 24.82 95.16 70.56 89.31 57.66 AB
170 | 44.25 1.1 38 43.15 98.27 23.91 AB
171 | 64.81 15.17 58.84 49.64 111.68 21.41 AB
172 | 78.4 14.04 79.69 64.36 104.73 12.39 AB
173 | 56.67 13.46 43.77 43.21 93.69 21.11 AB
174 | 50.83 9.06 56.3 41.76 79 23.04 AB
175 | 61.41 25.38 39.1 36.03 103.4 21.84 AB
176 | 56.56 8.96 52.58 47.6 98.78 50.7 AB
177 | 67.03 13.28 66.15 53.75 100.72 33.99 AB
178 | 80.82 19.24 61.64 61.58 89.47 44.17 AB
179 | 80.65 26.34 60.9 54.31 120.1 52.47 AB
180 | 68.72 49.43 68.06 19.29 125.02 54.69 AB
181 | 37.9 4.48 24.71 33.42 157.85 33.61 AB
182 | 64.62 15.23 67.63 49.4 90.3 31.33 AB
183 | 75.44 31.54 89.6 43.9 106.83 54.97 AB
184 | 71 37.52 84.54 33.49 125.16 67.77 AB
185 | 81.06 20.8 91.78 60.26 125.43 38.18 AB
186 | 91.47 24.51 84.62 66.96 117.31 52.62 AB
187 | 81.08 21.26 78.77 59.83 90.07 49.16 AB
188 | 60.42 5.27 59.81 55.15 109.03 30.27 AB
189 | 85.68 38.65 82.68 47.03 120.84 61.96 AB
190 | 82.41 29.28 77.05 53.13 117.04 62.77 AB
191 | 43.72 9.81 52 33.91 88.43 40.88 AB
192 | 86.47 40.3 61.14 46.17 97.4 55.75 AB
193 | 74.47 33.28 66.94 41.19 146.47 124.98 AB
194 | 70.25 10.34 76.37 59.91 119.24 32.67 AB
195 | 72.64 18.93 68 53.71 116.96 25.38 AB
196 | 71.24 5.27 86 65.97 110.7 38.26 AB
197 | 63.77 12.76 65.36 51.01 89.82 56 AB
198 | 58.83 37.58 125.74 21.25 135.63 117.31 AB
199 | 74.85 13.91 62.69 60.95 115.21 33.17 AB
200 | 75.3 16.67 61.3 58.63 118.88 31.58 AB
201 | 63.36 20.02 67.5 43.34 131 37.56 AB
202 | 67.51 33.28 96.28 34.24 145.6 88.3 AB
203 | 76.31 41.93 93.28 34.38 132.27 101.22 AB
204 | 73.64 9.71 63 63.92 98.73 26.98 AB
205 | 56.54 14.38 44.99 42.16 101.72 25.77 AB
206 | 80.11 33.94 85.1 46.17 125.59 100.29 AB
207 | 95.48 46.55 59 48.93 96.68 77.28 AB
208 | 74.09 18.82 76.03 55.27 128.41 73.39 AB
209 | 87.68 20.37 93.82 67.31 120.94 76.73 AB
210 | 48.26 16.42 36.33 31.84 94.88 28.34 AB
211 | 38.51 16.96 35.11 21.54 127.63 7.99 NO
212 | 54.92 18.97 51.6 35.95 125.85 2 NO
213 | 44.36 8.95 46.9 35.42 129.22 4.99 NO
214 | 48.32 17.45 48 30.87 128.98 -0.91 NO
215 | 45.7 10.66 42.58 35.04 130.18 -3.39 NO
216 | 30.74 13.35 35.9 17.39 142.41 -2.01 NO
217 | 50.91 6.68 30.9 44.24 118.15 -1.06 NO
218 | 38.13 6.56 50.45 31.57 132.11 6.34 NO
219 | 51.62 15.97 35 35.66 129.39 1.01 NO
220 | 64.31 26.33 50.96 37.98 106.18 3.12 NO
221 | 44.49 21.79 31.47 22.7 113.78 -0.28 NO
222 | 54.95 5.87 53 49.09 126.97 -0.63 NO
223 | 56.1 13.11 62.64 43 116.23 31.17 NO
224 | 69.4 18.9 75.97 50.5 103.58 -0.44 NO
225 | 89.83 22.64 90.56 67.2 100.5 3.04 NO
226 | 59.73 7.72 55.34 52 125.17 3.24 NO
227 | 63.96 16.06 63.12 47.9 142.36 6.3 NO
228 | 61.54 19.68 52.89 41.86 118.69 4.82 NO
229 | 38.05 8.3 26.24 29.74 123.8 3.89 NO
230 | 43.44 10.1 36.03 33.34 137.44 -3.11 NO
231 | 65.61 23.14 62.58 42.47 124.13 -4.08 NO
232 | 53.91 12.94 39 40.97 118.19 5.07 NO
233 | 43.12 13.82 40.35 29.3 128.52 0.97 NO
234 | 40.68 9.15 31.02 31.53 139.12 -2.51 NO
235 | 37.73 9.39 42 28.35 135.74 13.68 NO
236 | 63.93 19.97 40.18 43.96 113.07 -11.06 NO
237 | 61.82 13.6 64 48.22 121.78 1.3 NO
238 | 62.14 13.96 58 48.18 133.28 4.96 NO
239 | 69 13.29 55.57 55.71 126.61 10.83 NO
240 | 56.45 19.44 43.58 37 139.19 -1.86 NO
241 | 41.65 8.84 36.03 32.81 116.56 -6.05 NO
242 | 51.53 13.52 35 38.01 126.72 13.93 NO
243 | 39.09 5.54 26.93 33.55 131.58 -0.76 NO
244 | 34.65 7.51 43 27.14 123.99 -4.08 NO
245 | 63.03 27.34 51.61 35.69 114.51 7.44 NO
246 | 47.81 10.69 54 37.12 125.39 -0.4 NO
247 | 46.64 15.85 40 30.78 119.38 9.06 NO
248 | 49.83 16.74 28 33.09 121.44 1.91 NO
249 | 47.32 8.57 35.56 38.75 120.58 1.63 NO
250 | 50.75 20.24 37 30.52 122.34 2.29 NO
251 | 36.16 -0.81 33.63 36.97 135.94 -2.09 NO
252 | 40.75 1.84 50 38.91 139.25 0.67 NO
253 | 42.92 -5.85 58 48.76 121.61 -3.36 NO
254 | 63.79 21.35 66 42.45 119.55 12.38 NO
255 | 72.96 19.58 61.01 53.38 111.23 0.81 NO
256 | 67.54 14.66 58 52.88 123.63 25.97 NO
257 | 54.75 9.75 48 45 123.04 8.24 NO
258 | 50.16 -2.97 42 53.13 131.8 -8.29 NO
259 | 40.35 10.19 37.97 30.15 128.01 0.46 NO
260 | 63.62 16.93 49.35 46.68 117.09 -0.36 NO
261 | 54.14 11.94 43 42.21 122.21 0.15 NO
262 | 74.98 14.92 53.73 60.05 105.65 1.59 NO
263 | 42.52 14.38 25.32 28.14 128.91 0.76 NO
264 | 33.79 3.68 25.5 30.11 128.33 -1.78 NO
265 | 54.5 6.82 47 47.68 111.79 -4.41 NO
266 | 48.17 9.59 39.71 38.58 135.62 5.36 NO
267 | 46.37 10.22 42.7 36.16 121.25 -0.54 NO
268 | 52.86 9.41 46.99 43.45 123.09 1.86 NO
269 | 57.15 16.49 42.84 40.66 113.81 5.02 NO
270 | 37.14 16.48 24 20.66 125.01 7.37 NO
271 | 51.31 8.88 57 42.44 126.47 -2.14 NO
272 | 42.52 16.54 42 25.97 120.63 7.88 NO
273 | 39.36 7.01 37 32.35 117.82 1.9 NO
274 | 35.88 1.11 43.46 34.77 126.92 -1.63 NO
275 | 43.19 9.98 28.94 33.22 123.47 1.74 NO
276 | 67.29 16.72 51 50.57 137.59 4.96 NO
277 | 51.33 13.63 33.26 37.69 131.31 1.79 NO
278 | 65.76 13.21 44 52.55 129.39 -1.98 NO
279 | 40.41 -1.33 30.98 41.74 119.34 -6.17 NO
280 | 48.8 18.02 52 30.78 139.15 10.44 NO
281 | 50.09 13.43 34.46 36.66 119.13 3.09 NO
282 | 64.26 14.5 43.9 49.76 115.39 5.95 NO
283 | 53.68 13.45 41.58 40.24 113.91 2.74 NO
284 | 49 13.11 51.87 35.88 126.4 0.54 NO
285 | 59.17 14.56 43.2 44.6 121.04 2.83 NO
286 | 67.8 16.55 43.26 51.25 119.69 4.87 NO
287 | 61.73 17.11 46.9 44.62 120.92 3.09 NO
288 | 33.04 -0.32 19.07 33.37 120.39 9.35 NO
289 | 74.57 15.72 58.62 58.84 105.42 0.6 NO
290 | 44.43 14.17 32.24 30.26 131.72 -3.6 NO
291 | 36.42 13.88 20.24 22.54 126.08 0.18 NO
292 | 51.08 14.21 35.95 36.87 115.8 6.91 NO
293 | 34.76 2.63 29.5 32.12 127.14 -0.46 NO
294 | 48.9 5.59 55.5 43.32 137.11 19.85 NO
295 | 46.24 10.06 37 36.17 128.06 -5.1 NO
296 | 46.43 6.62 48.1 39.81 130.35 2.45 NO
297 | 39.66 16.21 36.67 23.45 131.92 -4.97 NO
298 | 45.58 18.76 33.77 26.82 116.8 3.13 NO
299 | 66.51 20.9 31.73 45.61 128.9 1.52 NO
300 | 82.91 29.89 58.25 53.01 110.71 6.08 NO
301 | 50.68 6.46 35 44.22 116.59 -0.21 NO
302 | 89.01 26.08 69.02 62.94 111.48 6.06 NO
303 | 54.6 21.49 29.36 33.11 118.34 -1.47 NO
304 | 34.38 2.06 32.39 32.32 128.3 -3.37 NO
305 | 45.08 12.31 44.58 32.77 147.89 -8.94 NO
306 | 47.9 13.62 36 34.29 117.45 -4.25 NO
307 | 53.94 20.72 29.22 33.22 114.37 -0.42 NO
308 | 61.45 22.69 46.17 38.75 125.67 -2.71 NO
309 | 45.25 8.69 41.58 36.56 118.55 0.21 NO
310 | 33.84 5.07 36.64 28.77 123.95 -0.2 NO
311 | 


--------------------------------------------------------------------------------
/Classification/vertebral_column_metadata.txt:
--------------------------------------------------------------------------------
 1 | Information from:
 2 | https://archive.ics.uci.edu/ml/datasets/Vertebral+Column
 3 | 
 4 | Data set containing values for six biomechanical features used to classify
 5 | orthopaedic patients into 2 classes (normal or abnormal).
 6 | 
 7 | Data Set Information:
 8 | 
 9 | Biomedical data set built by Dr. Henrique da Mota during a medical residence
10 | period in the Group of Applied Research in Orthopaedics (GARO) of the Centre
11 | Medico-Chirurgical de Redaptation des Massues, Lyon, France.
12 | 
13 | 
14 | Attribute Information:
15 | 
16 | Each patient is represented in the data set by six biomechanical attributes
17 | derived from the shape and orientation of the pelvis and lumbar spine (in this
18 | order): pelvic incidence, pelvic tilt, lumbar lordosis angle, sacral slope,
19 | pelvic radius and grade of spondylolisthesis. The following convention is used
20 | for the class labels: Normal (NO) and Abnormal (AB).
21 | 
22 | 


--------------------------------------------------------------------------------
/Data Analysis and API - LoL Tilt/desktop.ini:
--------------------------------------------------------------------------------
1 | [LocalizedFileNames]
2 | phase3.csv=@phase3,0
3 | 


--------------------------------------------------------------------------------
/Data Creation - Jungle Stats per Champion/champ_parse.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from math import floor
  3 | import numpy as np
  4 | import numpy.ma as ma
  5 | from itertools import zip_longest
  6 | 
  7 | 
  8 | def create_groupby(df):
  9 |     champ_stats = df.groupby(['champ_id', 'tier']).mean()
 10 |     alt_mean_cols = ['spell1', 'spell2', 'perk_main', 'perk_second']
 11 |     champ_stats.drop(alt_mean_cols, axis=1, inplace=True)
 12 |     return champ_stats
 13 | 
 14 | 
 15 | def count_groupby(df, champ_stats):
 16 |     count = df.groupby(['champ_id', 'tier']).count()
 17 |     count.reset_index(inplace = True)
 18 |     count = count[['champ_id', 'tier', 'game_duration']]
 19 |     count.rename(columns={'game_duration': 'count'}, inplace=True)
 20 |     champ_stats = pd.merge(champ_stats, count, on=['champ_id', 'tier'])
 21 |     return champ_stats
 22 | 
 23 | 
 24 | def database_champs(df):
 25 |     champs = df['champ_id'].unique()
 26 |     return champs
 27 | 
 28 | 
 29 | def average(a_list):
 30 |     average_list = [np.ma.average(ma.masked_values(temp_list, None)) for temp_list in zip_longest(*a_list)]
 31 |     return average_list
 32 | 
 33 | def average_list(df, champ_stats, champs, column):
 34 |     stat_dict = {}
 35 |     for champ in champs:
 36 |         champ_lists = df[df['champ_id'] == champ][column]
 37 |         stat_dict[champ] = list(average(champ_lists))
 38 |     champ_stats[column] = df['champ_id'].map(stat_dict)
 39 |     return champ_stats
 40 | 
 41 | 
 42 | def average_all_lists(df, champ_stats, champs):
 43 |     columns = ['gpm', 'xpm', 'cpm']
 44 |     for column in columns:
 45 |         champ_stats = average_list(df, champ_stats, champs, column)
 46 |     return champ_stats
 47 | 
 48 | 
 49 | def popular(df, champ_id, perk_name):
 50 |     filtered = df[df['champ_id'] == champ_id]
 51 |     pop_id = filtered[perk_name].value_counts().idxmax()
 52 |     pop_value = filtered[perk_name].value_counts().max()
 53 |     return pop_id, pop_value
 54 | 
 55 | 
 56 | def add_popular(df, champ_stats, champs, perk_name):
 57 |     id_dict = {}
 58 |     value_dict = {}
 59 |     for champ in champs:
 60 |         pop_id, pop_value = popular(df, champ, perk_name)
 61 |         id_dict[champ] = pop_id
 62 |         value_dict[champ] = pop_value
 63 |     champ_stats[perk_name] = champ_stats['champ_id'].map(id_dict)
 64 |     champ_stats[perk_name + '_count'] = champ_stats['champ_id'].map(value_dict)
 65 |     return champ_stats
 66 | 
 67 | 
 68 | def popular_all(df, champ_stats, champs):
 69 |     columns = ['spell1', 'spell2', 'perk_main', 'perk_second', 'perks', 'runes', 'items']
 70 |     for column in columns:
 71 |         champ_stats = add_popular(df, champ_stats, champs, column)
 72 |     return champ_stats
 73 | 
 74 | 
 75 | def create_interval_times(min_time, max_time):
 76 |     min_times = []
 77 |     max_times = []
 78 |     for i in range(min_time, max_time + 1, 5):
 79 |         max_times.append(i * 60)
 80 |         min_times.append((i - 5) * 60)
 81 |     max_times.append(120 * 60)
 82 |     min_times[0] = 0
 83 |     return min_times, max_times
 84 | 
 85 | 
 86 | def wr_by_time(df, champ_id, interval_times):
 87 |     wrs = []
 88 |     for j in range(len(interval_times[0])):
 89 |         filtered = df[(df['champ_id'] == champ_id) &
 90 |                       (df['game_duration'] <= interval_times[1][j]) &
 91 |                       (df['game_duration'] > interval_times[0][j])
 92 |                       ]
 93 |         wr = filtered['result'].mean()
 94 |         wrs.append(wr)
 95 |     return wrs
 96 | 
 97 | 
 98 | def wr_by_time_all(df, champ_stats, champs):
 99 |     wr_dict = {}
100 |     interval_times = create_interval_times(20, 40)
101 |     for champ in champs:
102 |         wrs = wr_by_time(df, champ, interval_times)
103 |         wr_dict[champ] = wrs
104 |     champ_stats['wr_time'] = champ_stats['champ_id'].map(wr_dict)
105 |     return champ_stats
106 | 
107 | 
108 | def sec_to_min(num):
109 |     mins = floor(num / 60)
110 |     secs = int(((num / 60) % 1) * 60)
111 |     time = str(mins) + '.' + str(secs)
112 |     return time
113 | 
114 | 
115 | def pm_calc(val, secs):
116 |     per_sec = val / secs
117 |     per_min = per_sec * 60
118 |     return per_min
119 | 
120 | 
121 | def extra_features(df):
122 |     df['game_minutes'] = df['game_duration'].apply(sec_to_min)
123 |     champ_ids = pd.read_csv('champ_ids.csv')
124 |     df = pd.merge(df, champ_ids[['champ_id', 'champion']], on='champ_id')
125 |     df['kpm'] = pm_calc(df['kills'], df['game_duration'])
126 |     df['depm'] = pm_calc(df['deaths'], df['game_duration'])
127 |     df['apm'] = pm_calc(df['assists'], df['game_duration'])
128 |     df['dapm'] = pm_calc(df['damage_dealt'], df['game_duration'])
129 |     df['vpm'] = pm_calc(df['vision_score'], df['game_duration'])
130 |     df['gold_pm'] = pm_calc(df['gold_earnt'], df['game_duration'])
131 |     df['enemy_jpm'] = pm_calc(df['enemy_jungle'], df['game_duration'])
132 |     df['friendly_jpm'] = pm_calc(df['friendly_jungle'], df['game_duration'])
133 |     df['total_jungle'] = df['enemy_jungle'] + df['friendly_jungle'] + df['scuttles_killed']
134 |     df['total_jpm'] = pm_calc(df['total_jungle'], df['game_duration'])
135 |     return df
136 | 
137 | 
138 | def final_tweaks(df):
139 |     # change the order columns appear
140 |     cols = list(df)
141 |     cols.insert(1, cols.pop(cols.index('champion')))
142 |     cols.insert(2, cols.pop(cols.index('count')))
143 |     df = df.loc[:, cols]
144 |     # order by count, remove low counts
145 |     df = df.sort_values('count', ascending= False)
146 |     df = df[df['count'] > 100]
147 |     return df
148 | 
149 | 
150 | 
151 | def champ_parse(df):
152 |     champs = database_champs(df)
153 |     champ_stats = create_groupby(df)
154 |     champ_stats = count_groupby(df, champ_stats)
155 |     champ_stats = average_all_lists(df, champ_stats, champs)
156 |     champ_stats = wr_by_time_all(df, champ_stats, champs)
157 |     champ_stats = popular_all(df, champ_stats, champs)
158 |     champ_stats = extra_features(champ_stats)
159 |     champ_stats = final_tweaks(champ_stats)
160 |     return champ_stats
161 | 
162 | def read_files():
163 |     data = pd.read_pickle('match_data.pkl')
164 |     return data
165 | 
166 | df = read_files()
167 | champ_stats = champ_parse(df)
168 | champ_stats.to_pickle('champ_stats2.pkl')
169 | champ_stats.to_csv('champ_stats2.csv')
170 | 
171 | 


--------------------------------------------------------------------------------
/Data Creation - Jungle Stats per Champion/get_match_ids.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import time as t
  3 | import datetime
  4 | from multiprocessing import Pool
  5 | 
  6 | 
  7 | # Python code to remove duplicate elements
  8 | def remove_duplicates(list1, list2, list3):
  9 |     final_list1 = []
 10 |     final_list2 = []
 11 |     final_list3 = []
 12 |     for i in range(len(list1)):
 13 |         if list1[i] not in final_list1:
 14 |             final_list1.append(list1[i])
 15 |             final_list2.append(list2[i])
 16 |             final_list3.append(list3[i])
 17 |     return final_list1, final_list2, final_list3
 18 | 
 19 | 
 20 | def return_json(URL, session):
 21 |     while True:
 22 |         response = session.get(URL)
 23 |         try:
 24 |             if response.json()['status']['status_code'] == 404:
 25 |                 break
 26 |             elif response.json()['status']['status_code'] == 429:
 27 |                 t.sleep(10)
 28 |                 continue
 29 |             else:
 30 |                 break
 31 |         except:
 32 |             break
 33 |     return response.json()
 34 | 
 35 | 
 36 | def set_volume(tier):
 37 |     if tier == 'CHALLENGER':
 38 |         size = 2
 39 |     elif tier == 'GRANDMASTER':
 40 |         size = 4
 41 |     elif tier == 'MASTER':
 42 |         size = 4
 43 |     else:
 44 |         size = 10
 45 |     return size
 46 | 
 47 | 
 48 | def get_summoners(fullRegionList, tierList, key, session):
 49 |     summonerIds, summonerRegions, summonerTier = [], [], []
 50 |     for y in fullRegionList:
 51 |         for z in range(len(tierList)):
 52 |             size = set_volume(tierList[z][0])
 53 |             for x in range(size):
 54 |                 page = x + 1
 55 |                 URL_ids = ('https://' + y + '.api.riotgames.com/lol/league-exp/v4/entries/RANKED_SOLO_5x5/' +
 56 |                            tierList[z][0] + '/' + tierList[z][1] + '/?page=' + str(page) + '&api_key=' + key)
 57 |                 json = return_json(URL_ids, session)
 58 |                 for x in range(0, len(json)):
 59 |                     summonerIds.append(json[x]['summonerId'])
 60 |                     summonerRegions.append(y)
 61 |                     summonerTier.append(tierList[z][0])
 62 |     return summonerIds, summonerRegions, summonerTier
 63 | 
 64 | 
 65 | def name_to_id(selectedIds, selectedRegions, selectedTiers,  key, session):
 66 |     accountIds, accountRegions, accountTiers = [], [], []
 67 |     for i in range(len(selectedIds)):
 68 |         URL = 'https://' + selectedRegions[i] + '.api.riotgames.com/lol/summoner/v4/summoners/' + selectedIds[
 69 |             i] + '/?api_key=' + key
 70 |         json = return_json(URL, session)
 71 |         account_id = json['accountId']
 72 |         accountIds.append(account_id)
 73 |         accountRegions.append(selectedRegions[i])
 74 |         accountTiers.append(selectedTiers[i])
 75 |     return accountIds, accountRegions, accountTiers
 76 | 
 77 | 
 78 | def find_time_interval(yyyy, mm, dd):
 79 |     # Set week period prior to given date
 80 |     ed = datetime.date(yyyy, mm, dd)
 81 |     endTime = t.mktime(ed.timetuple())
 82 |     endTime = str(int(endTime)) + "000"
 83 |     sd = datetime.date(yyyy, mm, dd) - datetime.timedelta(7)
 84 |     startTime = t.mktime(sd.timetuple())
 85 |     startTime = str(int(startTime)) + "000"
 86 |     return startTime, endTime
 87 | 
 88 | 
 89 | def id_to_match(accountIds, accountRegions, accountTiers, yyyy, mm, dd, key, session):
 90 |     startTime, endTime = find_time_interval(yyyy, mm, dd)
 91 |     gameIds, regions, tiers = [], [], []
 92 |     for i in range(len(accountIds)):
 93 |         URL = 'https://' + accountRegions[i] + '.api.riotgames.com/lol/match/v4/matchlists/by-account/' + accountIds[
 94 |             i] + '/?endTime=' + endTime + '&beginTime=' + startTime + '&api_key=' + key
 95 |         try:
 96 |             json = return_json(URL, session)
 97 |             len_matches = len(json['matches'])
 98 |             if len_matches > 20:
 99 |                 len_matches = 20
100 |             for j in range(0, len_matches):
101 |                 if json['matches'][j]['queue'] == 420:
102 |                     gameId = json['matches'][j]['gameId']
103 |                     gameIds.append(gameId)
104 |                     regions.append(accountRegions[i])
105 |                     tiers.append(accountTiers[i])
106 |         except:
107 |             pass
108 |     return gameIds, regions, tiers
109 | 
110 | def create_args(regions, tiers, yyyy, mm, dd, key, session):
111 |     other_vars = [yyyy, mm, dd, key, session]
112 |     all_args = []
113 |     for region in regions:
114 |         for tier in tiers:
115 |             args = []
116 |             args.append([region])
117 |             args.append([tier])
118 |             for other in other_vars:
119 |                 args.append(other)
120 |             all_args.append(args)
121 |     return all_args
122 | 
123 | def single_run(regions, tiers, yyyy, mm, dd, key, session):
124 |     summonerIds, summonerRegions, summonerTiers = get_summoners(regions, tiers, key, session)
125 |     accountIds, accountRegions, accountTiers = name_to_id(summonerIds, summonerRegions, summonerTiers, key, session)
126 |     gameIds, regions, tiers = id_to_match(accountIds, accountRegions, accountTiers, yyyy, mm, dd, key, session)
127 |     return gameIds, regions, tiers
128 | 
129 | 
130 | def get_match_ids(regions, tiers, yyyy, mm, dd, key, session):
131 |     args = create_args(regions, tiers, yyyy, mm, dd, key, session)
132 |     k = True
133 |     if k == True:
134 |         with Pool(processes = 8) as p:
135 |             results = p.starmap(single_run, args)
136 |         gameIds, regions, tiers = [], [], []
137 |         for i in range(len(results)):
138 |             gameIds.extend(results[i][0])
139 |             regions.extend(results[i][1])
140 |             tiers.extend(results[i][2])
141 |     else:
142 |         gameIds, regions, tiers = single_run(regions, tiers, yyyy, mm, dd, key, session)
143 |     dedup_gameIds, dedup_regions, dedup_tiers = remove_duplicates(gameIds, regions, tiers)
144 |     return dedup_gameIds, dedup_regions, dedup_tiers
145 | 


--------------------------------------------------------------------------------
/Data Creation - Jungle Stats per Champion/main.py:
--------------------------------------------------------------------------------
 1 | # Imports
 2 | import requests
 3 | import time as t
 4 | import get_match_ids
 5 | import match_data
 6 | import champ_parse
 7 | 
 8 | 
 9 | def main(regions, tiers, yyyy, mm, dd, key, session):
10 |     start = t.time()
11 |     print("Starting process")
12 |     print("Getting match IDs..")
13 |     match_ids, regions, tiers = get_match_ids.get_match_ids(regions, tiers, yyyy, mm, dd, key, session)
14 |     print(str(len(match_ids)), " match IDs found")
15 |     print("Building match data")
16 |     data = match_data.all_runs(match_ids, regions, tiers, key, session)
17 |     print("Match data built, saving progress")
18 |     data.to_pickle('match_data2.pkl')
19 |     print("Aggregating champion level information")
20 |     champ_stats = champ_parse.champ_parse(data)
21 |     print("Data aggregated, saving progress")
22 |     champ_stats.to_pickle('champ_stats2.pkl')
23 |     champ_stats.to_csv('champ_stats2.csv')
24 |     finish = t.time()
25 |     run_time = finish - start
26 |     print("Process complete, total run time: "+ str(int(run_time / 60)) + " minutes")
27 |     return data, champ_stats
28 | 
29 | 
30 | if __name__ == '__main__':
31 |     regions = ['EUW1',
32 |                #'KR',
33 |                #'NA1'
34 |                ]
35 |     tiers = [
36 |         ['CHALLENGER', 'I'],
37 |         ['GRANDMASTER', 'I'],
38 |         #['MASTER', 'I'],
39 |         #['DIAMOND', 'II'],
40 |         #['PLATINUM', 'II'],
41 |         #['GOLD', 'II'],
42 |         #['SILVER', 'II'],
43 |         #['BRONZE', 'II'],
44 |         #['IRON', 'II']
45 |     ]
46 |     yyyy = 2020
47 |     mm = 4
48 |     dd = 19
49 |     key = 'SECURE-KEY'
50 |     session = requests.Session()
51 |     main(regions, tiers, yyyy, mm, dd, key, session)
52 | 
53 | 


--------------------------------------------------------------------------------
/Data Creation - Jungle Stats per Champion/match_data.py:
--------------------------------------------------------------------------------
  1 | # Imports
  2 | import pandas as pd
  3 | from multiprocessing import Pool
  4 | 
  5 | 
  6 | # For any URL, return the JSON
  7 | def return_json(URL, session):
  8 |     while True:
  9 |         response = session.get(URL)
 10 |         try:
 11 |             # Check for 404 error and quit if received
 12 |             if response.json()['status']['status_code'] == 404:
 13 |                 return "error - status code 404"
 14 |             # Check for 429 (too many requests made), sleep if received
 15 |             elif response.json()['status']['status_code'] == 429:
 16 |                 t.sleep(10)
 17 |                 continue
 18 |             else:
 19 |                 return "error - unknown reason"
 20 |         except:
 21 |             break
 22 |     return response.json()
 23 | 
 24 | 
 25 | # Provide the match-id & region, receive the json of match timeline (1 minute interval of match data)
 26 | def get_matchTimeline(matchId, region, key, session):
 27 |     URL = 'https://' + region + '.api.riotgames.com/lol/match/v4/timelines/by-match/' + str(
 28 |         matchId) + '/?api_key=' + key
 29 |     json = return_json(URL, session)
 30 |     return json
 31 | 
 32 | 
 33 | # Provide the match-id & region, receive the match information (game length, participants etc..)
 34 | def get_gameInfo(matchId, region, key, session):
 35 |     URL = 'https://' + region + '.api.riotgames.com/lol/match/v4/matches/' + str(matchId) + '/?api_key=' + key
 36 |     json = return_json(URL, session)
 37 |     return json
 38 | 
 39 | 
 40 | # Provide the match data json and return the jungler + jungler participant number
 41 | def find_jungler(json, side):
 42 |     # The most jungle camps cleared so far, starting at none
 43 |     mostCamps = 0
 44 |     # Limit to blue side participants
 45 |     if side == 'Blue':
 46 |         min_id = 1
 47 |         max_id = 6
 48 |     # Limit to red side participants
 49 |     if side == 'Red':
 50 |         min_id = 6
 51 |         max_id = 11
 52 |     # For each player, check how much Jungle CS they have at 4 minutes
 53 |     for i in range(min_id, max_id):
 54 |         jungle_cs = json['frames'][4]['participantFrames'][str(i)]['jungleMinionsKilled']
 55 |         # If it's the most so far, make them the jungler and set the new record at their Jungle CS value
 56 |         if jungle_cs > mostCamps:
 57 |             jungler = i
 58 |             # Find their participant ID
 59 |             parti_jungler = json['frames'][1]['participantFrames'][str(i)]['participantId']
 60 |             mostCamps = jungle_cs
 61 |     # If no one has Jungle CS, there's an error
 62 |     if mostCamps == 0:
 63 |         return "No blue side jungler detected"
 64 |     else:
 65 |         return jungler, parti_jungler
 66 | 
 67 | 
 68 | def find_champion(json, parti_jungler):
 69 |     champion = json['participants'][parti_jungler - 1]['championId']
 70 |     return champion
 71 | 
 72 | 
 73 | def check_team_pos(parti_jungler):
 74 |     if parti_jungler <= 5:
 75 |         team_pos = 0
 76 |     else:
 77 |         team_pos = 1
 78 |     return team_pos
 79 | 
 80 | 
 81 | def check_result(result):
 82 |     if result == 'Win' or result == 'True' or result == True:
 83 |         result = 1
 84 |     else:
 85 |         result = 0
 86 |     return result
 87 | 
 88 | 
 89 | def create_var_list(variable, length, player_data):
 90 |     var_list = []
 91 |     for i in range(length):
 92 |         var_list.append(player_data[variable + str(i)])
 93 |     return var_list
 94 | 
 95 | 
 96 | def check_parti(variable, player_data):
 97 |     try:
 98 |         if check_result(player_data[variable + 'Kill']) or check_result(player_data[variable + 'Assist']):
 99 |             parti = 1
100 |         else:
101 |             parti = 0
102 |     except:
103 |         parti = 0
104 |     return parti
105 | 
106 | 
107 | def total_kd(game_json, parti_jungler):
108 |     kills = []
109 |     deaths = []
110 |     if parti_jungler <= 5:
111 |         for i in range(0, 5):
112 |             kills.append(game_json['participants'][i]['stats']['kills'])
113 |             deaths.append(game_json['participants'][i]['stats']['deaths'])
114 |     else:
115 |         for i in range(5, 10):
116 |             kills.append(game_json['participants'][i]['stats']['kills'])
117 |             deaths.append(game_json['participants'][i]['stats']['deaths'])
118 | 
119 |     total_kills = sum(kills)
120 |     total_deaths = sum(deaths)
121 |     return total_kills, total_deaths
122 | 
123 | 
124 | def game_info(game_json, parti_jungler):
125 |     team_pos = check_team_pos(parti_jungler)
126 |     game_duration = game_json['gameDuration']
127 |     team_data = game_json['teams'][team_pos]
128 |     result = check_result(team_data['win'])
129 |     first_baron = check_result(team_data['firstBaron'])
130 |     first_dragon = check_result(team_data['firstDragon'])
131 |     first_herald = check_result(team_data['firstRiftHerald'])
132 |     total_barons = team_data['baronKills']
133 |     total_dragons = team_data['dragonKills']
134 |     total_rifts = team_data['riftHeraldKills']
135 |     info = [game_duration, result, first_baron, first_dragon, first_herald, total_barons, total_dragons, total_rifts]
136 |     return info
137 | 
138 | 
139 | def player_info(game_json, parti_jungler, neutrals):
140 |     spell_data = game_json['participants'][parti_jungler - 1]
141 |     spell1 = spell_data['spell1Id']
142 |     spell2 = spell_data['spell2Id']
143 |     player_data = spell_data['stats']
144 |     items = create_var_list('item', 7, player_data)
145 |     kills = player_data['kills']
146 |     deaths = player_data['deaths']
147 |     assists = player_data['assists']
148 |     total_kills, total_deaths = total_kd(game_json, parti_jungler)
149 |     if kills > 0:
150 |         kp = (kills + assists) / total_kills
151 |     else:
152 |         kp = 0
153 |     if deaths > 0:
154 |         death_perc = deaths / total_deaths
155 |     else:
156 |         death_perc = 0
157 |     damage_dealt = player_data['totalDamageDealtToChampions']
158 |     vision_score = player_data['visionScore']
159 |     damage_taken = player_data['totalDamageTaken']
160 |     gold_earnt = player_data['goldEarned']
161 |     lane_minions = player_data['totalMinionsKilled']
162 |     enemy_jungle = player_data['neutralMinionsKilledEnemyJungle']
163 |     friendly_jungle = player_data['neutralMinionsKilledTeamJungle']
164 |     neutral_jungle = (sum(neutrals) * 0.8) * 4
165 |     scuttles_killed = round((player_data['neutralMinionsKilled'] - enemy_jungle - friendly_jungle - neutral_jungle) / 4)
166 |     vision_wards = player_data['visionWardsBoughtInGame']
167 |     fb_parti = check_parti('firstBlood', player_data)
168 |     ft_parti = check_parti('firstTower', player_data)
169 |     perks = create_var_list('perk', 6, player_data)
170 |     perk_main = player_data['perkPrimaryStyle']
171 |     perk_second = player_data['perkSubStyle']
172 |     runes = create_var_list('statPerk', 3, player_data)
173 |     info = [spell1, spell2, items, kills, deaths, assists, kp, death_perc, damage_dealt, vision_score, damage_taken,
174 |             gold_earnt, lane_minions, enemy_jungle, friendly_jungle, scuttles_killed, vision_wards,
175 |             fb_parti, ft_parti, perks, perk_main, perk_second, runes]
176 |     return info
177 | 
178 | 
179 | def find_frame(timeline_json, parti_jungler):
180 |     player_frames = timeline_json['frames'][0]['participantFrames']
181 |     for i in range(1, len(player_frames) + 1):
182 |         if player_frames[str(i)]['participantId'] == parti_jungler:
183 |             frame = str(i)
184 |             return frame
185 |         else:
186 |             continue
187 | 
188 | 
189 | def kill_check(event, parti_jungler):
190 |     if event['killerId'] == parti_jungler or parti_jungler in event['assistingParticipantIds']:
191 |         time = event['timestamp']
192 |         jung_inv = True
193 |     else:
194 |         time = 0
195 |         jung_inv = False
196 |     return jung_inv, time
197 | 
198 | 
199 | def timeline_info(timeline_json, parti_jungler):
200 |     frame = find_frame(timeline_json, parti_jungler)
201 |     gpm = []
202 |     xpm = []
203 |     cpm = []
204 |     player_events = []
205 |     kp_time = []
206 |     for i in range(len(timeline_json['frames'])):
207 |         gold = timeline_json['frames'][i]['participantFrames'][frame]['totalGold']
208 |         gpm.append(gold)
209 |         xp = timeline_json['frames'][i]['participantFrames'][frame]['xp']
210 |         xpm.append(xp)
211 |         camps = timeline_json['frames'][i]['participantFrames'][frame]['jungleMinionsKilled']
212 |         cpm.append(camps)
213 |         events = timeline_json['frames'][i]['events']
214 |         if len(events) > 0:
215 |             for j in range(0, len(events)):
216 |                 try:
217 |                     if events[j]['type'] == 'CHAMPION_KILL':
218 |                         jung_inv, kill_time = kill_check(events[j], parti_jungler)
219 |                         if jung_inv == True:
220 |                             kp_time.append(kill_time)
221 |                     elif events[j]['participantId'] == parti_jungler:
222 |                         player_events.append(events[j])
223 |                     else:
224 |                         continue
225 |                 except:
226 |                     continue
227 |     total_early_kills = sum(i < 600000 for i in kp_time)
228 |     return gpm, xpm, cpm, kp_time, total_early_kills, player_events
229 | 
230 | 
231 | def parse_player_events(player_events):
232 |     item_purchase = {}
233 |     items = 0
234 |     item_undo = {}
235 |     undos = 0
236 |     skill_up = []
237 |     for i in range(len(player_events)):
238 |         event_type = player_events[i]['type']
239 |         data = player_events[i]
240 |         if event_type == 'ITEM_PURCHASED':
241 |             item_purchase[items] = [data['itemId'], data['timestamp']]
242 |             items += 1
243 |         elif event_type == 'ITEM_UNDO':
244 |             item_undo[undos] = [data['beforeId'], data['timestamp']]
245 |             undos += 1
246 |         elif event_type == 'SKILL_LEVEL_UP':
247 |             skill_up.append(data['skillSlot'])
248 |         else:
249 |             continue
250 |     return item_purchase, item_undo, skill_up
251 | 
252 | 
253 | def remove_undos(item_purchase, item_undo):
254 |     to_delete = []
255 |     if len(item_undo) > 0:
256 |         for i in item_undo.keys():
257 |             item = item_undo[i][0]
258 |             time = item_undo[i][1]
259 |             for j in item_purchase.keys():
260 |                 if item_purchase[j][0] == item and abs(time - item_purchase[j][1]) < 30000 and j not in to_delete:
261 |                     to_delete.append(j)
262 |                     break
263 |     if len(to_delete) > 0:
264 |         for key in to_delete:
265 |             del item_purchase[key]
266 |     new_item_purchase = []
267 |     for key in item_purchase.keys():
268 |         new_item_purchase.append(item_purchase[key][0])
269 |     return new_item_purchase
270 | 
271 | 
272 | def individual_data(gameinfo_json, timeline_json, jungler):
273 |     info = game_info(gameinfo_json, jungler)
274 |     info.extend(player_info(gameinfo_json, jungler, info[-3:]))
275 |     gpm, xpm, cpm, kp_time, total_early_kills, player_events = timeline_info(timeline_json, jungler)
276 |     item_purchase, item_undo, skill_up = parse_player_events(player_events)
277 |     item_purchase = remove_undos(item_purchase, item_undo)
278 |     # remember this will affect the order used in diff_values!!
279 |     new_info = [gpm, xpm, cpm, kp_time, total_early_kills, item_purchase, skill_up]
280 |     for i in new_info:
281 |         info.append(i)
282 |     return info
283 | 
284 | 
285 | def diff_values(blue_info, red_info):
286 |     # this is what will change if new_info is changed!
287 |     try:
288 |         cpm_diff_10 = blue_info[-5:-4][0][11] - red_info[-5:-4][0][11]
289 |         gpm_diff_10 = blue_info[-7:-6][0][11] - red_info[-7:-6][0][11]
290 |     except:
291 |         cpm_diff_10 = 0
292 |         gpm_diff_10 = 0
293 |     try:
294 |         cpm_diff_15 = blue_info[-5:-4][0][16] - red_info[-5:-4][0][16]
295 |         gpm_diff_15 = blue_info[-7:-6][0][16] - red_info[-7:-6][0][16]
296 |     except:
297 |         cpm_diff_15 = 0
298 |         gpm_diff_15 = 0
299 | 
300 |     info = [cpm_diff_10, cpm_diff_15, gpm_diff_10, gpm_diff_15]
301 |     return info
302 | 
303 | 
304 | def match_info(gameinfo_json, timeline_json, blue_jungler, red_jungler):
305 |     blue_info = individual_data(gameinfo_json, timeline_json, blue_jungler)
306 |     red_info = individual_data(gameinfo_json, timeline_json, red_jungler)
307 |     diff_info = diff_values(blue_info, red_info)
308 |     for i in range(len(diff_info)):
309 |         blue_info.append(diff_info[i])
310 |         red_info.append(-1 * diff_info[i])
311 |     return blue_info, red_info
312 | 
313 | 
314 | def match_run(matchId, region, tier, key, session):
315 |     try:
316 |         timeline_json = get_matchTimeline(matchId, region, key, session)
317 |         gameinfo_json = get_gameInfo(matchId, region, key, session)
318 |         blue_jungler = find_jungler(timeline_json, 'Blue')[1]
319 |         red_jungler = find_jungler(timeline_json, 'Red')[1]
320 |         blue_champ = find_champion(gameinfo_json, blue_jungler)
321 |         red_champ = find_champion(gameinfo_json, red_jungler)
322 |         blue_info, red_info = match_info(gameinfo_json, timeline_json, blue_jungler, red_jungler)
323 |         blue_info.insert(0, blue_champ)
324 |         red_info.insert(0, red_champ)
325 |         blue_info.insert(1, tier)
326 |         red_info.insert(1, tier)
327 |         return [blue_info, red_info], matchId, region
328 |     except:
329 |         return 'Match Error', matchId, region
330 | 
331 | 
332 | def create_df(data):
333 |     column_names = ['champ_id', 'tier', 'game_duration', 'result', 'first_baron', 'first_dragon', 'first_herald',
334 |                     'total_barons', 'total_dragons',
335 |                     'total_rifts', 'spell1', 'spell2', 'items', 'kills', 'deaths', 'assists', 'kp', 'death_perc',
336 |                     'damage_dealt',
337 |                     'vision_score', 'damage_taken', 'gold_earnt', 'lane_minions', 'enemy_jungle', 'friendly_jungle',
338 |                     'scuttles_killed', 'vision_wards', 'fb_parti', 'ft_parti', 'perks', 'perk_main', 'perk_second',
339 |                     'runes',
340 |                     'gpm', 'xpm', 'cpm', 'kp_time', 'total_early_kills', 'item_purchase', 'skill_up',
341 |                     'cpm_diff_10', 'cpm_diff_15', 'gpm_diff_10', 'gpm_diff_15']
342 | 
343 |     df = pd.DataFrame(data, columns=column_names)
344 |     return df
345 | 
346 | def get_args(matchIds, regions, tiers, key, session):
347 |     keys = [key] * len(matchIds)
348 |     sessions = [session] * len(matchIds)
349 |     args = zip(matchIds, regions, tiers, keys, sessions)
350 |     return args
351 | 
352 | 
353 | def all_runs(matchIds, regions, tiers, key, session):
354 |     args = get_args(matchIds, regions, tiers, key, session)
355 |     data, fail_id, fail_region = [], [], []
356 |     with Pool(processes=8) as p:
357 |         for info, matchId, region in p.starmap(match_run, args):
358 |             if info == 'Match Error':
359 |                 fail_id.append(matchId)
360 |                 fail_region.append(region)
361 |             else:
362 |                 for i in info:
363 |                     data.append(i)
364 |     df = create_df(data)
365 |     return df
366 | 
367 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Jack J Williams Portfolio
 2 | 
 3 | Contains publicly available projects that I have been working on. No project/data contains sensitive information.
 4 | 
 5 | Note that some of these projects were created at the start of my Python career and will not reflect my current coding standards!
 6 | 
 7 | A private directory is available that contains the productionised code used to create the dataset for the League of Legends Analytics site Jung.GG, but permission is granted only to those with agreeable circumstances (recruiters/colleagues/friends) given my desire to keep our methods safe from competitors!
 8 | 
 9 | ## Current Portfolio:
10 | 
11 | **CNN - Music Genre Classification**: Building a CNN model capable of classifying the genre of a song based on a 30 sec extract. First transforms the audio files into melospectrograms (images of sound over time). Builds a base LeNet model (50% accuracy). Optimizes the model through iterative parameter tuning and architecture changes to a final model of 65% accuracy (95% for 2 of the 5 genres).
12 | 
13 | **RNN - Marvel Character Generator**: An RNN model that takes the text from the Marvel Wiki entries and uses it to generate a backstory for a character. Results vary and still requires better tuning but an interesting introduction!
14 | 
15 | **MCMC - Breaking the Enigma Code**: Using MCMC algorithm to break the enigma code. Used for creating the Medium article: 
16 | 
17 | **Data Creation - Jungle Stats per Champion**: Builds a full dataset of each League of Legends Champions average Jungle stats per ranked tier. First creates a dataset of match IDs for each tier, loops through this to find match JSONs then parses JSON to pull & aggregate key information. All performed with parallel processing. 
18 | 
19 | **Principle Component Analysis**: Exploring techniques to reduce the dimensionality of the 2012 U.S. Army Anthropometric Survey (ANSUR-2). Using predominantly PCA, but a small comparission to the more complex techniques of UMAP and T-SNE is also performed. 
20 | 
21 | **Principle Component Analysis & Recommendation Engine**: Use PCA to convert data about LoL Champions into 2D space to be used for Euclidean distance "Content-based recommendation" engine. 
22 | 
23 | **Classification**: Using medical data to classify patients. Transformed using several dimensionality reduction techniques (UMAP/PCA/TSNE) then classified through Supervised Learning (Gaussian Naive Bayes/K-Nearest Neighbors/SVC/Decision Trees), scored on T1 errors through cross-validation to pick optimal model. Further Unsupervised Learning (K-Means / Gaussian Mixture) used to test for further improvements in classification (whether additional categories are beneficial). 
24 | 
25 | **Data Analysis and API**: Using the Riot API to gather data about LoL player, attempting to prove whether "tilted" players are more likely to lose their next game. Used for creating the Medium article:
26 | 
27 | **Champion Classification using UMAP and KMeans**: Using Champion statistics alongside UMAP dimensionality reduction to classify LoL champions into four per lane. Used for article and future work (reference to follow)


--------------------------------------------------------------------------------
/RNN - Marvel Character Generator/RNN_Marvel_Generator.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Marvel_Generator.ipynb",
  7 |       "provenance": [],
  8 |       "collapsed_sections": [],
  9 |       "machine_shape": "hm"
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "accelerator": "GPU"
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "code",
 20 |       "metadata": {
 21 |         "id": "ggUwY8AgaHqE",
 22 |         "colab_type": "code",
 23 |         "outputId": "5ff35c03-3c85-405f-bfbe-814b5ee89ad6",
 24 |         "colab": {
 25 |           "base_uri": "https://localhost:8080/",
 26 |           "height": 34
 27 |         }
 28 |       },
 29 |       "source": [
 30 |         "from google.colab import drive\n",
 31 |         "drive.mount('/content/drive')\n",
 32 |         "import os\n",
 33 |         "os.chdir('drive/My Drive')"
 34 |       ],
 35 |       "execution_count": 0,
 36 |       "outputs": [
 37 |         {
 38 |           "output_type": "stream",
 39 |           "text": [
 40 |             "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
 41 |           ],
 42 |           "name": "stdout"
 43 |         }
 44 |       ]
 45 |     },
 46 |     {
 47 |       "cell_type": "code",
 48 |       "metadata": {
 49 |         "id": "gLfWWhWpatZf",
 50 |         "colab_type": "code",
 51 |         "colab": {}
 52 |       },
 53 |       "source": [
 54 |         "from __future__ import print_function\n",
 55 |         "from keras.callbacks import LambdaCallback\n",
 56 |         "from keras.utils import to_categorical\n",
 57 |         "from keras.models import Sequential\n",
 58 |         "from keras.layers import Dense, Activation\n",
 59 |         "from keras.optimizers import RMSprop\n",
 60 |         "from keras.preprocessing.text import Tokenizer\n",
 61 |         "from keras.layers import LSTM\n",
 62 |         "from keras.layers import Embedding\n",
 63 |         "from collections import Counter\n",
 64 |         "from keras.preprocessing.sequence import pad_sequences\n",
 65 |         "\n",
 66 |         "import numpy as np\n",
 67 |         "import random\n",
 68 |         "import sys\n",
 69 |         "import io\n",
 70 |         "import string"
 71 |       ],
 72 |       "execution_count": 0,
 73 |       "outputs": []
 74 |     },
 75 |     {
 76 |       "cell_type": "code",
 77 |       "metadata": {
 78 |         "id": "CIrhuYIaazUx",
 79 |         "colab_type": "code",
 80 |         "outputId": "7bffd353-61a5-4a9e-aa10-a54099e42435",
 81 |         "colab": {
 82 |           "base_uri": "https://localhost:8080/",
 83 |           "height": 34
 84 |         }
 85 |       },
 86 |       "source": [
 87 |         "# Read data and check length\n",
 88 |         "text = open('marvel_data.txt', encoding=\"utf8\").read().lower()\n",
 89 |         "print('text length', len(text))"
 90 |       ],
 91 |       "execution_count": 0,
 92 |       "outputs": [
 93 |         {
 94 |           "output_type": "stream",
 95 |           "text": [
 96 |             "text length 28213380\n"
 97 |           ],
 98 |           "name": "stdout"
 99 |         }
100 |       ]
101 |     },
102 |     {
103 |       "cell_type": "code",
104 |       "metadata": {
105 |         "id": "Z7H6YIGeFscJ",
106 |         "colab_type": "code",
107 |         "colab": {}
108 |       },
109 |       "source": [
110 |         "# Reduce (stop memory crashes - may be worth looking into memory reduction techniques instead)\n",
111 |         "reduced_text = text[:1000000]"
112 |       ],
113 |       "execution_count": 0,
114 |       "outputs": []
115 |     },
116 |     {
117 |       "cell_type": "code",
118 |       "metadata": {
119 |         "id": "sqTbcbasa7Is",
120 |         "colab_type": "code",
121 |         "outputId": "2e345eff-679a-4664-cf57-b435611f9051",
122 |         "colab": {
123 |           "base_uri": "https://localhost:8080/",
124 |           "height": 54
125 |         }
126 |       },
127 |       "source": [
128 |         "# Random line\n",
129 |         "print(text[20086:20480])"
130 |       ],
131 |       "execution_count": 0,
132 |       "outputs": [
133 |         {
134 |           "output_type": "stream",
135 |           "text": [
136 |             " she then tied him up until the police arrived on the scene and showed them images of chord and the dealings of the ten rings which led to his arrest. midnight's fire has very subtle abilities, due to the energies of the well of all things. his physical attributes are at the peak of human possibility. he has enhanced speed, strength, agility, and sensory perception on par with black panther.\n"
137 |           ],
138 |           "name": "stdout"
139 |         }
140 |       ]
141 |     },
142 |     {
143 |       "cell_type": "code",
144 |       "metadata": {
145 |         "id": "pdjB_ACI79rv",
146 |         "colab_type": "code",
147 |         "colab": {}
148 |       },
149 |       "source": [
150 |         "# Required cleans\n",
151 |         "def clean_doc(doc):\n",
152 |         "\t# replace '--' with a space ' '\n",
153 |         "\tdoc = doc.replace('--', ' ')\n",
154 |         "\t# split into tokens by white space\n",
155 |         "\ttokens = doc.split()\n",
156 |         "\t# remove punctuation from each token\n",
157 |         "\ttable = str.maketrans('', '', string.punctuation)\n",
158 |         "\ttokens = [w.translate(table) for w in tokens]\n",
159 |         "\t# remove remaining tokens that are not alphabetic\n",
160 |         "\ttokens = [word for word in tokens if word.isalpha()]\n",
161 |         "\t# make lower case\n",
162 |         "\ttokens = [word.lower() for word in tokens]\n",
163 |         "\treturn tokens\n",
164 |         "\n",
165 |         "\t\n",
166 |         "tokens = clean_doc(reduced_text)"
167 |       ],
168 |       "execution_count": 0,
169 |       "outputs": []
170 |     },
171 |     {
172 |       "cell_type": "code",
173 |       "metadata": {
174 |         "id": "Wqu3YMYA9YzC",
175 |         "colab_type": "code",
176 |         "outputId": "f0931034-340f-4277-9794-4f2d14a2c9c6",
177 |         "colab": {
178 |           "base_uri": "https://localhost:8080/",
179 |           "height": 88
180 |         }
181 |       },
182 |       "source": [
183 |         "# Example of tokens\n",
184 |         "print(tokens[:200])\n",
185 |         "print('Total Tokens: %d' % len(tokens))\n",
186 |         "print('Unique Tokens: %d' % len(set(tokens)))"
187 |       ],
188 |       "execution_count": 0,
189 |       "outputs": [
190 |         {
191 |           "output_type": "stream",
192 |           "text": [
193 |             "['unnamed', 'cousin', 'due', 'to', 'false', 'accusation', 'spread', 'by', 'afari', 'against', 'him', 'and', 'the', 'black', 'spears', 'commander', 'amboola', 'they', 'were', 'both', 'suspected', 'by', 'queen', 'tananda', 'of', 'plotting', 'against', 'her', 'and', 'were', 'arrested', 'amboola', 'was', 'killed', 'by', 'a', 'demon', 'released', 'by', 'the', 'sorcerer', 'muru', 'agent', 'of', 'the', 'actual', 'plotter', 'tuthmes', 'but', 'aahmes', 'fate', 'remained', 'unknown', 'aaidan', 'blomfield', 'was', 'the', 'third', 'man', 'to', 'call', 'himself', 'the', 'unicorn', 'he', 'was', 'an', 'agent', 'of', 'stockpile', 'attempting', 'to', 'raid', 'stark', 'enterprises', 'for', 'morgan', 'stark', 'after', 'the', 'apparent', 'death', 'of', 'tony', 'stark', 'he', 'claimed', 'to', 'be', 'an', 'old', 'foe', 'of', 'iron', 'mans', 'but', 'it', 'is', 'unknown', 'if', 'he', 'really', 'was', 'or', 'was', 'just', 'riding', 'on', 'the', 'reputation', 'of', 'the', 'original', 'unicorn', 'he', 'displayed', 'superhuman', 'strength', 'and', 'toughness', 'enough', 'to', 'resist', 'iron', 'mans', 'repulsors', 'and', 'war', 'blasts', 'aala', 'was', 'the', 'sea', 'goddess', 'of', 'balsagoth', 'kyrie', 'was', 'worshiped', 'as', 'aala', 'and', 'learn', 'to', 'use', 'that', 'belief', 'until', 'high', 'priest', 'gothan', 'had', 'her', 'exiled', 'and', 'those', 'faithful', 'to', 'her', 'slaughtered', 'unborn', 'child', 'aala', 'was', 'the', 'wife', 'of', 'galan', 'in', 'the', 'sixth', 'iteration', 'of', 'she', 'was', 'pregnant', 'at', 'the', 'time', 'of', 'the', 'end', 'of', 'the', 'universe', 'aalbort', 'was', 'accountant', 'for', 'the', 'starship', 'principle', 'of', 'reasonable', 'interest', 'and', 'had', 'been', 'working', 'for']\n",
194 |             "Total Tokens: 167600\n",
195 |             "Unique Tokens: 15115\n"
196 |           ],
197 |           "name": "stdout"
198 |         }
199 |       ]
200 |     },
201 |     {
202 |       "cell_type": "code",
203 |       "metadata": {
204 |         "id": "NW7Fl7QD_08F",
205 |         "colab_type": "code",
206 |         "outputId": "66966bd5-480e-45c6-f070-652a90d5e304",
207 |         "colab": {
208 |           "base_uri": "https://localhost:8080/",
209 |           "height": 187
210 |         }
211 |       },
212 |       "source": [
213 |         "# Count the tokens\n",
214 |         "Counter(tokens).most_common(10)"
215 |       ],
216 |       "execution_count": 0,
217 |       "outputs": [
218 |         {
219 |           "output_type": "execute_result",
220 |           "data": {
221 |             "text/plain": [
222 |               "[('the', 11274),\n",
223 |               " ('to', 6773),\n",
224 |               " ('and', 5352),\n",
225 |               " ('of', 4666),\n",
226 |               " ('a', 3580),\n",
227 |               " ('was', 3255),\n",
228 |               " ('his', 2787),\n",
229 |               " ('in', 2429),\n",
230 |               " ('he', 2349),\n",
231 |               " ('by', 1586)]"
232 |             ]
233 |           },
234 |           "metadata": {
235 |             "tags": []
236 |           },
237 |           "execution_count": 9
238 |         }
239 |       ]
240 |     },
241 |     {
242 |       "cell_type": "code",
243 |       "metadata": {
244 |         "id": "lX7R2Vs391nT",
245 |         "colab_type": "code",
246 |         "outputId": "c2cb95dd-7d8b-49ca-9ed7-ef229ccb0f5f",
247 |         "colab": {
248 |           "base_uri": "https://localhost:8080/",
249 |           "height": 34
250 |         }
251 |       },
252 |       "source": [
253 |         "# Organise into sequences\n",
254 |         "length = 41\n",
255 |         "sequences = list()\n",
256 |         "for i in range(length, len(tokens)):\n",
257 |         "\t# select sequence of tokens\n",
258 |         "\tseq = tokens[i-length:i]\n",
259 |         "\t# convert into a line\n",
260 |         "\tline = ' '.join(seq)\n",
261 |         "\t# store\n",
262 |         "\tsequences.append(line)\n",
263 |         "print('Total Sequences: %d' % len(sequences))"
264 |       ],
265 |       "execution_count": 0,
266 |       "outputs": [
267 |         {
268 |           "output_type": "stream",
269 |           "text": [
270 |             "Total Sequences: 167559\n"
271 |           ],
272 |           "name": "stdout"
273 |         }
274 |       ]
275 |     },
276 |     {
277 |       "cell_type": "code",
278 |       "metadata": {
279 |         "id": "znUAJGSWedzm",
280 |         "colab_type": "code",
281 |         "colab": {}
282 |       },
283 |       "source": [
284 |         "# Encode the sequences\n",
285 |         "tokenizer = Tokenizer()\n",
286 |         "tokenizer.fit_on_texts(sequences)\n",
287 |         "sequences = tokenizer.texts_to_sequences(sequences)"
288 |       ],
289 |       "execution_count": 0,
290 |       "outputs": []
291 |     },
292 |     {
293 |       "cell_type": "code",
294 |       "metadata": {
295 |         "id": "hg3ueeQOAmGe",
296 |         "colab_type": "code",
297 |         "colab": {}
298 |       },
299 |       "source": [
300 |         "# Check the size of the vocab\n",
301 |         "vocab_size = len(tokenizer.word_index) + 1\n",
302 |         "print(vocab_size)"
303 |       ],
304 |       "execution_count": 0,
305 |       "outputs": []
306 |     },
307 |     {
308 |       "cell_type": "code",
309 |       "metadata": {
310 |         "id": "_DW0e5k8bKsI",
311 |         "colab_type": "code",
312 |         "colab": {}
313 |       },
314 |       "source": [
315 |         "# Split into the input text and the output text\n",
316 |         "sequences = np.array(sequences)\n",
317 |         "X, y = sequences[:,:-1], sequences[:,-1]\n",
318 |         "y = to_categorical(y, num_classes=vocab_size)\n",
319 |         "seq_length = X.shape[1]"
320 |       ],
321 |       "execution_count": 0,
322 |       "outputs": []
323 |     },
324 |     {
325 |       "cell_type": "code",
326 |       "metadata": {
327 |         "id": "rJ8hWf5pHeZK",
328 |         "colab_type": "code",
329 |         "outputId": "b249ded9-6f84-4351-84cf-d61bfb66eaa6",
330 |         "colab": {
331 |           "base_uri": "https://localhost:8080/",
332 |           "height": 408
333 |         }
334 |       },
335 |       "source": [
336 |         "# Create an RNN model\n",
337 |         "model = Sequential()\n",
338 |         "model.add(Embedding(vocab_size, 50, input_length=seq_length))\n",
339 |         "model.add(LSTM(100, return_sequences=True))\n",
340 |         "model.add(LSTM(100))\n",
341 |         "model.add(Dense(100, activation='relu'))\n",
342 |         "model.add(Dense(vocab_size, activation='softmax'))\n",
343 |         "print(model.summary())\n"
344 |       ],
345 |       "execution_count": 0,
346 |       "outputs": [
347 |         {
348 |           "output_type": "stream",
349 |           "text": [
350 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:541: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.\n",
351 |             "\n",
352 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4432: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.\n",
353 |             "\n",
354 |             "Model: \"sequential_2\"\n",
355 |             "_________________________________________________________________\n",
356 |             "Layer (type)                 Output Shape              Param #   \n",
357 |             "=================================================================\n",
358 |             "embedding_1 (Embedding)      (None, 40, 50)            755800    \n",
359 |             "_________________________________________________________________\n",
360 |             "lstm_1 (LSTM)                (None, 40, 100)           60400     \n",
361 |             "_________________________________________________________________\n",
362 |             "lstm_2 (LSTM)                (None, 100)               80400     \n",
363 |             "_________________________________________________________________\n",
364 |             "dense_1 (Dense)              (None, 100)               10100     \n",
365 |             "_________________________________________________________________\n",
366 |             "dense_2 (Dense)              (None, 15116)             1526716   \n",
367 |             "=================================================================\n",
368 |             "Total params: 2,433,416\n",
369 |             "Trainable params: 2,433,416\n",
370 |             "Non-trainable params: 0\n",
371 |             "_________________________________________________________________\n",
372 |             "None\n"
373 |           ],
374 |           "name": "stdout"
375 |         }
376 |       ]
377 |     },
378 |     {
379 |       "cell_type": "code",
380 |       "metadata": {
381 |         "id": "Tyqtm-MyHk6w",
382 |         "colab_type": "code",
383 |         "outputId": "d4269baf-87c2-45a3-d868-d48d8e7e0dc0",
384 |         "colab": {
385 |           "base_uri": "https://localhost:8080/",
386 |           "height": 1000
387 |         }
388 |       },
389 |       "source": [
390 |         "# Compile & Fit\n",
391 |         "model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
392 |         "model.fit(X, y, batch_size=128, epochs=100)"
393 |       ],
394 |       "execution_count": 0,
395 |       "outputs": [
396 |         {
397 |           "output_type": "stream",
398 |           "text": [
399 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:793: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n",
400 |             "\n",
401 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3576: The name tf.log is deprecated. Please use tf.math.log instead.\n",
402 |             "\n",
403 |             "WARNING:tensorflow:From /tensorflow-1.15.0/python3.6/tensorflow_core/python/ops/math_grad.py:1424: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n",
404 |             "Instructions for updating:\n",
405 |             "Use tf.where in 2.0, which has the same broadcast rule as np.where\n",
406 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1033: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.\n",
407 |             "\n",
408 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1020: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.\n",
409 |             "\n",
410 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3005: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.\n",
411 |             "\n",
412 |             "Epoch 1/100\n",
413 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:190: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.\n",
414 |             "\n",
415 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:197: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.\n",
416 |             "\n",
417 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:207: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.\n",
418 |             "\n",
419 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:216: The name tf.is_variable_initialized is deprecated. Please use tf.compat.v1.is_variable_initialized instead.\n",
420 |             "\n",
421 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:223: The name tf.variables_initializer is deprecated. Please use tf.compat.v1.variables_initializer instead.\n",
422 |             "\n",
423 |             "167559/167559 [==============================] - 215s 1ms/step - loss: 7.1368 - acc: 0.0711\n",
424 |             "Epoch 2/100\n",
425 |             "167559/167559 [==============================] - 206s 1ms/step - loss: 6.6924 - acc: 0.0886\n",
426 |             "Epoch 3/100\n",
427 |             "167559/167559 [==============================] - 207s 1ms/step - loss: 6.4756 - acc: 0.1114\n",
428 |             "Epoch 4/100\n",
429 |             "167559/167559 [==============================] - 206s 1ms/step - loss: 6.2979 - acc: 0.1272\n",
430 |             "Epoch 5/100\n",
431 |             "167559/167559 [==============================] - 206s 1ms/step - loss: 6.1424 - acc: 0.1353\n",
432 |             "Epoch 6/100\n",
433 |             "167559/167559 [==============================] - 205s 1ms/step - loss: 6.0633 - acc: 0.1382\n",
434 |             "Epoch 7/100\n",
435 |             "167559/167559 [==============================] - 203s 1ms/step - loss: 5.9131 - acc: 0.1457\n",
436 |             "Epoch 8/100\n",
437 |             "167559/167559 [==============================] - 204s 1ms/step - loss: 5.7953 - acc: 0.1525\n",
438 |             "Epoch 9/100\n",
439 |             "167559/167559 [==============================] - 204s 1ms/step - loss: 5.6751 - acc: 0.1578\n",
440 |             "Epoch 10/100\n",
441 |             "167559/167559 [==============================] - 203s 1ms/step - loss: 5.5536 - acc: 0.1646\n",
442 |             "Epoch 11/100\n",
443 |             "167559/167559 [==============================] - 205s 1ms/step - loss: 5.6142 - acc: 0.1557\n",
444 |             "Epoch 12/100\n",
445 |             "167559/167559 [==============================] - 204s 1ms/step - loss: 5.4753 - acc: 0.1643\n",
446 |             "Epoch 13/100\n",
447 |             "167559/167559 [==============================] - 204s 1ms/step - loss: 5.3795 - acc: 0.1707\n",
448 |             "Epoch 14/100\n",
449 |             "167559/167559 [==============================] - 206s 1ms/step - loss: 5.2967 - acc: 0.1750\n",
450 |             "Epoch 15/100\n",
451 |             "167559/167559 [==============================] - 207s 1ms/step - loss: 5.2149 - acc: 0.1783\n",
452 |             "Epoch 16/100\n",
453 |             "167559/167559 [==============================] - 205s 1ms/step - loss: 5.1418 - acc: 0.1816\n",
454 |             "Epoch 17/100\n",
455 |             "167559/167559 [==============================] - 205s 1ms/step - loss: 5.0752 - acc: 0.1849\n",
456 |             "Epoch 18/100\n",
457 |             "167559/167559 [==============================] - 206s 1ms/step - loss: 5.0045 - acc: 0.1884\n",
458 |             "Epoch 19/100\n",
459 |             "167559/167559 [==============================] - 204s 1ms/step - loss: 4.9423 - acc: 0.1915\n",
460 |             "Epoch 20/100\n",
461 |             "167559/167559 [==============================] - 204s 1ms/step - loss: 4.8819 - acc: 0.1944\n",
462 |             "Epoch 21/100\n",
463 |             "167559/167559 [==============================] - 202s 1ms/step - loss: 4.8225 - acc: 0.1974\n",
464 |             "Epoch 22/100\n",
465 |             "167559/167559 [==============================] - 203s 1ms/step - loss: 4.7677 - acc: 0.1999\n",
466 |             "Epoch 23/100\n",
467 |             "167559/167559 [==============================] - 204s 1ms/step - loss: 4.7145 - acc: 0.2037\n",
468 |             "Epoch 24/100\n",
469 |             "167559/167559 [==============================] - 205s 1ms/step - loss: 4.6622 - acc: 0.2070\n",
470 |             "Epoch 25/100\n",
471 |             "167559/167559 [==============================] - 203s 1ms/step - loss: 4.6136 - acc: 0.2103\n",
472 |             "Epoch 26/100\n",
473 |             "167559/167559 [==============================] - 205s 1ms/step - loss: 4.5589 - acc: 0.2145\n",
474 |             "Epoch 27/100\n",
475 |             "167559/167559 [==============================] - 202s 1ms/step - loss: 4.5107 - acc: 0.2183\n",
476 |             "Epoch 28/100\n",
477 |             "167559/167559 [==============================] - 203s 1ms/step - loss: 4.4760 - acc: 0.2213\n",
478 |             "Epoch 29/100\n",
479 |             "167559/167559 [==============================] - 203s 1ms/step - loss: 4.4323 - acc: 0.2242\n",
480 |             "Epoch 30/100\n",
481 |             "167559/167559 [==============================] - 204s 1ms/step - loss: 4.3693 - acc: 0.2294\n",
482 |             "Epoch 31/100\n",
483 |             "167559/167559 [==============================] - 207s 1ms/step - loss: 4.3243 - acc: 0.2338\n",
484 |             "Epoch 32/100\n",
485 |             "167559/167559 [==============================] - 205s 1ms/step - loss: 4.2787 - acc: 0.2367\n",
486 |             "Epoch 33/100\n",
487 |             "167559/167559 [==============================] - 202s 1ms/step - loss: 4.2361 - acc: 0.2409\n",
488 |             "Epoch 34/100\n",
489 |             "167559/167559 [==============================] - 203s 1ms/step - loss: 4.1923 - acc: 0.2452\n",
490 |             "Epoch 35/100\n",
491 |             "167559/167559 [==============================] - 205s 1ms/step - loss: 4.1540 - acc: 0.2497\n",
492 |             "Epoch 36/100\n",
493 |             "167559/167559 [==============================] - 204s 1ms/step - loss: 4.1097 - acc: 0.2530\n",
494 |             "Epoch 37/100\n",
495 |             "167559/167559 [==============================] - 204s 1ms/step - loss: 4.0662 - acc: 0.2577\n",
496 |             "Epoch 38/100\n",
497 |             "167559/167559 [==============================] - 206s 1ms/step - loss: 4.0269 - acc: 0.2614\n",
498 |             "Epoch 39/100\n",
499 |             "167559/167559 [==============================] - 207s 1ms/step - loss: 3.9873 - acc: 0.2666\n",
500 |             "Epoch 40/100\n",
501 |             "167559/167559 [==============================] - 205s 1ms/step - loss: 3.9478 - acc: 0.2698\n",
502 |             "Epoch 41/100\n",
503 |             "167559/167559 [==============================] - 205s 1ms/step - loss: 3.9111 - acc: 0.2736\n",
504 |             "Epoch 42/100\n",
505 |             "167559/167559 [==============================] - 202s 1ms/step - loss: 3.8727 - acc: 0.2778\n",
506 |             "Epoch 43/100\n",
507 |             "167559/167559 [==============================] - 203s 1ms/step - loss: 3.8360 - acc: 0.2814\n",
508 |             "Epoch 44/100\n",
509 |             "167559/167559 [==============================] - 203s 1ms/step - loss: 3.7987 - acc: 0.2858\n",
510 |             "Epoch 45/100\n",
511 |             "167559/167559 [==============================] - 204s 1ms/step - loss: 3.7646 - acc: 0.2903\n",
512 |             "Epoch 46/100\n",
513 |             "167559/167559 [==============================] - 202s 1ms/step - loss: 3.7287 - acc: 0.2940\n",
514 |             "Epoch 47/100\n",
515 |             "167559/167559 [==============================] - 202s 1ms/step - loss: 3.6968 - acc: 0.2981\n",
516 |             "Epoch 48/100\n",
517 |             "167559/167559 [==============================] - 204s 1ms/step - loss: 3.6597 - acc: 0.3019\n",
518 |             "Epoch 49/100\n",
519 |             "167559/167559 [==============================] - 205s 1ms/step - loss: 3.6270 - acc: 0.3057\n",
520 |             "Epoch 50/100\n",
521 |             "167559/167559 [==============================] - 205s 1ms/step - loss: 3.5942 - acc: 0.3099\n",
522 |             "Epoch 51/100\n",
523 |             "167559/167559 [==============================] - 205s 1ms/step - loss: 3.5606 - acc: 0.3133\n",
524 |             "Epoch 52/100\n",
525 |             "167559/167559 [==============================] - 203s 1ms/step - loss: 3.5287 - acc: 0.3182\n",
526 |             "Epoch 53/100\n",
527 |             "167559/167559 [==============================] - 202s 1ms/step - loss: 3.4990 - acc: 0.3214\n",
528 |             "Epoch 54/100\n",
529 |             "167559/167559 [==============================] - 203s 1ms/step - loss: 3.4684 - acc: 0.3265\n",
530 |             "Epoch 55/100\n",
531 |             "167559/167559 [==============================] - 199s 1ms/step - loss: 3.4368 - acc: 0.3304\n",
532 |             "Epoch 56/100\n",
533 |             "167559/167559 [==============================] - 201s 1ms/step - loss: 3.4096 - acc: 0.3340\n",
534 |             "Epoch 57/100\n",
535 |             "167559/167559 [==============================] - 201s 1ms/step - loss: 3.3778 - acc: 0.3384\n",
536 |             "Epoch 58/100\n",
537 |             "167559/167559 [==============================] - 202s 1ms/step - loss: 3.3517 - acc: 0.3415\n",
538 |             "Epoch 59/100\n",
539 |             "167559/167559 [==============================] - 202s 1ms/step - loss: 3.3187 - acc: 0.3451\n",
540 |             "Epoch 60/100\n",
541 |             "167559/167559 [==============================] - 202s 1ms/step - loss: 3.2917 - acc: 0.3506\n",
542 |             "Epoch 61/100\n",
543 |             "167559/167559 [==============================] - 202s 1ms/step - loss: 3.2603 - acc: 0.3540\n",
544 |             "Epoch 62/100\n",
545 |             "167559/167559 [==============================] - 202s 1ms/step - loss: 3.2377 - acc: 0.3573\n",
546 |             "Epoch 63/100\n",
547 |             "167559/167559 [==============================] - 202s 1ms/step - loss: 3.2076 - acc: 0.3611\n",
548 |             "Epoch 64/100\n",
549 |             "167559/167559 [==============================] - 203s 1ms/step - loss: 3.1829 - acc: 0.3649\n",
550 |             "Epoch 65/100\n",
551 |             "167559/167559 [==============================] - 205s 1ms/step - loss: 3.1530 - acc: 0.3687\n",
552 |             "Epoch 66/100\n",
553 |             "167559/167559 [==============================] - 205s 1ms/step - loss: 3.1248 - acc: 0.3732\n",
554 |             "Epoch 67/100\n",
555 |             "167559/167559 [==============================] - 205s 1ms/step - loss: 3.0994 - acc: 0.3762\n",
556 |             "Epoch 68/100\n",
557 |             "167559/167559 [==============================] - 204s 1ms/step - loss: 3.0788 - acc: 0.3789\n",
558 |             "Epoch 69/100\n",
559 |             "167559/167559 [==============================] - 206s 1ms/step - loss: 3.0460 - acc: 0.3842\n",
560 |             "Epoch 70/100\n",
561 |             "167559/167559 [==============================] - 204s 1ms/step - loss: 3.0242 - acc: 0.3869\n",
562 |             "Epoch 71/100\n",
563 |             "167559/167559 [==============================] - 201s 1ms/step - loss: 3.0044 - acc: 0.3893\n",
564 |             "Epoch 72/100\n",
565 |             "167559/167559 [==============================] - 203s 1ms/step - loss: 2.9720 - acc: 0.3943\n",
566 |             "Epoch 73/100\n",
567 |             "167559/167559 [==============================] - 203s 1ms/step - loss: 2.9492 - acc: 0.3987\n",
568 |             "Epoch 74/100\n",
569 |             "167559/167559 [==============================] - 204s 1ms/step - loss: 2.9266 - acc: 0.4012\n",
570 |             "Epoch 75/100\n",
571 |             "167559/167559 [==============================] - 205s 1ms/step - loss: 2.9071 - acc: 0.4049\n",
572 |             "Epoch 76/100\n",
573 |             "167559/167559 [==============================] - 205s 1ms/step - loss: 2.8845 - acc: 0.4083\n",
574 |             "Epoch 77/100\n",
575 |             "167559/167559 [==============================] - 207s 1ms/step - loss: 2.8655 - acc: 0.4113\n",
576 |             "Epoch 78/100\n",
577 |             "167559/167559 [==============================] - 206s 1ms/step - loss: 2.8333 - acc: 0.4165\n",
578 |             "Epoch 79/100\n",
579 |             "167559/167559 [==============================] - 206s 1ms/step - loss: 2.8169 - acc: 0.4183\n",
580 |             "Epoch 80/100\n",
581 |             "167559/167559 [==============================] - 207s 1ms/step - loss: 2.7922 - acc: 0.4223\n",
582 |             "Epoch 81/100\n",
583 |             "167559/167559 [==============================] - 207s 1ms/step - loss: 2.7705 - acc: 0.4260\n",
584 |             "Epoch 82/100\n",
585 |             "167559/167559 [==============================] - 206s 1ms/step - loss: 2.7572 - acc: 0.4271\n",
586 |             "Epoch 83/100\n",
587 |             "167559/167559 [==============================] - 206s 1ms/step - loss: 2.7332 - acc: 0.4323\n",
588 |             "Epoch 84/100\n",
589 |             "167559/167559 [==============================] - 207s 1ms/step - loss: 2.7124 - acc: 0.4355\n",
590 |             "Epoch 85/100\n",
591 |             "167559/167559 [==============================] - 207s 1ms/step - loss: 2.6912 - acc: 0.4375\n",
592 |             "Epoch 86/100\n",
593 |             "167559/167559 [==============================] - 207s 1ms/step - loss: 2.6701 - acc: 0.4412\n",
594 |             "Epoch 87/100\n",
595 |             "167559/167559 [==============================] - 208s 1ms/step - loss: 2.6540 - acc: 0.4431\n",
596 |             "Epoch 88/100\n",
597 |             "167559/167559 [==============================] - 208s 1ms/step - loss: 2.6384 - acc: 0.4449\n",
598 |             "Epoch 89/100\n",
599 |             "167559/167559 [==============================] - 208s 1ms/step - loss: 2.6155 - acc: 0.4498\n",
600 |             "Epoch 90/100\n",
601 |             "167559/167559 [==============================] - 208s 1ms/step - loss: 2.5999 - acc: 0.4524\n",
602 |             "Epoch 91/100\n",
603 |             "167559/167559 [==============================] - 207s 1ms/step - loss: 2.5810 - acc: 0.4551\n",
604 |             "Epoch 92/100\n",
605 |             "167559/167559 [==============================] - 212s 1ms/step - loss: 2.5627 - acc: 0.4588\n",
606 |             "Epoch 93/100\n",
607 |             "167559/167559 [==============================] - 211s 1ms/step - loss: 2.5443 - acc: 0.4624\n",
608 |             "Epoch 94/100\n",
609 |             "167559/167559 [==============================] - 209s 1ms/step - loss: 2.5269 - acc: 0.4650\n",
610 |             "Epoch 95/100\n",
611 |             "167559/167559 [==============================] - 207s 1ms/step - loss: 2.5104 - acc: 0.4673\n",
612 |             "Epoch 96/100\n",
613 |             "167559/167559 [==============================] - 208s 1ms/step - loss: 2.4977 - acc: 0.4709\n",
614 |             "Epoch 97/100\n",
615 |             "167559/167559 [==============================] - 209s 1ms/step - loss: 2.4763 - acc: 0.4743\n",
616 |             "Epoch 98/100\n",
617 |             "167559/167559 [==============================] - 210s 1ms/step - loss: 2.4588 - acc: 0.4768\n",
618 |             "Epoch 99/100\n",
619 |             "167559/167559 [==============================] - 210s 1ms/step - loss: 2.4403 - acc: 0.4798\n",
620 |             "Epoch 100/100\n",
621 |             "167559/167559 [==============================] - 211s 1ms/step - loss: 2.4310 - acc: 0.4802\n"
622 |           ],
623 |           "name": "stdout"
624 |         },
625 |         {
626 |           "output_type": "execute_result",
627 |           "data": {
628 |             "text/plain": [
629 |               "<keras.callbacks.History at 0x7f36ecadf6a0>"
630 |             ]
631 |           },
632 |           "metadata": {
633 |             "tags": []
634 |           },
635 |           "execution_count": 22
636 |         }
637 |       ]
638 |     },
639 |     {
640 |       "cell_type": "code",
641 |       "metadata": {
642 |         "id": "JuMik4xeH268",
643 |         "colab_type": "code",
644 |         "colab": {}
645 |       },
646 |       "source": [
647 |         "# Save model output\n",
648 |         "from pickle import dump\n",
649 |         "model.save('marvel_model1.h5')\n",
650 |         "dump(tokenizer, open('tokenizer.pkl', 'wb'))"
651 |       ],
652 |       "execution_count": 0,
653 |       "outputs": []
654 |     },
655 |     {
656 |       "cell_type": "code",
657 |       "metadata": {
658 |         "id": "dXhkiQ8UZeT5",
659 |         "colab_type": "code",
660 |         "colab": {}
661 |       },
662 |       "source": [
663 |         "# Ask Martyn to write me a 40 word opening sentence\n",
664 |         "seed_text = 'Born in 1993, bullied in school for having small feet Martyn grew up on a local wheat farm. Lifting bails made him a strong man, far stronger than the average human. One day during a solar eclipse something odd happened'\n",
665 |         "encoded = tokenizer.texts_to_sequences([seed_text])[0]\n",
666 |         "encoded_array = np.array([encoded])\n",
667 |         "encoded_array.shape"
668 |       ],
669 |       "execution_count": 0,
670 |       "outputs": []
671 |     },
672 |     {
673 |       "cell_type": "code",
674 |       "metadata": {
675 |         "id": "AfZKpKSqaN3S",
676 |         "colab_type": "code",
677 |         "outputId": "d33ef5a2-17c0-47ad-acb2-0a3e7797b014",
678 |         "colab": {
679 |           "base_uri": "https://localhost:8080/",
680 |           "height": 306
681 |         }
682 |       },
683 |       "source": [
684 |         "# Predict the output\n",
685 |         "predicted = model.predict_classes(encoded_array, verbose=0)"
686 |       ],
687 |       "execution_count": 0,
688 |       "outputs": [
689 |         {
690 |           "output_type": "error",
691 |           "ename": "ValueError",
692 |           "evalue": "ignored",
693 |           "traceback": [
694 |             "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
695 |             "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
696 |             "\u001b[0;32m<ipython-input-90-6ff0b94914d3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpredicted\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_classes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mencoded_array\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
697 |             "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/sequential.py\u001b[0m in \u001b[0;36mpredict_classes\u001b[0;34m(self, x, batch_size, verbose)\u001b[0m\n\u001b[1;32m    266\u001b[0m             \u001b[0mA\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0marray\u001b[0m \u001b[0mof\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0mpredictions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    267\u001b[0m         \"\"\"\n\u001b[0;32m--> 268\u001b[0;31m         \u001b[0mproba\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    269\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mproba\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    270\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mproba\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
698 |             "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, batch_size, verbose, steps, callbacks, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m   1378\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1379\u001b[0m         \u001b[0;31m# Case 2: Symbolic tensors or Numpy array-like.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1380\u001b[0;31m         \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_standardize_user_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1381\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstateful\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1382\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
699 |             "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/training.py\u001b[0m in \u001b[0;36m_standardize_user_data\u001b[0;34m(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)\u001b[0m\n\u001b[1;32m    755\u001b[0m             \u001b[0mfeed_input_shapes\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    756\u001b[0m             \u001b[0mcheck_batch_axis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m  \u001b[0;31m# Don't enforce the batch size.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 757\u001b[0;31m             exception_prefix='input')\n\u001b[0m\u001b[1;32m    758\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    759\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0my\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
700 |             "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/training_utils.py\u001b[0m in \u001b[0;36mstandardize_input_data\u001b[0;34m(data, names, shapes, check_batch_axis, exception_prefix)\u001b[0m\n\u001b[1;32m    139\u001b[0m                             \u001b[0;34m': expected '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' to have shape '\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    140\u001b[0m                             \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' but got array with shape '\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 141\u001b[0;31m                             str(data_shape))\n\u001b[0m\u001b[1;32m    142\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
701 |             "\u001b[0;31mValueError\u001b[0m: Error when checking input: expected embedding_1_input to have shape (40,) but got array with shape (36,)"
702 |           ]
703 |         }
704 |       ]
705 |     },
706 |     {
707 |       "cell_type": "code",
708 |       "metadata": {
709 |         "id": "jQ2igHzKaSo7",
710 |         "colab_type": "code",
711 |         "colab": {}
712 |       },
713 |       "source": [
714 |         "# Generate a sequence\n",
715 |         "def generate_seq(model, tokenizer, seq_length, seed_text, n_words):\n",
716 |         "\tresult = list()\n",
717 |         "\tin_text = seed_text\n",
718 |         "\t# generate a fixed number of words\n",
719 |         "\tfor _ in range(n_words):\n",
720 |         "\t\t# encode the text as integer\n",
721 |         "\t\tencoded = tokenizer.texts_to_sequences([in_text])[0]\n",
722 |         "\t\t# truncate sequences to a fixed length\n",
723 |         "\t\tencoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')\n",
724 |         "\t\t# predict probabilities for each word\n",
725 |         "\t\tyhat = model.predict_classes(encoded, verbose=0)\n",
726 |         "\t\t# map predicted word index to word\n",
727 |         "\t\tout_word = ''\n",
728 |         "\t\tfor word, index in tokenizer.word_index.items():\n",
729 |         "\t\t\tif index == yhat:\n",
730 |         "\t\t\t\tout_word = word\n",
731 |         "\t\t\t\tbreak\n",
732 |         "\t\t# append to input\n",
733 |         "\t\tin_text += ' ' + out_word\n",
734 |         "\t\tresult.append(out_word)\n",
735 |         "\treturn ' '.join(result)\n"
736 |       ],
737 |       "execution_count": 0,
738 |       "outputs": []
739 |     },
740 |     {
741 |       "cell_type": "code",
742 |       "metadata": {
743 |         "id": "E5SMunCrj5Ri",
744 |         "colab_type": "code",
745 |         "outputId": "e588bbc8-0726-49b0-b399-b09e1f0e9213",
746 |         "colab": {
747 |           "base_uri": "https://localhost:8080/",
748 |           "height": 54
749 |         }
750 |       },
751 |       "source": [
752 |         "generated = generate_seq(model, tokenizer, seq_length, seed_text, 50)\n",
753 |         "print(generated)"
754 |       ],
755 |       "execution_count": 0,
756 |       "outputs": [
757 |         {
758 |           "output_type": "stream",
759 |           "text": [
760 |             "to the rigellians and nick fury will triggered his colorblindness and exchanging their japanese timeline grumlin was to combat fields hurani is a urgent detector radiate the wicked arno stark were being controlled by terror in the surface cap fell together facing shiv as part owner anubis fought for leaking\n"
761 |           ],
762 |           "name": "stdout"
763 |         }
764 |       ]
765 |     },
766 |     {
767 |       "cell_type": "code",
768 |       "metadata": {
769 |         "id": "4CCh3-QYj9BS",
770 |         "colab_type": "code",
771 |         "colab": {}
772 |       },
773 |       "source": [
774 |         "# Full sentence, with a few improvements (capital letters / commas)\n",
775 |         "\n",
776 |         "\"[HUMAN MADE:] Born in 1993, bullied in school for having small feet Martyn grew up on a local wheat farm. Lifting bails made him a strong man, far stronger than the average human. \\n\n",
777 |         "One day during a solar eclipse something odd happened [MODEL MADE:] to the Rigellians and Nick Fury's will triggered his colorblindness and exchanging their Japanese timeline, \\n\n",
778 |         "Grumlin was to combat fields. Hurani is an urgent detector, radiate the wicked Arno Stark were being controlled by terror in the surface. Cap fell together facing Shiv as part owner Anubis fought for leaking\""
779 |       ],
780 |       "execution_count": 0,
781 |       "outputs": []
782 |     },
783 |     {
784 |       "cell_type": "code",
785 |       "metadata": {
786 |         "id": "NxdNEIpBjYjV",
787 |         "colab_type": "code",
788 |         "colab": {}
789 |       },
790 |       "source": [
791 |         "# We're not there yet!"
792 |       ],
793 |       "execution_count": 0,
794 |       "outputs": []
795 |     }
796 |   ]
797 | }


--------------------------------------------------------------------------------