├── CNN - Music Genre ├── CNN_Model.ipynb └── Data Creation.ipynb ├── Champion Classes - UMAP and KMeans ├── champ_classification.ipynb └── champ_data.ipynb ├── Classification ├── Classification.ipynb ├── vertebral_column_data.txt └── vertebral_column_metadata.txt ├── Data Analysis and API - LoL Tilt ├── desktop.ini ├── phase3.csv └── tilt_trend.ipynb ├── Data Creation - Jungle Stats per Champion ├── champ_parse.py ├── get_match_ids.py ├── main.py └── match_data.py ├── MCMC - Enigma Thanos ├── BreakEnigma.ipynb ├── WikiData.ipynb └── marvel_data.txt ├── PCA for Recommendation └── Champ Recommendation and PCA.ipynb ├── Principle Component Analysis ├── ANSUR_II_FEMALE_Public.csv └── Principle Component Analysis.ipynb ├── README.md └── RNN - Marvel Character Generator ├── RNN_Marvel_Generator.ipynb └── Wiki_Data.ipynb /Champion Classes - UMAP and KMeans/champ_classification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import umap\n", 12 | "from sklearn.preprocessing import StandardScaler\n", 13 | "from sklearn.decomposition import PCA\n", 14 | "from sklearn.compose import ColumnTransformer\n", 15 | "import seaborn as sns\n", 16 | "import matplotlib.pyplot as plt\n", 17 | "import matplotlib.ticker as mtick\n", 18 | "sns.set_style(\"whitegrid\", {'axes.grid' : False})\n", 19 | "\n", 20 | "from sklearn.cluster import KMeans" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "df = pd.read_csv('final_data2.csv')\n", 30 | "try:\n", 31 | " df.drop('Unnamed: 0', axis= 1, inplace = True)\n", 32 | "except:\n", 33 | " pass\n", 34 | "avg_lane_stats = df.groupby('Lane').mean()\n", 35 | "champ_ids = pd.read_csv('champ_ids.csv')\n", 36 | "champ_ids = champ_ids[['Champ ID', 'Champion']]\n", 37 | "count_df = df.groupby(['Champ ID', 'Lane']).count()\n", 38 | "vol_champs = list(count_df[count_df['Tier'] > 200].index)\n", 39 | "agg_df = df.groupby(['Champ ID', 'Lane']).mean()\n", 40 | "final_df = agg_df[agg_df.index.isin(vol_champs)].reset_index()\n", 41 | "final_df.set_index('Champ ID', drop= True, inplace= True)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 22, 47 | "metadata": { 48 | "scrolled": true 49 | }, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "text/plain": [ 54 | "Index(['Champ ID', 'Lane', 'Tier', 'Result', 'kills', 'deaths', 'assists',\n", 55 | " 'largestKillingSpree', 'largestMultiKill', 'killingSprees',\n", 56 | " 'longestTimeSpentLiving', 'totalDamageDealt', 'magicDamageDealt',\n", 57 | " 'physicalDamageDealt', 'trueDamageDealt', 'largestCriticalStrike',\n", 58 | " 'totalDamageDealtToChampions', 'magicDamageDealtToChampions',\n", 59 | " 'trueDamageDealtToChampions', 'totalHeal', 'damageSelfMitigated',\n", 60 | " 'damageDealtToObjectives', 'damageDealtToTurrets', 'timeCCingOthers',\n", 61 | " 'totalDamageTaken', 'goldEarned', 'turretKills', 'inhibitorKills',\n", 62 | " 'totalMinionsKilled', 'totalTimeCrowdControlDealt',\n", 63 | " 'neutralMinionsKilled', 'neutralMinionsKilledTeamJungle',\n", 64 | " 'neutralMinionsKilledEnemyJungle', 'firstBloodKill', 'firstBloodAssist',\n", 65 | " 'firstTowerKill', 'firstTowerAssist', 'first10_xpm', 'first10_gpm',\n", 66 | " 'soloKills', 'earlyGanks', 'drakesKilled'],\n", 67 | " dtype='object')" 68 | ] 69 | }, 70 | "execution_count": 22, 71 | "metadata": {}, 72 | "output_type": "execute_result" 73 | } 74 | ], 75 | "source": [ 76 | "df.columns" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 23, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "data": { 86 | "text/plain": [ 87 | "42" 88 | ] 89 | }, 90 | "execution_count": 23, 91 | "metadata": {}, 92 | "output_type": "execute_result" 93 | } 94 | ], 95 | "source": [ 96 | "len(df.columns)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "top_df = final_df[final_df['Lane'] == 'top'].drop('Lane', axis = 1)\n", 106 | "jung_df = final_df[final_df['Lane'] == 'jungle'].drop('Lane', axis = 1)\n", 107 | "mid_df = final_df[final_df['Lane'] == 'middle'].drop('Lane', axis = 1)\n", 108 | "adc_df = final_df[final_df['Lane'] == 'adc'].drop('Lane', axis = 1)\n", 109 | "sup_df = final_df[final_df['Lane'] == 'support'].drop('Lane', axis = 1)\n", 110 | "sup_df = sup_df[sup_df['totalMinionsKilled']<100]" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 14, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "kills = mid_df['kills']\n", 120 | "gold = mid_df['goldEarned']" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 21, 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "data": { 130 | "text/plain": [ 131 | "" 132 | ] 133 | }, 134 | "execution_count": 21, 135 | "metadata": {}, 136 | "output_type": "execute_result" 137 | }, 138 | { 139 | "data": { 140 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZAAAAEECAYAAAAGSGKZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3de3zT9b0/8FcuTZp7Wu6Xltu4VBRpwR2PIkcmEy8oWASSTpjTc1RU9pvnOAEnyIaKbNizjR034THPOasuaRVFj3pU3BgFnJehQMXKoIDSUi7SW5K2SZN8f3948qWXJE3SfPNN2tfzL/nybfNBSl75fD+f9+etEARBABERUZyUcg+AiIgyEwOEiIgSwgAhIqKEMECIiCghDBAiIkqIWu4BSOnAgQPQarVyD4OIKKN4vV5Mnz691/v6dYBotVoUFBTIPQwiooxSXV0d0318hEVERAlhgBARUUIYIERElBAGCBERJYQBQkRECWGAEBFRQhggRESUEAYIERElRLIAOXjwIJYtWwYAOHbsGOx2O2w2G9avX49AIAAAqKioQHFxMZYsWYJdu3YBABoaGnDXXXehpKQEP/rRj9DW1hbxXiIiSi5BEBBrmyhJAmTbtm147LHH4PV6AQClpaX413/9VzidTrS3t+PPf/4zzp8/j7KyMjidTvz+979HaWkpfD4fnn32WcyfPx9//OMfcckll6C8vDzivURElDwulwu1tbXyBkh+fj62bNki/nrLli244oor4PP5cP78eQwaNAiHDh1CYWEhNBoNTCYT8vPz8cUXX2D//v245pprAACzZ8/G+++/H/FeIiLqO5fLhVOnTuHMmTNob2+P+eskCZB58+ZBrb54zJZKpUJdXR3mz5+PxsZGjBs3Dm63GyaTSbzHYDDA7XZ3uW4wGOByuSLeS0REiXO73WJwtLW1xTzzCEnZIvqoUaPw7rvvwm634+mnn4bRaITH4xF/3+PxwGQydbnu8XhgNpsj3ktERPFrbW1FbW0t6uvrEwqOkJQEyH333YeTJ08C+Gb2oFQqMW3aNOzfvx9erxculws1NTWYNGkSioqKsHv3bgBAZWUlZsyYEfFeIiKKXVtbG+rq6lBXV4fW1taEgyMkJce533PPPVi9ejWysrKg0+nwxBNPYMiQIVi2bBlKSkogCAIeeughaLVarFixAqtWrUJFRQVycnLwzDPPQK/Xh72XiIh6197ejsbGRrjd7j6HRmcKIZnfLc1UV1ezHwgRDVherxeNjY1wuVwxB4dCoYDP58PUqVN7vbdfN5QiIhqIfD4fmpqa0NLSgmAwKNnrMECIiPqJjo4ONDU1obm5WdLgCGGAEBFlOL/fLwZH6KSPVGCAEBFlqEAggKamJjQ1NaU0OEIYIEREGSYYDIrB4ff7ZRsHA4SIKEMEg0G0tLSgqakpLc4DZIAQEaU5QRDQ0tKCxsbGtAiOEAYIEVGaCgVHU1OTeLp5OmGAEFGfVde3oKquGZeNsqBghFnu4WS8YDAIl8uVdjOO7hggRNQn1fUtuP/FT9DhDyBLrcKz3ytiiCQoGWscNefcOHLWhcnDTJgw1JjkEXbFACGiPqmqa0aHP4AcgxaNHi+q6poZIHFK1uJ4zTk31v/PYfj9QajVSqy/ZWpcISIIAk6cOIHRo0fHdD8DhIj65LJRFmSpVWj0eJGlVuGyURa5h5Qxkr2r6shZF/z+IMz6LLS0duDIWVdMAdLS0oIdO3bA6XSipqYGn332WUyvxwAhoj4pGGHGs98r4hpIHKTaVTV5mAlqtRItrR1Qq5WYPCx636Sqqio4HA68+eabcXUiDGGAEFGfFYwwMzhiIAiCuDguxa6qCUONWH/L1KhrIK2trXjzzTfhcDhw+PBh8bper8ctt9yCkpKSmF+PAUJEJLFUbsedMNQYNjhqamrgcDiwY8cOuFwu8fqkSZNgs9mwYMECGI1G8Tj3WDBAiIgkInfluM/nw86dO+F0OvHRRx+J17OysnDDDTfAbrejqKgICoUioe/PACEiSjK5g6O2thYVFRV4+eWXceHCBfF6Xl4ebDYbiouLkZub2+fXYYAQESWJnMERCASwZ88eOBwO7N69W+xAqFQq8Z3vfAc2mw1XX301lEpl0l6TAUJE1EdyBsfXX3+Nl19+GRUVFairqxOvDx06FIsXL8aSJUswfPhwSV6bAUJElCC5gkMQBHz00UdwOp3YuXMnOjo6xN+7+uqrYbPZMGfOHGRlZUk6DgYIEVGc5AqO7gV/IVarFcXFxVi6dCnGjh2bsvEwQIiIYiTXseqRCv4KCwths9lwww03IDs7O2XjCWGAEBH1Qo7gaGtrwxtvvAGn09nlaJHOBX9TpkxJyVgiYYAQEUUgR3DEWvCXDhggRETdpLqRk8/nw3vvvQeHw9Gj4O/GG2+EzWbrU8GfVBggRCS7dGlIJQgC3G43GhoaUhIckQr+8vPzsXTp0qQV/EmFAUJEskqXhlQejwcNDQ1ob28Xi/Ck0FvBn91ux1VXXZXUgj+pMECISFZyN6Rqb29HQ0MDPB6PpMFx/vx5bN++vUfB35AhQ7BkyRJJC/6kwgAhIlnJ1ZDK5/OhsbERLS0tkgVHqODP4XBg586d8Pv94u+lsuBPKgwQIpJVqhtSeb1eNDc3o6WlBcFgUJLXSLeCP6kwQIhIdqloSOX1etHU1ASXyyVZcBw6dAhOp7NHwd/06dNht9tlK/iTCgOEiPq19vZ2NDU1we12SxIcvXX4s9vtKCgoSPrrpgMGCBGlzTbaZGpraxODQ4o1jmgFf3a7HbfeemvaFPxJhQFCNMClyzbaZGltbUVTU5Mku6qk7vAnJ6VSCZ1OB71ej/r6+pi+hgFCNMBJsY1WjhmNlNtxM73gLxyFQoGsrCzo9XoxOFQqFQDgzJkzMX0PBgjRANfXbbTdwyLVMxqv14vGxka4XK6kBkcgEEBlZSUcDgcqKytT0uFPap1nGTqdrs8L+pIFyMGDB7F582aUlZWhuroaGzZsgEqlgkajwaZNmzB48GBUVFTA6XRCrVZjxYoVmDNnDhoaGvDwww+jvb0dQ4cOxcaNG6HT6cLeS0R915dttOHCIlWFgR0dHWIdRzIXx8+fPy92+Dt9+rR4PRUd/pIt2iwjGSQJkG3btuH111+HTqcDADz55JNYu3YtCgoK4HQ6sW3bNvzzP/8zysrKsH37dni9XpSUlODqq6/Gs88+i/nz56O4uBhbt25FeXk5br755rD3ajQaKYZPFLdMX4ROdBttuLCQujAwEAigqakJTU1NCAQCSfme/angL9mzjGgkCZD8/Hxs2bIFjzzyCACgtLQUQ4cOBfDNX75Wq8WhQ4dQWFgIjUYDjUaD/Px8fPHFF9i/fz/uvfdeAMDs2bNRWlqKvLy8sPdOmzZNiuETxaW/LULHI1xYSFUYGAwGxeDo/AbfF83NzWLB3/Hjx8XrmVTwJ/UsIxpJAmTevHmora0Vfx0Kj08++QQvvPACXnzxRezZswcmk0m8x2AwwO12w+12i9cNBgNcLleXa53vJUoHcp/lJKdIYZHMwkApenIcOnQIDocDb731VtgOfzfeeCO0Wm1SXksKCoUCWq0WBoMBBoNBtuLElC2iv/XWW/jtb3+LrVu3Ijc3F0ajER6PR/x9j8cDk8kkXs/OzobH44HZbI54L1E6kOssp1Tp7fGcVFXkye7J0VvBXzp0+ItGqVQiOzsbBoMBer0+LQIuJQHy2muvoby8HGVlZbBarQCAadOm4Ze//CW8Xi98Ph9qamowadIkFBUVYffu3SguLkZlZSVmzJgR8V6idJDqs5wikWIdRo7Hc8FgEC6XK2nBcezYMTidzowr+FMoFFAqlWJg6HS6tFuDkTxAAoEAnnzySYwYMQIrV64EAFxxxRX44Q9/iGXLlqGkpASCIOChhx6CVqvFihUrsGrVKlRUVCAnJwfPPPMM9Hp92HuJ0kUqznKKRqo3+lQ+ngsEAmhpaUFzc3OfH1VFK/hL5w5/SqUSGo0GBoMBOp0OOp0u7cbYmUKQ8gB8mVVXV/fbM2ho4Ao306j42yn8+r2/i2/0P5w7CUtm5iXltaSegfj9fjQ3NydlV1WmFfyFZhl6vV6cZaTD7tJY3ztZSEiUQSK9oUu1DiPl4zm/34+mpiY0Nzf3KThCBX9Op7NLhz+VSoU5c+akXYe/7rOM7OzstBlbvBggRBkk0iMlKd/ok/14LrQdt7GxsU/BEa3gb8mSJVi8eHFaFPyl6ywjGRggRBkk2kxD7nWY3iRjjaO3gj+73Y5rr71W9sXm/jTLiIYBQpRB0mXHVzxCaxzNzc0JFwCme8Fff55lRMMAIcow6T7TCOnrkSOCIKCqqiptC/5CFeCdt9n2x1lGNAwQol5k+jlXqdbXI0eiFfzdeuutsNvtKS34qznnxpGzLkweZsK3hpmQlZUFk8kkawV4umCAEEUxkM+5ipcgCOJ23ETWOCIV/E2ePBk2my1pBX+dA2HC0Ojf7/h5D5557yi0Gg3er+vAugV5mDhmSJ/H0F8wQIiiGMjnXMVKEAS4XC40NjbGXTkeKvhzOBz4+OOPxeuhgj+73Y7CwsKkFdPVnHNj/f8cht8fhFqtxPpbpvYIkc6PpjyNSmTnDINOq0F9Uxs+P9+OS8ckZSj9AgOEKIr+fs5VX/TlrKpoBX82mw233XabJAV/R8664PcHYdZnoaW1A0fOujBhqLHLOVOdj0C/xK+FZ08tmtra+PcfBgOEKIpM3PXUXbLXcBI9HTdShz+VSiV2+JO64G/yMBPUaiW8viAGmfX4h8mjMGLEMOh0OqjVPd8O+8Pfv5R4lAlRP5bMNZxgMCjOOOIJjnQp+FMqldBqtTjXJuBkUwcuzR+CycMZCOHwKBMiSsoaTiIzDkEQ8OGHH8LpdMpW8KdQKKDRaKDX65GdnS3OMvIAzJDsVQcWBghRP9aXNZxEFsebm5vx6quvwul04sSJE+L1VBT8hYr5Ordz5and0mKAEKVQqmtKEnmGHwqOpqamLsV70e6Xq+AvtPjduf93Oh9/3t8wQIhSRK6aklgr1wVBgNvtFmccvS2PylXwp1KpoNfrxQrwcIvflBr8P0+UIulaUxJvB8Bjx47B4XBgx44dcLvd4vXJkyfDbrfjlltuSWqHv9CjKaPRKAZHLDu1eIKA9BggRCmSbjUl8eyqilTwp9FoxA5/ySz4Cy2Ah+oy9Hp9XN+bJwikBgOEKEX6WlOQrE/U8RyrXltbi/Lycmzfvl3Sgr/Op9mGWrn2Zc0kXWd7/Q0DhCiFuq9HxBoKyfhEHWvr2GgFf3PmzEFJSQn+8R//sc8FfwqFAmq1WlzL0Ov1SSsiTLfZXn/FAKGMl6nPuuMJhVg+UUf6/+Dz+dDc3IyWlpaowRGt4G/p0qVYvHgxhg0blvCfNzTL6HxciFTbbOWqIM/Un8VEMUAoo2Xys+54HrP09om6+/+H391RhDFWDZqbm+FyuRAMBsN+31gK/ubMmZPwTieFQgGVSgWTySTOMlK1zTbVfVMy+WcxUQwQymjJftadyk+Q8Txm6e0TdVVdMwKBAPJy9fC2ufHXqqNQjjVH3IorZYe/0KMpo9EozjYGQm3GQFx3YYBQRkvms+5Uf4KM9zFLtE/Ul40wYrhOwNf1tWj1+jBsxqAe4SFlwd9ADY3OBuK6CwOEMloyn3XL8Qmyr49ZvF4vWlpaoGlvwQ+KcnHkbFaPRknRCv4WLFgAm80Wd8Ff6NFU6JwprVY74KvAB+LJvQwQynjJetadSZ8gW1tb0dzcDI/HI65vTBhq7BIc0Tr8JVrwF1oEP98OHGv04xKdHlOs/f+NMlaZ0q8+WaIGyLJlyyJ+ovjDH/4gyYCI5JLunyADgQBcLhdaWloiHjXi8/nw7rvvwul0hi34s9vtmD59elwzhdABhaHHU8fOt+L/bR9Yi8UUXtQA+elPfwoA+I//+A9cd911mDFjBg4dOoRdu3alZHBEqdJ58XzJzDy5h9NFW1sbXC4XXC4XjtY344sw/bxPnToldvhraGgQr48cnYeZ35mPkiW3o3Di6LDfP1yPcIVCAa1WC5PJBKPR2OXY9UMDcLGYwosaIOPHjwcAfP3117jpppsAAN/97ndRVlYm/ciIkqS3nVXpuP3S7/fD7XZ3mW107+e99qYpqK3eD4fDgT179vTo8PdPNy7Ea/UG1AaA0so6rLdYe/T/Dn3PYECASa/B4wun4dIxw2AwGMS2rt1l0qM+klbMayAvvfQSpk2bhk8//RQ6nU7KMRGFlcgW21jCIV22XwqCAI/HA7fbDbfb3aN2I9TPWxv04OS+P2H58/vQ+PVZ8fdDHf6WLFmCYcOG4a2qegRqT/bo/93ZiQutGGwxYuigXFxoC+ICzBg0aFDUcab7oz5KnZgCZPPmzXj++eexc+dOjB8/Hv/+7/8u9biIukh0lhBLOMj9ibq9vR0ejwculwsdHR1h1zYEQUB73Rc4+ebzaDi6HwherCiPVPAX6v/d0toBtVqJycNMAL55PJWdnQ2TyYRr9YPxPycP42ijD1lqVcxhMNAWiym8mAJkyJAhuOaaazBu3DhMmzYNKpVK6nERdZHoLCGWcOjLJ+pECw+DwSAOnTiDA8frMdKowthB+rD3RSr4M1ssuH3RIthsNowZMybs104YasT6W6biyFkXCoabMWmkVVzTCD2eslqB0iWXx/VnGGjHdVBkMQVIaWkpzpw5g5qaGmRlZWHr1q0oLS2VemxEokRnCbGGQyKfqBOZFbW1tcHtduPg8Xqs23EIXl8AarUS62+ZKj5eSmbB3+SRVsyYNFo8sDDc7qt4/uyJ/JnlChwGnfRiCpD9+/fjxRdfxLJly3DbbbfB4XBIPS6iLvoyS5DqcUuss6Jwj6g+q22E1xfosj4xwqiMu8NfpB1UOp0OFosFRqMxqcV98c4E5dqgkI4bI/qjmAIkEAjA6/VCoVAgEAgk7chlonikw3P3zp9qo82KAoGAuIuqvb29x7pG5/UJX9Np/PmP/4u1774ZV4e/zruytBoVfrZwGi4fPwImkyniDqq+incmKNcGhXTZGNHfxRQg3//+91FcXIyGhgYsXrwYd955p8TDIko/4T7Vdp4VTRluEndQeTyeqEen51k1uFZ3Ci9XlON49SF89n/X4yn4O3LWBY1KiXHDBqFDlQVXVg6GDBmS5D91V7HOBENBa9SoZdmgIPfGiIEipgC58cYbcdVVV+HLL7/E6NGjk9KBjCjTdP9U+9npZtxeNBpjLGp4PB6cOPE1AoFAxBNwgW8K/kId/joX/I0ZMwY2mw0LFy6M6d+XSqXCVZfkY289cMEnwN8BTBmRmjfJ3maC3YP2x9dPhtvnT+laBLcap0ZMAVJdXY3y8nJ4vV7x2saNG6N+zcGDB7F58+YuRYdPPfUUxo0bB7vdDgCoqKiA0+mEWq3GihUrMGfOHDQ0NODhhx9Ge3s7hg4dio0bN0Kn04W9l6THhciLLhtlgTZLDb8/gOFGFcYZg/jyyy8jbr0NCQQC2L17d9iCv+uuuw42my2mDn+h6nCz2QyTyYQJKhU2WAel3d9P96B1+/yyVPenwyPP/i6mAFm9ejXuuOMODB8+PKZvum3bNrz++utiwWFDQwMeeeQRnDx5EnfffTeAb7qflZWVYfv27fB6vSgpKcHVV1+NZ599FvPnz0dxcTG2bt2K8vJy3HzzzWHv1Wg0Cf6xKRZciLwoEAgg36zCpvljcfD4GeRZtDArvIjWUvz8+fN46aWXUFFRgfr6evH6sGHDsGTJkpg7/KlUKhiNRrEpU2fp+CbJx0cDR0wBMnjwYCxevDjmb5qfn48tW7bgkUceAQB4PB6sXLkSlZWV4j2HDh1CYWEhNBoNNBoN8vPz8cUXX2D//v249957AQCzZ89GaWkp8vLywt47bdq0eP6sFKeBvBApCALa29vR2tqKtrY2cSHcrBBwzYScqF/34YcfwuFw4L333ku4w59SqRQPLzQYDBm1cYWPjwaOmAJk1KhR2Lp1KwoKCsRFvVmzZkW8f968eaitrRV/nZeXh7y8vC4B4na7YTKZxF8bDAZxATJ03WAwwOVyRbyXpDXQPkkGg0F4PB60traKi+DRHk111tzcjFdffRVOpxMnTpwQr4c6/EUr+OtMpVLBbDbDYrFk9Aw7HWdGlHwxBUhHRwdOnDjR5R9GtACJhdFohMfjEX/t8XjEKlmPx4Ps7Gx4PB6YzeaI95K05PwkmYq1lyNnWvB5XRPG56gxTK/sdedUd50L/t58880ua4SFhYWw2+244YYbei34UygUyMrKgtlshtlsTrj/OFGqxfSTajabsWbNmqS+8LRp0/DLX/4SXq8XPp8PNTU1mDRpEoqKirB7924UFxejsrISM2bMiHgvSU+OT5JSr720t7fj86/OY/2rn6C5xYOOYBBrb76kx0GDkbS2tuKNN96A0+mMueAvHKVSCb1eD7PZDIPBMKC7+VFmiilAampq0NLSArM5ef+IhwwZgmXLlqGkpASCIOChhx6CVqvFihUrsGrVKlRUVCAnJwfPPPMM9Hp92Hupf0r22ksgEEBbWxva2trg8XjQ0dGByqrTqDvXBLM+C972QNiTars7evQonnu+DDvffgPtrRdnxPF2+FOr1bBYLDCZTBn9mIpIIcTwkHfOnDk4c+YMcnNzxU9Je/fulXxwfVVdXY2CggK5h0Fx6usMJLQAHgqNtra2Hkejd++t0fksqs4idfhTqLIwZ+71uOcHy2Lq8KdQKMTZRrKPFyFKtljfO2MKkEzFAMlc8a6BdHR0iIvfocDo7Uc73DlSIZEK/rTWYRg98zoYJl6FH1x3KW66bETU1wgtikt5vAhRssX63hnTI6wDBw7glVdeQUdHBwDg3Llz+P3vf9+3ERJF0dvaSyAQQHt7uzjTaG9v7zHL6M2EocYuwREIBPCXv/wFTqczbMHfP924EDtO6xEIoEt/jXBBFHpMZbFYuChO/VZMP9lPPPEE7rzzTrzzzjuYNGkSfNGqp4gkEKkuIxkT6HPnzuHll1+OqeCvsFtYdH4UptOq8cSi6Zg+YRTMZnNG1W4QJSLmXVjz58/Hvn37sHLlStxxxx1Sj4syULK33naeYbS2tsb0WCpWgiDggw8+gMPhwJ/+9KcuBX9FV1yJ6XNuxm03Xo9JI61dvq77rOXIWRdUUGDsyFx0qLPRqLDAau36NUT9VUwBolAocPToUbS1teH48eM4f/681OOiDJOMrbfhdksle4mut4K/q757K7Z+2oID/iA++98j4uJ6pL4bM781HO/XC2jqAHwdwJQBVDzHc9Io5rOwjh49imXLluHhhx8WD0MkCklk620wGBQfRyX7sVRn8RT8vVVVD7+/qUujJwBddmxtWHAZLh07FDk5OZio02HQ0BEZ/0YabxjwnDQCegkQQRCgUCgwceJETJw4EQDwyiuvoK6uLiWDo8wR67EnPp9PfCSV7MdS3bW2tsbd4a9zo6fQQvmRsy74/UEMtWRDlaWFO8uMkSNHil+T6cd2JBIGA/mcNLooaoB8//vfxx/+8AcAwKZNm7Bq1SoAwJo1a8TrREDkY08EQYDX6xXPmPJ6vXHvlorX0aNH4XQ6sWPHjrg6/AHfrHGsv2Wq+LgKAFra/Rg5NBdqnQFeIQsFowdLOv5USyQMBto5aRRerzOQkM6f4Ppx6Qj1QeiTeCAQQEtLi7iWEc+hhIkKFfw5HA787W9/E6/H0+EvJLRQfqqpHc+9X492hQYelQkLLhmJ66cO73eftBMJA564S0CMayBA19BgFS115/P5ehTypUJvHf5uu+025OREPn69O4VCgezsbFgsFhx2N+Oc72uYdFnw+b0YYdWhYIQ5pYvHqXitRMMg0x/dUd9FDZDOQcHQoM6CwaBYl9Ha2gqfz5fU0IhWJd5bwV+sHf46UygU0Ol0sFqt4uOtKSMAhVLZ5ZN5KhePpXyt7sHEMKBERA2Qw4cPw2azQRAEHDt2TPzvmpqaVI2P0kjouBCpF8AjnVMVa8FfzTk33j58Nmz4dBcuOELCfTKv+NspyRaPu7+pS7VQzR1UlCxRA+T1119P1TgoDXWu/k7VAjgAcdeTWZ+FZo8Pb7y3GzUfvNOj4G/WrFmw2+249tprxeNCYj0kUalUwmAwwGq1iq2Xw+n+yVyqxeNwb+rRXqsvj7a4g4qSJWqAdD59tLtRo0YlfTAkv0AgIAZGqhbAu5s8zAR0tOLonj24cGgXPm08I/6e1WrFokWLYLPZkJ+f3+Xras658fIntWjz+THIqBXrODoHSOhwQ7PZnFBLAKkWj8O9qS+ZmRf2tfo6g+AOKkqWqAESelR14MAB6HQ6FBYWoqqqCn6/HwsXLkzJAEl6nY8LSeUCeHeCIODQoUNwOp347I034fNdLPgrKiqC3W7HvHnzwr7xh2YebR0BNLV+M0vRadTiVlyVSgWr1ZqUww2lWC+I9KYe7rX6OoPgDipKlqj/kv7t3/4NAHD33Xdj69at4vW77rpL2lGRpEIV4KFttj6fT9at2aEOfw6HA59//rl4Xa/XY8GCBbDZbL12+As99hpk+KZB0xVjc3F70WhMGmFJWnBIKZ439WTMILhoTskQ07+ohoYGsSNhY2MjmpqapB4XJZnP50N7e7tY0CdlBXis+lLw113nCnKjVo2SK8dh+oSRMJvNyMrKkuqPkFSxvqlzBkHpIqYAue+++7Bo0SIYjUa43W489dRTUo+L+sjv94vnS7W2tqKjo0O2R1OdJbPgr7MJQ4342a2X4rTbj+kTRuLy8SP69XHqnEFQOoirI+GFCxdgtVqhUqmkHFPSDKSOhH6/Xzz+XIq6jL5KdsFfd9nZ2bBarTCZTKxZIuqjpHQkXLp0acR/jE6nM7GRUVIIgiCuY6Ryi208QgV/DocDe/fu7VHwZ7fbceWVVyY8U1AoFNBqtQwOIplEDZDS0tJUjYNi0NHRIQaGx+NJi3WMcM6dO4eXXnoJL730Uq8d/hIROm4kVPzH4CCSR9QACdV6nDlzBk899RRqamowduxYrFmzJiWDI3Qp5Euk73eqROvwF67gLxEKhQJ6vR5WqxUGgyEZwyaiPojpX1qfrvIAABJhSURBVPNjjz0Gu92OK664Ah999BF+8pOf4L//+7+lHtuAlIqufMnU1NSEHTt2wOFw4OTJk+L13gr+Ip1zFY5CoYDRaOy1apyIUiumAPF6vbjuuusAAHPnzsV//ud/SjqogSR0XEgoNOQs5ItV54K/7h3+Yi346+2oEeCb4DAYDMjNzUV2drZkf554sZUr0TdiCpBAIIAjR45g8uTJOHLkCJ8590GowVLn6u90XcvozuPx4I033oDT6exS8GcwGMSCv8mTJ0f9Hp3PuQp31AhwMThycnLSbsbBgwiJLoopQNauXYuf/OQnOHfuHIYNG4YNGzZIPa5+pXtgyHG+VF8cPXoUDocDr732WpeCvylTpsBut2P+/PkJFfyFWsaGpHNwhPAgQqKLYgqQe+65BxcuXEBubi4+//xzlJSUYPDgwXj88cdx9dVXSz3GjNPR0dElMNJ9HSOcaAV/N910E+x2Oy6//PKECv46t4ydMNQoLo7n5uambXCE8CBCootiCpArrrgCDz74IMaPH4+vvvoKv/nNb/DAAw/gxz/+MQMEFxe+07WILx5SF/wBF1vGhrbj5ubmZsyuKh4jQnRRTAFy5swZjB8/HgCQn5+P+vp6jBkzJmMq0pMtdBhhaJbR3t4OQRAybpYRInXBXzharRY5OTkwm9PzDTjaQjmPESH6RkwBMmTIEGzevBmFhYX49NNPMXjwYOzbty9jDqnrq3A7pTI5MEJi7fCXTBqNRgyOdN2MwYVyotjEFCA///nPUV5ejsrKSkyaNAkrV67E559/3q8r1UPnSmXaTqnepKLgLxy1Wo2cnBxYLJa0P+SQC+VEsYnpXUKr1WL58uVdrhUWFkoyILmEjjsPBYbf7+8XgRESreCvuLgYNpsNY8aMSfrrqtVqsR9Hpjzy5EI5UWzSt8OOxNL1uPNkChX8ORwOvPXWW10K/goLC2G323HDDTck1Nq1N6EOgJl0enMIF8qJYjNgAqTzTqm2tra0PL02WXor+Fu6dGmvHf4SpVKpYLF80wUwnTsA9oYL5US9y9x/4TEIBoO4cOFCv9gpFYu///3vYoc/j8cjXi8oKIDdbsfNN98cc8FfvJRKpRgcA2VzBdFANyACpD/z+Xx455134HQ6uxT8abVascNfIgV/sVIqlTCZTMjJyYFGo5HkNYgoPUkWIAcPHsTmzZtRVlaGL7/8EqtXr4ZCocDEiRPx+OOPQ6lU4je/+Q3+8pe/QK1W49FHH8W0adPiuncgO3XqFJxOJ7Zv347Gxkbx+tixY8WCP6vVKtnrh07IzcnJSauDDokodSQJkG3btuH1118Xj6XYuHEjfvSjH+Ef/uEfsG7dOvzpT3/CyJEj8dFHH4lNh1auXInt27fHde9AE63gb+7cubDZbEkv+OsuE86rIqLUkCRA8vPzsWXLFjzyyCMAgMOHD+Pb3/42AGD27NnYt28fxo0bh1mzZkGhUGDkyJEIBAJoaGiI697c3Fwphp92pO7wF4vQeVU5OTnQ6/WSvhYRZQZJAmTevHmora0Vfy0IgvgM3mAwwOVywe12d3nEEroez739OUDkKvjrLjTjsFqtDA4i6iIli+idH6l4PB6YzWYYjcYuO4U8Hg9MJlNc9/ZHTU1NePXVV+F0OrsU/OXk5GDRokVYunRpjw5/UuCjKiLqTUrOlLjkkkvw4YcfAgAqKysxc+ZMFBUVYe/evQgGgzh9+jSCwSByc3Pjure/EAQBBw8exOrVqzF79mw8/fTTYngUFRXhF7/4BXbv3o0f//jHkodHaHF89OjRGDlyJMODiCJKyQxk1apVWLt2LUpLSzF+/HjMmzcPKpUKM2fOxNKlSxEMBrFu3bq47810kQr+9Ho9FixYALvd3muHv2Rh33EiipdC6MeVdVVVVZIc09FXkQr+Eunw11dKpVIMjnTfjste5ESpUV1djYKCgl7v69eFhOnE5/Ph7bffhtPpxP79+8Xrfe3wl6hQAaDFYkn74AB4xDpROmKASKy3gr+FCxf2ucNfPJRKJcxmM6xWa0ZVjvOIdaL0wwCRgN/v71LwFxIq+At1+EtlQ6XQjMNqtablY73e8Ih1ovTDAEmiUMFfRUUFzpw5I15PZcFfd6EZh8ViycjgCOER60TphwHSR+lS8NddfwmOznjEOlF6YYAkKFrBX6jDXyoK/rrL9EdVRJQ5GCBxCBX8OZ3OHh3+ioqKxA5/cixO83RcIko1BkgMQgV/DocD1dXV4vVQhz+bzZaygr/uFAoFdDodcnNzeVYVEaUUAySKv//973A4HHjttddS3uEvFlqtFrm5uf32XDAiSm8MkG5CBX8OhwOffPKJeF2r1eKmm26CzWZLacFfOGq1Grm5ubBYLLKOg4gGNgbI//nqq69QXl4uW4e/WGg0GlgsFlgsFkmbRhERxWJAB0g6FvyFo9FokJOT0+O4eyIiOQ3IADl79qzY4a9zwd/w4cOxZMkS3H777Skv+OtOoVB0CQ65Q4yIqLsBEyDBYBAffvghHA4H3nvvPQQCAfH35Cz4606hUECr1SInJwdGo5HBQURpq98HSG8d/mw2G/Ly8uQb4P9RKBTIzs4Wg4OIKN316wD56quvUFxcDJ/PJ16bMWMG7HY75s2blxan0TI4iChT9esAaWhogM/ng8FgwMKFC7F06VLZCv666/yoinUcRJSJ+nWAGAwG/OxnP8P8+fNhMBjkHo4oVADINQ4iymT9OkAmTpyISy+9VO5hiLRaLaxWK8xmM4ODiDJevw6QdBHajhstONjvm4gyDQNEQrEEB8B+30SUmRggEog1OELY75uIMhEDJImysrLE4IjnyBH2+yaiTMQASQK1Wg2r1Qqr1ZrQWVXs901EmYgB0gcqlUoMDpVK1afvxX7fRJRpGCAJCAWHxWKR/ewsIiK58N0vDiqVChaLBVarlcFBRAMe3wVjoFQqxeDIysqSezhERGmBARJFKDgsFktaHLxIRJROGCBhKJVKmEwm5OTkMDiIiCJggHSiUCjE4NBqtXIPh4gorTFA8E1w6PV65ObmQqfTyT0cIqKMMKADRKFQQKfTIScnJ62OeyciygQDMkBCzZxCPTmIiCh+Ay5A2MyJiCg5BkyAaDQa5ObmwmQyMTiIiJIgZQHi8/mwZs0anDp1CkajEevWrUNTUxOefPJJqFQqzJo1Cw8++CCCwSDWr1+PI0eOQKPR4IknnsCYMWNw4MCBHvfGQqPRiMeOMDiIiJInZQFSUVEBvV6PiooKHD9+HBs2bMDXX3+NLVu2IC8vD/fccw8OHz6Muro6+Hw+lJeX48CBA3j66afx29/+Fo8//niPe6dOnRr1NZVKJfLz8xM6IZeIiKJL2TvrsWPHMHv2bADA+PHjUVVVBZ/Ph/z8fCgUCsyaNQt//etfsX//flxzzTUAgOnTp+Ozzz6D2+0Oe29vlEolw4OISCIpe3ctKCjArl27IAgCDhw4AJfLBb1eL/6+wWCAy+WC2+3usjNKpVL1uBa6l4iI5JOyAFm0aBGMRiOWL1+OXbt2YcqUKWhraxN/3+PxwGw2w2g0wuPxiNeDwWCPa6F7iYhIPikLkKqqKsyYMQNlZWWYO3cuxo4di6ysLHz11VcQBAF79+7FzJkzUVRUhMrKSgDAgQMHMGnSJBiNxrD3EhGRfFK2iD5mzBj86le/wvPPPw+TyYQnn3wS9fX1ePjhhxEIBDBr1ixcfvnluOyyy7Bv3z7YbDYIgoCnnnoKAPDTn/60x71ERCQfhSAIgtyDkEp1dTUKCgrkHgYRUUaJ9b2TW5SIiCghDBAiIkoIA4SIiBLCAEmS6voWVPztFKrrW+QeChFRSgyYwxSlVF3fgvtf/AQd/gCy1Co8+70iFIxgnQoR9W+cgSRBVV0zOvwB5Bi06PAHUFXXLPeQiIgkxwBJgstGWZClVqHR40WWWoXLRlnkHhIRkeT4CCsJCkaY8ez3ilBV14zLRln4+IqIBgQGSJIUjDAzOIhoQOEjLCIiSggDhIiIEsIAISKihDBAwmBRIBFR77iI3g2LAomIYsMZSDcsCiQiig0DpBsWBRIRxYaPsLphUSARUWwYIGGwKJCIqHd8hEVERAlhgBARUUIYIERElBAGCBERJYQBQkRECWGAEBFRQvr1Nl6v14vq6mq5h0FElFG8Xm9M9ykEQRAkHgsREfVDfIRFREQJYYAQEVFCGCBERJQQBggRESWEAUJERAlhgBARUUL6dR2InAKBAB577DGcOHECKpUKGzduRH5+vtzDiujChQsoLi7G888/jwkTJsg9nIgWLlwIk8kEABg9ejQ2btwo84gie+655/DnP/8ZHR0dsNvtWLx4sdxDCuuVV17Bq6++CuBi7dS+fftgNqdXS4OOjg6sXr0adXV1UCqV2LBhQ9r+rPp8PqxZswanTp2C0WjEunXrMHbsWLmH1cPBgwexefNmlJWV4csvv8Tq1auhUCgwceJEPP7441Aqe5ljCCSJnTt3CqtXrxYEQRA++OAD4b777pN5RJH5fD7h/vvvF66//nrh2LFjcg8novb2dmHBggVyDyMmH3zwgXDvvfcKgUBAcLvdwq9//Wu5hxST9evXC06nU+5hhLVz507hhz/8oSAIgrB3717hwQcflHlEkZWVlQmPPfaYIAiCUFNTI9x1110yj6inrVu3CvPnzxcWL14sCIIg3HvvvcIHH3wgCIIgrF27Vnj33Xd7/R58hCWRuXPnYsOGDQCA06dPY/DgwTKPKLJNmzbBZrNh6NChcg8lqi+++AJtbW246667sHz5chw4cEDuIUW0d+9eTJo0CQ888ADuu+8+XHvttXIPqVdVVVU4duwYli5dKvdQwho3bhwCgQCCwSDcbjfU6vR9gHLs2DHMnj0bADB+/HjU1NTIPKKe8vPzsWXLFvHXhw8fxre//W0AwOzZs/H+++/3+j3S92+gH1Cr1Vi1ahV27tyJX//613IPJ6xXXnkFubm5uOaaa7B161a5hxNVdnY27r77bixevBgnT57Ev/zLv+Dtt99OyzeSxsZGnD59Gr/73e9QW1uLFStW4O2334ZCoZB7aBE999xzeOCBB+QeRkR6vR51dXW48cYb0djYiN/97ndyDymigoIC7Nq1C3PnzsXBgwdx9uxZBAIBqFQquYcmmjdvHmpra8VfC4Ig/nwaDAa4XK5evwdnIBLbtGkT3nnnHaxduxatra1yD6eH7du34/3338eyZctQXV2NVatW4fz583IPK6xx48bh1ltvhUKhwLhx42C1WtN2rFarFbNmzYJGo8H48eOh1WrR0NAg97AiamlpwfHjx3HllVfKPZSI/uu//guzZs3CO++8g9deew2rV6+O+cymVFu0aBGMRiOWL1+OXbt2YerUqWkVHuF0Xu/weDwxrYExQCSyY8cOPPfccwAAnU4HhUKRlj9AL774Il544QWUlZWhoKAAmzZtwpAhQ+QeVlgvv/wynn76aQDA2bNn4Xa703asM2bMwJ49eyAIAs6ePYu2tjZYrVa5hxXRxx9/jKuuukruYURlNpvFDRQWiwV+vx+BQEDmUYVXVVWFGTNmoKysDHPnzkVeXp7cQ+rVJZdcgg8//BAAUFlZiZkzZ/b6Nek39+8nrr/+eqxZswbf+9734Pf78eijj0Kr1co9rIx2++23Y82aNbDb7VAoFHjqqafS8vEVAMyZMwcff/wxbr/9dgiCgHXr1qXlB4iQEydOYPTo0XIPI6o777wTjz76KEpKStDR0YGHHnoIer1e7mGFNWbMGPzqV7/C888/D5PJhCeffFLuIfVq1apVWLt2LUpLSzF+/HjMmzev16/habxERJQQPsIiIqKEMECIiCghDBAiIkoIA4SIiBLCACEiooQwQIgk8sorr2Dz5s3irysrK1FeXo7a2losWbIEAPCd73wnbYvhiHqTnpvoifqh0NlInY+PIMpkDBAiiTU0NOD+++/HokWL8OWXX8Jms/W4591338W2bdugVqsxatQo/PznP+/9KG0imfEnlEhCFy5cwIoVK7BmzZqolehvvPEG7rzzTjgcDsyaNQtutzuFoyRKDAOESEJ79uyBz+dDMBiMet+aNWvw8ccf44477sAnn3zC2QdlBP6UEklo4cKF+MUvfoHHHnsMbW1tEe8rLy/HypUr8cILLwAAdu7cmaohEiWMAUIksW9961u49dZbo7bfnTZtGn7wgx9g+fLlOH/+fEY0oCLiYYpERJQQzkCIiCghDBAiIkoIA4SIiBLCACEiooQwQIiIKCEMECIiSggDhIiIEvL/AcrdsxAMBz4+AAAAAElFTkSuQmCC\n", 141 | "text/plain": [ 142 | "
" 143 | ] 144 | }, 145 | "metadata": {}, 146 | "output_type": "display_data" 147 | } 148 | ], 149 | "source": [ 150 | "sns.regplot(kills, gold, marker = '.', line_kws={\"color\": \"black\"})" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "plt.figure(figsize=(4,4))\n", 160 | "sns.regplot(list(Xs[i]), list(Ys[i]), data= final_df, marker='.')\n", 161 | "plt.xlabel(X_labels[i])\n", 162 | "plt.ylabel(Y_labels[i])\n", 163 | "plt.tight_layout()" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 6, 169 | "metadata": { 170 | "scrolled": true 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "def standardise_features(df):\n", 175 | " scaler = StandardScaler()\n", 176 | " df_std = pd.DataFrame(scaler.fit_transform(df))\n", 177 | " df_std.columns = df.columns\n", 178 | " return df_std, scaler\n", 179 | "\n", 180 | "def fit_pca(df_std, num_comps = False):\n", 181 | " if num_comps == False:\n", 182 | " num_comps = len(df_std.columns)\n", 183 | " pca = PCA(n_components= num_comps)\n", 184 | " principalComponents = pca.fit_transform(df_std)\n", 185 | " pca_str = pca.explained_variance_ratio_\n", 186 | " return pca, pca_str\n", 187 | "\n", 188 | "def fit_umap(df_std, num_comps = False):\n", 189 | " if num_comps == False:\n", 190 | " num_comps = 2\n", 191 | " reducer = umap.UMAP(n_components = num_comps)\n", 192 | " reducer.fit_transform(df_std)\n", 193 | " return reducer, None\n", 194 | " \n", 195 | "\n", 196 | "def draw_component_str(df_std, pca_str):\n", 197 | " num_features = len(df_std.columns)\n", 198 | " # Plot the variance of each PC\n", 199 | " plt.figure(figsize=(12,8))\n", 200 | " plt.bar(np.arange(1, num_features + 1), pca_str, color=(0.2, 0.4, 0.6, 0.6),edgecolor='black')\n", 201 | " plt.xticks(np.arange(1, num_features + 1))\n", 202 | " plt.xlabel('Number of Components',fontweight='bold')\n", 203 | " plt.ylabel('Variance (%)',fontweight='bold')\n", 204 | " plt.title('Individual Explained Variance',fontweight='bold')\n", 205 | " plt.show()\n", 206 | " \n", 207 | "def draw_component_features(df_std, pca, feature = 1):\n", 208 | " # Create a table of all the feature strengths for each component\n", 209 | " all_comps = []\n", 210 | " for i in range(feature):\n", 211 | " all_comps.append(pca.components_[i])\n", 212 | " var_str = pd.DataFrame(all_comps, columns= df_std.columns, index=range(1, feature + 1)).abs().transpose()\n", 213 | "\n", 214 | " # Plot the varialbes in PC1\n", 215 | " plt.figure(figsize=(12,8))\n", 216 | " plt.bar(var_str.sort_values(feature, ascending=False)[1].keys(),var_str.sort_values(1,ascending=False)[1],color=(0.2, 0.4, 0.6, 0.6),edgecolor='black')\n", 217 | " plt.xticks(rotation=90)\n", 218 | " plt.xlabel('Variables',fontweight='bold')\n", 219 | " plt.ylabel('Variable Influence',fontweight='bold')\n", 220 | " plt.title('Variable Influence on Principle Component 1',fontweight='bold')\n", 221 | " \n", 222 | " \n", 223 | "def fit_kmeans(df_std, n_comps, n_clusters, drt= 'PCA'):\n", 224 | " if drt == 'PCA':\n", 225 | " reducer, pca_str = fit_pca(df_std, num_comps= n_comps)\n", 226 | " if drt == 'UMAP':\n", 227 | " reducer, pca_str = fit_umap(df_std, num_comps= n_comps)\n", 228 | " components = reducer.fit_transform(df_std)\n", 229 | " kmeans = KMeans(init=\"random\", n_clusters= n_clusters, n_init= 10, max_iter= 30, random_state= 42)\n", 230 | " kmeans.fit(components)\n", 231 | " return kmeans, components\n", 232 | "\n", 233 | "\n", 234 | "def predict_classes(kmeans, components, lane_df, champ_ids):\n", 235 | " classes = kmeans.predict(components)\n", 236 | " champs = lane_df.index\n", 237 | " lane_classified = pd.DataFrame({'Class': classes, 'Champ ID': champs})\n", 238 | " lane_classified = pd.merge(champ_ids, lane_classified, on = 'Champ ID')\n", 239 | " return lane_classified\n", 240 | "\n", 241 | "\n", 242 | "def classify_lane(lane_df, n_comps, n_clusters, drt= 'PCA', vis= False):\n", 243 | " df_std, scaler = standardise_features(lane_df)\n", 244 | " df_std.drop(['Result'], axis= 1, inplace = True)\n", 245 | " if vis != False:\n", 246 | " draw_component_features(df_std, pca, 1)\n", 247 | " draw_component_str(df_std, pca_str)\n", 248 | " kmeans, components = fit_kmeans(df_std, n_comps, n_clusters, drt= drt)\n", 249 | " classisfied = predict_classes(kmeans, components, lane_df, champ_ids)\n", 250 | " champ_stats_df = pd.merge(lane_df, classisfied, on = 'Champ ID')\n", 251 | " class_stats_df = champ_stats_df.groupby('Class').mean()\n", 252 | " class_stats_df.drop('Champ ID', axis = 1, inplace = True)\n", 253 | " return champ_stats_df, class_stats_df, kmeans\n", 254 | "\n", 255 | "\n", 256 | "def sort_four_lists(list_to_sort, list2, list3, list4):\n", 257 | " sorted_list, list2, list3, list4 = map(list, zip(*sorted(zip(list_to_sort, list2, list3, list4), reverse = True)))\n", 258 | " return sorted_list, list2, list3, list4\n", 259 | "\n", 260 | "\n", 261 | "# Order the stats by the difference from the class to the lane\n", 262 | "def sort_stats_by_perc_diff(class_stats_df, avg_lane_stats, lane, class_num):\n", 263 | " cols = list(class_stats_df.columns)\n", 264 | " class_avg_stats, lane_avg_stats, perc_diffs = [], [], []\n", 265 | " for col in cols:\n", 266 | " class_avg_stat = class_stats_df[col][class_num]\n", 267 | " lane_avg_stat = avg_lane_stats[col][lane]\n", 268 | " class_avg_stats.append(class_avg_stat)\n", 269 | " lane_avg_stats.append(lane_avg_stat)\n", 270 | " perc_diffs.append(class_avg_stat / lane_avg_stat)\n", 271 | " perc_diffs, cols, class_avg_stats, lane_avg_stats = sort_four_lists(perc_diffs, \n", 272 | " class_stats_df.columns, \n", 273 | " class_avg_stats, \n", 274 | " lane_avg_stats)\n", 275 | " col_dict = {}\n", 276 | " for i in range(len(cols)):\n", 277 | " col_dict[cols[i]] = round(perc_diffs[i], 2), round(class_avg_stats[i], 4), round(lane_avg_stats[i], 4)\n", 278 | " return col_dict" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 7, 284 | "metadata": {}, 285 | "outputs": [], 286 | "source": [ 287 | "# To change lane, switch adc_df to mid_df, and so on. \n", 288 | "lane_to_classify = adc_df\n", 289 | "# Numbers of components compressed to during feature reduction\n", 290 | "n_comps = 5\n", 291 | "# Number of classes created\n", 292 | "n_clusters = 4\n", 293 | "# Technique used (PCA or UMAP)\n", 294 | "technique = 'UMAP'\n", 295 | "# Turn of plot prints, can be enabled only with PCA (UMAP can't provide variable strengths metrics)\n", 296 | "vis = False\n", 297 | "mega_df, stats_df, kmeans = classify_lane(lane_to_classify, n_comps, n_clusters, techinique, vis)" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 25, 303 | "metadata": {}, 304 | "outputs": [ 305 | { 306 | "data": { 307 | "text/html": [ 308 | "
\n", 309 | "\n", 322 | "\n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | "
ResultkillsdeathsassistslargestKillingSpreelargestMultiKillkillingSpreeslongestTimeSpentLivingtotalDamageDealtmagicDamageDealt...neutralMinionsKilledEnemyJunglefirstBloodKillfirstBloodAssistfirstTowerKillfirstTowerAssistfirst10_xpmfirst10_gpmsoloKillsearlyGanksdrakesKilled
Class
00.5347687.2310826.1419356.6019403.1966201.6573311.710934523.961056128455.95002361338.246133...2.4158890.0943170.00.0719850.055026362.639191282.4333670.6926570.0501330.197316
10.5106976.8705525.3942686.2122233.3244031.5739701.605745563.996556138491.5406969218.265972...2.7286530.1425910.00.1242850.056086366.214734284.7738310.6864900.0431620.263578
20.5047495.6961615.2531367.9170392.8301091.4062651.343797581.407458118958.01776758119.092096...1.5059580.0898130.00.0782800.065374365.780578281.9291860.5961900.0284110.140793
30.5022365.9393985.3038017.1057832.9701481.5035341.381198577.887791145684.13173621112.003760...2.3200290.1017990.00.1398720.055545366.575414283.2317490.4956830.0340150.195676
\n", 472 | "

4 rows × 39 columns

\n", 473 | "
" 474 | ], 475 | "text/plain": [ 476 | " Result kills deaths assists largestKillingSpree \\\n", 477 | "Class \n", 478 | "0 0.534768 7.231082 6.141935 6.601940 3.196620 \n", 479 | "1 0.510697 6.870552 5.394268 6.212223 3.324403 \n", 480 | "2 0.504749 5.696161 5.253136 7.917039 2.830109 \n", 481 | "3 0.502236 5.939398 5.303801 7.105783 2.970148 \n", 482 | "\n", 483 | " largestMultiKill killingSprees longestTimeSpentLiving \\\n", 484 | "Class \n", 485 | "0 1.657331 1.710934 523.961056 \n", 486 | "1 1.573970 1.605745 563.996556 \n", 487 | "2 1.406265 1.343797 581.407458 \n", 488 | "3 1.503534 1.381198 577.887791 \n", 489 | "\n", 490 | " totalDamageDealt magicDamageDealt ... \\\n", 491 | "Class ... \n", 492 | "0 128455.950023 61338.246133 ... \n", 493 | "1 138491.540696 9218.265972 ... \n", 494 | "2 118958.017767 58119.092096 ... \n", 495 | "3 145684.131736 21112.003760 ... \n", 496 | "\n", 497 | " neutralMinionsKilledEnemyJungle firstBloodKill firstBloodAssist \\\n", 498 | "Class \n", 499 | "0 2.415889 0.094317 0.0 \n", 500 | "1 2.728653 0.142591 0.0 \n", 501 | "2 1.505958 0.089813 0.0 \n", 502 | "3 2.320029 0.101799 0.0 \n", 503 | "\n", 504 | " firstTowerKill firstTowerAssist first10_xpm first10_gpm soloKills \\\n", 505 | "Class \n", 506 | "0 0.071985 0.055026 362.639191 282.433367 0.692657 \n", 507 | "1 0.124285 0.056086 366.214734 284.773831 0.686490 \n", 508 | "2 0.078280 0.065374 365.780578 281.929186 0.596190 \n", 509 | "3 0.139872 0.055545 366.575414 283.231749 0.495683 \n", 510 | "\n", 511 | " earlyGanks drakesKilled \n", 512 | "Class \n", 513 | "0 0.050133 0.197316 \n", 514 | "1 0.043162 0.263578 \n", 515 | "2 0.028411 0.140793 \n", 516 | "3 0.034015 0.195676 \n", 517 | "\n", 518 | "[4 rows x 39 columns]" 519 | ] 520 | }, 521 | "execution_count": 25, 522 | "metadata": {}, 523 | "output_type": "execute_result" 524 | } 525 | ], 526 | "source": [ 527 | "# The average statistics for each class\n", 528 | "stats_df" 529 | ] 530 | }, 531 | { 532 | "cell_type": "code", 533 | "execution_count": 9, 534 | "metadata": {}, 535 | "outputs": [ 536 | { 537 | "name": "stderr", 538 | "output_type": "stream", 539 | "text": [ 540 | "C:\\Users\\Jack Wills\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:96: RuntimeWarning: invalid value encountered in double_scalars\n" 541 | ] 542 | }, 543 | { 544 | "data": { 545 | "text/plain": [ 546 | "{'neutralMinionsKilledTeamJungle': (2.75, 9.286, 3.3726),\n", 547 | " 'largestCriticalStrike': (2.44, 201.8767, 82.8721),\n", 548 | " 'magicDamageDealtToChampions': (2.04, 11468.2898, 5616.3885),\n", 549 | " 'trueDamageDealtToChampions': (1.55, 1989.5481, 1282.797),\n", 550 | " 'damageDealtToObjectives': (1.36, 10418.1406, 7660.4017),\n", 551 | " 'firstBloodAssist': (nan, 0.0, 0.0),\n", 552 | " 'drakesKilled': (2.33, 0.1973, 0.0847),\n", 553 | " 'magicDamageDealt': (1.81, 61338.2461, 33948.6284),\n", 554 | " 'neutralMinionsKilled': (1.67, 15.2795, 9.1418),\n", 555 | " 'killingSprees': (1.47, 1.7109, 1.1612),\n", 556 | " 'kills': (1.42, 7.2311, 5.0916),\n", 557 | " 'totalDamageDealtToChampions': (1.29, 21258.0022, 16516.245),\n", 558 | " 'largestMultiKill': (1.26, 1.6573, 1.315),\n", 559 | " 'largestKillingSpree': (1.25, 3.1966, 2.5641),\n", 560 | " 'trueDamageDealt': (1.21, 9114.2616, 7538.5123),\n", 561 | " 'assists': (1.2, 6.6019, 5.5171),\n", 562 | " 'deaths': (1.18, 6.1419, 5.2029),\n", 563 | " 'goldEarned': (1.08, 11693.7486, 10832.7723),\n", 564 | " 'inhibitorKills': (1.06, 0.1863, 0.176),\n", 565 | " 'Result': (1.05, 0.5348, 0.5074),\n", 566 | " 'totalTimeCrowdControlDealt': (1.04, 231.8851, 223.9104),\n", 567 | " 'totalDamageDealt': (1.04, 128455.95, 124052.5547),\n", 568 | " 'totalMinionsKilled': (1.02, 173.0637, 170.2526),\n", 569 | " 'first10_gpm': (1.0, 282.4334, 282.6026),\n", 570 | " 'first10_xpm': (1.0, 362.6392, 363.6761),\n", 571 | " 'firstTowerAssist': (1.0, 0.055, 0.0552),\n", 572 | " 'neutralMinionsKilledEnemyJungle': (0.97, 2.4159, 2.4913),\n", 573 | " 'firstBloodKill': (0.96, 0.0943, 0.098),\n", 574 | " 'longestTimeSpentLiving': (0.9, 523.9611, 580.9603),\n", 575 | " 'turretKills': (0.88, 1.2216, 1.3915),\n", 576 | " 'damageDealtToTurrets': (0.82, 3185.1619, 3887.3995),\n", 577 | " 'totalHeal': (0.81, 4162.844, 5160.4172),\n", 578 | " 'totalDamageTaken': (0.8, 19029.3924, 23926.664),\n", 579 | " 'timeCCingOthers': (0.77, 15.9375, 20.7292),\n", 580 | " 'physicalDamageDealt': (0.7, 58002.5764, 82564.6542),\n", 581 | " 'firstTowerKill': (0.67, 0.072, 0.1068),\n", 582 | " 'earlyGanks': (0.6, 0.0501, 0.0841),\n", 583 | " 'damageSelfMitigated': (0.41, 9816.7249, 23879.6685),\n", 584 | " 'soloKills': (0.4, 0.6927, 1.741)}" 585 | ] 586 | }, 587 | "execution_count": 9, 588 | "metadata": {}, 589 | "output_type": "execute_result" 590 | } 591 | ], 592 | "source": [ 593 | "# A list of all the statistics and how the class is different, ordered from greatest from mean to smallest from mean\n", 594 | "sort_stats_by_perc_diff(stats_df, avg_lane_stats, 'top', 0)" 595 | ] 596 | }, 597 | { 598 | "cell_type": "code", 599 | "execution_count": 10, 600 | "metadata": {}, 601 | "outputs": [ 602 | { 603 | "data": { 604 | "text/plain": [ 605 | "['Twitch', 'Karthus', 'Vayne', 'Cassiopeia', 'KogMaw']" 606 | ] 607 | }, 608 | "execution_count": 10, 609 | "metadata": {}, 610 | "output_type": "execute_result" 611 | } 612 | ], 613 | "source": [ 614 | "# To see the Champs in the Class\n", 615 | "mega_df[mega_df['Class']== 0]['Champion'].to_list()" 616 | ] 617 | }, 618 | { 619 | "cell_type": "code", 620 | "execution_count": null, 621 | "metadata": {}, 622 | "outputs": [], 623 | "source": [] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": null, 628 | "metadata": {}, 629 | "outputs": [], 630 | "source": [] 631 | } 632 | ], 633 | "metadata": { 634 | "kernelspec": { 635 | "display_name": "Python 3", 636 | "language": "python", 637 | "name": "python3" 638 | }, 639 | "language_info": { 640 | "codemirror_mode": { 641 | "name": "ipython", 642 | "version": 3 643 | }, 644 | "file_extension": ".py", 645 | "mimetype": "text/x-python", 646 | "name": "python", 647 | "nbconvert_exporter": "python", 648 | "pygments_lexer": "ipython3", 649 | "version": "3.7.5" 650 | } 651 | }, 652 | "nbformat": 4, 653 | "nbformat_minor": 2 654 | } 655 | -------------------------------------------------------------------------------- /Champion Classes - UMAP and KMeans/champ_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import requests\n", 10 | "import pandas as pd\n", 11 | "import numpy as np\n", 12 | "import datetime\n", 13 | "import time as t\n", 14 | "import seaborn as sns\n", 15 | "import matplotlib.pyplot as plt\n", 16 | "import matplotlib.ticker as mtick\n", 17 | "from sklearn.preprocessing import StandardScaler\n", 18 | "from sklearn.decomposition import PCA\n", 19 | "sns.set_style(\"whitegrid\", {'axes.grid' : False})\n", 20 | "import os\n", 21 | "import math\n", 22 | "import time as t" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 7, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "# For any URL, return the JSON\n", 32 | "def return_json(URL, session):\n", 33 | " while True:\n", 34 | " response = session.get(URL)\n", 35 | " try:\n", 36 | " # Check for 404 error and quit if received\n", 37 | " if response.json()['status']['status_code'] == 404:\n", 38 | " return \"error - status code 404\"\n", 39 | " # Check for 429 (too many requests made), sleep if received\n", 40 | " elif response.json()['status']['status_code'] == 429:\n", 41 | " t.sleep(10)\n", 42 | " continue\n", 43 | " else:\n", 44 | " return \"error - unknown reason\"\n", 45 | " except:\n", 46 | " break\n", 47 | " return response.json()\n", 48 | "\n", 49 | "# Provide the match-id & region, receive the json of match timeline (1 minute interval of match data)\n", 50 | "def get_matchTimeline(matchId, region, key, session):\n", 51 | " URL = 'https://' + region + '.api.riotgames.com/lol/match/v4/timelines/by-match/' + str(\n", 52 | " matchId) + '/?api_key=' + key\n", 53 | " json = return_json(URL, session)\n", 54 | " return json\n", 55 | "\n", 56 | "\n", 57 | "# Provide the match-id & region, receive the match information (game length, participants etc..)\n", 58 | "def get_gameInfo(matchId, region, key, session):\n", 59 | " URL = 'https://' + region + '.api.riotgames.com/lol/match/v4/matches/' + str(matchId) + '/?api_key=' + key\n", 60 | " json = return_json(URL, session)\n", 61 | " return json\n", 62 | "\n", 63 | "# Decide how much data to gather in each elo\n", 64 | "def set_volume(tier):\n", 65 | " tier_list = {\n", 66 | " 'DIAMOND': 20,\n", 67 | " 'GOLD': 1,\n", 68 | " 'SILVER': 1\n", 69 | " }\n", 70 | " size = tier_list[tier]\n", 71 | " return size\n", 72 | "\n", 73 | "# Loop to get summoner IDs from given regions / tiers\n", 74 | "def get_summoners(fullRegionList, tierList, key, session):\n", 75 | " summonerIds, summonerRegions, summonerTier = [], [], []\n", 76 | " for y in fullRegionList:\n", 77 | " for z in range(len(tierList)):\n", 78 | " size = set_volume(tierList[z][0])\n", 79 | " for x in range(size):\n", 80 | " page = x + 1\n", 81 | " URL_ids = ('https://' + y + '.api.riotgames.com/lol/league-exp/v4/entries/RANKED_SOLO_5x5/' +\n", 82 | " tierList[z][0] + '/' + tierList[z][1] + '/?page=' + str(page) + '&api_key=' + key)\n", 83 | " json = return_json(URL_ids, session)\n", 84 | " for x in range(0, len(json)):\n", 85 | " summonerIds.append(json[x]['summonerId'])\n", 86 | " summonerRegions.append(y)\n", 87 | " summonerTier.append(tierList[z][0])\n", 88 | " return summonerIds, summonerRegions, summonerTier\n", 89 | "\n", 90 | "\n", 91 | "# Convert a list of names to IDs\n", 92 | "def name_to_id(selectedIds, selectedRegions, selectedTiers, key, session):\n", 93 | " accountIds, accountRegions, accountTiers = [], [], []\n", 94 | " for i in range(len(selectedIds)):\n", 95 | " URL = 'https://' + selectedRegions[i] + '.api.riotgames.com/lol/summoner/v4/summoners/' + selectedIds[\n", 96 | " i] + '/?api_key=' + key\n", 97 | " json = return_json(URL, session)\n", 98 | " account_id = json['accountId']\n", 99 | " accountIds.append(account_id)\n", 100 | " accountRegions.append(selectedRegions[i])\n", 101 | " accountTiers.append(selectedTiers[i])\n", 102 | " return accountIds, accountRegions, accountTiers\n", 103 | "\n", 104 | "\n", 105 | "# Python code to remove duplicate elements\n", 106 | "def remove_duplicates(list1, list2, list3):\n", 107 | " final_list1 = []\n", 108 | " final_list2 = []\n", 109 | " final_list3 = []\n", 110 | " for i in range(len(list1)):\n", 111 | " if list1[i] not in final_list1:\n", 112 | " final_list1.append(list1[i])\n", 113 | " final_list2.append(list2[i])\n", 114 | " final_list3.append(list3[i])\n", 115 | " return final_list1, final_list2, final_list3\n", 116 | "\n", 117 | "\n", 118 | "def time_conv(yyyy, mm, dd):\n", 119 | " sd = datetime.date(yyyy, mm, dd) - datetime.timedelta(7)\n", 120 | " time = t.mktime(sd.timetuple())\n", 121 | " time = str(int(time)) + \"000\"\n", 122 | " return time\n", 123 | " \n", 124 | "def get_matchIds(accountIds, accountRegions, accountTiers, key, session):\n", 125 | " matchIds, matchTiers, matchRegions = [], [], []\n", 126 | " start_time = time_conv(2020, 10, 19)\n", 127 | " for i in range(len(accountIds)):\n", 128 | " URL = 'https://' + accountRegions[i] + '.api.riotgames.com/lol/match/v4/matchlists/by-account/' + accountIds[\n", 129 | " i] + '/?beginTime=' + start_time + '&queue=420' + '&api_key=' + key\n", 130 | " try:\n", 131 | " match_json = return_json(URL, session) \n", 132 | " for match in match_json['matches'][:10]:\n", 133 | " matchIds.append(match['gameId'])\n", 134 | " matchRegions.append(accountRegions[i])\n", 135 | " matchTiers.append(accountTiers[i])\n", 136 | " except:\n", 137 | " pass\n", 138 | " return matchIds, matchRegions, matchTiers\n", 139 | "\n", 140 | "def main(fullRegionList, tierList, key, session):\n", 141 | " summonerIds, summonerRegions, summonerTiers = get_summoners(fullRegionList, tierList, key, session)\n", 142 | " print(str(len(summonerIds)) + \" summoners found. Transforming to account IDs.\")\n", 143 | " accountIds, accountRegions, accountTiers = name_to_id(summonerIds, summonerRegions, summonerTiers, key, session)\n", 144 | " print(str(len(accountIds)) + \" account IDs successfully transformed. Getting match IDs.\")\n", 145 | " matchIds, matchRegions, matchTiers = get_matchIds(accountIds, accountRegions, accountTiers, key, session)\n", 146 | " print(str(len(matchIds)) + \" game IDs found, converting to data.\")\n", 147 | " return matchIds, matchRegions, matchTiers" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 8, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "# For all lanes, check whether the jungler is within a given range at a given time\n", 157 | "def current_lane(pos, dist):\n", 158 | " for lane in lane_locs:\n", 159 | " if lane_distance(pos, lane) < dist:\n", 160 | " return lane\n", 161 | " return False\n", 162 | " \n", 163 | "# Find the distance between jungler at a given time, and a given lane\n", 164 | "def lane_distance(pos, lane):\n", 165 | " loc_X = pos['x']\n", 166 | " loc_Y = pos['y']\n", 167 | " current_loc = [loc_X, loc_Y]\n", 168 | " shortest_dist = 99999\n", 169 | " for i in range(2):\n", 170 | " lane_loc = lane_locs[lane][i]\n", 171 | " dist = distance(current_loc, lane_loc)\n", 172 | " if dist < shortest_dist:\n", 173 | " shortest_dist = dist\n", 174 | " return shortest_dist\n", 175 | "\n", 176 | "# Calculate the distance between point 1 (x1, y1) and point 2 (x2, y2)\n", 177 | "def distance(p1, p2):\n", 178 | " return math.sqrt(((p1[0] - p2[0]) ** 2) + ((p1[1] - p2[1]) ** 2))\n", 179 | "\n", 180 | "# Create dictionary of lane co-ordinates, which can be used to check for ganks\n", 181 | "lane_locs = {\"top\": [[2250, 12750],\n", 182 | " [1500, 12000],\n", 183 | " [3500, 13500]],\n", 184 | " \"middle\": [[7500, 7500],\n", 185 | " [8500, 8500],\n", 186 | " [6500, 6750]],\n", 187 | " \"adc\": [[12750, 2250],\n", 188 | " [13500, 3500],\n", 189 | " [11500, 1500],\n", 190 | " [10500, 800],\n", 191 | " [14100, 4500]]}\n", 192 | "\n", 193 | "\n", 194 | "def determine_lane(lane_cs, jungle_cs, coord_2, coord_3):\n", 195 | " if jungle_cs > 5:\n", 196 | " lane = \"jungle\"\n", 197 | " elif lane_cs < 12:\n", 198 | " lane = \"support\"\n", 199 | " elif lane_cs >= 13:\n", 200 | " lane = position_lane_check(coord_2, coord_3, 1800)\n", 201 | " else:\n", 202 | " lane = False\n", 203 | " return lane\n", 204 | "\n", 205 | "\n", 206 | "def position_lane_check(coord_2, coord_3, dist):\n", 207 | " lane_check_one = current_lane(coord_2, dist)\n", 208 | " lane_check_two = current_lane(coord_3, dist)\n", 209 | "\n", 210 | " if lane_check_one == lane_check_two and lane_check_one != False:\n", 211 | " lane = lane_check_one\n", 212 | " elif lane_check_one == False and lane_check_two != False:\n", 213 | " lane = lane_check_two\n", 214 | " elif lane_check_one != False and lane_check_two == False:\n", 215 | " lane = lane_check_one\n", 216 | " else:\n", 217 | " lane = False\n", 218 | " return lane\n", 219 | "\n", 220 | "\n", 221 | "def create_id_lane_dict(match_timeline):\n", 222 | " id_lane_dict = {}\n", 223 | " for i in range(1, 11):\n", 224 | " part_id = match_timeline['frames'][0]['participantFrames'][str(i)]['participantId']\n", 225 | " lane_cs = match_timeline['frames'][4]['participantFrames'][str(i)]['minionsKilled']\n", 226 | " jungle_cs = match_timeline['frames'][4]['participantFrames'][str(i)]['jungleMinionsKilled']\n", 227 | " coord_2 = match_timeline['frames'][3]['participantFrames'][str(i)]['position']\n", 228 | " coord_3 = match_timeline['frames'][4]['participantFrames'][str(i)]['position']\n", 229 | " lane = determine_lane(lane_cs, jungle_cs, coord_2, coord_3)\n", 230 | " id_lane_dict[part_id] = lane\n", 231 | " return id_lane_dict\n", 232 | "\n", 233 | "\n", 234 | "def parse_event_info(match_timeline):\n", 235 | " killer, victim, assists, timestamp, pos = [], [], [], [], []\n", 236 | " all_kills = []\n", 237 | " drake_kills_list = []\n", 238 | " for i in range(len(match_timeline['frames'])):\n", 239 | " for event in match_timeline['frames'][i]['events']:\n", 240 | " if event['type'] == 'CHAMPION_KILL':\n", 241 | " kill_info = [event['killerId']]\n", 242 | " kill_info.append(event['victimId'])\n", 243 | " kill_info.append(event['assistingParticipantIds'])\n", 244 | " kill_info.append(event['timestamp'])\n", 245 | " kill_info.append(event['position'])\n", 246 | " all_kills.append(kill_info)\n", 247 | " if event['type'] == 'ELITE_MONSTER_KILL' and event['monsterType'] == 'DRAGON':\n", 248 | " drake_kills_list.append(event['killerId'])\n", 249 | " kill_df = pd.DataFrame(all_kills, columns = ['Killer', 'Victim', 'Assists', 'Timestamp','Position'])\n", 250 | " solo_kills_df = kill_df[kill_df['Assists'].map(lambda d: len(d)) == 0].groupby('Killer').count()\n", 251 | " \n", 252 | " early_kills_df = kill_df[kill_df['Timestamp'] < 60000 * 10]\n", 253 | " lanes = []\n", 254 | " for i in range(len(early_kills_df)):\n", 255 | " kill_lane = current_lane(early_kills_df.loc[i]['Position'], 1750)\n", 256 | " lanes.append(kill_lane)\n", 257 | " early_kills_df['Lanes'] = lanes\n", 258 | " \n", 259 | " return solo_kills_df, early_kills_df, drake_kills_list\n", 260 | "\n", 261 | "\n", 262 | "def get_solo_kills(solo_kills_df, part_id):\n", 263 | " if part_id in solo_kills_df.index:\n", 264 | " solo_kills = solo_kills_df.loc[part_id].Victim\n", 265 | " else:\n", 266 | " solo_kills = 0\n", 267 | " return solo_kills\n", 268 | "\n", 269 | "\n", 270 | "def get_early_ganks(early_kills_df, part_id, lane):\n", 271 | " early_ganks = early_kills_df[(early_kills_df['Lanes'] != False) &\n", 272 | " (early_kills_df['Killer'] == part_id) &\n", 273 | " (early_kills_df['Lanes'] != lane)].count()['Victim']\n", 274 | " return early_ganks\n", 275 | " \n", 276 | "\n", 277 | "def blue_win_check(game_info):\n", 278 | " if game_info['teams'][0]['win'] == 'Win':\n", 279 | " return 1\n", 280 | " else:\n", 281 | " return 0\n", 282 | " \n", 283 | "def get_team_data(features, matchId, matchRegion, matchTier, key, session): \n", 284 | " game_info = get_gameInfo(matchId, matchRegion, key, session)\n", 285 | " match_timeline = get_matchTimeline(matchId, matchRegion, key, session)\n", 286 | " solo_kills_df, early_kills_df, drake_kills_list = parse_event_info(match_timeline)\n", 287 | " id_lane_dict = create_id_lane_dict(match_timeline)\n", 288 | " game_duration = game_info['gameDuration']\n", 289 | " all_team_stats = []\n", 290 | " blue_result = blue_win_check(game_info)\n", 291 | " red_result = 1 - blue_result\n", 292 | " for player in game_info['participants']:\n", 293 | " part_id = player['participantId']\n", 294 | " lane = id_lane_dict[part_id]\n", 295 | " player_stats = []\n", 296 | " for stat in features:\n", 297 | " player_stats.append(player['stats'][stat])\n", 298 | " first10_xp = game_info['participants'][0]['timeline']['xpPerMinDeltas']['0-10']\n", 299 | " first10_gold = game_info['participants'][0]['timeline']['goldPerMinDeltas']['0-10']\n", 300 | " solo_kills = get_solo_kills(solo_kills_df, part_id) \n", 301 | " early_ganks = get_early_ganks(early_kills_df, part_id, lane)\n", 302 | " drake_kills = drake_kills_list.count(part_id)\n", 303 | " player_stats.append(first10_xp)\n", 304 | " player_stats.append(first10_gold)\n", 305 | " player_stats.append(solo_kills)\n", 306 | " player_stats.append(early_ganks)\n", 307 | " player_stats.append(drake_kills)\n", 308 | " if part_id <= 5:\n", 309 | " player_stats.insert(0, blue_result)\n", 310 | " else:\n", 311 | " player_stats.insert(0, red_result)\n", 312 | " player_stats.insert(0, matchTier)\n", 313 | " player_stats.insert(0, lane)\n", 314 | " player_stats.insert(0, player['championId'])\n", 315 | "\n", 316 | " all_team_stats.append(player_stats)\n", 317 | " return all_team_stats\n", 318 | "\n", 319 | "\n", 320 | "def gameIds_to_data(matchIds, matchRegions, matchTiers, key, session):\n", 321 | "\n", 322 | " features = ['kills', 'deaths', 'assists', 'largestKillingSpree', 'largestMultiKill', 'killingSprees', \n", 323 | " 'longestTimeSpentLiving', 'totalDamageDealt', 'magicDamageDealt', 'physicalDamageDealt', \n", 324 | " 'trueDamageDealt', 'largestCriticalStrike', 'totalDamageDealtToChampions', \n", 325 | " 'magicDamageDealtToChampions', 'trueDamageDealtToChampions', 'totalHeal', 'damageSelfMitigated',\n", 326 | " 'damageDealtToObjectives', 'damageDealtToTurrets', 'timeCCingOthers', 'totalDamageTaken', \n", 327 | " 'goldEarned', 'turretKills', 'inhibitorKills', 'totalMinionsKilled', 'totalTimeCrowdControlDealt',\n", 328 | " 'neutralMinionsKilled', 'neutralMinionsKilledTeamJungle', 'neutralMinionsKilledEnemyJungle',\n", 329 | " 'firstBloodKill', 'firstBloodAssist', 'firstTowerKill', 'firstTowerAssist']\n", 330 | " \n", 331 | " all_stats = []\n", 332 | " for i in range(len(matchIds)):\n", 333 | " if i % 1000 == 0:\n", 334 | " print(i)\n", 335 | " try:\n", 336 | " all_stats.extend(get_team_data(features, matchIds[i], matchRegions[i], matchTiers[i], key, session))\n", 337 | " except:\n", 338 | " pass\n", 339 | " col = ['Champ ID', 'Lane', 'Tier', 'Result'] + features + ['first10_xpm', 'first10_gpm', 'soloKills', 'earlyGanks', 'drakesKilled']\n", 340 | " df = pd.DataFrame(all_stats, columns = col) \n", 341 | " bool_cols = ['firstBloodKill', 'firstBloodAssist', 'firstTowerKill', 'firstTowerAssist']\n", 342 | " for col in bool_cols:\n", 343 | " df[col] = df[col].astype(int)\n", 344 | " return df\n", 345 | "\n", 346 | "def full(fullRegionList, tierList, key, session):\n", 347 | " start = t.time()\n", 348 | " matchIds, matchRegions, matchTiers = main(fullRegionList, tierList, key, session)\n", 349 | " end = t.time()\n", 350 | " run_time = end - start\n", 351 | " print(\"Match ID finding run time:\", run_time)\n", 352 | " start = t.time()\n", 353 | " df = gameIds_to_data(matchIds, matchRegions, matchTiers, key, session)\n", 354 | " end = t.time()\n", 355 | " run_time = end - start\n", 356 | " print(\"ID to DF run time:\", run_time)\n", 357 | " return df" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": 9, 363 | "metadata": {}, 364 | "outputs": [ 365 | { 366 | "name": "stdout", 367 | "output_type": "stream", 368 | "text": [ 369 | "12300 summoners found. Transforming to account IDs.\n", 370 | "12300 account IDs successfully transformed. Getting match IDs.\n", 371 | "68415 game IDs found, converting to data.\n", 372 | "Match ID finding run time: 4306.0135061740875\n", 373 | "0\n" 374 | ] 375 | }, 376 | { 377 | "name": "stderr", 378 | "output_type": "stream", 379 | "text": [ 380 | "C:\\Users\\Jack Wills\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:102: SettingWithCopyWarning: \n", 381 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 382 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 383 | "\n", 384 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 385 | "C:\\Users\\Jack Wills\\Anaconda3\\lib\\site-packages\\pandas\\core\\ops\\__init__.py:1115: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n", 386 | " result = method(y)\n" 387 | ] 388 | }, 389 | { 390 | "name": "stdout", 391 | "output_type": "stream", 392 | "text": [ 393 | "1000\n", 394 | "2000\n", 395 | "3000\n", 396 | "4000\n", 397 | "5000\n", 398 | "6000\n", 399 | "7000\n", 400 | "8000\n", 401 | "9000\n", 402 | "10000\n", 403 | "11000\n", 404 | "12000\n", 405 | "13000\n", 406 | "14000\n", 407 | "15000\n", 408 | "16000\n", 409 | "17000\n", 410 | "18000\n", 411 | "19000\n", 412 | "20000\n", 413 | "21000\n", 414 | "22000\n", 415 | "23000\n", 416 | "24000\n", 417 | "25000\n", 418 | "26000\n", 419 | "27000\n", 420 | "28000\n", 421 | "29000\n", 422 | "30000\n", 423 | "31000\n", 424 | "32000\n", 425 | "33000\n", 426 | "34000\n", 427 | "35000\n", 428 | "36000\n", 429 | "37000\n", 430 | "38000\n", 431 | "39000\n", 432 | "40000\n", 433 | "41000\n", 434 | "42000\n", 435 | "43000\n", 436 | "44000\n", 437 | "45000\n", 438 | "46000\n", 439 | "47000\n", 440 | "48000\n", 441 | "49000\n", 442 | "50000\n", 443 | "51000\n", 444 | "52000\n", 445 | "53000\n", 446 | "54000\n", 447 | "55000\n", 448 | "56000\n", 449 | "57000\n", 450 | "58000\n", 451 | "59000\n", 452 | "60000\n", 453 | "61000\n", 454 | "62000\n", 455 | "63000\n", 456 | "64000\n", 457 | "65000\n", 458 | "66000\n", 459 | "67000\n", 460 | "68000\n", 461 | "ID to DF run time: 52127.05663204193\n" 462 | ] 463 | } 464 | ], 465 | "source": [ 466 | "fullRegionList = ['euw1']\n", 467 | "tierList = [['DIAMOND', 'III'], ['DIAMOND', 'II'], ['DIAMOND', 'I']]\n", 468 | "key = 'YOUR_KEY (see Riot API site for details)'\n", 469 | "session = requests.Session()\n", 470 | "\n", 471 | "df = full(fullRegionList, tierList, key, session)" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": 12, 477 | "metadata": {}, 478 | "outputs": [], 479 | "source": [ 480 | "df.to_csv('final_data2.csv')\n", 481 | "\n", 482 | "# 12,300 summoners\n", 483 | "# 65 seconds per batch, 60 batches = 1 hour 5 minutes\n", 484 | "# ~~ 92,852 games\n", 485 | "# 92,852 games * 0.685 secs per game = ~~63,418 seconds\n", 486 | "# 17 hours 36 minutes + 1 hour 5 minutes = ~~18 hours 41 minutes\n", 487 | "# 21:08 start time\n", 488 | "# 15:49 end time\n", 489 | "\n", 490 | "# gameIds were found by 22:17\n", 491 | "# 22:17 to 16:27 = 18 hours 10 minutes = 65,400 seconnds\n", 492 | "# 65,400 seconds / 95,000 batch = 0.688 secs per game\n", 493 | "\n", 494 | "# 18,000 batches * 0.688 = 12,384 / 60 / 60 = 3 hours 26 minutes\n", 495 | "# 09:03 + 3 hours 26 minutes = 12:44 end time\n", 496 | "# 12,000 batches * 0.688 = 8,256 / 60 / 60 = 2 hours 17 minutes\n", 497 | "# 10:32 + 2 hours 17 minutes = 12:49 end time\n", 498 | "\n", 499 | "# 11,000 batches * 0.688 = 2 hours 6 minutes\n", 500 | "# 13:06 + 2 hours 6 minutes = 15:12 ????\n", 501 | "\n", 502 | "# 4,500 batches * 0.688 = 51 minutes\n", 503 | "\n" 504 | ] 505 | } 506 | ], 507 | "metadata": { 508 | "kernelspec": { 509 | "display_name": "Python 3", 510 | "language": "python", 511 | "name": "python3" 512 | }, 513 | "language_info": { 514 | "codemirror_mode": { 515 | "name": "ipython", 516 | "version": 3 517 | }, 518 | "file_extension": ".py", 519 | "mimetype": "text/x-python", 520 | "name": "python", 521 | "nbconvert_exporter": "python", 522 | "pygments_lexer": "ipython3", 523 | "version": "3.7.5" 524 | } 525 | }, 526 | "nbformat": 4, 527 | "nbformat_minor": 2 528 | } 529 | -------------------------------------------------------------------------------- /Classification/vertebral_column_data.txt: -------------------------------------------------------------------------------- 1 | 63.03 22.55 39.61 40.48 98.67 -0.25 AB 2 | 39.06 10.06 25.02 29 114.41 4.56 AB 3 | 68.83 22.22 50.09 46.61 105.99 -3.53 AB 4 | 69.3 24.65 44.31 44.64 101.87 11.21 AB 5 | 49.71 9.65 28.32 40.06 108.17 7.92 AB 6 | 40.25 13.92 25.12 26.33 130.33 2.23 AB 7 | 53.43 15.86 37.17 37.57 120.57 5.99 AB 8 | 45.37 10.76 29.04 34.61 117.27 -10.68 AB 9 | 43.79 13.53 42.69 30.26 125 13.29 AB 10 | 36.69 5.01 41.95 31.68 84.24 0.66 AB 11 | 49.71 13.04 31.33 36.67 108.65 -7.83 AB 12 | 31.23 17.72 15.5 13.52 120.06 0.5 AB 13 | 48.92 19.96 40.26 28.95 119.32 8.03 AB 14 | 53.57 20.46 33.1 33.11 110.97 7.04 AB 15 | 57.3 24.19 47 33.11 116.81 5.77 AB 16 | 44.32 12.54 36.1 31.78 124.12 5.42 AB 17 | 63.83 20.36 54.55 43.47 112.31 -0.62 AB 18 | 31.28 3.14 32.56 28.13 129.01 3.62 AB 19 | 38.7 13.44 31 25.25 123.16 1.43 AB 20 | 41.73 12.25 30.12 29.48 116.59 -1.24 AB 21 | 43.92 14.18 37.83 29.74 134.46 6.45 AB 22 | 54.92 21.06 42.2 33.86 125.21 2.43 AB 23 | 63.07 24.41 54 38.66 106.42 15.78 AB 24 | 45.54 13.07 30.3 32.47 117.98 -4.99 AB 25 | 36.13 22.76 29 13.37 115.58 -3.24 AB 26 | 54.12 26.65 35.33 27.47 121.45 1.57 AB 27 | 26.15 10.76 14 15.39 125.2 -10.09 AB 28 | 43.58 16.51 47 27.07 109.27 8.99 AB 29 | 44.55 21.93 26.79 22.62 111.07 2.65 AB 30 | 66.88 24.89 49.28 41.99 113.48 -2.01 AB 31 | 50.82 15.4 42.53 35.42 112.19 10.87 AB 32 | 46.39 11.08 32.14 35.31 98.77 6.39 AB 33 | 44.94 17.44 27.78 27.49 117.98 5.57 AB 34 | 38.66 12.99 40 25.68 124.91 2.7 AB 35 | 59.6 32 46.56 27.6 119.33 1.47 AB 36 | 31.48 7.83 24.28 23.66 113.83 4.39 AB 37 | 32.09 6.99 36 25.1 132.26 6.41 AB 38 | 35.7 19.44 20.7 16.26 137.54 -0.26 AB 39 | 55.84 28.85 47.69 27 123.31 2.81 AB 40 | 52.42 19.01 35.87 33.41 116.56 1.69 AB 41 | 35.49 11.7 15.59 23.79 106.94 -3.46 AB 42 | 46.44 8.4 29.04 38.05 115.48 2.05 AB 43 | 53.85 19.23 32.78 34.62 121.67 5.33 AB 44 | 66.29 26.33 47.5 39.96 121.22 -0.8 AB 45 | 56.03 16.3 62.28 39.73 114.02 -2.33 AB 46 | 50.91 23.02 47 27.9 117.42 -2.53 AB 47 | 48.33 22.23 36.18 26.1 117.38 6.48 AB 48 | 41.35 16.58 30.71 24.78 113.27 -4.5 AB 49 | 40.56 17.98 34 22.58 121.05 -1.54 AB 50 | 41.77 17.9 20.03 23.87 118.36 2.06 AB 51 | 55.29 20.44 34 34.85 115.88 3.56 AB 52 | 74.43 41.56 27.7 32.88 107.95 5 AB 53 | 50.21 29.76 36.1 20.45 128.29 5.74 AB 54 | 30.15 11.92 34 18.23 112.68 11.46 AB 55 | 41.17 17.32 33.47 23.85 116.38 -9.57 AB 56 | 47.66 13.28 36.68 34.38 98.25 6.27 AB 57 | 43.35 7.47 28.07 35.88 112.78 5.75 AB 58 | 46.86 15.35 38 31.5 116.25 1.66 AB 59 | 43.2 19.66 35 23.54 124.85 -2.92 AB 60 | 48.11 14.93 35.56 33.18 124.06 7.95 AB 61 | 74.38 32.05 78.77 42.32 143.56 56.13 AB 62 | 89.68 32.7 83.13 56.98 129.96 92.03 AB 63 | 44.53 9.43 52 35.1 134.71 29.11 AB 64 | 77.69 21.38 64.43 56.31 114.82 26.93 AB 65 | 76.15 21.94 82.96 54.21 123.93 10.43 AB 66 | 83.93 41.29 62 42.65 115.01 26.59 AB 67 | 78.49 22.18 60 56.31 118.53 27.38 AB 68 | 75.65 19.34 64.15 56.31 95.9 69.55 AB 69 | 72.08 18.95 51 53.13 114.21 1.01 AB 70 | 58.6 -0.26 51.5 58.86 102.04 28.06 AB 71 | 72.56 17.39 52 55.18 119.19 32.11 AB 72 | 86.9 32.93 47.79 53.97 135.08 101.72 AB 73 | 84.97 33.02 60.86 51.95 125.66 74.33 AB 74 | 55.51 20.1 44 35.42 122.65 34.55 AB 75 | 72.22 23.08 91 49.14 137.74 56.8 AB 76 | 70.22 39.82 68.12 30.4 148.53 145.38 AB 77 | 86.75 36.04 69.22 50.71 139.41 110.86 AB 78 | 58.78 7.67 53.34 51.12 98.5 51.58 AB 79 | 67.41 17.44 60.14 49.97 111.12 33.16 AB 80 | 47.74 12.09 39 35.66 117.51 21.68 AB 81 | 77.11 30.47 69.48 46.64 112.15 70.76 AB 82 | 74.01 21.12 57.38 52.88 120.21 74.56 AB 83 | 88.62 29.09 47.56 59.53 121.76 51.81 AB 84 | 81.1 24.79 77.89 56.31 151.84 65.21 AB 85 | 76.33 42.4 57.2 33.93 124.27 50.13 AB 86 | 45.44 9.91 45 35.54 163.07 20.32 AB 87 | 59.79 17.88 59.21 41.91 119.32 22.12 AB 88 | 44.91 10.22 44.63 34.7 130.08 37.36 AB 89 | 56.61 16.8 42 39.81 127.29 24.02 AB 90 | 71.19 23.9 43.7 47.29 119.86 27.28 AB 91 | 81.66 28.75 58.23 52.91 114.77 30.61 AB 92 | 70.95 20.16 62.86 50.79 116.18 32.52 AB 93 | 85.35 15.84 71.67 69.51 124.42 76.02 AB 94 | 58.1 14.84 79.65 43.26 113.59 50.24 AB 95 | 94.17 15.38 67.71 78.79 114.89 53.26 AB 96 | 57.52 33.65 50.91 23.88 140.98 148.75 AB 97 | 96.66 19.46 90.21 77.2 120.67 64.08 AB 98 | 74.72 19.76 82.74 54.96 109.36 33.31 AB 99 | 77.66 22.43 93.89 55.22 123.06 61.21 AB 100 | 58.52 13.92 41.47 44.6 115.51 30.39 AB 101 | 84.59 30.36 65.48 54.22 108.01 25.12 AB 102 | 79.94 18.77 63.31 61.16 114.79 38.54 AB 103 | 70.4 13.47 61.2 56.93 102.34 25.54 AB 104 | 49.78 6.47 53 43.32 110.86 25.34 AB 105 | 77.41 29.4 63.23 48.01 118.45 93.56 AB 106 | 65.01 27.6 50.95 37.41 116.58 7.02 AB 107 | 65.01 9.84 57.74 55.18 94.74 49.7 AB 108 | 78.43 33.43 76.28 45 138.55 77.16 AB 109 | 63.17 6.33 63 56.84 110.64 42.61 AB 110 | 68.61 15.08 63.01 53.53 123.43 39.5 AB 111 | 63.9 13.71 62.12 50.19 114.13 41.42 AB 112 | 85 29.61 83.35 55.39 126.91 71.32 AB 113 | 42.02 -6.55 67.9 48.58 111.59 27.34 AB 114 | 69.76 19.28 48.5 50.48 96.49 51.17 AB 115 | 80.99 36.84 86.96 44.14 141.09 85.87 AB 116 | 129.83 8.4 48.38 121.43 107.69 418.54 AB 117 | 70.48 12.49 62.42 57.99 114.19 56.9 AB 118 | 86.04 38.75 47.87 47.29 122.09 61.99 AB 119 | 65.54 24.16 45.78 41.38 136.44 16.38 AB 120 | 60.75 15.75 43.2 45 113.05 31.69 AB 121 | 54.74 12.1 41 42.65 117.64 40.38 AB 122 | 83.88 23.08 87.14 60.8 124.65 80.56 AB 123 | 80.07 48.07 52.4 32.01 110.71 67.73 AB 124 | 65.67 10.54 56.49 55.12 109.16 53.93 AB 125 | 74.72 14.32 32.5 60.4 107.18 37.02 AB 126 | 48.06 5.69 57.06 42.37 95.44 32.84 AB 127 | 70.68 21.7 59.18 48.97 103.01 27.81 AB 128 | 80.43 17 66.54 63.43 116.44 57.78 AB 129 | 90.51 28.27 69.81 62.24 100.89 58.82 AB 130 | 77.24 16.74 49.78 60.5 110.69 39.79 AB 131 | 50.07 9.12 32.17 40.95 99.71 26.77 AB 132 | 69.78 13.78 58 56 118.93 17.91 AB 133 | 69.63 21.12 52.77 48.5 116.8 54.82 AB 134 | 81.75 20.12 70.56 61.63 119.43 55.51 AB 135 | 52.2 17.21 78.09 34.99 136.97 54.94 AB 136 | 77.12 30.35 77.48 46.77 110.61 82.09 AB 137 | 88.02 39.84 81.77 48.18 116.6 56.77 AB 138 | 83.4 34.31 78.42 49.09 110.47 49.67 AB 139 | 72.05 24.7 79.87 47.35 107.17 56.43 AB 140 | 85.1 21.07 91.73 64.03 109.06 38.03 AB 141 | 69.56 15.4 74.44 54.16 105.07 29.7 AB 142 | 89.5 48.9 72 40.6 134.63 118.35 AB 143 | 85.29 18.28 100.74 67.01 110.66 58.88 AB 144 | 60.63 20.6 64.54 40.03 117.23 104.86 AB 145 | 60.04 14.31 58.04 45.73 105.13 30.41 AB 146 | 85.64 42.69 78.75 42.95 105.14 42.89 AB 147 | 85.58 30.46 78.23 55.12 114.87 68.38 AB 148 | 55.08 -3.76 56 58.84 109.92 31.77 AB 149 | 65.76 9.83 50.82 55.92 104.39 39.31 AB 150 | 79.25 23.94 40.8 55.3 98.62 36.71 AB 151 | 81.11 20.69 60.69 60.42 94.02 40.51 AB 152 | 48.03 3.97 58.34 44.06 125.35 35 AB 153 | 63.4 14.12 48.14 49.29 111.92 31.78 AB 154 | 57.29 15.15 64 42.14 116.74 30.34 AB 155 | 41.19 5.79 42.87 35.39 103.35 27.66 AB 156 | 66.8 14.55 72.08 52.25 82.46 41.69 AB 157 | 79.48 26.73 70.65 52.74 118.59 61.7 AB 158 | 44.22 1.51 46.11 42.71 108.63 42.81 AB 159 | 57.04 0.35 49.2 56.69 103.05 52.17 AB 160 | 64.27 12.51 68.7 51.77 95.25 39.41 AB 161 | 92.03 35.39 77.42 56.63 115.72 58.06 AB 162 | 67.26 7.19 51.7 60.07 97.8 42.14 AB 163 | 118.14 38.45 50.84 79.7 81.02 74.04 AB 164 | 115.92 37.52 76.8 78.41 104.7 81.2 AB 165 | 53.94 9.31 43.1 44.64 124.4 25.08 AB 166 | 83.7 20.27 77.11 63.43 125.48 69.28 AB 167 | 56.99 6.87 57.01 50.12 109.98 36.81 AB 168 | 72.34 16.42 59.87 55.92 70.08 12.07 AB 169 | 95.38 24.82 95.16 70.56 89.31 57.66 AB 170 | 44.25 1.1 38 43.15 98.27 23.91 AB 171 | 64.81 15.17 58.84 49.64 111.68 21.41 AB 172 | 78.4 14.04 79.69 64.36 104.73 12.39 AB 173 | 56.67 13.46 43.77 43.21 93.69 21.11 AB 174 | 50.83 9.06 56.3 41.76 79 23.04 AB 175 | 61.41 25.38 39.1 36.03 103.4 21.84 AB 176 | 56.56 8.96 52.58 47.6 98.78 50.7 AB 177 | 67.03 13.28 66.15 53.75 100.72 33.99 AB 178 | 80.82 19.24 61.64 61.58 89.47 44.17 AB 179 | 80.65 26.34 60.9 54.31 120.1 52.47 AB 180 | 68.72 49.43 68.06 19.29 125.02 54.69 AB 181 | 37.9 4.48 24.71 33.42 157.85 33.61 AB 182 | 64.62 15.23 67.63 49.4 90.3 31.33 AB 183 | 75.44 31.54 89.6 43.9 106.83 54.97 AB 184 | 71 37.52 84.54 33.49 125.16 67.77 AB 185 | 81.06 20.8 91.78 60.26 125.43 38.18 AB 186 | 91.47 24.51 84.62 66.96 117.31 52.62 AB 187 | 81.08 21.26 78.77 59.83 90.07 49.16 AB 188 | 60.42 5.27 59.81 55.15 109.03 30.27 AB 189 | 85.68 38.65 82.68 47.03 120.84 61.96 AB 190 | 82.41 29.28 77.05 53.13 117.04 62.77 AB 191 | 43.72 9.81 52 33.91 88.43 40.88 AB 192 | 86.47 40.3 61.14 46.17 97.4 55.75 AB 193 | 74.47 33.28 66.94 41.19 146.47 124.98 AB 194 | 70.25 10.34 76.37 59.91 119.24 32.67 AB 195 | 72.64 18.93 68 53.71 116.96 25.38 AB 196 | 71.24 5.27 86 65.97 110.7 38.26 AB 197 | 63.77 12.76 65.36 51.01 89.82 56 AB 198 | 58.83 37.58 125.74 21.25 135.63 117.31 AB 199 | 74.85 13.91 62.69 60.95 115.21 33.17 AB 200 | 75.3 16.67 61.3 58.63 118.88 31.58 AB 201 | 63.36 20.02 67.5 43.34 131 37.56 AB 202 | 67.51 33.28 96.28 34.24 145.6 88.3 AB 203 | 76.31 41.93 93.28 34.38 132.27 101.22 AB 204 | 73.64 9.71 63 63.92 98.73 26.98 AB 205 | 56.54 14.38 44.99 42.16 101.72 25.77 AB 206 | 80.11 33.94 85.1 46.17 125.59 100.29 AB 207 | 95.48 46.55 59 48.93 96.68 77.28 AB 208 | 74.09 18.82 76.03 55.27 128.41 73.39 AB 209 | 87.68 20.37 93.82 67.31 120.94 76.73 AB 210 | 48.26 16.42 36.33 31.84 94.88 28.34 AB 211 | 38.51 16.96 35.11 21.54 127.63 7.99 NO 212 | 54.92 18.97 51.6 35.95 125.85 2 NO 213 | 44.36 8.95 46.9 35.42 129.22 4.99 NO 214 | 48.32 17.45 48 30.87 128.98 -0.91 NO 215 | 45.7 10.66 42.58 35.04 130.18 -3.39 NO 216 | 30.74 13.35 35.9 17.39 142.41 -2.01 NO 217 | 50.91 6.68 30.9 44.24 118.15 -1.06 NO 218 | 38.13 6.56 50.45 31.57 132.11 6.34 NO 219 | 51.62 15.97 35 35.66 129.39 1.01 NO 220 | 64.31 26.33 50.96 37.98 106.18 3.12 NO 221 | 44.49 21.79 31.47 22.7 113.78 -0.28 NO 222 | 54.95 5.87 53 49.09 126.97 -0.63 NO 223 | 56.1 13.11 62.64 43 116.23 31.17 NO 224 | 69.4 18.9 75.97 50.5 103.58 -0.44 NO 225 | 89.83 22.64 90.56 67.2 100.5 3.04 NO 226 | 59.73 7.72 55.34 52 125.17 3.24 NO 227 | 63.96 16.06 63.12 47.9 142.36 6.3 NO 228 | 61.54 19.68 52.89 41.86 118.69 4.82 NO 229 | 38.05 8.3 26.24 29.74 123.8 3.89 NO 230 | 43.44 10.1 36.03 33.34 137.44 -3.11 NO 231 | 65.61 23.14 62.58 42.47 124.13 -4.08 NO 232 | 53.91 12.94 39 40.97 118.19 5.07 NO 233 | 43.12 13.82 40.35 29.3 128.52 0.97 NO 234 | 40.68 9.15 31.02 31.53 139.12 -2.51 NO 235 | 37.73 9.39 42 28.35 135.74 13.68 NO 236 | 63.93 19.97 40.18 43.96 113.07 -11.06 NO 237 | 61.82 13.6 64 48.22 121.78 1.3 NO 238 | 62.14 13.96 58 48.18 133.28 4.96 NO 239 | 69 13.29 55.57 55.71 126.61 10.83 NO 240 | 56.45 19.44 43.58 37 139.19 -1.86 NO 241 | 41.65 8.84 36.03 32.81 116.56 -6.05 NO 242 | 51.53 13.52 35 38.01 126.72 13.93 NO 243 | 39.09 5.54 26.93 33.55 131.58 -0.76 NO 244 | 34.65 7.51 43 27.14 123.99 -4.08 NO 245 | 63.03 27.34 51.61 35.69 114.51 7.44 NO 246 | 47.81 10.69 54 37.12 125.39 -0.4 NO 247 | 46.64 15.85 40 30.78 119.38 9.06 NO 248 | 49.83 16.74 28 33.09 121.44 1.91 NO 249 | 47.32 8.57 35.56 38.75 120.58 1.63 NO 250 | 50.75 20.24 37 30.52 122.34 2.29 NO 251 | 36.16 -0.81 33.63 36.97 135.94 -2.09 NO 252 | 40.75 1.84 50 38.91 139.25 0.67 NO 253 | 42.92 -5.85 58 48.76 121.61 -3.36 NO 254 | 63.79 21.35 66 42.45 119.55 12.38 NO 255 | 72.96 19.58 61.01 53.38 111.23 0.81 NO 256 | 67.54 14.66 58 52.88 123.63 25.97 NO 257 | 54.75 9.75 48 45 123.04 8.24 NO 258 | 50.16 -2.97 42 53.13 131.8 -8.29 NO 259 | 40.35 10.19 37.97 30.15 128.01 0.46 NO 260 | 63.62 16.93 49.35 46.68 117.09 -0.36 NO 261 | 54.14 11.94 43 42.21 122.21 0.15 NO 262 | 74.98 14.92 53.73 60.05 105.65 1.59 NO 263 | 42.52 14.38 25.32 28.14 128.91 0.76 NO 264 | 33.79 3.68 25.5 30.11 128.33 -1.78 NO 265 | 54.5 6.82 47 47.68 111.79 -4.41 NO 266 | 48.17 9.59 39.71 38.58 135.62 5.36 NO 267 | 46.37 10.22 42.7 36.16 121.25 -0.54 NO 268 | 52.86 9.41 46.99 43.45 123.09 1.86 NO 269 | 57.15 16.49 42.84 40.66 113.81 5.02 NO 270 | 37.14 16.48 24 20.66 125.01 7.37 NO 271 | 51.31 8.88 57 42.44 126.47 -2.14 NO 272 | 42.52 16.54 42 25.97 120.63 7.88 NO 273 | 39.36 7.01 37 32.35 117.82 1.9 NO 274 | 35.88 1.11 43.46 34.77 126.92 -1.63 NO 275 | 43.19 9.98 28.94 33.22 123.47 1.74 NO 276 | 67.29 16.72 51 50.57 137.59 4.96 NO 277 | 51.33 13.63 33.26 37.69 131.31 1.79 NO 278 | 65.76 13.21 44 52.55 129.39 -1.98 NO 279 | 40.41 -1.33 30.98 41.74 119.34 -6.17 NO 280 | 48.8 18.02 52 30.78 139.15 10.44 NO 281 | 50.09 13.43 34.46 36.66 119.13 3.09 NO 282 | 64.26 14.5 43.9 49.76 115.39 5.95 NO 283 | 53.68 13.45 41.58 40.24 113.91 2.74 NO 284 | 49 13.11 51.87 35.88 126.4 0.54 NO 285 | 59.17 14.56 43.2 44.6 121.04 2.83 NO 286 | 67.8 16.55 43.26 51.25 119.69 4.87 NO 287 | 61.73 17.11 46.9 44.62 120.92 3.09 NO 288 | 33.04 -0.32 19.07 33.37 120.39 9.35 NO 289 | 74.57 15.72 58.62 58.84 105.42 0.6 NO 290 | 44.43 14.17 32.24 30.26 131.72 -3.6 NO 291 | 36.42 13.88 20.24 22.54 126.08 0.18 NO 292 | 51.08 14.21 35.95 36.87 115.8 6.91 NO 293 | 34.76 2.63 29.5 32.12 127.14 -0.46 NO 294 | 48.9 5.59 55.5 43.32 137.11 19.85 NO 295 | 46.24 10.06 37 36.17 128.06 -5.1 NO 296 | 46.43 6.62 48.1 39.81 130.35 2.45 NO 297 | 39.66 16.21 36.67 23.45 131.92 -4.97 NO 298 | 45.58 18.76 33.77 26.82 116.8 3.13 NO 299 | 66.51 20.9 31.73 45.61 128.9 1.52 NO 300 | 82.91 29.89 58.25 53.01 110.71 6.08 NO 301 | 50.68 6.46 35 44.22 116.59 -0.21 NO 302 | 89.01 26.08 69.02 62.94 111.48 6.06 NO 303 | 54.6 21.49 29.36 33.11 118.34 -1.47 NO 304 | 34.38 2.06 32.39 32.32 128.3 -3.37 NO 305 | 45.08 12.31 44.58 32.77 147.89 -8.94 NO 306 | 47.9 13.62 36 34.29 117.45 -4.25 NO 307 | 53.94 20.72 29.22 33.22 114.37 -0.42 NO 308 | 61.45 22.69 46.17 38.75 125.67 -2.71 NO 309 | 45.25 8.69 41.58 36.56 118.55 0.21 NO 310 | 33.84 5.07 36.64 28.77 123.95 -0.2 NO 311 | -------------------------------------------------------------------------------- /Classification/vertebral_column_metadata.txt: -------------------------------------------------------------------------------- 1 | Information from: 2 | https://archive.ics.uci.edu/ml/datasets/Vertebral+Column 3 | 4 | Data set containing values for six biomechanical features used to classify 5 | orthopaedic patients into 2 classes (normal or abnormal). 6 | 7 | Data Set Information: 8 | 9 | Biomedical data set built by Dr. Henrique da Mota during a medical residence 10 | period in the Group of Applied Research in Orthopaedics (GARO) of the Centre 11 | Medico-Chirurgical de Redaptation des Massues, Lyon, France. 12 | 13 | 14 | Attribute Information: 15 | 16 | Each patient is represented in the data set by six biomechanical attributes 17 | derived from the shape and orientation of the pelvis and lumbar spine (in this 18 | order): pelvic incidence, pelvic tilt, lumbar lordosis angle, sacral slope, 19 | pelvic radius and grade of spondylolisthesis. The following convention is used 20 | for the class labels: Normal (NO) and Abnormal (AB). 21 | 22 | -------------------------------------------------------------------------------- /Data Analysis and API - LoL Tilt/desktop.ini: -------------------------------------------------------------------------------- 1 | [LocalizedFileNames] 2 | phase3.csv=@phase3,0 3 | -------------------------------------------------------------------------------- /Data Creation - Jungle Stats per Champion/champ_parse.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from math import floor 3 | import numpy as np 4 | import numpy.ma as ma 5 | from itertools import zip_longest 6 | 7 | 8 | def create_groupby(df): 9 | champ_stats = df.groupby(['champ_id', 'tier']).mean() 10 | alt_mean_cols = ['spell1', 'spell2', 'perk_main', 'perk_second'] 11 | champ_stats.drop(alt_mean_cols, axis=1, inplace=True) 12 | return champ_stats 13 | 14 | 15 | def count_groupby(df, champ_stats): 16 | count = df.groupby(['champ_id', 'tier']).count() 17 | count.reset_index(inplace = True) 18 | count = count[['champ_id', 'tier', 'game_duration']] 19 | count.rename(columns={'game_duration': 'count'}, inplace=True) 20 | champ_stats = pd.merge(champ_stats, count, on=['champ_id', 'tier']) 21 | return champ_stats 22 | 23 | 24 | def database_champs(df): 25 | champs = df['champ_id'].unique() 26 | return champs 27 | 28 | 29 | def average(a_list): 30 | average_list = [np.ma.average(ma.masked_values(temp_list, None)) for temp_list in zip_longest(*a_list)] 31 | return average_list 32 | 33 | def average_list(df, champ_stats, champs, column): 34 | stat_dict = {} 35 | for champ in champs: 36 | champ_lists = df[df['champ_id'] == champ][column] 37 | stat_dict[champ] = list(average(champ_lists)) 38 | champ_stats[column] = df['champ_id'].map(stat_dict) 39 | return champ_stats 40 | 41 | 42 | def average_all_lists(df, champ_stats, champs): 43 | columns = ['gpm', 'xpm', 'cpm'] 44 | for column in columns: 45 | champ_stats = average_list(df, champ_stats, champs, column) 46 | return champ_stats 47 | 48 | 49 | def popular(df, champ_id, perk_name): 50 | filtered = df[df['champ_id'] == champ_id] 51 | pop_id = filtered[perk_name].value_counts().idxmax() 52 | pop_value = filtered[perk_name].value_counts().max() 53 | return pop_id, pop_value 54 | 55 | 56 | def add_popular(df, champ_stats, champs, perk_name): 57 | id_dict = {} 58 | value_dict = {} 59 | for champ in champs: 60 | pop_id, pop_value = popular(df, champ, perk_name) 61 | id_dict[champ] = pop_id 62 | value_dict[champ] = pop_value 63 | champ_stats[perk_name] = champ_stats['champ_id'].map(id_dict) 64 | champ_stats[perk_name + '_count'] = champ_stats['champ_id'].map(value_dict) 65 | return champ_stats 66 | 67 | 68 | def popular_all(df, champ_stats, champs): 69 | columns = ['spell1', 'spell2', 'perk_main', 'perk_second', 'perks', 'runes', 'items'] 70 | for column in columns: 71 | champ_stats = add_popular(df, champ_stats, champs, column) 72 | return champ_stats 73 | 74 | 75 | def create_interval_times(min_time, max_time): 76 | min_times = [] 77 | max_times = [] 78 | for i in range(min_time, max_time + 1, 5): 79 | max_times.append(i * 60) 80 | min_times.append((i - 5) * 60) 81 | max_times.append(120 * 60) 82 | min_times[0] = 0 83 | return min_times, max_times 84 | 85 | 86 | def wr_by_time(df, champ_id, interval_times): 87 | wrs = [] 88 | for j in range(len(interval_times[0])): 89 | filtered = df[(df['champ_id'] == champ_id) & 90 | (df['game_duration'] <= interval_times[1][j]) & 91 | (df['game_duration'] > interval_times[0][j]) 92 | ] 93 | wr = filtered['result'].mean() 94 | wrs.append(wr) 95 | return wrs 96 | 97 | 98 | def wr_by_time_all(df, champ_stats, champs): 99 | wr_dict = {} 100 | interval_times = create_interval_times(20, 40) 101 | for champ in champs: 102 | wrs = wr_by_time(df, champ, interval_times) 103 | wr_dict[champ] = wrs 104 | champ_stats['wr_time'] = champ_stats['champ_id'].map(wr_dict) 105 | return champ_stats 106 | 107 | 108 | def sec_to_min(num): 109 | mins = floor(num / 60) 110 | secs = int(((num / 60) % 1) * 60) 111 | time = str(mins) + '.' + str(secs) 112 | return time 113 | 114 | 115 | def pm_calc(val, secs): 116 | per_sec = val / secs 117 | per_min = per_sec * 60 118 | return per_min 119 | 120 | 121 | def extra_features(df): 122 | df['game_minutes'] = df['game_duration'].apply(sec_to_min) 123 | champ_ids = pd.read_csv('champ_ids.csv') 124 | df = pd.merge(df, champ_ids[['champ_id', 'champion']], on='champ_id') 125 | df['kpm'] = pm_calc(df['kills'], df['game_duration']) 126 | df['depm'] = pm_calc(df['deaths'], df['game_duration']) 127 | df['apm'] = pm_calc(df['assists'], df['game_duration']) 128 | df['dapm'] = pm_calc(df['damage_dealt'], df['game_duration']) 129 | df['vpm'] = pm_calc(df['vision_score'], df['game_duration']) 130 | df['gold_pm'] = pm_calc(df['gold_earnt'], df['game_duration']) 131 | df['enemy_jpm'] = pm_calc(df['enemy_jungle'], df['game_duration']) 132 | df['friendly_jpm'] = pm_calc(df['friendly_jungle'], df['game_duration']) 133 | df['total_jungle'] = df['enemy_jungle'] + df['friendly_jungle'] + df['scuttles_killed'] 134 | df['total_jpm'] = pm_calc(df['total_jungle'], df['game_duration']) 135 | return df 136 | 137 | 138 | def final_tweaks(df): 139 | # change the order columns appear 140 | cols = list(df) 141 | cols.insert(1, cols.pop(cols.index('champion'))) 142 | cols.insert(2, cols.pop(cols.index('count'))) 143 | df = df.loc[:, cols] 144 | # order by count, remove low counts 145 | df = df.sort_values('count', ascending= False) 146 | df = df[df['count'] > 100] 147 | return df 148 | 149 | 150 | 151 | def champ_parse(df): 152 | champs = database_champs(df) 153 | champ_stats = create_groupby(df) 154 | champ_stats = count_groupby(df, champ_stats) 155 | champ_stats = average_all_lists(df, champ_stats, champs) 156 | champ_stats = wr_by_time_all(df, champ_stats, champs) 157 | champ_stats = popular_all(df, champ_stats, champs) 158 | champ_stats = extra_features(champ_stats) 159 | champ_stats = final_tweaks(champ_stats) 160 | return champ_stats 161 | 162 | def read_files(): 163 | data = pd.read_pickle('match_data.pkl') 164 | return data 165 | 166 | df = read_files() 167 | champ_stats = champ_parse(df) 168 | champ_stats.to_pickle('champ_stats2.pkl') 169 | champ_stats.to_csv('champ_stats2.csv') 170 | 171 | -------------------------------------------------------------------------------- /Data Creation - Jungle Stats per Champion/get_match_ids.py: -------------------------------------------------------------------------------- 1 | import random 2 | import time as t 3 | import datetime 4 | from multiprocessing import Pool 5 | 6 | 7 | # Python code to remove duplicate elements 8 | def remove_duplicates(list1, list2, list3): 9 | final_list1 = [] 10 | final_list2 = [] 11 | final_list3 = [] 12 | for i in range(len(list1)): 13 | if list1[i] not in final_list1: 14 | final_list1.append(list1[i]) 15 | final_list2.append(list2[i]) 16 | final_list3.append(list3[i]) 17 | return final_list1, final_list2, final_list3 18 | 19 | 20 | def return_json(URL, session): 21 | while True: 22 | response = session.get(URL) 23 | try: 24 | if response.json()['status']['status_code'] == 404: 25 | break 26 | elif response.json()['status']['status_code'] == 429: 27 | t.sleep(10) 28 | continue 29 | else: 30 | break 31 | except: 32 | break 33 | return response.json() 34 | 35 | 36 | def set_volume(tier): 37 | if tier == 'CHALLENGER': 38 | size = 2 39 | elif tier == 'GRANDMASTER': 40 | size = 4 41 | elif tier == 'MASTER': 42 | size = 4 43 | else: 44 | size = 10 45 | return size 46 | 47 | 48 | def get_summoners(fullRegionList, tierList, key, session): 49 | summonerIds, summonerRegions, summonerTier = [], [], [] 50 | for y in fullRegionList: 51 | for z in range(len(tierList)): 52 | size = set_volume(tierList[z][0]) 53 | for x in range(size): 54 | page = x + 1 55 | URL_ids = ('https://' + y + '.api.riotgames.com/lol/league-exp/v4/entries/RANKED_SOLO_5x5/' + 56 | tierList[z][0] + '/' + tierList[z][1] + '/?page=' + str(page) + '&api_key=' + key) 57 | json = return_json(URL_ids, session) 58 | for x in range(0, len(json)): 59 | summonerIds.append(json[x]['summonerId']) 60 | summonerRegions.append(y) 61 | summonerTier.append(tierList[z][0]) 62 | return summonerIds, summonerRegions, summonerTier 63 | 64 | 65 | def name_to_id(selectedIds, selectedRegions, selectedTiers, key, session): 66 | accountIds, accountRegions, accountTiers = [], [], [] 67 | for i in range(len(selectedIds)): 68 | URL = 'https://' + selectedRegions[i] + '.api.riotgames.com/lol/summoner/v4/summoners/' + selectedIds[ 69 | i] + '/?api_key=' + key 70 | json = return_json(URL, session) 71 | account_id = json['accountId'] 72 | accountIds.append(account_id) 73 | accountRegions.append(selectedRegions[i]) 74 | accountTiers.append(selectedTiers[i]) 75 | return accountIds, accountRegions, accountTiers 76 | 77 | 78 | def find_time_interval(yyyy, mm, dd): 79 | # Set week period prior to given date 80 | ed = datetime.date(yyyy, mm, dd) 81 | endTime = t.mktime(ed.timetuple()) 82 | endTime = str(int(endTime)) + "000" 83 | sd = datetime.date(yyyy, mm, dd) - datetime.timedelta(7) 84 | startTime = t.mktime(sd.timetuple()) 85 | startTime = str(int(startTime)) + "000" 86 | return startTime, endTime 87 | 88 | 89 | def id_to_match(accountIds, accountRegions, accountTiers, yyyy, mm, dd, key, session): 90 | startTime, endTime = find_time_interval(yyyy, mm, dd) 91 | gameIds, regions, tiers = [], [], [] 92 | for i in range(len(accountIds)): 93 | URL = 'https://' + accountRegions[i] + '.api.riotgames.com/lol/match/v4/matchlists/by-account/' + accountIds[ 94 | i] + '/?endTime=' + endTime + '&beginTime=' + startTime + '&api_key=' + key 95 | try: 96 | json = return_json(URL, session) 97 | len_matches = len(json['matches']) 98 | if len_matches > 20: 99 | len_matches = 20 100 | for j in range(0, len_matches): 101 | if json['matches'][j]['queue'] == 420: 102 | gameId = json['matches'][j]['gameId'] 103 | gameIds.append(gameId) 104 | regions.append(accountRegions[i]) 105 | tiers.append(accountTiers[i]) 106 | except: 107 | pass 108 | return gameIds, regions, tiers 109 | 110 | def create_args(regions, tiers, yyyy, mm, dd, key, session): 111 | other_vars = [yyyy, mm, dd, key, session] 112 | all_args = [] 113 | for region in regions: 114 | for tier in tiers: 115 | args = [] 116 | args.append([region]) 117 | args.append([tier]) 118 | for other in other_vars: 119 | args.append(other) 120 | all_args.append(args) 121 | return all_args 122 | 123 | def single_run(regions, tiers, yyyy, mm, dd, key, session): 124 | summonerIds, summonerRegions, summonerTiers = get_summoners(regions, tiers, key, session) 125 | accountIds, accountRegions, accountTiers = name_to_id(summonerIds, summonerRegions, summonerTiers, key, session) 126 | gameIds, regions, tiers = id_to_match(accountIds, accountRegions, accountTiers, yyyy, mm, dd, key, session) 127 | return gameIds, regions, tiers 128 | 129 | 130 | def get_match_ids(regions, tiers, yyyy, mm, dd, key, session): 131 | args = create_args(regions, tiers, yyyy, mm, dd, key, session) 132 | k = True 133 | if k == True: 134 | with Pool(processes = 8) as p: 135 | results = p.starmap(single_run, args) 136 | gameIds, regions, tiers = [], [], [] 137 | for i in range(len(results)): 138 | gameIds.extend(results[i][0]) 139 | regions.extend(results[i][1]) 140 | tiers.extend(results[i][2]) 141 | else: 142 | gameIds, regions, tiers = single_run(regions, tiers, yyyy, mm, dd, key, session) 143 | dedup_gameIds, dedup_regions, dedup_tiers = remove_duplicates(gameIds, regions, tiers) 144 | return dedup_gameIds, dedup_regions, dedup_tiers 145 | -------------------------------------------------------------------------------- /Data Creation - Jungle Stats per Champion/main.py: -------------------------------------------------------------------------------- 1 | # Imports 2 | import requests 3 | import time as t 4 | import get_match_ids 5 | import match_data 6 | import champ_parse 7 | 8 | 9 | def main(regions, tiers, yyyy, mm, dd, key, session): 10 | start = t.time() 11 | print("Starting process") 12 | print("Getting match IDs..") 13 | match_ids, regions, tiers = get_match_ids.get_match_ids(regions, tiers, yyyy, mm, dd, key, session) 14 | print(str(len(match_ids)), " match IDs found") 15 | print("Building match data") 16 | data = match_data.all_runs(match_ids, regions, tiers, key, session) 17 | print("Match data built, saving progress") 18 | data.to_pickle('match_data2.pkl') 19 | print("Aggregating champion level information") 20 | champ_stats = champ_parse.champ_parse(data) 21 | print("Data aggregated, saving progress") 22 | champ_stats.to_pickle('champ_stats2.pkl') 23 | champ_stats.to_csv('champ_stats2.csv') 24 | finish = t.time() 25 | run_time = finish - start 26 | print("Process complete, total run time: "+ str(int(run_time / 60)) + " minutes") 27 | return data, champ_stats 28 | 29 | 30 | if __name__ == '__main__': 31 | regions = ['EUW1', 32 | #'KR', 33 | #'NA1' 34 | ] 35 | tiers = [ 36 | ['CHALLENGER', 'I'], 37 | ['GRANDMASTER', 'I'], 38 | #['MASTER', 'I'], 39 | #['DIAMOND', 'II'], 40 | #['PLATINUM', 'II'], 41 | #['GOLD', 'II'], 42 | #['SILVER', 'II'], 43 | #['BRONZE', 'II'], 44 | #['IRON', 'II'] 45 | ] 46 | yyyy = 2020 47 | mm = 4 48 | dd = 19 49 | key = 'SECURE-KEY' 50 | session = requests.Session() 51 | main(regions, tiers, yyyy, mm, dd, key, session) 52 | 53 | -------------------------------------------------------------------------------- /Data Creation - Jungle Stats per Champion/match_data.py: -------------------------------------------------------------------------------- 1 | # Imports 2 | import pandas as pd 3 | from multiprocessing import Pool 4 | 5 | 6 | # For any URL, return the JSON 7 | def return_json(URL, session): 8 | while True: 9 | response = session.get(URL) 10 | try: 11 | # Check for 404 error and quit if received 12 | if response.json()['status']['status_code'] == 404: 13 | return "error - status code 404" 14 | # Check for 429 (too many requests made), sleep if received 15 | elif response.json()['status']['status_code'] == 429: 16 | t.sleep(10) 17 | continue 18 | else: 19 | return "error - unknown reason" 20 | except: 21 | break 22 | return response.json() 23 | 24 | 25 | # Provide the match-id & region, receive the json of match timeline (1 minute interval of match data) 26 | def get_matchTimeline(matchId, region, key, session): 27 | URL = 'https://' + region + '.api.riotgames.com/lol/match/v4/timelines/by-match/' + str( 28 | matchId) + '/?api_key=' + key 29 | json = return_json(URL, session) 30 | return json 31 | 32 | 33 | # Provide the match-id & region, receive the match information (game length, participants etc..) 34 | def get_gameInfo(matchId, region, key, session): 35 | URL = 'https://' + region + '.api.riotgames.com/lol/match/v4/matches/' + str(matchId) + '/?api_key=' + key 36 | json = return_json(URL, session) 37 | return json 38 | 39 | 40 | # Provide the match data json and return the jungler + jungler participant number 41 | def find_jungler(json, side): 42 | # The most jungle camps cleared so far, starting at none 43 | mostCamps = 0 44 | # Limit to blue side participants 45 | if side == 'Blue': 46 | min_id = 1 47 | max_id = 6 48 | # Limit to red side participants 49 | if side == 'Red': 50 | min_id = 6 51 | max_id = 11 52 | # For each player, check how much Jungle CS they have at 4 minutes 53 | for i in range(min_id, max_id): 54 | jungle_cs = json['frames'][4]['participantFrames'][str(i)]['jungleMinionsKilled'] 55 | # If it's the most so far, make them the jungler and set the new record at their Jungle CS value 56 | if jungle_cs > mostCamps: 57 | jungler = i 58 | # Find their participant ID 59 | parti_jungler = json['frames'][1]['participantFrames'][str(i)]['participantId'] 60 | mostCamps = jungle_cs 61 | # If no one has Jungle CS, there's an error 62 | if mostCamps == 0: 63 | return "No blue side jungler detected" 64 | else: 65 | return jungler, parti_jungler 66 | 67 | 68 | def find_champion(json, parti_jungler): 69 | champion = json['participants'][parti_jungler - 1]['championId'] 70 | return champion 71 | 72 | 73 | def check_team_pos(parti_jungler): 74 | if parti_jungler <= 5: 75 | team_pos = 0 76 | else: 77 | team_pos = 1 78 | return team_pos 79 | 80 | 81 | def check_result(result): 82 | if result == 'Win' or result == 'True' or result == True: 83 | result = 1 84 | else: 85 | result = 0 86 | return result 87 | 88 | 89 | def create_var_list(variable, length, player_data): 90 | var_list = [] 91 | for i in range(length): 92 | var_list.append(player_data[variable + str(i)]) 93 | return var_list 94 | 95 | 96 | def check_parti(variable, player_data): 97 | try: 98 | if check_result(player_data[variable + 'Kill']) or check_result(player_data[variable + 'Assist']): 99 | parti = 1 100 | else: 101 | parti = 0 102 | except: 103 | parti = 0 104 | return parti 105 | 106 | 107 | def total_kd(game_json, parti_jungler): 108 | kills = [] 109 | deaths = [] 110 | if parti_jungler <= 5: 111 | for i in range(0, 5): 112 | kills.append(game_json['participants'][i]['stats']['kills']) 113 | deaths.append(game_json['participants'][i]['stats']['deaths']) 114 | else: 115 | for i in range(5, 10): 116 | kills.append(game_json['participants'][i]['stats']['kills']) 117 | deaths.append(game_json['participants'][i]['stats']['deaths']) 118 | 119 | total_kills = sum(kills) 120 | total_deaths = sum(deaths) 121 | return total_kills, total_deaths 122 | 123 | 124 | def game_info(game_json, parti_jungler): 125 | team_pos = check_team_pos(parti_jungler) 126 | game_duration = game_json['gameDuration'] 127 | team_data = game_json['teams'][team_pos] 128 | result = check_result(team_data['win']) 129 | first_baron = check_result(team_data['firstBaron']) 130 | first_dragon = check_result(team_data['firstDragon']) 131 | first_herald = check_result(team_data['firstRiftHerald']) 132 | total_barons = team_data['baronKills'] 133 | total_dragons = team_data['dragonKills'] 134 | total_rifts = team_data['riftHeraldKills'] 135 | info = [game_duration, result, first_baron, first_dragon, first_herald, total_barons, total_dragons, total_rifts] 136 | return info 137 | 138 | 139 | def player_info(game_json, parti_jungler, neutrals): 140 | spell_data = game_json['participants'][parti_jungler - 1] 141 | spell1 = spell_data['spell1Id'] 142 | spell2 = spell_data['spell2Id'] 143 | player_data = spell_data['stats'] 144 | items = create_var_list('item', 7, player_data) 145 | kills = player_data['kills'] 146 | deaths = player_data['deaths'] 147 | assists = player_data['assists'] 148 | total_kills, total_deaths = total_kd(game_json, parti_jungler) 149 | if kills > 0: 150 | kp = (kills + assists) / total_kills 151 | else: 152 | kp = 0 153 | if deaths > 0: 154 | death_perc = deaths / total_deaths 155 | else: 156 | death_perc = 0 157 | damage_dealt = player_data['totalDamageDealtToChampions'] 158 | vision_score = player_data['visionScore'] 159 | damage_taken = player_data['totalDamageTaken'] 160 | gold_earnt = player_data['goldEarned'] 161 | lane_minions = player_data['totalMinionsKilled'] 162 | enemy_jungle = player_data['neutralMinionsKilledEnemyJungle'] 163 | friendly_jungle = player_data['neutralMinionsKilledTeamJungle'] 164 | neutral_jungle = (sum(neutrals) * 0.8) * 4 165 | scuttles_killed = round((player_data['neutralMinionsKilled'] - enemy_jungle - friendly_jungle - neutral_jungle) / 4) 166 | vision_wards = player_data['visionWardsBoughtInGame'] 167 | fb_parti = check_parti('firstBlood', player_data) 168 | ft_parti = check_parti('firstTower', player_data) 169 | perks = create_var_list('perk', 6, player_data) 170 | perk_main = player_data['perkPrimaryStyle'] 171 | perk_second = player_data['perkSubStyle'] 172 | runes = create_var_list('statPerk', 3, player_data) 173 | info = [spell1, spell2, items, kills, deaths, assists, kp, death_perc, damage_dealt, vision_score, damage_taken, 174 | gold_earnt, lane_minions, enemy_jungle, friendly_jungle, scuttles_killed, vision_wards, 175 | fb_parti, ft_parti, perks, perk_main, perk_second, runes] 176 | return info 177 | 178 | 179 | def find_frame(timeline_json, parti_jungler): 180 | player_frames = timeline_json['frames'][0]['participantFrames'] 181 | for i in range(1, len(player_frames) + 1): 182 | if player_frames[str(i)]['participantId'] == parti_jungler: 183 | frame = str(i) 184 | return frame 185 | else: 186 | continue 187 | 188 | 189 | def kill_check(event, parti_jungler): 190 | if event['killerId'] == parti_jungler or parti_jungler in event['assistingParticipantIds']: 191 | time = event['timestamp'] 192 | jung_inv = True 193 | else: 194 | time = 0 195 | jung_inv = False 196 | return jung_inv, time 197 | 198 | 199 | def timeline_info(timeline_json, parti_jungler): 200 | frame = find_frame(timeline_json, parti_jungler) 201 | gpm = [] 202 | xpm = [] 203 | cpm = [] 204 | player_events = [] 205 | kp_time = [] 206 | for i in range(len(timeline_json['frames'])): 207 | gold = timeline_json['frames'][i]['participantFrames'][frame]['totalGold'] 208 | gpm.append(gold) 209 | xp = timeline_json['frames'][i]['participantFrames'][frame]['xp'] 210 | xpm.append(xp) 211 | camps = timeline_json['frames'][i]['participantFrames'][frame]['jungleMinionsKilled'] 212 | cpm.append(camps) 213 | events = timeline_json['frames'][i]['events'] 214 | if len(events) > 0: 215 | for j in range(0, len(events)): 216 | try: 217 | if events[j]['type'] == 'CHAMPION_KILL': 218 | jung_inv, kill_time = kill_check(events[j], parti_jungler) 219 | if jung_inv == True: 220 | kp_time.append(kill_time) 221 | elif events[j]['participantId'] == parti_jungler: 222 | player_events.append(events[j]) 223 | else: 224 | continue 225 | except: 226 | continue 227 | total_early_kills = sum(i < 600000 for i in kp_time) 228 | return gpm, xpm, cpm, kp_time, total_early_kills, player_events 229 | 230 | 231 | def parse_player_events(player_events): 232 | item_purchase = {} 233 | items = 0 234 | item_undo = {} 235 | undos = 0 236 | skill_up = [] 237 | for i in range(len(player_events)): 238 | event_type = player_events[i]['type'] 239 | data = player_events[i] 240 | if event_type == 'ITEM_PURCHASED': 241 | item_purchase[items] = [data['itemId'], data['timestamp']] 242 | items += 1 243 | elif event_type == 'ITEM_UNDO': 244 | item_undo[undos] = [data['beforeId'], data['timestamp']] 245 | undos += 1 246 | elif event_type == 'SKILL_LEVEL_UP': 247 | skill_up.append(data['skillSlot']) 248 | else: 249 | continue 250 | return item_purchase, item_undo, skill_up 251 | 252 | 253 | def remove_undos(item_purchase, item_undo): 254 | to_delete = [] 255 | if len(item_undo) > 0: 256 | for i in item_undo.keys(): 257 | item = item_undo[i][0] 258 | time = item_undo[i][1] 259 | for j in item_purchase.keys(): 260 | if item_purchase[j][0] == item and abs(time - item_purchase[j][1]) < 30000 and j not in to_delete: 261 | to_delete.append(j) 262 | break 263 | if len(to_delete) > 0: 264 | for key in to_delete: 265 | del item_purchase[key] 266 | new_item_purchase = [] 267 | for key in item_purchase.keys(): 268 | new_item_purchase.append(item_purchase[key][0]) 269 | return new_item_purchase 270 | 271 | 272 | def individual_data(gameinfo_json, timeline_json, jungler): 273 | info = game_info(gameinfo_json, jungler) 274 | info.extend(player_info(gameinfo_json, jungler, info[-3:])) 275 | gpm, xpm, cpm, kp_time, total_early_kills, player_events = timeline_info(timeline_json, jungler) 276 | item_purchase, item_undo, skill_up = parse_player_events(player_events) 277 | item_purchase = remove_undos(item_purchase, item_undo) 278 | # remember this will affect the order used in diff_values!! 279 | new_info = [gpm, xpm, cpm, kp_time, total_early_kills, item_purchase, skill_up] 280 | for i in new_info: 281 | info.append(i) 282 | return info 283 | 284 | 285 | def diff_values(blue_info, red_info): 286 | # this is what will change if new_info is changed! 287 | try: 288 | cpm_diff_10 = blue_info[-5:-4][0][11] - red_info[-5:-4][0][11] 289 | gpm_diff_10 = blue_info[-7:-6][0][11] - red_info[-7:-6][0][11] 290 | except: 291 | cpm_diff_10 = 0 292 | gpm_diff_10 = 0 293 | try: 294 | cpm_diff_15 = blue_info[-5:-4][0][16] - red_info[-5:-4][0][16] 295 | gpm_diff_15 = blue_info[-7:-6][0][16] - red_info[-7:-6][0][16] 296 | except: 297 | cpm_diff_15 = 0 298 | gpm_diff_15 = 0 299 | 300 | info = [cpm_diff_10, cpm_diff_15, gpm_diff_10, gpm_diff_15] 301 | return info 302 | 303 | 304 | def match_info(gameinfo_json, timeline_json, blue_jungler, red_jungler): 305 | blue_info = individual_data(gameinfo_json, timeline_json, blue_jungler) 306 | red_info = individual_data(gameinfo_json, timeline_json, red_jungler) 307 | diff_info = diff_values(blue_info, red_info) 308 | for i in range(len(diff_info)): 309 | blue_info.append(diff_info[i]) 310 | red_info.append(-1 * diff_info[i]) 311 | return blue_info, red_info 312 | 313 | 314 | def match_run(matchId, region, tier, key, session): 315 | try: 316 | timeline_json = get_matchTimeline(matchId, region, key, session) 317 | gameinfo_json = get_gameInfo(matchId, region, key, session) 318 | blue_jungler = find_jungler(timeline_json, 'Blue')[1] 319 | red_jungler = find_jungler(timeline_json, 'Red')[1] 320 | blue_champ = find_champion(gameinfo_json, blue_jungler) 321 | red_champ = find_champion(gameinfo_json, red_jungler) 322 | blue_info, red_info = match_info(gameinfo_json, timeline_json, blue_jungler, red_jungler) 323 | blue_info.insert(0, blue_champ) 324 | red_info.insert(0, red_champ) 325 | blue_info.insert(1, tier) 326 | red_info.insert(1, tier) 327 | return [blue_info, red_info], matchId, region 328 | except: 329 | return 'Match Error', matchId, region 330 | 331 | 332 | def create_df(data): 333 | column_names = ['champ_id', 'tier', 'game_duration', 'result', 'first_baron', 'first_dragon', 'first_herald', 334 | 'total_barons', 'total_dragons', 335 | 'total_rifts', 'spell1', 'spell2', 'items', 'kills', 'deaths', 'assists', 'kp', 'death_perc', 336 | 'damage_dealt', 337 | 'vision_score', 'damage_taken', 'gold_earnt', 'lane_minions', 'enemy_jungle', 'friendly_jungle', 338 | 'scuttles_killed', 'vision_wards', 'fb_parti', 'ft_parti', 'perks', 'perk_main', 'perk_second', 339 | 'runes', 340 | 'gpm', 'xpm', 'cpm', 'kp_time', 'total_early_kills', 'item_purchase', 'skill_up', 341 | 'cpm_diff_10', 'cpm_diff_15', 'gpm_diff_10', 'gpm_diff_15'] 342 | 343 | df = pd.DataFrame(data, columns=column_names) 344 | return df 345 | 346 | def get_args(matchIds, regions, tiers, key, session): 347 | keys = [key] * len(matchIds) 348 | sessions = [session] * len(matchIds) 349 | args = zip(matchIds, regions, tiers, keys, sessions) 350 | return args 351 | 352 | 353 | def all_runs(matchIds, regions, tiers, key, session): 354 | args = get_args(matchIds, regions, tiers, key, session) 355 | data, fail_id, fail_region = [], [], [] 356 | with Pool(processes=8) as p: 357 | for info, matchId, region in p.starmap(match_run, args): 358 | if info == 'Match Error': 359 | fail_id.append(matchId) 360 | fail_region.append(region) 361 | else: 362 | for i in info: 363 | data.append(i) 364 | df = create_df(data) 365 | return df 366 | 367 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Jack J Williams Portfolio 2 | 3 | Contains publicly available projects that I have been working on. No project/data contains sensitive information. 4 | 5 | Note that some of these projects were created at the start of my Python career and will not reflect my current coding standards! 6 | 7 | A private directory is available that contains the productionised code used to create the dataset for the League of Legends Analytics site Jung.GG, but permission is granted only to those with agreeable circumstances (recruiters/colleagues/friends) given my desire to keep our methods safe from competitors! 8 | 9 | ## Current Portfolio: 10 | 11 | **CNN - Music Genre Classification**: Building a CNN model capable of classifying the genre of a song based on a 30 sec extract. First transforms the audio files into melospectrograms (images of sound over time). Builds a base LeNet model (50% accuracy). Optimizes the model through iterative parameter tuning and architecture changes to a final model of 65% accuracy (95% for 2 of the 5 genres). 12 | 13 | **RNN - Marvel Character Generator**: An RNN model that takes the text from the Marvel Wiki entries and uses it to generate a backstory for a character. Results vary and still requires better tuning but an interesting introduction! 14 | 15 | **MCMC - Breaking the Enigma Code**: Using MCMC algorithm to break the enigma code. Used for creating the Medium article: 16 | 17 | **Data Creation - Jungle Stats per Champion**: Builds a full dataset of each League of Legends Champions average Jungle stats per ranked tier. First creates a dataset of match IDs for each tier, loops through this to find match JSONs then parses JSON to pull & aggregate key information. All performed with parallel processing. 18 | 19 | **Principle Component Analysis**: Exploring techniques to reduce the dimensionality of the 2012 U.S. Army Anthropometric Survey (ANSUR-2). Using predominantly PCA, but a small comparission to the more complex techniques of UMAP and T-SNE is also performed. 20 | 21 | **Principle Component Analysis & Recommendation Engine**: Use PCA to convert data about LoL Champions into 2D space to be used for Euclidean distance "Content-based recommendation" engine. 22 | 23 | **Classification**: Using medical data to classify patients. Transformed using several dimensionality reduction techniques (UMAP/PCA/TSNE) then classified through Supervised Learning (Gaussian Naive Bayes/K-Nearest Neighbors/SVC/Decision Trees), scored on T1 errors through cross-validation to pick optimal model. Further Unsupervised Learning (K-Means / Gaussian Mixture) used to test for further improvements in classification (whether additional categories are beneficial). 24 | 25 | **Data Analysis and API**: Using the Riot API to gather data about LoL player, attempting to prove whether "tilted" players are more likely to lose their next game. Used for creating the Medium article: 26 | 27 | **Champion Classification using UMAP and KMeans**: Using Champion statistics alongside UMAP dimensionality reduction to classify LoL champions into four per lane. Used for article and future work (reference to follow) -------------------------------------------------------------------------------- /RNN - Marvel Character Generator/RNN_Marvel_Generator.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Marvel_Generator.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "machine_shape": "hm" 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "accelerator": "GPU" 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "code", 20 | "metadata": { 21 | "id": "ggUwY8AgaHqE", 22 | "colab_type": "code", 23 | "outputId": "5ff35c03-3c85-405f-bfbe-814b5ee89ad6", 24 | "colab": { 25 | "base_uri": "https://localhost:8080/", 26 | "height": 34 27 | } 28 | }, 29 | "source": [ 30 | "from google.colab import drive\n", 31 | "drive.mount('/content/drive')\n", 32 | "import os\n", 33 | "os.chdir('drive/My Drive')" 34 | ], 35 | "execution_count": 0, 36 | "outputs": [ 37 | { 38 | "output_type": "stream", 39 | "text": [ 40 | "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" 41 | ], 42 | "name": "stdout" 43 | } 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "metadata": { 49 | "id": "gLfWWhWpatZf", 50 | "colab_type": "code", 51 | "colab": {} 52 | }, 53 | "source": [ 54 | "from __future__ import print_function\n", 55 | "from keras.callbacks import LambdaCallback\n", 56 | "from keras.utils import to_categorical\n", 57 | "from keras.models import Sequential\n", 58 | "from keras.layers import Dense, Activation\n", 59 | "from keras.optimizers import RMSprop\n", 60 | "from keras.preprocessing.text import Tokenizer\n", 61 | "from keras.layers import LSTM\n", 62 | "from keras.layers import Embedding\n", 63 | "from collections import Counter\n", 64 | "from keras.preprocessing.sequence import pad_sequences\n", 65 | "\n", 66 | "import numpy as np\n", 67 | "import random\n", 68 | "import sys\n", 69 | "import io\n", 70 | "import string" 71 | ], 72 | "execution_count": 0, 73 | "outputs": [] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "metadata": { 78 | "id": "CIrhuYIaazUx", 79 | "colab_type": "code", 80 | "outputId": "7bffd353-61a5-4a9e-aa10-a54099e42435", 81 | "colab": { 82 | "base_uri": "https://localhost:8080/", 83 | "height": 34 84 | } 85 | }, 86 | "source": [ 87 | "# Read data and check length\n", 88 | "text = open('marvel_data.txt', encoding=\"utf8\").read().lower()\n", 89 | "print('text length', len(text))" 90 | ], 91 | "execution_count": 0, 92 | "outputs": [ 93 | { 94 | "output_type": "stream", 95 | "text": [ 96 | "text length 28213380\n" 97 | ], 98 | "name": "stdout" 99 | } 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "metadata": { 105 | "id": "Z7H6YIGeFscJ", 106 | "colab_type": "code", 107 | "colab": {} 108 | }, 109 | "source": [ 110 | "# Reduce (stop memory crashes - may be worth looking into memory reduction techniques instead)\n", 111 | "reduced_text = text[:1000000]" 112 | ], 113 | "execution_count": 0, 114 | "outputs": [] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "metadata": { 119 | "id": "sqTbcbasa7Is", 120 | "colab_type": "code", 121 | "outputId": "2e345eff-679a-4664-cf57-b435611f9051", 122 | "colab": { 123 | "base_uri": "https://localhost:8080/", 124 | "height": 54 125 | } 126 | }, 127 | "source": [ 128 | "# Random line\n", 129 | "print(text[20086:20480])" 130 | ], 131 | "execution_count": 0, 132 | "outputs": [ 133 | { 134 | "output_type": "stream", 135 | "text": [ 136 | " she then tied him up until the police arrived on the scene and showed them images of chord and the dealings of the ten rings which led to his arrest. midnight's fire has very subtle abilities, due to the energies of the well of all things. his physical attributes are at the peak of human possibility. he has enhanced speed, strength, agility, and sensory perception on par with black panther.\n" 137 | ], 138 | "name": "stdout" 139 | } 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "metadata": { 145 | "id": "pdjB_ACI79rv", 146 | "colab_type": "code", 147 | "colab": {} 148 | }, 149 | "source": [ 150 | "# Required cleans\n", 151 | "def clean_doc(doc):\n", 152 | "\t# replace '--' with a space ' '\n", 153 | "\tdoc = doc.replace('--', ' ')\n", 154 | "\t# split into tokens by white space\n", 155 | "\ttokens = doc.split()\n", 156 | "\t# remove punctuation from each token\n", 157 | "\ttable = str.maketrans('', '', string.punctuation)\n", 158 | "\ttokens = [w.translate(table) for w in tokens]\n", 159 | "\t# remove remaining tokens that are not alphabetic\n", 160 | "\ttokens = [word for word in tokens if word.isalpha()]\n", 161 | "\t# make lower case\n", 162 | "\ttokens = [word.lower() for word in tokens]\n", 163 | "\treturn tokens\n", 164 | "\n", 165 | "\t\n", 166 | "tokens = clean_doc(reduced_text)" 167 | ], 168 | "execution_count": 0, 169 | "outputs": [] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "metadata": { 174 | "id": "Wqu3YMYA9YzC", 175 | "colab_type": "code", 176 | "outputId": "f0931034-340f-4277-9794-4f2d14a2c9c6", 177 | "colab": { 178 | "base_uri": "https://localhost:8080/", 179 | "height": 88 180 | } 181 | }, 182 | "source": [ 183 | "# Example of tokens\n", 184 | "print(tokens[:200])\n", 185 | "print('Total Tokens: %d' % len(tokens))\n", 186 | "print('Unique Tokens: %d' % len(set(tokens)))" 187 | ], 188 | "execution_count": 0, 189 | "outputs": [ 190 | { 191 | "output_type": "stream", 192 | "text": [ 193 | "['unnamed', 'cousin', 'due', 'to', 'false', 'accusation', 'spread', 'by', 'afari', 'against', 'him', 'and', 'the', 'black', 'spears', 'commander', 'amboola', 'they', 'were', 'both', 'suspected', 'by', 'queen', 'tananda', 'of', 'plotting', 'against', 'her', 'and', 'were', 'arrested', 'amboola', 'was', 'killed', 'by', 'a', 'demon', 'released', 'by', 'the', 'sorcerer', 'muru', 'agent', 'of', 'the', 'actual', 'plotter', 'tuthmes', 'but', 'aahmes', 'fate', 'remained', 'unknown', 'aaidan', 'blomfield', 'was', 'the', 'third', 'man', 'to', 'call', 'himself', 'the', 'unicorn', 'he', 'was', 'an', 'agent', 'of', 'stockpile', 'attempting', 'to', 'raid', 'stark', 'enterprises', 'for', 'morgan', 'stark', 'after', 'the', 'apparent', 'death', 'of', 'tony', 'stark', 'he', 'claimed', 'to', 'be', 'an', 'old', 'foe', 'of', 'iron', 'mans', 'but', 'it', 'is', 'unknown', 'if', 'he', 'really', 'was', 'or', 'was', 'just', 'riding', 'on', 'the', 'reputation', 'of', 'the', 'original', 'unicorn', 'he', 'displayed', 'superhuman', 'strength', 'and', 'toughness', 'enough', 'to', 'resist', 'iron', 'mans', 'repulsors', 'and', 'war', 'blasts', 'aala', 'was', 'the', 'sea', 'goddess', 'of', 'balsagoth', 'kyrie', 'was', 'worshiped', 'as', 'aala', 'and', 'learn', 'to', 'use', 'that', 'belief', 'until', 'high', 'priest', 'gothan', 'had', 'her', 'exiled', 'and', 'those', 'faithful', 'to', 'her', 'slaughtered', 'unborn', 'child', 'aala', 'was', 'the', 'wife', 'of', 'galan', 'in', 'the', 'sixth', 'iteration', 'of', 'she', 'was', 'pregnant', 'at', 'the', 'time', 'of', 'the', 'end', 'of', 'the', 'universe', 'aalbort', 'was', 'accountant', 'for', 'the', 'starship', 'principle', 'of', 'reasonable', 'interest', 'and', 'had', 'been', 'working', 'for']\n", 194 | "Total Tokens: 167600\n", 195 | "Unique Tokens: 15115\n" 196 | ], 197 | "name": "stdout" 198 | } 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "metadata": { 204 | "id": "NW7Fl7QD_08F", 205 | "colab_type": "code", 206 | "outputId": "66966bd5-480e-45c6-f070-652a90d5e304", 207 | "colab": { 208 | "base_uri": "https://localhost:8080/", 209 | "height": 187 210 | } 211 | }, 212 | "source": [ 213 | "# Count the tokens\n", 214 | "Counter(tokens).most_common(10)" 215 | ], 216 | "execution_count": 0, 217 | "outputs": [ 218 | { 219 | "output_type": "execute_result", 220 | "data": { 221 | "text/plain": [ 222 | "[('the', 11274),\n", 223 | " ('to', 6773),\n", 224 | " ('and', 5352),\n", 225 | " ('of', 4666),\n", 226 | " ('a', 3580),\n", 227 | " ('was', 3255),\n", 228 | " ('his', 2787),\n", 229 | " ('in', 2429),\n", 230 | " ('he', 2349),\n", 231 | " ('by', 1586)]" 232 | ] 233 | }, 234 | "metadata": { 235 | "tags": [] 236 | }, 237 | "execution_count": 9 238 | } 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "metadata": { 244 | "id": "lX7R2Vs391nT", 245 | "colab_type": "code", 246 | "outputId": "c2cb95dd-7d8b-49ca-9ed7-ef229ccb0f5f", 247 | "colab": { 248 | "base_uri": "https://localhost:8080/", 249 | "height": 34 250 | } 251 | }, 252 | "source": [ 253 | "# Organise into sequences\n", 254 | "length = 41\n", 255 | "sequences = list()\n", 256 | "for i in range(length, len(tokens)):\n", 257 | "\t# select sequence of tokens\n", 258 | "\tseq = tokens[i-length:i]\n", 259 | "\t# convert into a line\n", 260 | "\tline = ' '.join(seq)\n", 261 | "\t# store\n", 262 | "\tsequences.append(line)\n", 263 | "print('Total Sequences: %d' % len(sequences))" 264 | ], 265 | "execution_count": 0, 266 | "outputs": [ 267 | { 268 | "output_type": "stream", 269 | "text": [ 270 | "Total Sequences: 167559\n" 271 | ], 272 | "name": "stdout" 273 | } 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "metadata": { 279 | "id": "znUAJGSWedzm", 280 | "colab_type": "code", 281 | "colab": {} 282 | }, 283 | "source": [ 284 | "# Encode the sequences\n", 285 | "tokenizer = Tokenizer()\n", 286 | "tokenizer.fit_on_texts(sequences)\n", 287 | "sequences = tokenizer.texts_to_sequences(sequences)" 288 | ], 289 | "execution_count": 0, 290 | "outputs": [] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "metadata": { 295 | "id": "hg3ueeQOAmGe", 296 | "colab_type": "code", 297 | "colab": {} 298 | }, 299 | "source": [ 300 | "# Check the size of the vocab\n", 301 | "vocab_size = len(tokenizer.word_index) + 1\n", 302 | "print(vocab_size)" 303 | ], 304 | "execution_count": 0, 305 | "outputs": [] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "metadata": { 310 | "id": "_DW0e5k8bKsI", 311 | "colab_type": "code", 312 | "colab": {} 313 | }, 314 | "source": [ 315 | "# Split into the input text and the output text\n", 316 | "sequences = np.array(sequences)\n", 317 | "X, y = sequences[:,:-1], sequences[:,-1]\n", 318 | "y = to_categorical(y, num_classes=vocab_size)\n", 319 | "seq_length = X.shape[1]" 320 | ], 321 | "execution_count": 0, 322 | "outputs": [] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "metadata": { 327 | "id": "rJ8hWf5pHeZK", 328 | "colab_type": "code", 329 | "outputId": "b249ded9-6f84-4351-84cf-d61bfb66eaa6", 330 | "colab": { 331 | "base_uri": "https://localhost:8080/", 332 | "height": 408 333 | } 334 | }, 335 | "source": [ 336 | "# Create an RNN model\n", 337 | "model = Sequential()\n", 338 | "model.add(Embedding(vocab_size, 50, input_length=seq_length))\n", 339 | "model.add(LSTM(100, return_sequences=True))\n", 340 | "model.add(LSTM(100))\n", 341 | "model.add(Dense(100, activation='relu'))\n", 342 | "model.add(Dense(vocab_size, activation='softmax'))\n", 343 | "print(model.summary())\n" 344 | ], 345 | "execution_count": 0, 346 | "outputs": [ 347 | { 348 | "output_type": "stream", 349 | "text": [ 350 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:541: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.\n", 351 | "\n", 352 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4432: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.\n", 353 | "\n", 354 | "Model: \"sequential_2\"\n", 355 | "_________________________________________________________________\n", 356 | "Layer (type) Output Shape Param # \n", 357 | "=================================================================\n", 358 | "embedding_1 (Embedding) (None, 40, 50) 755800 \n", 359 | "_________________________________________________________________\n", 360 | "lstm_1 (LSTM) (None, 40, 100) 60400 \n", 361 | "_________________________________________________________________\n", 362 | "lstm_2 (LSTM) (None, 100) 80400 \n", 363 | "_________________________________________________________________\n", 364 | "dense_1 (Dense) (None, 100) 10100 \n", 365 | "_________________________________________________________________\n", 366 | "dense_2 (Dense) (None, 15116) 1526716 \n", 367 | "=================================================================\n", 368 | "Total params: 2,433,416\n", 369 | "Trainable params: 2,433,416\n", 370 | "Non-trainable params: 0\n", 371 | "_________________________________________________________________\n", 372 | "None\n" 373 | ], 374 | "name": "stdout" 375 | } 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "metadata": { 381 | "id": "Tyqtm-MyHk6w", 382 | "colab_type": "code", 383 | "outputId": "d4269baf-87c2-45a3-d868-d48d8e7e0dc0", 384 | "colab": { 385 | "base_uri": "https://localhost:8080/", 386 | "height": 1000 387 | } 388 | }, 389 | "source": [ 390 | "# Compile & Fit\n", 391 | "model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n", 392 | "model.fit(X, y, batch_size=128, epochs=100)" 393 | ], 394 | "execution_count": 0, 395 | "outputs": [ 396 | { 397 | "output_type": "stream", 398 | "text": [ 399 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:793: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n", 400 | "\n", 401 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3576: The name tf.log is deprecated. Please use tf.math.log instead.\n", 402 | "\n", 403 | "WARNING:tensorflow:From /tensorflow-1.15.0/python3.6/tensorflow_core/python/ops/math_grad.py:1424: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n", 404 | "Instructions for updating:\n", 405 | "Use tf.where in 2.0, which has the same broadcast rule as np.where\n", 406 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1033: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.\n", 407 | "\n", 408 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1020: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.\n", 409 | "\n", 410 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3005: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.\n", 411 | "\n", 412 | "Epoch 1/100\n", 413 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:190: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.\n", 414 | "\n", 415 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:197: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.\n", 416 | "\n", 417 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:207: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.\n", 418 | "\n", 419 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:216: The name tf.is_variable_initialized is deprecated. Please use tf.compat.v1.is_variable_initialized instead.\n", 420 | "\n", 421 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:223: The name tf.variables_initializer is deprecated. Please use tf.compat.v1.variables_initializer instead.\n", 422 | "\n", 423 | "167559/167559 [==============================] - 215s 1ms/step - loss: 7.1368 - acc: 0.0711\n", 424 | "Epoch 2/100\n", 425 | "167559/167559 [==============================] - 206s 1ms/step - loss: 6.6924 - acc: 0.0886\n", 426 | "Epoch 3/100\n", 427 | "167559/167559 [==============================] - 207s 1ms/step - loss: 6.4756 - acc: 0.1114\n", 428 | "Epoch 4/100\n", 429 | "167559/167559 [==============================] - 206s 1ms/step - loss: 6.2979 - acc: 0.1272\n", 430 | "Epoch 5/100\n", 431 | "167559/167559 [==============================] - 206s 1ms/step - loss: 6.1424 - acc: 0.1353\n", 432 | "Epoch 6/100\n", 433 | "167559/167559 [==============================] - 205s 1ms/step - loss: 6.0633 - acc: 0.1382\n", 434 | "Epoch 7/100\n", 435 | "167559/167559 [==============================] - 203s 1ms/step - loss: 5.9131 - acc: 0.1457\n", 436 | "Epoch 8/100\n", 437 | "167559/167559 [==============================] - 204s 1ms/step - loss: 5.7953 - acc: 0.1525\n", 438 | "Epoch 9/100\n", 439 | "167559/167559 [==============================] - 204s 1ms/step - loss: 5.6751 - acc: 0.1578\n", 440 | "Epoch 10/100\n", 441 | "167559/167559 [==============================] - 203s 1ms/step - loss: 5.5536 - acc: 0.1646\n", 442 | "Epoch 11/100\n", 443 | "167559/167559 [==============================] - 205s 1ms/step - loss: 5.6142 - acc: 0.1557\n", 444 | "Epoch 12/100\n", 445 | "167559/167559 [==============================] - 204s 1ms/step - loss: 5.4753 - acc: 0.1643\n", 446 | "Epoch 13/100\n", 447 | "167559/167559 [==============================] - 204s 1ms/step - loss: 5.3795 - acc: 0.1707\n", 448 | "Epoch 14/100\n", 449 | "167559/167559 [==============================] - 206s 1ms/step - loss: 5.2967 - acc: 0.1750\n", 450 | "Epoch 15/100\n", 451 | "167559/167559 [==============================] - 207s 1ms/step - loss: 5.2149 - acc: 0.1783\n", 452 | "Epoch 16/100\n", 453 | "167559/167559 [==============================] - 205s 1ms/step - loss: 5.1418 - acc: 0.1816\n", 454 | "Epoch 17/100\n", 455 | "167559/167559 [==============================] - 205s 1ms/step - loss: 5.0752 - acc: 0.1849\n", 456 | "Epoch 18/100\n", 457 | "167559/167559 [==============================] - 206s 1ms/step - loss: 5.0045 - acc: 0.1884\n", 458 | "Epoch 19/100\n", 459 | "167559/167559 [==============================] - 204s 1ms/step - loss: 4.9423 - acc: 0.1915\n", 460 | "Epoch 20/100\n", 461 | "167559/167559 [==============================] - 204s 1ms/step - loss: 4.8819 - acc: 0.1944\n", 462 | "Epoch 21/100\n", 463 | "167559/167559 [==============================] - 202s 1ms/step - loss: 4.8225 - acc: 0.1974\n", 464 | "Epoch 22/100\n", 465 | "167559/167559 [==============================] - 203s 1ms/step - loss: 4.7677 - acc: 0.1999\n", 466 | "Epoch 23/100\n", 467 | "167559/167559 [==============================] - 204s 1ms/step - loss: 4.7145 - acc: 0.2037\n", 468 | "Epoch 24/100\n", 469 | "167559/167559 [==============================] - 205s 1ms/step - loss: 4.6622 - acc: 0.2070\n", 470 | "Epoch 25/100\n", 471 | "167559/167559 [==============================] - 203s 1ms/step - loss: 4.6136 - acc: 0.2103\n", 472 | "Epoch 26/100\n", 473 | "167559/167559 [==============================] - 205s 1ms/step - loss: 4.5589 - acc: 0.2145\n", 474 | "Epoch 27/100\n", 475 | "167559/167559 [==============================] - 202s 1ms/step - loss: 4.5107 - acc: 0.2183\n", 476 | "Epoch 28/100\n", 477 | "167559/167559 [==============================] - 203s 1ms/step - loss: 4.4760 - acc: 0.2213\n", 478 | "Epoch 29/100\n", 479 | "167559/167559 [==============================] - 203s 1ms/step - loss: 4.4323 - acc: 0.2242\n", 480 | "Epoch 30/100\n", 481 | "167559/167559 [==============================] - 204s 1ms/step - loss: 4.3693 - acc: 0.2294\n", 482 | "Epoch 31/100\n", 483 | "167559/167559 [==============================] - 207s 1ms/step - loss: 4.3243 - acc: 0.2338\n", 484 | "Epoch 32/100\n", 485 | "167559/167559 [==============================] - 205s 1ms/step - loss: 4.2787 - acc: 0.2367\n", 486 | "Epoch 33/100\n", 487 | "167559/167559 [==============================] - 202s 1ms/step - loss: 4.2361 - acc: 0.2409\n", 488 | "Epoch 34/100\n", 489 | "167559/167559 [==============================] - 203s 1ms/step - loss: 4.1923 - acc: 0.2452\n", 490 | "Epoch 35/100\n", 491 | "167559/167559 [==============================] - 205s 1ms/step - loss: 4.1540 - acc: 0.2497\n", 492 | "Epoch 36/100\n", 493 | "167559/167559 [==============================] - 204s 1ms/step - loss: 4.1097 - acc: 0.2530\n", 494 | "Epoch 37/100\n", 495 | "167559/167559 [==============================] - 204s 1ms/step - loss: 4.0662 - acc: 0.2577\n", 496 | "Epoch 38/100\n", 497 | "167559/167559 [==============================] - 206s 1ms/step - loss: 4.0269 - acc: 0.2614\n", 498 | "Epoch 39/100\n", 499 | "167559/167559 [==============================] - 207s 1ms/step - loss: 3.9873 - acc: 0.2666\n", 500 | "Epoch 40/100\n", 501 | "167559/167559 [==============================] - 205s 1ms/step - loss: 3.9478 - acc: 0.2698\n", 502 | "Epoch 41/100\n", 503 | "167559/167559 [==============================] - 205s 1ms/step - loss: 3.9111 - acc: 0.2736\n", 504 | "Epoch 42/100\n", 505 | "167559/167559 [==============================] - 202s 1ms/step - loss: 3.8727 - acc: 0.2778\n", 506 | "Epoch 43/100\n", 507 | "167559/167559 [==============================] - 203s 1ms/step - loss: 3.8360 - acc: 0.2814\n", 508 | "Epoch 44/100\n", 509 | "167559/167559 [==============================] - 203s 1ms/step - loss: 3.7987 - acc: 0.2858\n", 510 | "Epoch 45/100\n", 511 | "167559/167559 [==============================] - 204s 1ms/step - loss: 3.7646 - acc: 0.2903\n", 512 | "Epoch 46/100\n", 513 | "167559/167559 [==============================] - 202s 1ms/step - loss: 3.7287 - acc: 0.2940\n", 514 | "Epoch 47/100\n", 515 | "167559/167559 [==============================] - 202s 1ms/step - loss: 3.6968 - acc: 0.2981\n", 516 | "Epoch 48/100\n", 517 | "167559/167559 [==============================] - 204s 1ms/step - loss: 3.6597 - acc: 0.3019\n", 518 | "Epoch 49/100\n", 519 | "167559/167559 [==============================] - 205s 1ms/step - loss: 3.6270 - acc: 0.3057\n", 520 | "Epoch 50/100\n", 521 | "167559/167559 [==============================] - 205s 1ms/step - loss: 3.5942 - acc: 0.3099\n", 522 | "Epoch 51/100\n", 523 | "167559/167559 [==============================] - 205s 1ms/step - loss: 3.5606 - acc: 0.3133\n", 524 | "Epoch 52/100\n", 525 | "167559/167559 [==============================] - 203s 1ms/step - loss: 3.5287 - acc: 0.3182\n", 526 | "Epoch 53/100\n", 527 | "167559/167559 [==============================] - 202s 1ms/step - loss: 3.4990 - acc: 0.3214\n", 528 | "Epoch 54/100\n", 529 | "167559/167559 [==============================] - 203s 1ms/step - loss: 3.4684 - acc: 0.3265\n", 530 | "Epoch 55/100\n", 531 | "167559/167559 [==============================] - 199s 1ms/step - loss: 3.4368 - acc: 0.3304\n", 532 | "Epoch 56/100\n", 533 | "167559/167559 [==============================] - 201s 1ms/step - loss: 3.4096 - acc: 0.3340\n", 534 | "Epoch 57/100\n", 535 | "167559/167559 [==============================] - 201s 1ms/step - loss: 3.3778 - acc: 0.3384\n", 536 | "Epoch 58/100\n", 537 | "167559/167559 [==============================] - 202s 1ms/step - loss: 3.3517 - acc: 0.3415\n", 538 | "Epoch 59/100\n", 539 | "167559/167559 [==============================] - 202s 1ms/step - loss: 3.3187 - acc: 0.3451\n", 540 | "Epoch 60/100\n", 541 | "167559/167559 [==============================] - 202s 1ms/step - loss: 3.2917 - acc: 0.3506\n", 542 | "Epoch 61/100\n", 543 | "167559/167559 [==============================] - 202s 1ms/step - loss: 3.2603 - acc: 0.3540\n", 544 | "Epoch 62/100\n", 545 | "167559/167559 [==============================] - 202s 1ms/step - loss: 3.2377 - acc: 0.3573\n", 546 | "Epoch 63/100\n", 547 | "167559/167559 [==============================] - 202s 1ms/step - loss: 3.2076 - acc: 0.3611\n", 548 | "Epoch 64/100\n", 549 | "167559/167559 [==============================] - 203s 1ms/step - loss: 3.1829 - acc: 0.3649\n", 550 | "Epoch 65/100\n", 551 | "167559/167559 [==============================] - 205s 1ms/step - loss: 3.1530 - acc: 0.3687\n", 552 | "Epoch 66/100\n", 553 | "167559/167559 [==============================] - 205s 1ms/step - loss: 3.1248 - acc: 0.3732\n", 554 | "Epoch 67/100\n", 555 | "167559/167559 [==============================] - 205s 1ms/step - loss: 3.0994 - acc: 0.3762\n", 556 | "Epoch 68/100\n", 557 | "167559/167559 [==============================] - 204s 1ms/step - loss: 3.0788 - acc: 0.3789\n", 558 | "Epoch 69/100\n", 559 | "167559/167559 [==============================] - 206s 1ms/step - loss: 3.0460 - acc: 0.3842\n", 560 | "Epoch 70/100\n", 561 | "167559/167559 [==============================] - 204s 1ms/step - loss: 3.0242 - acc: 0.3869\n", 562 | "Epoch 71/100\n", 563 | "167559/167559 [==============================] - 201s 1ms/step - loss: 3.0044 - acc: 0.3893\n", 564 | "Epoch 72/100\n", 565 | "167559/167559 [==============================] - 203s 1ms/step - loss: 2.9720 - acc: 0.3943\n", 566 | "Epoch 73/100\n", 567 | "167559/167559 [==============================] - 203s 1ms/step - loss: 2.9492 - acc: 0.3987\n", 568 | "Epoch 74/100\n", 569 | "167559/167559 [==============================] - 204s 1ms/step - loss: 2.9266 - acc: 0.4012\n", 570 | "Epoch 75/100\n", 571 | "167559/167559 [==============================] - 205s 1ms/step - loss: 2.9071 - acc: 0.4049\n", 572 | "Epoch 76/100\n", 573 | "167559/167559 [==============================] - 205s 1ms/step - loss: 2.8845 - acc: 0.4083\n", 574 | "Epoch 77/100\n", 575 | "167559/167559 [==============================] - 207s 1ms/step - loss: 2.8655 - acc: 0.4113\n", 576 | "Epoch 78/100\n", 577 | "167559/167559 [==============================] - 206s 1ms/step - loss: 2.8333 - acc: 0.4165\n", 578 | "Epoch 79/100\n", 579 | "167559/167559 [==============================] - 206s 1ms/step - loss: 2.8169 - acc: 0.4183\n", 580 | "Epoch 80/100\n", 581 | "167559/167559 [==============================] - 207s 1ms/step - loss: 2.7922 - acc: 0.4223\n", 582 | "Epoch 81/100\n", 583 | "167559/167559 [==============================] - 207s 1ms/step - loss: 2.7705 - acc: 0.4260\n", 584 | "Epoch 82/100\n", 585 | "167559/167559 [==============================] - 206s 1ms/step - loss: 2.7572 - acc: 0.4271\n", 586 | "Epoch 83/100\n", 587 | "167559/167559 [==============================] - 206s 1ms/step - loss: 2.7332 - acc: 0.4323\n", 588 | "Epoch 84/100\n", 589 | "167559/167559 [==============================] - 207s 1ms/step - loss: 2.7124 - acc: 0.4355\n", 590 | "Epoch 85/100\n", 591 | "167559/167559 [==============================] - 207s 1ms/step - loss: 2.6912 - acc: 0.4375\n", 592 | "Epoch 86/100\n", 593 | "167559/167559 [==============================] - 207s 1ms/step - loss: 2.6701 - acc: 0.4412\n", 594 | "Epoch 87/100\n", 595 | "167559/167559 [==============================] - 208s 1ms/step - loss: 2.6540 - acc: 0.4431\n", 596 | "Epoch 88/100\n", 597 | "167559/167559 [==============================] - 208s 1ms/step - loss: 2.6384 - acc: 0.4449\n", 598 | "Epoch 89/100\n", 599 | "167559/167559 [==============================] - 208s 1ms/step - loss: 2.6155 - acc: 0.4498\n", 600 | "Epoch 90/100\n", 601 | "167559/167559 [==============================] - 208s 1ms/step - loss: 2.5999 - acc: 0.4524\n", 602 | "Epoch 91/100\n", 603 | "167559/167559 [==============================] - 207s 1ms/step - loss: 2.5810 - acc: 0.4551\n", 604 | "Epoch 92/100\n", 605 | "167559/167559 [==============================] - 212s 1ms/step - loss: 2.5627 - acc: 0.4588\n", 606 | "Epoch 93/100\n", 607 | "167559/167559 [==============================] - 211s 1ms/step - loss: 2.5443 - acc: 0.4624\n", 608 | "Epoch 94/100\n", 609 | "167559/167559 [==============================] - 209s 1ms/step - loss: 2.5269 - acc: 0.4650\n", 610 | "Epoch 95/100\n", 611 | "167559/167559 [==============================] - 207s 1ms/step - loss: 2.5104 - acc: 0.4673\n", 612 | "Epoch 96/100\n", 613 | "167559/167559 [==============================] - 208s 1ms/step - loss: 2.4977 - acc: 0.4709\n", 614 | "Epoch 97/100\n", 615 | "167559/167559 [==============================] - 209s 1ms/step - loss: 2.4763 - acc: 0.4743\n", 616 | "Epoch 98/100\n", 617 | "167559/167559 [==============================] - 210s 1ms/step - loss: 2.4588 - acc: 0.4768\n", 618 | "Epoch 99/100\n", 619 | "167559/167559 [==============================] - 210s 1ms/step - loss: 2.4403 - acc: 0.4798\n", 620 | "Epoch 100/100\n", 621 | "167559/167559 [==============================] - 211s 1ms/step - loss: 2.4310 - acc: 0.4802\n" 622 | ], 623 | "name": "stdout" 624 | }, 625 | { 626 | "output_type": "execute_result", 627 | "data": { 628 | "text/plain": [ 629 | "" 630 | ] 631 | }, 632 | "metadata": { 633 | "tags": [] 634 | }, 635 | "execution_count": 22 636 | } 637 | ] 638 | }, 639 | { 640 | "cell_type": "code", 641 | "metadata": { 642 | "id": "JuMik4xeH268", 643 | "colab_type": "code", 644 | "colab": {} 645 | }, 646 | "source": [ 647 | "# Save model output\n", 648 | "from pickle import dump\n", 649 | "model.save('marvel_model1.h5')\n", 650 | "dump(tokenizer, open('tokenizer.pkl', 'wb'))" 651 | ], 652 | "execution_count": 0, 653 | "outputs": [] 654 | }, 655 | { 656 | "cell_type": "code", 657 | "metadata": { 658 | "id": "dXhkiQ8UZeT5", 659 | "colab_type": "code", 660 | "colab": {} 661 | }, 662 | "source": [ 663 | "# Ask Martyn to write me a 40 word opening sentence\n", 664 | "seed_text = 'Born in 1993, bullied in school for having small feet Martyn grew up on a local wheat farm. Lifting bails made him a strong man, far stronger than the average human. One day during a solar eclipse something odd happened'\n", 665 | "encoded = tokenizer.texts_to_sequences([seed_text])[0]\n", 666 | "encoded_array = np.array([encoded])\n", 667 | "encoded_array.shape" 668 | ], 669 | "execution_count": 0, 670 | "outputs": [] 671 | }, 672 | { 673 | "cell_type": "code", 674 | "metadata": { 675 | "id": "AfZKpKSqaN3S", 676 | "colab_type": "code", 677 | "outputId": "d33ef5a2-17c0-47ad-acb2-0a3e7797b014", 678 | "colab": { 679 | "base_uri": "https://localhost:8080/", 680 | "height": 306 681 | } 682 | }, 683 | "source": [ 684 | "# Predict the output\n", 685 | "predicted = model.predict_classes(encoded_array, verbose=0)" 686 | ], 687 | "execution_count": 0, 688 | "outputs": [ 689 | { 690 | "output_type": "error", 691 | "ename": "ValueError", 692 | "evalue": "ignored", 693 | "traceback": [ 694 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 695 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 696 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpredicted\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_classes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mencoded_array\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 697 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/sequential.py\u001b[0m in \u001b[0;36mpredict_classes\u001b[0;34m(self, x, batch_size, verbose)\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0mA\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0marray\u001b[0m \u001b[0mof\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0mpredictions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 267\u001b[0m \"\"\"\n\u001b[0;32m--> 268\u001b[0;31m \u001b[0mproba\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 269\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mproba\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 270\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mproba\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 698 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, batch_size, verbose, steps, callbacks, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m 1378\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1379\u001b[0m \u001b[0;31m# Case 2: Symbolic tensors or Numpy array-like.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1380\u001b[0;31m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_standardize_user_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1381\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstateful\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1382\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 699 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/training.py\u001b[0m in \u001b[0;36m_standardize_user_data\u001b[0;34m(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)\u001b[0m\n\u001b[1;32m 755\u001b[0m \u001b[0mfeed_input_shapes\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 756\u001b[0m \u001b[0mcheck_batch_axis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;31m# Don't enforce the batch size.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 757\u001b[0;31m exception_prefix='input')\n\u001b[0m\u001b[1;32m 758\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 759\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0my\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 700 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/training_utils.py\u001b[0m in \u001b[0;36mstandardize_input_data\u001b[0;34m(data, names, shapes, check_batch_axis, exception_prefix)\u001b[0m\n\u001b[1;32m 139\u001b[0m \u001b[0;34m': expected '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' to have shape '\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' but got array with shape '\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 141\u001b[0;31m str(data_shape))\n\u001b[0m\u001b[1;32m 142\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 701 | "\u001b[0;31mValueError\u001b[0m: Error when checking input: expected embedding_1_input to have shape (40,) but got array with shape (36,)" 702 | ] 703 | } 704 | ] 705 | }, 706 | { 707 | "cell_type": "code", 708 | "metadata": { 709 | "id": "jQ2igHzKaSo7", 710 | "colab_type": "code", 711 | "colab": {} 712 | }, 713 | "source": [ 714 | "# Generate a sequence\n", 715 | "def generate_seq(model, tokenizer, seq_length, seed_text, n_words):\n", 716 | "\tresult = list()\n", 717 | "\tin_text = seed_text\n", 718 | "\t# generate a fixed number of words\n", 719 | "\tfor _ in range(n_words):\n", 720 | "\t\t# encode the text as integer\n", 721 | "\t\tencoded = tokenizer.texts_to_sequences([in_text])[0]\n", 722 | "\t\t# truncate sequences to a fixed length\n", 723 | "\t\tencoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')\n", 724 | "\t\t# predict probabilities for each word\n", 725 | "\t\tyhat = model.predict_classes(encoded, verbose=0)\n", 726 | "\t\t# map predicted word index to word\n", 727 | "\t\tout_word = ''\n", 728 | "\t\tfor word, index in tokenizer.word_index.items():\n", 729 | "\t\t\tif index == yhat:\n", 730 | "\t\t\t\tout_word = word\n", 731 | "\t\t\t\tbreak\n", 732 | "\t\t# append to input\n", 733 | "\t\tin_text += ' ' + out_word\n", 734 | "\t\tresult.append(out_word)\n", 735 | "\treturn ' '.join(result)\n" 736 | ], 737 | "execution_count": 0, 738 | "outputs": [] 739 | }, 740 | { 741 | "cell_type": "code", 742 | "metadata": { 743 | "id": "E5SMunCrj5Ri", 744 | "colab_type": "code", 745 | "outputId": "e588bbc8-0726-49b0-b399-b09e1f0e9213", 746 | "colab": { 747 | "base_uri": "https://localhost:8080/", 748 | "height": 54 749 | } 750 | }, 751 | "source": [ 752 | "generated = generate_seq(model, tokenizer, seq_length, seed_text, 50)\n", 753 | "print(generated)" 754 | ], 755 | "execution_count": 0, 756 | "outputs": [ 757 | { 758 | "output_type": "stream", 759 | "text": [ 760 | "to the rigellians and nick fury will triggered his colorblindness and exchanging their japanese timeline grumlin was to combat fields hurani is a urgent detector radiate the wicked arno stark were being controlled by terror in the surface cap fell together facing shiv as part owner anubis fought for leaking\n" 761 | ], 762 | "name": "stdout" 763 | } 764 | ] 765 | }, 766 | { 767 | "cell_type": "code", 768 | "metadata": { 769 | "id": "4CCh3-QYj9BS", 770 | "colab_type": "code", 771 | "colab": {} 772 | }, 773 | "source": [ 774 | "# Full sentence, with a few improvements (capital letters / commas)\n", 775 | "\n", 776 | "\"[HUMAN MADE:] Born in 1993, bullied in school for having small feet Martyn grew up on a local wheat farm. Lifting bails made him a strong man, far stronger than the average human. \\n\n", 777 | "One day during a solar eclipse something odd happened [MODEL MADE:] to the Rigellians and Nick Fury's will triggered his colorblindness and exchanging their Japanese timeline, \\n\n", 778 | "Grumlin was to combat fields. Hurani is an urgent detector, radiate the wicked Arno Stark were being controlled by terror in the surface. Cap fell together facing Shiv as part owner Anubis fought for leaking\"" 779 | ], 780 | "execution_count": 0, 781 | "outputs": [] 782 | }, 783 | { 784 | "cell_type": "code", 785 | "metadata": { 786 | "id": "NxdNEIpBjYjV", 787 | "colab_type": "code", 788 | "colab": {} 789 | }, 790 | "source": [ 791 | "# We're not there yet!" 792 | ], 793 | "execution_count": 0, 794 | "outputs": [] 795 | } 796 | ] 797 | } --------------------------------------------------------------------------------