├── README.md ├── metrics.py ├── plots.py ├── neural_net.py ├── models.py ├── get_data.py ├── LICENSE.md └── create_datasets.py /README.md: -------------------------------------------------------------------------------- 1 | # fantasy-football-prediction 2 | Accurate predictions of Fantasy Football scores can make a difference in a Fantasy Football league. 3 | The provided files were used to create a dataset and evaluate Machine Learning techniques, 4 | namely Support Vector Regression and Neural Networks, on 2014 NFL game data. 5 | 6 | To get the NFL data, I used the nflgame package from github which is downloadable with the following commands: 7 | 8 | sudo apt-get install python-pip 9 | 10 | sudo pip install nflgame 11 | 12 | sudo nflgame-update-players 13 | -------------------------------------------------------------------------------- /metrics.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Roman Lutz. All rights reserved. 2 | # The use and distribution terms for this software are covered by the 3 | # Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 4 | # which can be found in the file LICENSE.md at the root of this distribution. 5 | # By using this software in any fashion, you are agreeing to be bound by 6 | # the terms of this license. 7 | # You must not remove this notice, or any other, from this software. 8 | 9 | import numpy as np 10 | 11 | def mean_relative_error(y, prediction): 12 | return reduce(lambda x, y: x + y, map(lambda (y, p): np.fabs(p-y)/p, zip(y, prediction))) / len(y) 13 | -------------------------------------------------------------------------------- /plots.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Roman Lutz. All rights reserved. 2 | # The use and distribution terms for this software are covered by the 3 | # Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 4 | # which can be found in the file LICENSE.md at the root of this distribution. 5 | # By using this software in any fashion, you are agreeing to be bound by 6 | # the terms of this license. 7 | # You must not remove this notice, or any other, from this software. 8 | 9 | import matplotlib.pyplot as plt 10 | import pylab as P 11 | import numpy as np 12 | 13 | def histogram(y_vals, pred): 14 | P.hist(map(lambda (y, p): np.fabs(y-p), zip(y_vals,pred)), bins=range(35), rwidth=1.0, histtype='bar') 15 | P.xlabel('Absolute Error') 16 | P.ylabel('number of data cases') 17 | #P.xlim([0, 1]) 18 | #P.xticks([0.25, 0.75], ['female', 'male']) 19 | P.title('Absolute Error Distribution') 20 | P.savefig('absolute_error_distribution.pdf') 21 | # P.show() 22 | P.close('all') 23 | -------------------------------------------------------------------------------- /neural_net.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Roman Lutz. All rights reserved. 2 | # The use and distribution terms for this software are covered by the 3 | # Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 4 | # which can be found in the file LICENSE.md at the root of this distribution. 5 | # By using this software in any fashion, you are agreeing to be bound by 6 | # the terms of this license. 7 | # You must not remove this notice, or any other, from this software. 8 | 9 | 10 | from pybrain.datasets import SupervisedDataSet 11 | from pybrain.structure import SigmoidLayer, LinearLayer 12 | from pybrain.tools.shortcuts import buildNetwork 13 | from pybrain.supervised.trainers import BackpropTrainer 14 | from pybrain.structure import TanhLayer 15 | from sklearn.metrics import mean_squared_error, mean_absolute_error 16 | from metrics import mean_relative_error 17 | import numpy as np 18 | from get_data import test_players 19 | 20 | # load data 21 | # indices are 22 | # 0: QB id 23 | # 1: QB name 24 | # 2: QB age 25 | # 3: QB years pro 26 | # 4-15: last game QB stats 27 | # 16-27: last 10 games QB stats 28 | # 28-31: last game defense stats 29 | # 32-35: last 10 games defense stats 30 | # 36-43: actual fantasy score = target 31 | train = np.load('train.npy') 32 | test = np.load('test.npy') 33 | 34 | train_x = train[:, 2:36].astype(np.float) 35 | train_y = train[:, 36].astype(np.float) # 1 column 36 | test_x = test[:, 2:36].astype(np.float) 37 | test_y = test[:, 36].astype(np.float) 38 | 39 | # determine indices in test set of 24 best players 40 | indices = [] 41 | for index in range(len(test_x)): 42 | if test[index, 0] in test_players.keys(): 43 | indices.append(index) 44 | 45 | number_of_features = train_x.shape[1] 46 | 47 | ds = SupervisedDataSet(number_of_features, 1) 48 | 49 | ds.setField('input', train_x) 50 | ds.setField('target', train_y.reshape((len(train_y), 1))) 51 | 52 | file = open("neural_net_output.txt", "w") 53 | file.write('RMSE(all), RMSE(24), MAE(all), MAE(24), MRE(all), MRE(24):\n') 54 | 55 | for number_of_epochs in [10, 50, 100, 1000]: 56 | for number_of_hidden_units in [10, 25, 50, 100]: 57 | for hidden_class in [SigmoidLayer, TanhLayer]: 58 | if hidden_class == TanhLayer: 59 | hidden_class_name = 'Tanh Layer' 60 | elif hidden_class == LinearLayer: 61 | hidden_class_name = 'Linear Layer' 62 | else: 63 | hidden_class_name = 'Sigmoid Layer' 64 | 65 | #Build Neural Network 66 | net = buildNetwork(number_of_features, # input 67 | number_of_hidden_units, # number of hidden units 68 | 1, # output neurons 69 | bias = True, 70 | hiddenclass = hidden_class, 71 | outclass = LinearLayer 72 | ) 73 | 74 | trainer = BackpropTrainer(net, ds, learningrate=0.01, lrdecay=1.0, momentum=0.0, weightdecay=0.0, verbose=True) 75 | 76 | trainer.trainUntilConvergence(maxEpochs=number_of_epochs) 77 | 78 | predictions = [] 79 | for x in test_x: 80 | predictions.append(net.activate(x)[0]) 81 | 82 | predictions = np.array(predictions) 83 | 84 | # format output for LaTeX 85 | file.write('%d %d %s %f & %f & %f & %f & %f & %f \\\\ \n' % (number_of_epochs, 86 | number_of_hidden_units, 87 | hidden_class_name, 88 | mean_squared_error(test_y, predictions)**0.5, 89 | mean_absolute_error(test_y, predictions), 90 | mean_relative_error(test_y, predictions), 91 | mean_squared_error(test_y[indices], predictions[indices])**0.5, 92 | mean_absolute_error(test_y[indices], predictions[indices]), 93 | mean_relative_error(test_y[indices], predictions[indices]))) 94 | print zip(test_y[indices], predictions[indices]) 95 | 96 | file.close() -------------------------------------------------------------------------------- /models.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Roman Lutz. All rights reserved. 2 | # The use and distribution terms for this software are covered by the 3 | # Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 4 | # which can be found in the file LICENSE.md at the root of this distribution. 5 | # By using this software in any fashion, you are agreeing to be bound by 6 | # the terms of this license. 7 | # You must not remove this notice, or any other, from this software. 8 | 9 | import numpy as np 10 | from sklearn.svm import SVR 11 | from sklearn import preprocessing 12 | from sklearn import cross_validation 13 | from sklearn import feature_selection 14 | from sklearn.metrics import mean_squared_error, mean_absolute_error 15 | from get_data import test_players 16 | import time 17 | from metrics import mean_relative_error 18 | from plots import histogram 19 | 20 | def hyperparameter_selection(regressors, x, y, folds): 21 | k_fold = cross_validation.StratifiedKFold(y, n_folds=folds) 22 | # get the index of the regressor with minimal average error over all folds 23 | MAE_averages = [0] * len(regressors) 24 | for index, reg in enumerate(regressors): 25 | print 'started next regressor', reg.kernel 26 | MAE_averages[index] = np.average( 27 | [mean_absolute_error(y[val], reg.fit(x[train], y[train]).predict(x[val])) for train, val in k_fold]) 28 | print MAE_averages[index] 29 | return np.argmin(MAE_averages) 30 | 31 | 32 | # Only one of the feature selection methods can be chosen 33 | FEATURE_SELECTION = False 34 | MANUAL_FEATURE_SELECTION = False 35 | FEATURE_NORMALIZATION = True 36 | HYPERPARAMETER_SELECTION = False 37 | HISTOGRAM = True 38 | 39 | # load data 40 | # indices are 41 | # 0: QB id 42 | # 1: QB name 43 | # 2: QB age 44 | # 3: QB years pro 45 | # 4-15: last game QB stats 46 | # 16-27: last 10 games QB stats 47 | # 28-31: last game defense stats 48 | # 32-35: last 10 games defense stats 49 | # 36: actual fantasy score = target 50 | train = np.load('train.npy') 51 | test = np.load('test.npy') 52 | 53 | train_x = train[:, 2:36].astype(np.float) 54 | train_y = train[:, 36].astype(np.float) 55 | test_x = test[:, 2:36].astype(np.float) 56 | test_y = test[:, 36].astype(np.float) 57 | kernels = ['rbf', 'linear', 'sigmoid', 'poly'] 58 | degrees = [2, 3] 59 | gamma_values = [0.05*k for k in range(0,4)] 60 | C_values = [0.25*k for k in range(1, 5)] 61 | epsilon_values = [0.05*k for k in range(1, 6)] 62 | 63 | # Feature Normalization 64 | if FEATURE_NORMALIZATION: 65 | print 'started feature normalization', time.time() 66 | x = np.concatenate((train_x, test_x), axis=0) 67 | x = preprocessing.scale(x) 68 | train_x = x[:len(train_x)] 69 | test_x = x[len(train_x):] 70 | 71 | 72 | # Recursive Feature Elimination with cross-validation (RFECV) 73 | if FEATURE_SELECTION: 74 | print 'started feature selection', time.time() 75 | selector = feature_selection.RFECV(estimator=SVR(kernel='linear'), step=3, cv=5) 76 | selector.fit(train_x, train_y) 77 | train_x = selector.transform(train_x) 78 | test_x = selector.transform(test_x) 79 | print selector.ranking_ 80 | elif MANUAL_FEATURE_SELECTION: # leave out the two point attempts 81 | manual_indices = [0, 1, 2, 3, 4, 5, 8, 9, 10, 13, 14, 15, 16, 17, 20, 21, 22, 25, 26, 27, 28, 29, 30, 31, 32, 33] 82 | train_x = train_x[:, manual_indices] 83 | test_x = test_x[:, manual_indices] 84 | 85 | 86 | # hyperparameter selection 87 | if HYPERPARAMETER_SELECTION: 88 | regressors = [] 89 | for C in C_values: 90 | for epsilon in epsilon_values: 91 | for kernel in kernels: 92 | if kernel == 'poly': 93 | for gamma in gamma_values: 94 | for degree in degrees: 95 | regressors.append(SVR(C=C, epsilon=epsilon, kernel='poly', degree=degree, gamma=gamma)) 96 | elif kernel in ['rbf', 'sigmoid']: 97 | for gamma in gamma_values: 98 | regressors.append(SVR(C=C, epsilon=epsilon, kernel=kernel, gamma=gamma)) 99 | else: 100 | regressors.append(SVR(C=C, epsilon=epsilon, kernel=kernel)) 101 | 102 | print 'start hyperparameter selection', time.time() 103 | best_regressor = regressors[hyperparameter_selection(regressors, train_x, train_y, 5)] 104 | print best_regressor.C, best_regressor.epsilon, best_regressor.kernel, best_regressor.degree, best_regressor.gamma 105 | 106 | else: 107 | best_regressor = SVR(C=0.25, epsilon=0.25, kernel='linear') 108 | 109 | best_regressor.fit(train_x, train_y) 110 | prediction = best_regressor.predict(test_x) 111 | 112 | np.save('prediction.npy', prediction) 113 | 114 | print 'RMSE, MAE, MRE (all):', mean_squared_error(test_y, prediction)**0.5, \ 115 | mean_absolute_error(test_y, prediction), \ 116 | mean_relative_error(test_y, prediction) 117 | 118 | # determine error if only best 24 players are selected 119 | indices = [] 120 | for index in range(len(test_x)): 121 | if test[index, 0] in test_players.keys(): 122 | indices.append(index) 123 | print 'RMSE, MAE, MRE (24 best):', mean_squared_error(test_y[indices], prediction[indices])**0.5, \ 124 | mean_absolute_error(test_y[indices], prediction[indices]), \ 125 | mean_relative_error(test_y[indices], prediction[indices]) 126 | print zip(test_y[indices], prediction[indices]) 127 | 128 | if HISTOGRAM: 129 | histogram(test_y, prediction) 130 | 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /get_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Roman Lutz. All rights reserved. 2 | # The use and distribution terms for this software are covered by the 3 | # Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 4 | # which can be found in the file LICENSE.md at the root of this distribution. 5 | # By using this software in any fashion, you are agreeing to be bound by 6 | # the terms of this license. 7 | # You must not remove this notice, or any other, from this software. 8 | 9 | import nflgame 10 | 11 | """ create dictionary consisting of all games in the used 12 | year. All these games have a single attribute 'played' 13 | set to False. 14 | """ 15 | def create_empty_entry(): 16 | dict = {} 17 | for year in range(2009, 2015): 18 | dict[str(year)] = {} 19 | for week in range(1, 18): 20 | dict[str(year)][str(week)] = {'played': False} 21 | return dict 22 | 23 | """ Returns a dictionary with the name, birthdate 24 | and the number of years the player has spent as a 25 | professional player. 26 | """ 27 | def get_static_data(id): 28 | player = nflgame.players[id] 29 | return {'name': player.full_name, 30 | 'birthdate': player.birthdate, 31 | 'years_pro': player.years_pro} 32 | 33 | 34 | """ Checks if player had a single team in one season and 35 | return team, if multiple teams: return None 36 | """ 37 | def determine_team(year_data): 38 | teams = {} 39 | games = 0 40 | for week in year_data.keys(): 41 | if year_data[week]['played']: 42 | games += 1 43 | if year_data[week]['home'] in teams.keys(): 44 | teams[year_data[week]['home']] += 1 45 | else: 46 | teams[year_data[week]['home']] = 1 47 | if year_data[week]['away'] in teams.keys(): 48 | teams[year_data[week]['away']] += 1 49 | else: 50 | teams[year_data[week]['away']] = 1 51 | for team in teams.keys(): 52 | # if one team occurs in every game, return game 53 | if teams[team] == games: 54 | return team 55 | # no team occurs in every game 56 | return None 57 | 58 | """ Gets all QB statistics in a single dictionary. 59 | The keys are the player names, the value for each player 60 | is a dictionary with all his game statistics. 61 | """ 62 | def fetch_qb_stats(): 63 | # statistics is a dictionary of all player stats 64 | # the keys are player names, the values are lists 65 | # each list contains dictionaries that contain single game stats 66 | statistics = {} 67 | teams = map(lambda x: x[0], nflgame.teams) 68 | for year in range(2009, 2015): 69 | for week in range(1, 18): 70 | games = nflgame.games(year=year, week=week) 71 | for index, game in enumerate(games): 72 | players = nflgame.combine([games[index]]) 73 | # every player with at least 5 passing attempts 74 | # less than five is not taken into account 75 | for player in filter(lambda player: player.passing_att >= 5, players.passing()): 76 | # if player has not been saved before create entry 77 | if not(player.playerid in statistics.keys()): 78 | statistics[player.playerid] = create_empty_entry() 79 | statistics[player.playerid].update(get_static_data(id = player.playerid)) 80 | # save data in dictionary 81 | statistics[player.playerid][str(year)][str(week)]= { 82 | 'home': game.home, 83 | 'away': game.away, 84 | 'passing_attempts': player.passing_att, 85 | 'passing_yards': player.passing_yds, 86 | 'passing_touchdowns': player.passing_tds, 87 | 'passing_interceptions': player.passing_ints, 88 | 'passing_two_point_attempts': player.passing_twopta, 89 | 'passing_two_point_made': player.passing_twoptm, 90 | 'rushing_attempts': player.rushing_att, 91 | 'rushing_yards': player.rushing_yds, 92 | 'rushing_touchdowns': player.rushing_tds, 93 | 'rushing_two_point_attempts': player.rushing_twopta, 94 | 'rushing_two_point_made': player.rushing_twoptm, 95 | 'fumbles': player.fumbles_tot, 96 | 'played': True 97 | } 98 | return statistics 99 | 100 | # the test players were selected based on their stats 101 | test_players = { 102 | '00-0029263': 'Russell Wilson', 103 | '00-0023459': 'Aaron Rodgers', 104 | '00-0026143': 'Matt Ryan', 105 | '00-0020531': 'Drew Brees', 106 | '00-0026158': 'Joe Flacco', 107 | '00-0027973': 'Andy Dalton', 108 | '00-0024226': 'Jay Cutler', 109 | '00-0023436': 'Alex Smith', 110 | '00-0029701': 'Ryan Tannehill', 111 | '00-0019596': 'Tom Brady', 112 | '00-0031280': 'Derek Carr', 113 | '00-0022924': 'Ben Roethlisberger', 114 | '00-0026625': 'Brian Hoyer', 115 | '00-0021678': 'Tony Romo', 116 | '00-0027974': 'Colin Kaepernick', 117 | '00-0010346': 'Peyton Manning', 118 | '00-0029668': 'Andrew Luck', 119 | '00-0026498': 'Matthew Stafford', 120 | '00-0022803': 'Eli Manning', 121 | '00-0022942': 'Philip Rivers', 122 | '00-0027939': 'Cam Newton', 123 | '00-0031237': 'Teddy Bridgewater', 124 | '00-0031407': 'Blake Bortles', 125 | '00-0023541': 'Kyle Orton' 126 | } 127 | 128 | """ Get the game statistics of all 32 defenses of all 129 | games in the observed time. The dictionary is indexed 130 | by the team's abbreviation, e.g. ATL for Atlanta. 131 | """ 132 | def fetch_defense_stats(): 133 | # team defense statistics 134 | statistics = {} 135 | for team in map(lambda x: x[0], nflgame.teams): 136 | statistics[team] = create_empty_entry() 137 | 138 | for year in range(2009, 2015): 139 | for week in range(1, 18): 140 | for game in nflgame.games(year=year, week=week): 141 | home = game.home 142 | away = game.away 143 | statistics[home][str(year)][str(week)] = { 144 | 'home': home, 145 | 'away': away, 146 | 'points_allowed': game.score_away, 147 | 'passing_yards_allowed': game.stats_away[2], 148 | 'rushing_yards_allowed': game.stats_away[3], 149 | 'turnovers': game.stats_away[6], 150 | 'played': True 151 | 152 | } 153 | statistics[away][str(year)][str(week)] = { 154 | 'home': home, 155 | 'away': away, 156 | 'points_allowed': game.score_home, 157 | 'passing_yards_allowed': game.stats_home[2], 158 | 'rushing_yards_allowed': game.stats_home[3], 159 | 'turnovers': game.stats_home[6], 160 | 'played': True 161 | } 162 | 163 | return statistics 164 | 165 | 166 | 167 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Eclipse Public License - v 1.0 2 | 3 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 4 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 5 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 6 | 7 | 1. DEFINITIONS 8 | 9 | "Contribution" means: 10 | 11 | a) in the case of the initial Contributor, the initial code and documentation 12 | distributed under this Agreement, and 13 | b) in the case of each subsequent Contributor: 14 | i) changes to the Program, and 15 | ii) additions to the Program; 16 | 17 | where such changes and/or additions to the Program originate from and are 18 | distributed by that particular Contributor. A Contribution 'originates' 19 | from a Contributor if it was added to the Program by such Contributor 20 | itself or anyone acting on such Contributor's behalf. Contributions do not 21 | include additions to the Program which: (i) are separate modules of 22 | software distributed in conjunction with the Program under their own 23 | license agreement, and (ii) are not derivative works of the Program. 24 | 25 | "Contributor" means any person or entity that distributes the Program. 26 | 27 | "Licensed Patents" mean patent claims licensable by a Contributor which are 28 | necessarily infringed by the use or sale of its Contribution alone or when 29 | combined with the Program. 30 | 31 | "Program" means the Contributions distributed in accordance with this 32 | Agreement. 33 | 34 | "Recipient" means anyone who receives the Program under this Agreement, 35 | including all Contributors. 36 | 37 | 2. GRANT OF RIGHTS 38 | a) Subject to the terms of this Agreement, each Contributor hereby grants 39 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 40 | reproduce, prepare derivative works of, publicly display, publicly 41 | perform, distribute and sublicense the Contribution of such Contributor, 42 | if any, and such derivative works, in source code and object code form. 43 | b) Subject to the terms of this Agreement, each Contributor hereby grants 44 | Recipient a non-exclusive, worldwide, royalty-free patent license under 45 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 46 | transfer the Contribution of such Contributor, if any, in source code and 47 | object code form. This patent license shall apply to the combination of 48 | the Contribution and the Program if, at the time the Contribution is 49 | added by the Contributor, such addition of the Contribution causes such 50 | combination to be covered by the Licensed Patents. The patent license 51 | shall not apply to any other combinations which include the Contribution. 52 | No hardware per se is licensed hereunder. 53 | c) Recipient understands that although each Contributor grants the licenses 54 | to its Contributions set forth herein, no assurances are provided by any 55 | Contributor that the Program does not infringe the patent or other 56 | intellectual property rights of any other entity. Each Contributor 57 | disclaims any liability to Recipient for claims brought by any other 58 | entity based on infringement of intellectual property rights or 59 | otherwise. As a condition to exercising the rights and licenses granted 60 | hereunder, each Recipient hereby assumes sole responsibility to secure 61 | any other intellectual property rights needed, if any. For example, if a 62 | third party patent license is required to allow Recipient to distribute 63 | the Program, it is Recipient's responsibility to acquire that license 64 | before distributing the Program. 65 | d) Each Contributor represents that to its knowledge it has sufficient 66 | copyright rights in its Contribution, if any, to grant the copyright 67 | license set forth in this Agreement. 68 | 69 | 3. REQUIREMENTS 70 | 71 | A Contributor may choose to distribute the Program in object code form under 72 | its own license agreement, provided that: 73 | 74 | a) it complies with the terms and conditions of this Agreement; and 75 | b) its license agreement: 76 | i) effectively disclaims on behalf of all Contributors all warranties 77 | and conditions, express and implied, including warranties or 78 | conditions of title and non-infringement, and implied warranties or 79 | conditions of merchantability and fitness for a particular purpose; 80 | ii) effectively excludes on behalf of all Contributors all liability for 81 | damages, including direct, indirect, special, incidental and 82 | consequential damages, such as lost profits; 83 | iii) states that any provisions which differ from this Agreement are 84 | offered by that Contributor alone and not by any other party; and 85 | iv) states that source code for the Program is available from such 86 | Contributor, and informs licensees how to obtain it in a reasonable 87 | manner on or through a medium customarily used for software exchange. 88 | 89 | When the Program is made available in source code form: 90 | 91 | a) it must be made available under this Agreement; and 92 | b) a copy of this Agreement must be included with each copy of the Program. 93 | Contributors may not remove or alter any copyright notices contained 94 | within the Program. 95 | 96 | Each Contributor must identify itself as the originator of its Contribution, 97 | if 98 | any, in a manner that reasonably allows subsequent Recipients to identify the 99 | originator of the Contribution. 100 | 101 | 4. COMMERCIAL DISTRIBUTION 102 | 103 | Commercial distributors of software may accept certain responsibilities with 104 | respect to end users, business partners and the like. While this license is 105 | intended to facilitate the commercial use of the Program, the Contributor who 106 | includes the Program in a commercial product offering should do so in a manner 107 | which does not create potential liability for other Contributors. Therefore, 108 | if a Contributor includes the Program in a commercial product offering, such 109 | Contributor ("Commercial Contributor") hereby agrees to defend and indemnify 110 | every other Contributor ("Indemnified Contributor") against any losses, 111 | damages and costs (collectively "Losses") arising from claims, lawsuits and 112 | other legal actions brought by a third party against the Indemnified 113 | Contributor to the extent caused by the acts or omissions of such Commercial 114 | Contributor in connection with its distribution of the Program in a commercial 115 | product offering. The obligations in this section do not apply to any claims 116 | or Losses relating to any actual or alleged intellectual property 117 | infringement. In order to qualify, an Indemnified Contributor must: 118 | a) promptly notify the Commercial Contributor in writing of such claim, and 119 | b) allow the Commercial Contributor to control, and cooperate with the 120 | Commercial Contributor in, the defense and any related settlement 121 | negotiations. The Indemnified Contributor may participate in any such claim at 122 | its own expense. 123 | 124 | For example, a Contributor might include the Program in a commercial product 125 | offering, Product X. That Contributor is then a Commercial Contributor. If 126 | that Commercial Contributor then makes performance claims, or offers 127 | warranties related to Product X, those performance claims and warranties are 128 | such Commercial Contributor's responsibility alone. Under this section, the 129 | Commercial Contributor would have to defend claims against the other 130 | Contributors related to those performance claims and warranties, and if a 131 | court requires any other Contributor to pay any damages as a result, the 132 | Commercial Contributor must pay those damages. 133 | 134 | 5. NO WARRANTY 135 | 136 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN 137 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR 138 | IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, 139 | NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each 140 | Recipient is solely responsible for determining the appropriateness of using 141 | and distributing the Program and assumes all risks associated with its 142 | exercise of rights under this Agreement , including but not limited to the 143 | risks and costs of program errors, compliance with applicable laws, damage to 144 | or loss of data, programs or equipment, and unavailability or interruption of 145 | operations. 146 | 147 | 6. DISCLAIMER OF LIABILITY 148 | 149 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 150 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 151 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 152 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 153 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 154 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 155 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 156 | OF SUCH DAMAGES. 157 | 158 | 7. GENERAL 159 | 160 | If any provision of this Agreement is invalid or unenforceable under 161 | applicable law, it shall not affect the validity or enforceability of the 162 | remainder of the terms of this Agreement, and without further action by the 163 | parties hereto, such provision shall be reformed to the minimum extent 164 | necessary to make such provision valid and enforceable. 165 | 166 | If Recipient institutes patent litigation against any entity (including a 167 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 168 | (excluding combinations of the Program with other software or hardware) 169 | infringes such Recipient's patent(s), then such Recipient's rights granted 170 | under Section 2(b) shall terminate as of the date such litigation is filed. 171 | 172 | All Recipient's rights under this Agreement shall terminate if it fails to 173 | comply with any of the material terms or conditions of this Agreement and does 174 | not cure such failure in a reasonable period of time after becoming aware of 175 | such noncompliance. If all Recipient's rights under this Agreement terminate, 176 | Recipient agrees to cease use and distribution of the Program as soon as 177 | reasonably practicable. However, Recipient's obligations under this Agreement 178 | and any licenses granted by Recipient relating to the Program shall continue 179 | and survive. 180 | 181 | Everyone is permitted to copy and distribute copies of this Agreement, but in 182 | order to avoid inconsistency the Agreement is copyrighted and may only be 183 | modified in the following manner. The Agreement Steward reserves the right to 184 | publish new versions (including revisions) of this Agreement from time to 185 | time. No one other than the Agreement Steward has the right to modify this 186 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 187 | Eclipse Foundation may assign the responsibility to serve as the Agreement 188 | Steward to a suitable separate entity. Each new version of the Agreement will 189 | be given a distinguishing version number. The Program (including 190 | Contributions) may always be distributed subject to the version of the 191 | Agreement under which it was received. In addition, after a new version of the 192 | Agreement is published, Contributor may elect to distribute the Program 193 | (including its Contributions) under the new version. Except as expressly 194 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 195 | licenses to the intellectual property of any Contributor under this Agreement, 196 | whether expressly, by implication, estoppel or otherwise. All rights in the 197 | Program not expressly granted under this Agreement are reserved. 198 | 199 | This Agreement is governed by the laws of the State of New York and the 200 | intellectual property laws of the United States of America. No party to this 201 | Agreement will bring a legal action under this Agreement more than one year 202 | after the cause of action arose. Each party waives its rights to a jury trial in 203 | any resulting litigation. 204 | -------------------------------------------------------------------------------- /create_datasets.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Roman Lutz. All rights reserved. 2 | # The use and distribution terms for this software are covered by the 3 | # Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 4 | # which can be found in the file LICENSE.md at the root of this distribution. 5 | # By using this software in any fashion, you are agreeing to be bound by 6 | # the terms of this license. 7 | # You must not remove this notice, or any other, from this software. 8 | 9 | from get_data import fetch_defense_stats, fetch_qb_stats, test_players, determine_team 10 | import numpy as np 11 | 12 | """ Returns stats of the player or team corresponding to the id 13 | for the last game before the given week in the given year 14 | """ 15 | def last_game(statistics, id, year, week): 16 | # if the week was the first, go back by one week 17 | week -= 1 18 | if week == 0: 19 | week = 17 20 | year -= 1 21 | # check if there are previous years 22 | if year < 2009 or year > 2014: 23 | return None, None, None 24 | 25 | # check if the team/player played in the given week 26 | # if not played, recursively call previous week to check 27 | if not(statistics[id][str(year)][str(week)]['played']): 28 | return last_game(statistics, id, year, week) 29 | # team/player played in the given week, return stats 30 | else: 31 | return statistics[id][str(year)][str(week)], year, week 32 | 33 | """ Returns the statistics of the last k games for the given 34 | player or team corresponding to the id. If there are less than 35 | k games, only the existing ones are returned. 36 | """ 37 | def last_k_games(k, statistics, id, year, week): 38 | stats, year, week = last_game(statistics, id, year, week) 39 | last_k = [stats] 40 | k -= 1 41 | # case 1: no prior games 42 | if stats == None: 43 | return [] 44 | # case 2: only one game requested 45 | if k == 0: 46 | return last_k 47 | 48 | # case 3: multiple games requested 49 | # repeatedly check if there are further games 50 | while last_k[-1] != None and k > 0: 51 | stats, year, week = last_game(statistics, id, year, week) 52 | last_k.append(stats) 53 | k -= 1 54 | # if None appears in the list, remove it 55 | return last_k[:-1] if last_k[-1] == None else last_k 56 | 57 | """ Calculates the average stats of a defense over the games 58 | handed over to the function. 59 | """ 60 | def average_defense_stats(games): 61 | points = 0 62 | passing_yards = 0 63 | rushing_yards = 0 64 | turnovers = 0 65 | 66 | n_games = len(games) 67 | 68 | if n_games == 0: 69 | return None 70 | 71 | for game in games: 72 | points += game['points_allowed'] 73 | passing_yards += game['passing_yards_allowed'] 74 | rushing_yards += game['rushing_yards_allowed'] 75 | turnovers += game['turnovers'] 76 | 77 | return { 78 | 'points_allowed': float(points)/float(n_games), 79 | 'passing_yards_allowed': float(passing_yards)/float(n_games), 80 | 'rushing_yards_allowed': float(rushing_yards)/float(n_games), 81 | 'turnovers': float(turnovers)/float(n_games), 82 | } 83 | 84 | """ Calculates the average QB stats over the games 85 | handed over to the function. 86 | """ 87 | def average_qb_stats(games): 88 | passing_attempts = 0 89 | passing_yards = 0 90 | passing_touchdowns = 0 91 | passing_interceptions = 0 92 | passing_two_point_attempts = 0 93 | passing_two_point_made = 0 94 | rushing_attempts = 0 95 | rushing_yards = 0 96 | rushing_touchdowns = 0 97 | rushing_two_point_attempts = 0 98 | rushing_two_point_made = 0 99 | fumbles = 0 100 | 101 | n_games = len(games) 102 | 103 | if n_games == 0: 104 | return None 105 | 106 | for game in games: 107 | passing_attempts += game['passing_attempts'] 108 | passing_yards += game['passing_yards'] 109 | passing_touchdowns += game['passing_touchdowns'] 110 | passing_interceptions += game ['passing_interceptions'] 111 | passing_two_point_attempts += game['passing_two_point_attempts'] 112 | passing_two_point_made += game['passing_two_point_made'] 113 | rushing_attempts += game['rushing_attempts'] 114 | rushing_yards += game['rushing_yards'] 115 | rushing_touchdowns += game['rushing_touchdowns'] 116 | rushing_two_point_attempts += game['rushing_two_point_attempts'] 117 | rushing_two_point_made += game['rushing_two_point_made'] 118 | fumbles += game['fumbles'] 119 | 120 | return { 121 | 'passing_attempts': float(passing_attempts)/float(n_games), 122 | 'passing_yards': float(passing_yards)/float(n_games), 123 | 'passing_touchdowns': float(passing_touchdowns)/float(n_games), 124 | 'passing_interceptions': float(passing_interceptions)/float(n_games), 125 | 'passing_two_point_attempts': float(passing_two_point_attempts)/float(n_games), 126 | 'passing_two_point_made': float(passing_two_point_made)/float(n_games), 127 | 'rushing_attempts': float(rushing_attempts)/float(n_games), 128 | 'rushing_yards': float(rushing_yards)/float(n_games), 129 | 'rushing_touchdowns': float(rushing_touchdowns)/float(n_games), 130 | 'rushing_two_point_attempts': float(rushing_two_point_attempts)/float(n_games), 131 | 'rushing_two_point_made': float(rushing_two_point_made)/float(n_games), 132 | 'fumbles': float(fumbles)/float(n_games) 133 | } 134 | 135 | """ Calculates the age of a player for a given game. 136 | """ 137 | def calculate_age(birthdate, game_week, game_year): 138 | if birthdate[1] == '/': 139 | birthdate = '0' + birthdate 140 | birth_month = int(birthdate[0:2]) 141 | if birthdate[4] == '/': 142 | birthdate = birthdate[:3] + '0' + birthdate[3:] 143 | birth_day = int(birthdate[3:5]) 144 | birth_year = int(birthdate[6:10]) 145 | total_days = 1 + (game_week - 1) * 7 146 | game_month = 9 + int(total_days / 31) 147 | game_day = total_days % 31 148 | age = game_year - birth_year 149 | age += float(game_month - birth_month) / 12 150 | age += float(game_day - birth_day) / 365 151 | return age 152 | 153 | """ Creates a row for the dataset. It is assumed 154 | that the player with the given ID actually played 155 | in the given week and year. 156 | """ 157 | def create_row(qb_statistics, defense_statistics, rookie_statistics, id, year, week): 158 | age = calculate_age(qb_statistics[id]['birthdate'], week, year) 159 | years_pro = qb_statistics[id]['years_pro'] - (2015 - year) 160 | last_game_qb_stats = average_qb_stats(last_k_games(1, qb_statistics, id, year, week)) 161 | if last_game_qb_stats == None: 162 | # replace last_game_stats with rookie stats 163 | last_game_qb_stats = rookie_statistics 164 | 165 | last_10_games_qb_stats = average_qb_stats(last_k_games(10, qb_statistics, id, year, week)) 166 | if last_10_games_qb_stats == None: 167 | # replace last_10_games with rookie stats 168 | last_10_games_qb_stats = rookie_statistics 169 | 170 | # find out the opposing team by determining which team the QB plays for 171 | # the API does only allow to query the current team (as of 2015) 172 | # therefore this has to be done differently 173 | qb_team = determine_team(qb_statistics[id][str(year)]) 174 | # if QB had multiple teams in the given year, don't include QB stats 175 | if qb_team == None: 176 | return None 177 | 178 | home_team = qb_statistics[id][str(year)][str(week)]['home'] 179 | away_team = qb_statistics[id][str(year)][str(week)]['away'] 180 | # take other team as opponent 181 | opponent = home_team if away_team == qb_team else away_team 182 | 183 | # the defense stats should only be used based on some data 184 | # it cannot be substituted by 'rookie' stats 185 | last_game_defense_stats = average_defense_stats(last_k_games(1, defense_statistics, opponent, year, week)) 186 | last_10_games_defense_stats = average_defense_stats(last_k_games(10, defense_statistics, opponent, year, week)) 187 | 188 | # row consists of 189 | # 0: QB id 190 | # 1: QB name 191 | # 2: QB age 192 | # 3: QB years pro 193 | # 4-15: last game QB stats 194 | # 16-27: last 10 games QB stats 195 | # 28-31: last game defense stats 196 | # 32-35: last 10 games defense stats 197 | # 36: actual fantasy score = target 198 | return [id, 199 | qb_statistics[id]['name'], 200 | age, 201 | years_pro, 202 | last_game_qb_stats['passing_attempts'], 203 | last_game_qb_stats['passing_yards'], 204 | last_game_qb_stats['passing_touchdowns'], 205 | last_game_qb_stats['passing_interceptions'], 206 | last_game_qb_stats['passing_two_point_attempts'], 207 | last_game_qb_stats['passing_two_point_made'], 208 | last_game_qb_stats['rushing_attempts'], 209 | last_game_qb_stats['rushing_yards'], 210 | last_game_qb_stats['rushing_touchdowns'], 211 | last_game_qb_stats['rushing_two_point_attempts'], 212 | last_game_qb_stats['rushing_two_point_made'], 213 | last_game_qb_stats['fumbles'], 214 | last_10_games_qb_stats['passing_attempts'], 215 | last_10_games_qb_stats['passing_yards'], 216 | last_10_games_qb_stats['passing_touchdowns'], 217 | last_10_games_qb_stats['passing_interceptions'], 218 | last_10_games_qb_stats['passing_two_point_attempts'], 219 | last_10_games_qb_stats['passing_two_point_made'], 220 | last_10_games_qb_stats['rushing_attempts'], 221 | last_10_games_qb_stats['rushing_yards'], 222 | last_10_games_qb_stats['rushing_touchdowns'], 223 | last_10_games_qb_stats['rushing_two_point_attempts'], 224 | last_10_games_qb_stats['rushing_two_point_made'], 225 | last_10_games_qb_stats['fumbles'], 226 | last_game_defense_stats['points_allowed'], 227 | last_game_defense_stats['passing_yards_allowed'], 228 | last_game_defense_stats['rushing_yards_allowed'], 229 | last_game_defense_stats['turnovers'], 230 | last_10_games_defense_stats['points_allowed'], 231 | last_10_games_defense_stats['passing_yards_allowed'], 232 | last_10_games_defense_stats['rushing_yards_allowed'], 233 | last_10_games_defense_stats['turnovers'], 234 | fantasy_score(qb_statistics[id][str(year)][str(week)]['passing_yards'], 235 | qb_statistics[id][str(year)][str(week)]['passing_touchdowns'], 236 | qb_statistics[id][str(year)][str(week)]['passing_interceptions'], 237 | qb_statistics[id][str(year)][str(week)]['rushing_yards'], 238 | qb_statistics[id][str(year)][str(week)]['rushing_touchdowns'], 239 | qb_statistics[id][str(year)][str(week)]['fumbles'], 240 | qb_statistics[id][str(year)][str(week)]['rushing_two_point_made'] + qb_statistics[id][str(year)][str(week)]['passing_two_point_made']) 241 | ] 242 | 243 | """ Calculate the fantasy score based on NFL standard rules. 244 | """ 245 | def fantasy_score(passing_yards, passing_touchdowns, interceptions, rushing_yards, rushing_touchdowns, fumbles, two_point): 246 | return float(passing_yards) / 25 + passing_touchdowns * 4.0 - interceptions * 2.0 + float(rushing_yards) / 10 + rushing_touchdowns * 6.0 - fumbles * 2.0 + two_point * 2 247 | 248 | """ Calculate the average stats of all Rookie QBs in the 249 | observed years. 250 | """ 251 | def rookie_qb_average(qb_statistics): 252 | games = [] 253 | for qb in qb_statistics.keys(): 254 | rookie_year = 2015 - qb_statistics[qb]['years_pro'] + 1 255 | if rookie_year >= 2009: 256 | # some wrong NFL data has 2014 rookies labelled as 2015 rookies 257 | if rookie_year > 2014: 258 | rookie_year = 2014 259 | # data on rookie_year is available 260 | for week in range(1, 18): 261 | if qb_statistics[qb][str(rookie_year)][str(week)]['played']: 262 | games.append(qb_statistics[qb][str(rookie_year)][str(week)]) 263 | return average_qb_stats(games) 264 | 265 | """ Create the dataset by determining all rows 266 | """ 267 | def create_all_rows(qb_statistics, defense_statistics, start_year, end_year): 268 | rows = [] 269 | rookie_qb_stats = rookie_qb_average(qb_statistics) 270 | # year 2009 should be left such that some previous defense data is available 271 | for year in range(start_year, end_year): 272 | for week in range(1, 18): 273 | for qb in qb_statistics.keys(): 274 | if qb_statistics[qb][str(year)][str(week)]['played']: 275 | row = create_row(qb_statistics, defense_statistics, rookie_qb_stats, qb, year, week) 276 | # if stats are inconclusive, don't use them 277 | # for more information see create_row documentation 278 | if row != None: 279 | rows.append(row) 280 | return rows 281 | 282 | # save data sets to files 283 | np.save('train.npy', np.array(create_all_rows(fetch_qb_stats(), fetch_defense_stats(), 2010, 2014))) 284 | np.save('test.npy', np.array(create_all_rows(fetch_qb_stats(), fetch_defense_stats(), 2014, 2015))) 285 | 286 | 287 | 288 | --------------------------------------------------------------------------------