├── Unit 3 ├── Setup.py ├── helperLogic_Unit1.py └── dataframes_Exercise.py ├── Unit 6 ├── epa_exercise.py └── efficiency_exercise.py ├── Unit 2 ├── Python_OOP_Lesson.py └── Python_Modules_Lesson.py ├── Unit 4 ├── teamData_Exercise.py ├── helperFunction_Unit2.py └── playerData_Exercise.py ├── _AI_Unit ├── AI_Unit_Lesson.py ├── AI_Unit_Exercises.py └── AI_Unit_Vocabulary.md ├── Unit 1 ├── Python_Review_Exercises.py ├── Python_Review_Lesson.py └── General_Python_Vocabulary.md ├── Unit 5 ├── dataVisualization_barCharts.py ├── dataVisualization_lineChart.py └── dataVisualization_scatterPlot.py ├── .gitignore ├── Resources └── Student Helper function Sheet.png ├── washed ├── __pycache__ │ └── helperFunctions.cpython-312.pyc ├── washed.py └── helperFunctions.py ├── Analytics Helper Functions ├── review.py ├── __pycache__ │ └── helperFunctions.cpython-312.pyc ├── visualiization.py └── helperFunctions.py └── readme.md /Unit 3/Setup.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Unit 6/epa_exercise.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Unit 2/Python_OOP_Lesson.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Unit 3/helperLogic_Unit1.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Unit 4/teamData_Exercise.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /_AI_Unit/AI_Unit_Lesson.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Unit 1/Python_Review_Exercises.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Unit 1/Python_Review_Lesson.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Unit 2/Python_Modules_Lesson.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Unit 3/dataframes_Exercise.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Unit 4/helperFunction_Unit2.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Unit 4/playerData_Exercise.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Unit 6/efficiency_exercise.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /_AI_Unit/AI_Unit_Exercises.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Unit 1/General_Python_Vocabulary.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Unit 5/dataVisualization_barCharts.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Unit 5/dataVisualization_lineChart.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Unit 5/dataVisualization_scatterPlot.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Instructor_Notes 2 | Assessments 3 | 4 | requirements.txt 5 | -------------------------------------------------------------------------------- /Resources/Student Helper function Sheet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Iankimble/AP-Coding/HEAD/Resources/Student Helper function Sheet.png -------------------------------------------------------------------------------- /washed/__pycache__/helperFunctions.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Iankimble/AP-Coding/HEAD/washed/__pycache__/helperFunctions.cpython-312.pyc -------------------------------------------------------------------------------- /Analytics Helper Functions/review.py: -------------------------------------------------------------------------------- 1 | from helperFunctions import get_season_totals_by_position 2 | 3 | rbStats = get_season_totals_by_position(2024,'RB') 4 | print(rbStats) -------------------------------------------------------------------------------- /Analytics Helper Functions/__pycache__/helperFunctions.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Iankimble/AP-Coding/HEAD/Analytics Helper Functions/__pycache__/helperFunctions.cpython-312.pyc -------------------------------------------------------------------------------- /_AI_Unit/AI_Unit_Vocabulary.md: -------------------------------------------------------------------------------- 1 | 1. Artificial Intelligence (AI) 2 | 3 | 2. Artificial General Intelligence (AGI) 4 | 5 | 3. Generative AI 6 | 7 | 4. LLM 8 | 9 | 5. Prompting 10 | 11 | 6. Hallucination 12 | 13 | 7. Model 14 | 15 | 8. Token 16 | 17 | 9. Algorithm 18 | 19 | 10. Training Data 20 | 21 | 11. Agent 22 | 23 | -------------------------------------------------------------------------------- /washed/washed.py: -------------------------------------------------------------------------------- 1 | from helperFunctions import get_season_totals_by_position, plot_position_stat_bar, plot_player_stat_by_week, get_player_stats 2 | 3 | # 1. What 4 stats makes a player washed? 4 | # games played - indicates the player is healhy or game day ready 5 | # attempts - indicates they are able to get the ball out 6 | # completions - they are getting the ball to their recievers 7 | # passing yards - they are getting significant yardage 8 | 9 | 10 | # Get all player stats from 2024 11 | qbStats= get_season_totals_by_position(2024,'QB') 12 | print(qbStats) 13 | 14 | # 2. Why is this player washed? 15 | # Based on the league average for rushing yard for the qb position, russell wilson... 16 | 17 | RussStats = get_player_stats(2024,'Russell','Wilson') 18 | 19 | #Line graph of Russ stats for 2024 20 | plot_player_stat_by_week(2024, 'Russell','Wilson',"passing_yards", save_path='Jalen Hurts Passing Yards 2024') 21 | 22 | -------------------------------------------------------------------------------- /Analytics Helper Functions/visualiization.py: -------------------------------------------------------------------------------- 1 | from helperFunctions import get_season_totals_by_position, plot_position_stat_bar, plot_player_stat_by_week, get_player_stats 2 | 3 | # rb for '24 4 | #plot_position_stat_bar(2024,'RB',"rushing_yards",save_path='rb rushing yards 2024') 5 | # rb for '23 6 | #plot_position_stat_bar(2023,'RB',"rushing_yards",save_path='rb rushing yards 2023') 7 | # rb for '22 8 | #plot_position_stat_bar(2022,'RB',"rushing_yards",save_path='rb rushing yards 2022') 9 | 10 | bestRb24= get_season_totals_by_position(2024, 'RB') 11 | bestRb23= get_season_totals_by_position(2023, 'RB') 12 | bestRb22= get_season_totals_by_position(2022, 'RB') 13 | 14 | #print(bestRb22) 15 | #print(bestRb23) 16 | #print(bestRb24) 17 | 18 | answer= 'derick henry has the most rushing yards over the course of 2022-2024' 19 | 20 | #plot_player_stat_by_week(2024, 'Jalen','Hurts',"passing_yards", save_path='Jalen Hurts Passing Yards 2024') 21 | #plot_player_stat_by_week(2023, 'Jalen','Hurts',"passing_yards", save_path='Jalen Hurts Passing Yards 2023') 22 | #plot_player_stat_by_week(2022, 'Jalen','Hurts',"passing_yards", save_path='Jalen Hurts Passing Yards 2022') 23 | 24 | Jalen24 = get_player_stats(2024,'Jalen','Hurts') 25 | # print(Jalen24) 26 | qbNumbers= get_season_totals_by_position(2024,'QB') 27 | print(qbNumbers) -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Beginner NFL Analytics with Python 2 | ### Black Tech Philly 3 | 4 | **1. Overview and Purpose** 5 | --- 6 | Lorem ipsum dolor sit amet consectetur adipiscing elit. Quisque faucibus ex sapien vitae pellentesque sem placerat. In id cursus mi pretium tellus duis convallis. Tempus leo eu aenean sed diam urna tempor. Pulvinar vivamus fringilla lacus nec metus bibendum egestas. Iaculis massa nisl malesuada lacinia integer nunc posuere. Ut hendrerit semper vel class aptent taciti sociosqu. Ad litora torquent per conubia nostra inceptos himenaeos. 7 | 8 | **2. What You'll Learn** 9 | --- 10 | Lorem ipsum dolor sit amet consectetur adipiscing elit. Quisque faucibus ex sapien vitae pellentesque sem placerat. In id cursus mi pretium tellus duis convallis. Tempus leo eu aenean sed diam urna tempor. Pulvinar vivamus fringilla lacus nec metus bibendum egestas. Iaculis massa nisl malesuada lacinia integer nunc posuere. Ut hendrerit semper vel class aptent taciti sociosqu. Ad litora torquent per conubia nostra inceptos himenaeos. 11 | 12 | **3. Getting Started** 13 | 14 | **4. Folder Structure** 15 | --- 16 | - Lessons 17 | What is analytics? 18 | 19 | 1. Finding the mean, median, mode of NFL teams by score 20 | 21 | 2. Find the point differential 22 | 23 | 3. Find the best individual player mean, median, and mode 24 | 25 | 4. Find the standard deviation 26 | 27 | 5. Creating graphs and charts 28 | 29 | - Setup Guide 30 | 1. Installation and Setup 31 | 2. Running helperFunctions() 32 | 33 | **5. How to use this Repo** 34 | --- 35 | Lorem ipsum dolor sit amet consectetur adipiscing elit. Quisque faucibus ex sapien vitae pellentesque sem placerat. In id cursus mi pretium tellus duis convallis. Tempus leo eu aenean sed diam urna tempor. Pulvinar vivamus fringilla lacus nec metus bibendum egestas. Iaculis massa nisl malesuada lacinia integer nunc posuere. Ut hendrerit semper vel class aptent taciti sociosqu. Ad litora torquent per conubia nostra inceptos himenaeos. 36 | 37 | - **For Students** 38 | 39 | Lorem ipsum dolor sit amet consectetur adipiscing elit. Quisque faucibus ex sapien vitae pellentesque sem placerat. In id cursus mi pretium tellus duis convallis. Tempus leo eu aenean sed diam urna tempor. Pulvinar vivamus fringilla lacus nec metus bibendum egestas. Iaculis massa nisl malesuada lacinia integer nunc posuere. Ut hendrerit semper vel class aptent taciti sociosqu. Ad litora torquent per conubia nostra inceptos himenaeos. 40 | 41 | - **For Teachers** 42 | 43 | Lorem ipsum dolor sit amet consectetur adipiscing elit. Quisque faucibus ex sapien vitae pellentesque sem placerat. In id cursus mi pretium tellus duis convallis. Tempus leo eu aenean sed diam urna tempor. Pulvinar vivamus fringilla lacus nec metus bibendum egestas. Iaculis massa nisl malesuada lacinia integer nunc posuere. Ut hendrerit semper vel class aptent taciti sociosqu. Ad litora torquent per conubia nostra inceptos himenaeos. 44 | 45 | - **For those who are interested in learning Sports Analytics** 46 | 47 | Lorem ipsum dolor sit amet consectetur adipiscing elit. Quisque faucibus ex sapien vitae pellentesque sem placerat. In id cursus mi pretium tellus duis convallis. Tempus leo eu aenean sed diam urna tempor. Pulvinar vivamus fringilla lacus nec metus bibendum egestas. Iaculis massa nisl malesuada lacinia integer nunc posuere. Ut hendrerit semper vel class aptent taciti sociosqu. Ad litora torquent per conubia nostra inceptos himenaeos. 48 | 49 | **6. Liscences and Acknolwledgments** 50 | --- 51 | Lorem ipsum dolor sit amet consectetur adipiscing elit. Quisque faucibus ex sapien vitae pellentesque sem placerat. In id cursus mi pretium tellus duis convallis. Tempus leo eu aenean sed diam urna tempor. Pulvinar vivamus fringilla lacus nec metus bibendum egestas. Iaculis massa nisl malesuada lacinia integer nunc posuere. Ut hendrerit semper vel class aptent taciti sociosqu. Ad litora torquent per conubia nostra inceptos himenaeos. 52 | 53 | **7. Contact** 54 | --- 55 | 56 | -------------------------------------------------------------------------------- /washed/helperFunctions.py: -------------------------------------------------------------------------------- 1 | import nfl_data_py as nfl 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | def get_season_totals_by_position(year: int, position: str) -> pd.DataFrame: 7 | """ 8 | Return trimmed full-season stats for all players at a given position. 9 | 10 | Args: 11 | year (int): NFL season (e.g., 2024) 12 | position (str): Player position ('QB', 'RB', 'WR', 'TE', etc.) 13 | 14 | Returns: 15 | pandas.DataFrame: One row per player with core season-total stats. 16 | """ 17 | # Load weekly stats for the season 18 | weekly = nfl.import_weekly_data([year]) 19 | 20 | pos = position.upper() 21 | 22 | if "position" not in weekly.columns: 23 | raise ValueError("Column 'position' not found in weekly data.") 24 | 25 | # Filter to the requested position 26 | pos_df = weekly[weekly["position"] == pos].copy() 27 | if pos_df.empty: 28 | raise ValueError(f"No data found for position '{pos}' in season {year}.") 29 | 30 | # Identify player columns 31 | group_cols = ["player_display_name", "player_id", "position", "recent_team"] 32 | group_cols = [c for c in group_cols if c in pos_df.columns] 33 | 34 | # Calculate games played per player 35 | if "week" in pos_df.columns: 36 | games_played = ( 37 | pos_df.groupby(group_cols)["week"] 38 | .nunique() 39 | .reset_index(name="games_played") 40 | ) 41 | else: 42 | # Fallback if week isn't available 43 | games_played = ( 44 | pos_df.groupby(group_cols) 45 | .size() 46 | .reset_index(name="games_played") 47 | ) 48 | 49 | # Numeric columns to sum 50 | numeric_cols = pos_df.select_dtypes(include="number").columns.tolist() 51 | # Remove fantasy-related columns 52 | fantasy_cols = [c for c in numeric_cols if "fantasy" in c.lower()] 53 | numeric_cols = [c for c in numeric_cols if c not in fantasy_cols] 54 | 55 | # Group by player and sum 56 | season_totals = ( 57 | pos_df[group_cols + numeric_cols] 58 | .groupby(group_cols, as_index=False)[numeric_cols] 59 | .sum() 60 | ) 61 | 62 | # Merge games_played 63 | season_totals = season_totals.merge(games_played, on=group_cols, how="left") 64 | 65 | # ---- Position-specific core columns ---- 66 | core_stats_by_pos = { 67 | "QB": [ 68 | "games_played", 69 | "attempts", 70 | "completions", 71 | "passing_yards", 72 | "passing_tds", 73 | "interceptions", 74 | "rushing_yards", 75 | "rushing_tds", 76 | ], 77 | "RB": [ 78 | "games_played", 79 | "rushing_attempts", 80 | "rushing_yards", 81 | "rushing_tds", 82 | "targets", 83 | "receptions", 84 | "receiving_yards", 85 | "receiving_tds", 86 | ], 87 | "WR": [ 88 | "games_played", 89 | "targets", 90 | "receptions", 91 | "receiving_yards", 92 | "receiving_tds", 93 | ], 94 | "TE": [ 95 | "games_played", 96 | "targets", 97 | "receptions", 98 | "receiving_yards", 99 | "receiving_tds", 100 | ], 101 | } 102 | 103 | # Choose which columns to keep 104 | base_cols = group_cols.copy() 105 | pos_core = core_stats_by_pos.get(pos, ["games_played"]) 106 | keep_cols = base_cols + [c for c in pos_core if c in season_totals.columns] 107 | 108 | # Filter to trimmed set of columns 109 | season_totals = season_totals[keep_cols] 110 | 111 | # Sort by a key stat depending on position 112 | if pos == "QB" and "passing_yards" in season_totals.columns: 113 | season_totals = season_totals.sort_values("passing_yards", ascending=False) 114 | elif pos == "RB" and "rushing_yards" in season_totals.columns: 115 | season_totals = season_totals.sort_values("rushing_yards", ascending=False) 116 | elif pos in ("WR", "TE") and "receiving_yards" in season_totals.columns: 117 | season_totals = season_totals.sort_values("receiving_yards", ascending=False) 118 | 119 | return season_totals 120 | 121 | #qb_2024_totals_top5 = get_season_totals_by_position(2024, "QB") 122 | #print(qb_2024_totals_top5.head()) 123 | #qb_2024 = get_season_totals_by_position(2024, "QB") 124 | #print(qb_2024) 125 | 126 | def plot_position_stat_bar(year: int, 127 | position: str, 128 | stat_col: str, 129 | top_n: int = 20, 130 | save_path: str = None) -> None: 131 | """ 132 | Plot a bar chart for a given stat column for all players at a position, 133 | and optionally save it as a PNG file. 134 | 135 | Args: 136 | year (int): NFL season (e.g., 2024) 137 | position (str): Position (e.g., 'QB', 'RB', 'WR', 'TE') 138 | stat_col (str): Stat column to plot (e.g., 'passing_yards') 139 | top_n (int): Show top N players (default 20) 140 | save_path (str): Optional. File path to save PNG (e.g., 'qb_passing_2024.png') 141 | 142 | Returns: 143 | None 144 | """ 145 | df = get_season_totals_by_position(year, position) 146 | 147 | if stat_col not in df.columns: 148 | raise ValueError( 149 | f"Column '{stat_col}' not found. Available columns: {list(df.columns)}" 150 | ) 151 | 152 | df_sorted = df.sort_values(stat_col, ascending=False).head(top_n) 153 | 154 | plt.figure(figsize=(12, 6)) 155 | plt.bar(df_sorted["player_display_name"], df_sorted[stat_col]) 156 | 157 | pretty_stat = stat_col.replace("_", " ").title() 158 | plt.title(f"Top {top_n} {position.upper()} by {pretty_stat} in {year}") 159 | plt.xlabel("Player") 160 | plt.ylabel(pretty_stat) 161 | plt.xticks(rotation=45, ha="right") 162 | 163 | plt.tight_layout() 164 | 165 | # --- 🔥 Save chart if save_path is given --- 166 | if save_path: 167 | plt.savefig(save_path, dpi=300) # dpi=300 gives high quality images 168 | print(f"Chart saved as: {save_path}") 169 | 170 | plt.show() 171 | 172 | # plot_position_stat_bar(2024, "QB", "passing_yards", save_path="qb_passing_2024.png", top_n=20) 173 | # plot_position_stat_bar(2024, "RB", "rushing_yards", save_path="rb_rushing_2024.png", top_n=20) 174 | 175 | def get_player_stats(year: int, first_name: str, last_name: str) -> pd.DataFrame: 176 | """ 177 | Get all weekly stats for a single NFL player for a given season. 178 | Requires exact match on first and last name. 179 | 180 | Args: 181 | year (int): NFL season year (e.g., 2024) 182 | first_name (str): Player's first name (e.g., "Jalen") 183 | last_name (str): Player's last name (e.g., "Hurts") 184 | 185 | Returns: 186 | pandas.DataFrame: All weekly stats for that player in that season. 187 | """ 188 | 189 | # Load weekly data for the season 190 | weekly = nfl.import_weekly_data([year]) 191 | 192 | # Normalize inputs 193 | first = first_name.lower().strip() 194 | last = last_name.lower().strip() 195 | 196 | # Normalize player names in the dataset 197 | weekly["first"] = weekly["player_display_name"].str.split().str[0].str.lower() 198 | weekly["last"] = weekly["player_display_name"].str.split().str[-1].str.lower() 199 | 200 | # Exact match on first + last 201 | player_df = weekly[(weekly["first"] == first) & (weekly["last"] == last)].copy() 202 | 203 | if player_df.empty: 204 | raise ValueError( 205 | f"No data found for player '{first_name} {last_name}' in season {year}." 206 | ) 207 | 208 | # Sort by week for clean output 209 | player_df = player_df.sort_values("week") 210 | 211 | # Remove temporary helper columns 212 | player_df = player_df.drop(columns=["first", "last"], errors="ignore") 213 | 214 | return player_df 215 | 216 | # playerData= get_player_stats(2024, 'Lamar','Jackson') 217 | # print(playerData) 218 | 219 | def dataframe_to_png(df, png_path="dataframe.png", fontsize=10, col_width=2.0): 220 | """ 221 | Save a pandas DataFrame as a PNG image using Matplotlib. 222 | 223 | Args: 224 | df (pd.DataFrame): The DataFrame to export 225 | png_path (str): File path to save the PNG 226 | fontsize (int): Font size in the table 227 | col_width (float): Width of each column in the image 228 | 229 | Returns: 230 | None (saves PNG file) 231 | """ 232 | 233 | # Calculate figure size based on rows and columns 234 | n_rows, n_cols = df.shape 235 | figsize = (col_width * n_cols, 0.4 * n_rows) 236 | 237 | fig, ax = plt.subplots(figsize=figsize) 238 | ax.axis("off") # hide axes 239 | 240 | # Create table 241 | table = ax.table( 242 | cellText=df.values, 243 | colLabels=df.columns, 244 | loc="center", 245 | cellLoc="center", 246 | ) 247 | 248 | table.auto_set_font_size(False) 249 | table.set_fontsize(fontsize) 250 | table.scale(1, 1.5) # increase row height 251 | 252 | # Save image 253 | plt.savefig(png_path, bbox_inches="tight", dpi=300) 254 | plt.close() 255 | 256 | print(f"DataFrame saved as PNG: {png_path}") 257 | 258 | # qb_totals = get_season_totals_by_position(2024, "QB") 259 | 260 | # dataframe_to_png(qb_totals, "qb_totals_2024.png") 261 | 262 | def export_player_season_png( 263 | year: int, 264 | first_name: str, 265 | last_name: str, 266 | png_path: str | None = None, 267 | columns: list[str] | None = None, 268 | fontsize: int = 10, 269 | ) -> str: 270 | """ 271 | Get a player's weekly stats for a season and export them as a PNG table. 272 | 273 | Args: 274 | year (int): NFL season (e.g., 2024) 275 | first_name (str): Player's first name (e.g., "Jalen") 276 | last_name (str): Player's last name (e.g., "Hurts") 277 | png_path (str | None): Optional file path for the PNG. 278 | If None, a name is generated automatically. 279 | columns (list[str] | None): Optional list of columns to include. 280 | If None, all columns are used. 281 | fontsize (int): Font size for the table text. 282 | 283 | Returns: 284 | str: The path to the saved PNG file. 285 | """ 286 | 287 | # 1. Get the player's DataFrame (one row per week) 288 | df = get_player_stats(year, first_name, last_name) 289 | 290 | # 2. Keep only selected columns if provided 291 | if columns is not None: 292 | # Only keep columns that exist in df 293 | cols_to_use = [c for c in columns if c in df.columns] 294 | if not cols_to_use: 295 | raise ValueError("None of the specified columns exist in the DataFrame.") 296 | df = df[cols_to_use] 297 | 298 | # 3. Auto-generate a file name if not provided 299 | if png_path is None: 300 | safe_first = first_name.lower().replace(" ", "_") 301 | safe_last = last_name.lower().replace(" ", "_") 302 | png_path = f"{safe_first}_{safe_last}_{year}_stats.png" 303 | 304 | # 4. Build the table figure 305 | n_rows, n_cols = df.shape 306 | # Reasonable sizing for a single player season (usually <= 18 games) 307 | figsize = (max(8, n_cols * 1.2), max(2, n_rows * 0.6)) 308 | 309 | fig, ax = plt.subplots(figsize=figsize) 310 | ax.axis("off") 311 | 312 | table = ax.table( 313 | cellText=df.values, 314 | colLabels=df.columns, 315 | loc="center", 316 | cellLoc="center", 317 | ) 318 | 319 | table.auto_set_font_size(False) 320 | table.set_fontsize(fontsize) 321 | table.scale(1, 1.4) # increase row height a bit 322 | 323 | # 5. Add a title 324 | full_name = f"{first_name} {last_name}" 325 | ax.set_title(f"{full_name} – {year} Season Stats (Weekly)", pad=20) 326 | 327 | # 6. Save as PNG 328 | plt.savefig(png_path, bbox_inches="tight", dpi=300) 329 | plt.close(fig) 330 | 331 | print(f"Saved player stats table as: {png_path}") 332 | return png_path 333 | 334 | # export_player_season_png(2024, "Jalen", "Hurts") 335 | 336 | def plot_player_stat_by_week( 337 | year: int, 338 | first_name: str, 339 | last_name: str, 340 | stat_col: str, 341 | save_path: str | None = None 342 | ) -> None: 343 | """ 344 | Plot a line graph for a specific player's stat by week for a given season. 345 | 346 | Args: 347 | year (int): NFL season year (e.g., 2024) 348 | first_name (str): Player's first name (e.g., "Jalen") 349 | last_name (str): Player's last name (e.g., "Hurts") 350 | stat_col (str): Column name of the stat to plot 351 | (e.g., "passing_yards", "rushing_yards", "receiving_yards") 352 | save_path (str | None): Optional path to save the plot as a PNG. 353 | If None, the plot is just shown. 354 | 355 | Returns: 356 | None 357 | """ 358 | 359 | # Get the player's weekly stats DataFrame (using the helper we wrote earlier) 360 | df = get_player_stats(year, first_name, last_name) 361 | 362 | # Make sure the stat column exists 363 | if stat_col not in df.columns: 364 | raise ValueError( 365 | f"Column '{stat_col}' not found in player data. " 366 | f"Available columns include: {list(df.columns)}" 367 | ) 368 | 369 | # Ensure data is sorted by week 370 | if "week" not in df.columns: 371 | raise ValueError("Column 'week' not found in player data.") 372 | df = df.sort_values("week") 373 | 374 | # Convert the stat column to numeric (just in case) and fill NaN with 0 375 | df[stat_col] = pd.to_numeric(df[stat_col], errors="coerce").fillna(0) 376 | 377 | weeks = df["week"] 378 | values = df[stat_col] 379 | 380 | # Create the line plot 381 | plt.figure(figsize=(10, 5)) 382 | plt.plot(weeks, values, marker="o") 383 | 384 | # Labels and title 385 | pretty_stat = stat_col.replace("_", " ").title() 386 | full_name = f"{first_name} {last_name}" 387 | 388 | plt.title(f"{full_name} – {pretty_stat} by Week ({year} Season)") 389 | plt.xlabel("Week") 390 | plt.ylabel(pretty_stat) 391 | plt.xticks(weeks) # show actual week numbers on x-axis 392 | plt.grid(True, linestyle="--", alpha=0.5) 393 | 394 | plt.tight_layout() 395 | 396 | # Optionally save as PNG 397 | if save_path is not None: 398 | plt.savefig(save_path, dpi=300, bbox_inches="tight") 399 | print(f"Saved line chart as: {save_path}") 400 | 401 | # Show the plot 402 | plt.show() 403 | 404 | # plot_player_stat_by_week( 405 | # 2004, 406 | # "Brian", 407 | # "Westbrook", 408 | # "rushing_yards", 409 | # save_path="brian_westbrook_2004_rushing_yards_by_week.png" 410 | # ) 411 | 412 | def get_team_season_data(year: int, include_team_meta: bool = True) -> pd.DataFrame: 413 | """ 414 | Get clean team-level season stats for all teams for a given NFL season. 415 | 416 | Removes ALL metadata fields related to: 417 | - logos 418 | - colors 419 | - wordmarks 420 | - nicknames 421 | - divisions 422 | 423 | Adds: 424 | - points_for 425 | - points_against 426 | - point_diff 427 | - ppg_for 428 | - ppg_against 429 | 430 | Compatible with nfl_data_py 0.3.3. 431 | """ 432 | 433 | # 1. Load schedule data 434 | games = nfl.import_schedules([year]) 435 | 436 | if "season_type" in games.columns: 437 | games = games[games["season_type"] == "REG"] 438 | 439 | games = games.dropna(subset=["home_score", "away_score"]) 440 | 441 | # 2. Build home and away rows 442 | home = games[["home_team", "home_score", "away_score"]].rename( 443 | columns={"home_team": "team", "home_score": "points_for", "away_score": "points_against"} 444 | ) 445 | away = games[["away_team", "away_score", "home_score"]].rename( 446 | columns={"away_team": "team", "away_score": "points_for", "home_score": "points_against"} 447 | ) 448 | 449 | # Outcomes 450 | for df in (home, away): 451 | df["win"] = (df["points_for"] > df["points_against"]).astype(int) 452 | df["loss"] = (df["points_for"] < df["points_against"]).astype(int) 453 | df["tie"] = (df["points_for"] == df["points_against"]).astype(int) 454 | 455 | # Combine 456 | team_games = pd.concat([home, away], ignore_index=True) 457 | 458 | # 3. Aggregate season totals 459 | team_stats = team_games.groupby("team").agg( 460 | games_played=("win", "size"), 461 | wins=("win", "sum"), 462 | losses=("loss", "sum"), 463 | ties=("tie", "sum"), 464 | points_for=("points_for", "sum"), 465 | points_against=("points_against", "sum") 466 | ).reset_index() 467 | 468 | # Derived stats 469 | team_stats["point_diff"] = team_stats["points_for"] - team_stats["points_against"] 470 | team_stats["ppg_for"] = team_stats["points_for"] / team_stats["games_played"] 471 | team_stats["ppg_against"] = team_stats["points_against"] / team_stats["games_played"] 472 | 473 | # 4. Optional metadata merge + cleaning 474 | if include_team_meta: 475 | try: 476 | meta = nfl.import_team_desc() 477 | 478 | # Merge on appropriate key 479 | if "team" in meta.columns: 480 | team_stats = team_stats.merge(meta, on="team", how="left") 481 | elif "team_abbr" in meta.columns: 482 | team_stats = team_stats.merge(meta, left_on="team", right_on="team_abbr", how="left") 483 | 484 | # Fields to remove 485 | remove_cols = [ 486 | c for c in team_stats.columns 487 | if any(keyword in c.lower() for keyword in [ 488 | "logo", "wordmark", "color", "nick", "division" 489 | ]) 490 | ] 491 | 492 | team_stats = team_stats.drop(columns=remove_cols, errors="ignore") 493 | 494 | except Exception: 495 | pass 496 | 497 | # Add season 498 | team_stats["season"] = year 499 | 500 | # Reorder 501 | cols = [ 502 | "season", "team", 503 | "games_played", "wins", "losses", "ties", 504 | "points_for", "points_against", "point_diff", 505 | "ppg_for", "ppg_against" 506 | ] 507 | other_cols = [c for c in team_stats.columns if c not in cols] 508 | 509 | return team_stats[cols + other_cols] 510 | 511 | 512 | #df = get_team_season_data(2024) 513 | #print(df.head) 514 | 515 | def get_all_team_game_stats(year: int) -> pd.DataFrame: 516 | """ 517 | Return game-by-game stats for every team in a given NFL season. 518 | One row per *team-game* (so each actual game appears twice: once per team). 519 | 520 | Columns include: 521 | - season, week, game_id, gameday 522 | - team, opponent, is_home 523 | - points_for, points_against, point_diff 524 | - result ('W', 'L', 'T') 525 | Compatible with nfl_data_py 0.3.3. 526 | """ 527 | 528 | # 1. Load schedule data 529 | games = nfl.import_schedules([year]) 530 | 531 | # Filter to regular season if column exists 532 | if "season_type" in games.columns: 533 | games = games[games["season_type"] == "REG"] 534 | 535 | # Drop games without final scores 536 | games = games.dropna(subset=["home_score", "away_score"]) 537 | 538 | # 2. Build home team rows 539 | home = games.copy() 540 | home["team"] = home["home_team"] 541 | home["opponent"] = home["away_team"] 542 | home["is_home"] = True 543 | home["points_for"] = home["home_score"] 544 | home["points_against"] = home["away_score"] 545 | 546 | # 3. Build away team rows 547 | away = games.copy() 548 | away["team"] = away["away_team"] 549 | away["opponent"] = away["home_team"] 550 | away["is_home"] = False 551 | away["points_for"] = away["away_score"] 552 | away["points_against"] = away["home_score"] 553 | 554 | # 4. Combine into team-game logs 555 | team_games = pd.concat([home, away], ignore_index=True) 556 | 557 | # 5. Derived stats 558 | team_games["point_diff"] = team_games["points_for"] - team_games["points_against"] 559 | team_games["result"] = np.where( 560 | team_games["points_for"] > team_games["points_against"], "W", 561 | np.where(team_games["points_for"] < team_games["points_against"], "L", "T") 562 | ) 563 | 564 | # 6. Keep / rename main columns (and keep extras if present) 565 | base_cols = [ 566 | "season" if "season" in team_games.columns else None, 567 | "week" if "week" in team_games.columns else None, 568 | "gameday" if "gameday" in team_games.columns else None, 569 | "game_id" if "game_id" in team_games.columns else None, 570 | "team", "opponent", "is_home", 571 | "points_for", "points_against", "point_diff", "result", 572 | ] 573 | base_cols = [c for c in base_cols if c is not None] 574 | 575 | # Put base columns first, then everything else 576 | other_cols = [c for c in team_games.columns if c not in base_cols] 577 | team_games = team_games[base_cols + other_cols] 578 | 579 | # Sort by team + week if week exists 580 | if "week" in team_games.columns: 581 | team_games = team_games.sort_values(["team", "week"]).reset_index(drop=True) 582 | 583 | return team_games 584 | 585 | def get_team_game_stats(year: int, team: str) -> pd.DataFrame: 586 | """ 587 | Get game-by-game stats for a single team in a given season. 588 | 589 | Args: 590 | year (int): NFL season (e.g., 2024) 591 | team (str): Team abbreviation, e.g. 'PHI', 'DAL', 'KC' 592 | 593 | Returns: 594 | pandas.DataFrame: one row per game for that team. 595 | """ 596 | team = team.upper() 597 | all_games = get_all_team_game_stats(year) 598 | return all_games[all_games["team"] == team].reset_index(drop=True) 599 | 600 | phi_2024 = get_team_game_stats(2024, "PHI") 601 | print(phi_2024[["week", "team", "opponent", "is_home", "points_for", "points_against", "result"]]) 602 | 603 | 604 | def get_team_touchdown_stats(year: int) -> pd.DataFrame: 605 | """ 606 | Build touchdown stats for each team using play-by-play data. 607 | Compatible with nfl_data_py 0.3.3. 608 | """ 609 | 610 | pbp = nfl.import_pbp_data([year]) 611 | 612 | # Determine scoring team (posteam for offensive TDs, defteam for defensive TDs) 613 | pbp["team"] = pbp["posteam"].fillna(pbp["defteam"]) 614 | 615 | # Create TD type indicators 616 | pbp["rush_td"] = pbp.get("rush_touchdown", 0) 617 | pbp["pass_td"] = pbp.get("pass_touchdown", 0) 618 | 619 | # Defensive TD (fumble return, interception return) 620 | pbp["def_td"] = pbp.get("defensive_touchdown", 0) 621 | 622 | # Special teams TD (punt return, kickoff return, blocked FG return) 623 | pbp["special_td"] = pbp.get("special_teams_touchdown", 0) 624 | 625 | # Total touchdowns 626 | pbp["total_td"] = ( 627 | pbp["rush_td"] + 628 | pbp["pass_td"] + 629 | pbp["def_td"] + 630 | pbp["special_td"] 631 | ) 632 | 633 | # Group by team 634 | td_stats = pbp.groupby("team").agg( 635 | total_td=("total_td", "sum"), 636 | rush_td=("rush_td", "sum"), 637 | pass_td=("pass_td", "sum"), 638 | def_td=("def_td", "sum"), 639 | special_td=("special_td", "sum"), 640 | ).reset_index() 641 | 642 | return td_stats 643 | 644 | def get_team_season_with_tds(year: int) -> pd.DataFrame: 645 | base_df = get_team_season_data(year) 646 | td_df = get_team_touchdown_stats(year) 647 | 648 | merged = base_df.merge(td_df, on="team", how="left") 649 | 650 | td_cols = ["total_td", "rush_td", "pass_td", "def_td", "special_td"] 651 | for col in td_cols: 652 | merged[col] = merged[col].fillna(0).astype(int) 653 | 654 | return merged 655 | 656 | 657 | def plot_team_stat_bar(year: int, stat_col: str): 658 | df = get_team_season_with_tds(year) 659 | 660 | if stat_col not in df.columns: 661 | raise ValueError(f"Column '{stat_col}' not found. " 662 | "Did you mean one of: points_for, rush_td, pass_td, total_td?") 663 | 664 | df = df.sort_values(stat_col, ascending=False) 665 | 666 | teams = df["team"] 667 | values = df[stat_col] 668 | 669 | plt.figure(figsize=(12, 6)) 670 | plt.bar(teams, values) 671 | plt.title(f"{year} - Team Comparison by {stat_col.replace('_',' ').title()}") 672 | plt.xticks(rotation=45) 673 | plt.xlabel("Team") 674 | plt.ylabel(stat_col.replace("_"," ").title()) 675 | plt.tight_layout() 676 | plt.show() 677 | 678 | # plot_team_stat_bar(2024, "total_td") 679 | 680 | # plot_team_stat_bar(2024, "total_td") 681 | # plot_team_stat_bar(2022, "rush_td") 682 | -------------------------------------------------------------------------------- /Analytics Helper Functions/helperFunctions.py: -------------------------------------------------------------------------------- 1 | import nfl_data_py as nfl 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | def get_season_totals_by_position(year: int, position: str) -> pd.DataFrame: 7 | """ 8 | Return trimmed full-season stats for all players at a given position. 9 | 10 | Args: 11 | year (int): NFL season (e.g., 2024) 12 | position (str): Player position ('QB', 'RB', 'WR', 'TE', etc.) 13 | 14 | Returns: 15 | pandas.DataFrame: One row per player with core season-total stats. 16 | """ 17 | # Load weekly stats for the season 18 | weekly = nfl.import_weekly_data([year]) 19 | 20 | pos = position.upper() 21 | 22 | if "position" not in weekly.columns: 23 | raise ValueError("Column 'position' not found in weekly data.") 24 | 25 | # Filter to the requested position 26 | pos_df = weekly[weekly["position"] == pos].copy() 27 | if pos_df.empty: 28 | raise ValueError(f"No data found for position '{pos}' in season {year}.") 29 | 30 | # Identify player columns 31 | group_cols = ["player_display_name", "player_id", "position", "recent_team"] 32 | group_cols = [c for c in group_cols if c in pos_df.columns] 33 | 34 | # Calculate games played per player 35 | if "week" in pos_df.columns: 36 | games_played = ( 37 | pos_df.groupby(group_cols)["week"] 38 | .nunique() 39 | .reset_index(name="games_played") 40 | ) 41 | else: 42 | # Fallback if week isn't available 43 | games_played = ( 44 | pos_df.groupby(group_cols) 45 | .size() 46 | .reset_index(name="games_played") 47 | ) 48 | 49 | # Numeric columns to sum 50 | numeric_cols = pos_df.select_dtypes(include="number").columns.tolist() 51 | # Remove fantasy-related columns 52 | fantasy_cols = [c for c in numeric_cols if "fantasy" in c.lower()] 53 | numeric_cols = [c for c in numeric_cols if c not in fantasy_cols] 54 | 55 | # Group by player and sum 56 | season_totals = ( 57 | pos_df[group_cols + numeric_cols] 58 | .groupby(group_cols, as_index=False)[numeric_cols] 59 | .sum() 60 | ) 61 | 62 | # Merge games_played 63 | season_totals = season_totals.merge(games_played, on=group_cols, how="left") 64 | 65 | # ---- Position-specific core columns ---- 66 | core_stats_by_pos = { 67 | "QB": [ 68 | "games_played", 69 | "attempts", 70 | "completions", 71 | "passing_yards", 72 | "passing_tds", 73 | "interceptions", 74 | "rushing_yards", 75 | "rushing_tds", 76 | ], 77 | "RB": [ 78 | "games_played", 79 | "rushing_attempts", 80 | "rushing_yards", 81 | "rushing_tds", 82 | "targets", 83 | "receptions", 84 | "receiving_yards", 85 | "receiving_tds", 86 | ], 87 | "WR": [ 88 | "games_played", 89 | "targets", 90 | "receptions", 91 | "receiving_yards", 92 | "receiving_tds", 93 | ], 94 | "TE": [ 95 | "games_played", 96 | "targets", 97 | "receptions", 98 | "receiving_yards", 99 | "receiving_tds", 100 | ], 101 | } 102 | 103 | # Choose which columns to keep 104 | base_cols = group_cols.copy() 105 | pos_core = core_stats_by_pos.get(pos, ["games_played"]) 106 | keep_cols = base_cols + [c for c in pos_core if c in season_totals.columns] 107 | 108 | # Filter to trimmed set of columns 109 | season_totals = season_totals[keep_cols] 110 | 111 | # Sort by a key stat depending on position 112 | if pos == "QB" and "passing_yards" in season_totals.columns: 113 | season_totals = season_totals.sort_values("passing_yards", ascending=False) 114 | elif pos == "RB" and "rushing_yards" in season_totals.columns: 115 | season_totals = season_totals.sort_values("rushing_yards", ascending=False) 116 | elif pos in ("WR", "TE") and "receiving_yards" in season_totals.columns: 117 | season_totals = season_totals.sort_values("receiving_yards", ascending=False) 118 | 119 | return season_totals 120 | 121 | #qb_2024_totals_top5 = get_season_totals_by_position(2024, "QB") 122 | #print(qb_2024_totals_top5.head()) 123 | #qb_2024 = get_season_totals_by_position(2024, "QB") 124 | #print(qb_2024) 125 | 126 | def plot_position_stat_bar(year: int, 127 | position: str, 128 | stat_col: str, 129 | top_n: int = 20, 130 | save_path: str = None) -> None: 131 | """ 132 | Plot a bar chart for a given stat column for all players at a position, 133 | and optionally save it as a PNG file. 134 | 135 | Args: 136 | year (int): NFL season (e.g., 2024) 137 | position (str): Position (e.g., 'QB', 'RB', 'WR', 'TE') 138 | stat_col (str): Stat column to plot (e.g., 'passing_yards') 139 | top_n (int): Show top N players (default 20) 140 | save_path (str): Optional. File path to save PNG (e.g., 'qb_passing_2024.png') 141 | 142 | Returns: 143 | None 144 | """ 145 | df = get_season_totals_by_position(year, position) 146 | 147 | if stat_col not in df.columns: 148 | raise ValueError( 149 | f"Column '{stat_col}' not found. Available columns: {list(df.columns)}" 150 | ) 151 | 152 | df_sorted = df.sort_values(stat_col, ascending=False).head(top_n) 153 | 154 | plt.figure(figsize=(12, 6)) 155 | plt.bar(df_sorted["player_display_name"], df_sorted[stat_col]) 156 | 157 | pretty_stat = stat_col.replace("_", " ").title() 158 | plt.title(f"Top {top_n} {position.upper()} by {pretty_stat} in {year}") 159 | plt.xlabel("Player") 160 | plt.ylabel(pretty_stat) 161 | plt.xticks(rotation=45, ha="right") 162 | 163 | plt.tight_layout() 164 | 165 | # --- 🔥 Save chart if save_path is given --- 166 | if save_path: 167 | plt.savefig(save_path, dpi=300) # dpi=300 gives high quality images 168 | print(f"Chart saved as: {save_path}") 169 | 170 | plt.show() 171 | 172 | # plot_position_stat_bar(2024, "QB", "passing_yards", save_path="qb_passing_2024.png", top_n=20) 173 | # plot_position_stat_bar(2024, "RB", "rushing_yards", save_path="rb_rushing_2024.png", top_n=20) 174 | 175 | def get_player_stats(year: int, first_name: str, last_name: str) -> pd.DataFrame: 176 | """ 177 | Get all weekly stats for a single NFL player for a given season. 178 | Requires exact match on first and last name. 179 | 180 | Args: 181 | year (int): NFL season year (e.g., 2024) 182 | first_name (str): Player's first name (e.g., "Jalen") 183 | last_name (str): Player's last name (e.g., "Hurts") 184 | 185 | Returns: 186 | pandas.DataFrame: All weekly stats for that player in that season. 187 | """ 188 | 189 | # Load weekly data for the season 190 | weekly = nfl.import_weekly_data([year]) 191 | 192 | # Normalize inputs 193 | first = first_name.lower().strip() 194 | last = last_name.lower().strip() 195 | 196 | # Normalize player names in the dataset 197 | weekly["first"] = weekly["player_display_name"].str.split().str[0].str.lower() 198 | weekly["last"] = weekly["player_display_name"].str.split().str[-1].str.lower() 199 | 200 | # Exact match on first + last 201 | player_df = weekly[(weekly["first"] == first) & (weekly["last"] == last)].copy() 202 | 203 | if player_df.empty: 204 | raise ValueError( 205 | f"No data found for player '{first_name} {last_name}' in season {year}." 206 | ) 207 | 208 | # Sort by week for clean output 209 | player_df = player_df.sort_values("week") 210 | 211 | # Remove temporary helper columns 212 | player_df = player_df.drop(columns=["first", "last"], errors="ignore") 213 | 214 | return player_df 215 | 216 | # playerData= get_player_stats(2024, 'Lamar','Jackson') 217 | # print(playerData) 218 | 219 | def dataframe_to_png(df, png_path="dataframe.png", fontsize=10, col_width=2.0): 220 | """ 221 | Save a pandas DataFrame as a PNG image using Matplotlib. 222 | 223 | Args: 224 | df (pd.DataFrame): The DataFrame to export 225 | png_path (str): File path to save the PNG 226 | fontsize (int): Font size in the table 227 | col_width (float): Width of each column in the image 228 | 229 | Returns: 230 | None (saves PNG file) 231 | """ 232 | 233 | # Calculate figure size based on rows and columns 234 | n_rows, n_cols = df.shape 235 | figsize = (col_width * n_cols, 0.4 * n_rows) 236 | 237 | fig, ax = plt.subplots(figsize=figsize) 238 | ax.axis("off") # hide axes 239 | 240 | # Create table 241 | table = ax.table( 242 | cellText=df.values, 243 | colLabels=df.columns, 244 | loc="center", 245 | cellLoc="center", 246 | ) 247 | 248 | table.auto_set_font_size(False) 249 | table.set_fontsize(fontsize) 250 | table.scale(1, 1.5) # increase row height 251 | 252 | # Save image 253 | plt.savefig(png_path, bbox_inches="tight", dpi=300) 254 | plt.close() 255 | 256 | print(f"DataFrame saved as PNG: {png_path}") 257 | 258 | # qb_totals = get_season_totals_by_position(2024, "QB") 259 | 260 | # dataframe_to_png(qb_totals, "qb_totals_2024.png") 261 | 262 | def export_player_season_png( 263 | year: int, 264 | first_name: str, 265 | last_name: str, 266 | png_path: str | None = None, 267 | columns: list[str] | None = None, 268 | fontsize: int = 10, 269 | ) -> str: 270 | """ 271 | Get a player's weekly stats for a season and export them as a PNG table. 272 | 273 | Args: 274 | year (int): NFL season (e.g., 2024) 275 | first_name (str): Player's first name (e.g., "Jalen") 276 | last_name (str): Player's last name (e.g., "Hurts") 277 | png_path (str | None): Optional file path for the PNG. 278 | If None, a name is generated automatically. 279 | columns (list[str] | None): Optional list of columns to include. 280 | If None, all columns are used. 281 | fontsize (int): Font size for the table text. 282 | 283 | Returns: 284 | str: The path to the saved PNG file. 285 | """ 286 | 287 | # 1. Get the player's DataFrame (one row per week) 288 | df = get_player_stats(year, first_name, last_name) 289 | 290 | # 2. Keep only selected columns if provided 291 | if columns is not None: 292 | # Only keep columns that exist in df 293 | cols_to_use = [c for c in columns if c in df.columns] 294 | if not cols_to_use: 295 | raise ValueError("None of the specified columns exist in the DataFrame.") 296 | df = df[cols_to_use] 297 | 298 | # 3. Auto-generate a file name if not provided 299 | if png_path is None: 300 | safe_first = first_name.lower().replace(" ", "_") 301 | safe_last = last_name.lower().replace(" ", "_") 302 | png_path = f"{safe_first}_{safe_last}_{year}_stats.png" 303 | 304 | # 4. Build the table figure 305 | n_rows, n_cols = df.shape 306 | # Reasonable sizing for a single player season (usually <= 18 games) 307 | figsize = (max(8, n_cols * 1.2), max(2, n_rows * 0.6)) 308 | 309 | fig, ax = plt.subplots(figsize=figsize) 310 | ax.axis("off") 311 | 312 | table = ax.table( 313 | cellText=df.values, 314 | colLabels=df.columns, 315 | loc="center", 316 | cellLoc="center", 317 | ) 318 | 319 | table.auto_set_font_size(False) 320 | table.set_fontsize(fontsize) 321 | table.scale(1, 1.4) # increase row height a bit 322 | 323 | # 5. Add a title 324 | full_name = f"{first_name} {last_name}" 325 | ax.set_title(f"{full_name} – {year} Season Stats (Weekly)", pad=20) 326 | 327 | # 6. Save as PNG 328 | plt.savefig(png_path, bbox_inches="tight", dpi=300) 329 | plt.close(fig) 330 | 331 | print(f"Saved player stats table as: {png_path}") 332 | return png_path 333 | 334 | # export_player_season_png(2024, "Jalen", "Hurts") 335 | 336 | def plot_player_stat_by_week( 337 | year: int, 338 | first_name: str, 339 | last_name: str, 340 | stat_col: str, 341 | save_path: str | None = None 342 | ) -> None: 343 | """ 344 | Plot a line graph for a specific player's stat by week for a given season. 345 | 346 | Args: 347 | year (int): NFL season year (e.g., 2024) 348 | first_name (str): Player's first name (e.g., "Jalen") 349 | last_name (str): Player's last name (e.g., "Hurts") 350 | stat_col (str): Column name of the stat to plot 351 | (e.g., "passing_yards", "rushing_yards", "receiving_yards") 352 | save_path (str | None): Optional path to save the plot as a PNG. 353 | If None, the plot is just shown. 354 | 355 | Returns: 356 | None 357 | """ 358 | 359 | # Get the player's weekly stats DataFrame (using the helper we wrote earlier) 360 | df = get_player_stats(year, first_name, last_name) 361 | 362 | # Make sure the stat column exists 363 | if stat_col not in df.columns: 364 | raise ValueError( 365 | f"Column '{stat_col}' not found in player data. " 366 | f"Available columns include: {list(df.columns)}" 367 | ) 368 | 369 | # Ensure data is sorted by week 370 | if "week" not in df.columns: 371 | raise ValueError("Column 'week' not found in player data.") 372 | df = df.sort_values("week") 373 | 374 | # Convert the stat column to numeric (just in case) and fill NaN with 0 375 | df[stat_col] = pd.to_numeric(df[stat_col], errors="coerce").fillna(0) 376 | 377 | weeks = df["week"] 378 | values = df[stat_col] 379 | 380 | # Create the line plot 381 | plt.figure(figsize=(10, 5)) 382 | plt.plot(weeks, values, marker="o") 383 | 384 | # Labels and title 385 | pretty_stat = stat_col.replace("_", " ").title() 386 | full_name = f"{first_name} {last_name}" 387 | 388 | plt.title(f"{full_name} – {pretty_stat} by Week ({year} Season)") 389 | plt.xlabel("Week") 390 | plt.ylabel(pretty_stat) 391 | plt.xticks(weeks) # show actual week numbers on x-axis 392 | plt.grid(True, linestyle="--", alpha=0.5) 393 | 394 | plt.tight_layout() 395 | 396 | # Optionally save as PNG 397 | if save_path is not None: 398 | plt.savefig(save_path, dpi=300, bbox_inches="tight") 399 | print(f"Saved line chart as: {save_path}") 400 | 401 | # Show the plot 402 | plt.show() 403 | 404 | # plot_player_stat_by_week( 405 | # 2004, 406 | # "Brian", 407 | # "Westbrook", 408 | # "rushing_yards", 409 | # save_path="brian_westbrook_2004_rushing_yards_by_week.png" 410 | # ) 411 | 412 | def get_team_season_data(year: int, include_team_meta: bool = True) -> pd.DataFrame: 413 | """ 414 | Get clean team-level season stats for all teams for a given NFL season. 415 | 416 | Removes ALL metadata fields related to: 417 | - logos 418 | - colors 419 | - wordmarks 420 | - nicknames 421 | - divisions 422 | 423 | Adds: 424 | - points_for 425 | - points_against 426 | - point_diff 427 | - ppg_for 428 | - ppg_against 429 | 430 | Compatible with nfl_data_py 0.3.3. 431 | """ 432 | 433 | # 1. Load schedule data 434 | games = nfl.import_schedules([year]) 435 | 436 | if "season_type" in games.columns: 437 | games = games[games["season_type"] == "REG"] 438 | 439 | games = games.dropna(subset=["home_score", "away_score"]) 440 | 441 | # 2. Build home and away rows 442 | home = games[["home_team", "home_score", "away_score"]].rename( 443 | columns={"home_team": "team", "home_score": "points_for", "away_score": "points_against"} 444 | ) 445 | away = games[["away_team", "away_score", "home_score"]].rename( 446 | columns={"away_team": "team", "away_score": "points_for", "home_score": "points_against"} 447 | ) 448 | 449 | # Outcomes 450 | for df in (home, away): 451 | df["win"] = (df["points_for"] > df["points_against"]).astype(int) 452 | df["loss"] = (df["points_for"] < df["points_against"]).astype(int) 453 | df["tie"] = (df["points_for"] == df["points_against"]).astype(int) 454 | 455 | # Combine 456 | team_games = pd.concat([home, away], ignore_index=True) 457 | 458 | # 3. Aggregate season totals 459 | team_stats = team_games.groupby("team").agg( 460 | games_played=("win", "size"), 461 | wins=("win", "sum"), 462 | losses=("loss", "sum"), 463 | ties=("tie", "sum"), 464 | points_for=("points_for", "sum"), 465 | points_against=("points_against", "sum") 466 | ).reset_index() 467 | 468 | # Derived stats 469 | team_stats["point_diff"] = team_stats["points_for"] - team_stats["points_against"] 470 | team_stats["ppg_for"] = team_stats["points_for"] / team_stats["games_played"] 471 | team_stats["ppg_against"] = team_stats["points_against"] / team_stats["games_played"] 472 | 473 | # 4. Optional metadata merge + cleaning 474 | if include_team_meta: 475 | try: 476 | meta = nfl.import_team_desc() 477 | 478 | # Merge on appropriate key 479 | if "team" in meta.columns: 480 | team_stats = team_stats.merge(meta, on="team", how="left") 481 | elif "team_abbr" in meta.columns: 482 | team_stats = team_stats.merge(meta, left_on="team", right_on="team_abbr", how="left") 483 | 484 | # Fields to remove 485 | remove_cols = [ 486 | c for c in team_stats.columns 487 | if any(keyword in c.lower() for keyword in [ 488 | "logo", "wordmark", "color", "nick", "division" 489 | ]) 490 | ] 491 | 492 | team_stats = team_stats.drop(columns=remove_cols, errors="ignore") 493 | 494 | except Exception: 495 | pass 496 | 497 | # Add season 498 | team_stats["season"] = year 499 | 500 | # Reorder 501 | cols = [ 502 | "season", "team", 503 | "games_played", "wins", "losses", "ties", 504 | "points_for", "points_against", "point_diff", 505 | "ppg_for", "ppg_against" 506 | ] 507 | other_cols = [c for c in team_stats.columns if c not in cols] 508 | 509 | return team_stats[cols + other_cols] 510 | 511 | 512 | #df = get_team_season_data(2024) 513 | #print(df.head) 514 | 515 | def get_all_team_game_stats(year: int) -> pd.DataFrame: 516 | """ 517 | Return game-by-game stats for every team in a given NFL season. 518 | One row per *team-game* (so each actual game appears twice: once per team). 519 | 520 | Columns include: 521 | - season, week, game_id, gameday 522 | - team, opponent, is_home 523 | - points_for, points_against, point_diff 524 | - result ('W', 'L', 'T') 525 | Compatible with nfl_data_py 0.3.3. 526 | """ 527 | 528 | # 1. Load schedule data 529 | games = nfl.import_schedules([year]) 530 | 531 | # Filter to regular season if column exists 532 | if "season_type" in games.columns: 533 | games = games[games["season_type"] == "REG"] 534 | 535 | # Drop games without final scores 536 | games = games.dropna(subset=["home_score", "away_score"]) 537 | 538 | # 2. Build home team rows 539 | home = games.copy() 540 | home["team"] = home["home_team"] 541 | home["opponent"] = home["away_team"] 542 | home["is_home"] = True 543 | home["points_for"] = home["home_score"] 544 | home["points_against"] = home["away_score"] 545 | 546 | # 3. Build away team rows 547 | away = games.copy() 548 | away["team"] = away["away_team"] 549 | away["opponent"] = away["home_team"] 550 | away["is_home"] = False 551 | away["points_for"] = away["away_score"] 552 | away["points_against"] = away["home_score"] 553 | 554 | # 4. Combine into team-game logs 555 | team_games = pd.concat([home, away], ignore_index=True) 556 | 557 | # 5. Derived stats 558 | team_games["point_diff"] = team_games["points_for"] - team_games["points_against"] 559 | team_games["result"] = np.where( 560 | team_games["points_for"] > team_games["points_against"], "W", 561 | np.where(team_games["points_for"] < team_games["points_against"], "L", "T") 562 | ) 563 | 564 | # 6. Keep / rename main columns (and keep extras if present) 565 | base_cols = [ 566 | "season" if "season" in team_games.columns else None, 567 | "week" if "week" in team_games.columns else None, 568 | "gameday" if "gameday" in team_games.columns else None, 569 | "game_id" if "game_id" in team_games.columns else None, 570 | "team", "opponent", "is_home", 571 | "points_for", "points_against", "point_diff", "result", 572 | ] 573 | base_cols = [c for c in base_cols if c is not None] 574 | 575 | # Put base columns first, then everything else 576 | other_cols = [c for c in team_games.columns if c not in base_cols] 577 | team_games = team_games[base_cols + other_cols] 578 | 579 | # Sort by team + week if week exists 580 | if "week" in team_games.columns: 581 | team_games = team_games.sort_values(["team", "week"]).reset_index(drop=True) 582 | 583 | return team_games 584 | 585 | def get_team_game_stats(year: int, team: str) -> pd.DataFrame: 586 | """ 587 | Get game-by-game stats for a single team in a given season. 588 | 589 | Args: 590 | year (int): NFL season (e.g., 2024) 591 | team (str): Team abbreviation, e.g. 'PHI', 'DAL', 'KC' 592 | 593 | Returns: 594 | pandas.DataFrame: one row per game for that team. 595 | """ 596 | team = team.upper() 597 | all_games = get_all_team_game_stats(year) 598 | return all_games[all_games["team"] == team].reset_index(drop=True) 599 | 600 | phi_2024 = get_team_game_stats(2024, "PHI") 601 | print(phi_2024[["week", "team", "opponent", "is_home", "points_for", "points_against", "result"]]) 602 | 603 | 604 | def get_team_touchdown_stats(year: int) -> pd.DataFrame: 605 | """ 606 | Build touchdown stats for each team using play-by-play data. 607 | Compatible with nfl_data_py 0.3.3. 608 | """ 609 | 610 | pbp = nfl.import_pbp_data([year]) 611 | 612 | # Determine scoring team (posteam for offensive TDs, defteam for defensive TDs) 613 | pbp["team"] = pbp["posteam"].fillna(pbp["defteam"]) 614 | 615 | # Create TD type indicators 616 | pbp["rush_td"] = pbp.get("rush_touchdown", 0) 617 | pbp["pass_td"] = pbp.get("pass_touchdown", 0) 618 | 619 | # Defensive TD (fumble return, interception return) 620 | pbp["def_td"] = pbp.get("defensive_touchdown", 0) 621 | 622 | # Special teams TD (punt return, kickoff return, blocked FG return) 623 | pbp["special_td"] = pbp.get("special_teams_touchdown", 0) 624 | 625 | # Total touchdowns 626 | pbp["total_td"] = ( 627 | pbp["rush_td"] + 628 | pbp["pass_td"] + 629 | pbp["def_td"] + 630 | pbp["special_td"] 631 | ) 632 | 633 | # Group by team 634 | td_stats = pbp.groupby("team").agg( 635 | total_td=("total_td", "sum"), 636 | rush_td=("rush_td", "sum"), 637 | pass_td=("pass_td", "sum"), 638 | def_td=("def_td", "sum"), 639 | special_td=("special_td", "sum"), 640 | ).reset_index() 641 | 642 | return td_stats 643 | 644 | def get_team_season_with_tds(year: int) -> pd.DataFrame: 645 | base_df = get_team_season_data(year) 646 | td_df = get_team_touchdown_stats(year) 647 | 648 | merged = base_df.merge(td_df, on="team", how="left") 649 | 650 | td_cols = ["total_td", "rush_td", "pass_td", "def_td", "special_td"] 651 | for col in td_cols: 652 | merged[col] = merged[col].fillna(0).astype(int) 653 | 654 | return merged 655 | 656 | 657 | def plot_team_stat_bar(year: int, stat_col: str): 658 | df = get_team_season_with_tds(year) 659 | 660 | if stat_col not in df.columns: 661 | raise ValueError(f"Column '{stat_col}' not found. " 662 | "Did you mean one of: points_for, rush_td, pass_td, total_td?") 663 | 664 | df = df.sort_values(stat_col, ascending=False) 665 | 666 | teams = df["team"] 667 | values = df[stat_col] 668 | 669 | plt.figure(figsize=(12, 6)) 670 | plt.bar(teams, values) 671 | plt.title(f"{year} - Team Comparison by {stat_col.replace('_',' ').title()}") 672 | plt.xticks(rotation=45) 673 | plt.xlabel("Team") 674 | plt.ylabel(stat_col.replace("_"," ").title()) 675 | plt.tight_layout() 676 | plt.show() 677 | 678 | # plot_team_stat_bar(2024, "total_td") 679 | 680 | # plot_team_stat_bar(2024, "total_td") 681 | # plot_team_stat_bar(2022, "rush_td") 682 | --------------------------------------------------------------------------------