├── Unit 3
    ├── Setup.py
    ├── helperLogic_Unit1.py
    └── dataframes_Exercise.py
├── Unit 6
    ├── epa_exercise.py
    └── efficiency_exercise.py
├── Unit 2
    ├── Python_OOP_Lesson.py
    └── Python_Modules_Lesson.py
├── Unit 4
    ├── teamData_Exercise.py
    ├── helperFunction_Unit2.py
    └── playerData_Exercise.py
├── _AI_Unit
    ├── AI_Unit_Lesson.py
    ├── AI_Unit_Exercises.py
    └── AI_Unit_Vocabulary.md
├── Unit 1
    ├── Python_Review_Exercises.py
    ├── Python_Review_Lesson.py
    └── General_Python_Vocabulary.md
├── Unit 5
    ├── dataVisualization_barCharts.py
    ├── dataVisualization_lineChart.py
    └── dataVisualization_scatterPlot.py
├── .gitignore
├── Resources
    └── Student Helper function Sheet.png
├── washed
    ├── __pycache__
    │   └── helperFunctions.cpython-312.pyc
    ├── washed.py
    └── helperFunctions.py
├── Analytics Helper Functions
    ├── review.py
    ├── __pycache__
    │   └── helperFunctions.cpython-312.pyc
    ├── visualiization.py
    └── helperFunctions.py
└── readme.md


/Unit 3/Setup.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Unit 6/epa_exercise.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Unit 2/Python_OOP_Lesson.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Unit 3/helperLogic_Unit1.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Unit 4/teamData_Exercise.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/_AI_Unit/AI_Unit_Lesson.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Unit 1/Python_Review_Exercises.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Unit 1/Python_Review_Lesson.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Unit 2/Python_Modules_Lesson.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Unit 3/dataframes_Exercise.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Unit 4/helperFunction_Unit2.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Unit 4/playerData_Exercise.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Unit 6/efficiency_exercise.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/_AI_Unit/AI_Unit_Exercises.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Unit 1/General_Python_Vocabulary.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Unit 5/dataVisualization_barCharts.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Unit 5/dataVisualization_lineChart.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Unit 5/dataVisualization_scatterPlot.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | Instructor_Notes
2 | Assessments
3 | 
4 | requirements.txt
5 | 


--------------------------------------------------------------------------------
/Resources/Student Helper function Sheet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Iankimble/AP-Coding/HEAD/Resources/Student Helper function Sheet.png


--------------------------------------------------------------------------------
/washed/__pycache__/helperFunctions.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Iankimble/AP-Coding/HEAD/washed/__pycache__/helperFunctions.cpython-312.pyc


--------------------------------------------------------------------------------
/Analytics Helper Functions/review.py:
--------------------------------------------------------------------------------
1 | from helperFunctions import get_season_totals_by_position
2 | 
3 | rbStats = get_season_totals_by_position(2024,'RB')
4 | print(rbStats)


--------------------------------------------------------------------------------
/Analytics Helper Functions/__pycache__/helperFunctions.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Iankimble/AP-Coding/HEAD/Analytics Helper Functions/__pycache__/helperFunctions.cpython-312.pyc


--------------------------------------------------------------------------------
/_AI_Unit/AI_Unit_Vocabulary.md:
--------------------------------------------------------------------------------
 1 | 1. Artificial Intelligence (AI)
 2 | 
 3 | 2. Artificial General Intelligence (AGI)
 4 | 
 5 | 3. Generative AI 
 6 | 
 7 | 4. LLM
 8 | 
 9 | 5. Prompting
10 | 
11 | 6. Hallucination
12 | 
13 | 7. Model
14 | 
15 | 8. Token
16 | 
17 | 9. Algorithm
18 | 
19 | 10. Training Data
20 | 
21 | 11. Agent
22 | 
23 | 


--------------------------------------------------------------------------------
/washed/washed.py:
--------------------------------------------------------------------------------
 1 | from helperFunctions import get_season_totals_by_position, plot_position_stat_bar, plot_player_stat_by_week, get_player_stats
 2 | 
 3 | # 1. What 4 stats makes a player washed? 
 4 | # games played - indicates the player is healhy or game day ready
 5 | # attempts - indicates they are able to get the ball out
 6 | # completions -  they are getting the ball to their recievers
 7 | # passing yards - they are getting significant yardage
 8 | 
 9 | 
10 | # Get all player stats from 2024
11 | qbStats= get_season_totals_by_position(2024,'QB')
12 | print(qbStats)
13 | 
14 | # 2. Why is this player washed? 
15 | # Based on the league average for rushing yard for the qb position, russell wilson...
16 | 
17 | RussStats = get_player_stats(2024,'Russell','Wilson')
18 | 
19 | #Line graph of Russ stats for 2024
20 | plot_player_stat_by_week(2024, 'Russell','Wilson',"passing_yards", save_path='Jalen Hurts Passing Yards 2024')
21 | 
22 | 


--------------------------------------------------------------------------------
/Analytics Helper Functions/visualiization.py:
--------------------------------------------------------------------------------
 1 | from helperFunctions import get_season_totals_by_position, plot_position_stat_bar, plot_player_stat_by_week, get_player_stats
 2 | 
 3 | # rb for '24
 4 | #plot_position_stat_bar(2024,'RB',"rushing_yards",save_path='rb rushing yards 2024')
 5 | #  rb for '23
 6 | #plot_position_stat_bar(2023,'RB',"rushing_yards",save_path='rb rushing yards 2023')
 7 | # rb for '22
 8 | #plot_position_stat_bar(2022,'RB',"rushing_yards",save_path='rb rushing yards 2022')
 9 | 
10 | bestRb24= get_season_totals_by_position(2024, 'RB')
11 | bestRb23= get_season_totals_by_position(2023, 'RB')
12 | bestRb22= get_season_totals_by_position(2022, 'RB')
13 | 
14 | #print(bestRb22)
15 | #print(bestRb23)
16 | #print(bestRb24)
17 | 
18 | answer=  'derick henry has the most rushing yards over the course of 2022-2024'
19 | 
20 | #plot_player_stat_by_week(2024, 'Jalen','Hurts',"passing_yards", save_path='Jalen Hurts Passing Yards 2024')
21 | #plot_player_stat_by_week(2023, 'Jalen','Hurts',"passing_yards", save_path='Jalen Hurts Passing Yards 2023')
22 | #plot_player_stat_by_week(2022, 'Jalen','Hurts',"passing_yards", save_path='Jalen Hurts Passing Yards 2022')
23 | 
24 | Jalen24 = get_player_stats(2024,'Jalen','Hurts')
25 | # print(Jalen24)
26 | qbNumbers= get_season_totals_by_position(2024,'QB')
27 | print(qbNumbers)


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # Beginner NFL Analytics with Python 
 2 | ### Black Tech Philly
 3 | 
 4 | **1. Overview and Purpose**
 5 | ---
 6 | Lorem ipsum dolor sit amet consectetur adipiscing elit. Quisque faucibus ex sapien vitae pellentesque sem placerat. In id cursus mi pretium tellus duis convallis. Tempus leo eu aenean sed diam urna tempor. Pulvinar vivamus fringilla lacus nec metus bibendum egestas. Iaculis massa nisl malesuada lacinia integer nunc posuere. Ut hendrerit semper vel class aptent taciti sociosqu. Ad litora torquent per conubia nostra inceptos himenaeos.
 7 | 
 8 | **2. What You'll Learn**
 9 | ---
10 | Lorem ipsum dolor sit amet consectetur adipiscing elit. Quisque faucibus ex sapien vitae pellentesque sem placerat. In id cursus mi pretium tellus duis convallis. Tempus leo eu aenean sed diam urna tempor. Pulvinar vivamus fringilla lacus nec metus bibendum egestas. Iaculis massa nisl malesuada lacinia integer nunc posuere. Ut hendrerit semper vel class aptent taciti sociosqu. Ad litora torquent per conubia nostra inceptos himenaeos.
11 | 
12 | **3. Getting Started**
13 | 
14 | **4. Folder Structure**
15 | ---
16 | - Lessons
17 | What is analytics?
18 | 
19 | 1. Finding the mean, median, mode of NFL teams by score
20 | 
21 | 2. Find the point differential
22 | 
23 | 3. Find the best individual player mean, median, and mode
24 | 
25 | 4. Find the standard deviation
26 | 
27 | 5. Creating graphs and charts
28 | 
29 | - Setup Guide
30 | 1. Installation and Setup
31 | 2. Running helperFunctions() 
32 | 
33 | **5. How to use this Repo**
34 | ---
35 | Lorem ipsum dolor sit amet consectetur adipiscing elit. Quisque faucibus ex sapien vitae pellentesque sem placerat. In id cursus mi pretium tellus duis convallis. Tempus leo eu aenean sed diam urna tempor. Pulvinar vivamus fringilla lacus nec metus bibendum egestas. Iaculis massa nisl malesuada lacinia integer nunc posuere. Ut hendrerit semper vel class aptent taciti sociosqu. Ad litora torquent per conubia nostra inceptos himenaeos.
36 | 
37 | - **For Students**
38 | 
39 | Lorem ipsum dolor sit amet consectetur adipiscing elit. Quisque faucibus ex sapien vitae pellentesque sem placerat. In id cursus mi pretium tellus duis convallis. Tempus leo eu aenean sed diam urna tempor. Pulvinar vivamus fringilla lacus nec metus bibendum egestas. Iaculis massa nisl malesuada lacinia integer nunc posuere. Ut hendrerit semper vel class aptent taciti sociosqu. Ad litora torquent per conubia nostra inceptos himenaeos.
40 | 
41 | - **For Teachers**
42 | 
43 | Lorem ipsum dolor sit amet consectetur adipiscing elit. Quisque faucibus ex sapien vitae pellentesque sem placerat. In id cursus mi pretium tellus duis convallis. Tempus leo eu aenean sed diam urna tempor. Pulvinar vivamus fringilla lacus nec metus bibendum egestas. Iaculis massa nisl malesuada lacinia integer nunc posuere. Ut hendrerit semper vel class aptent taciti sociosqu. Ad litora torquent per conubia nostra inceptos himenaeos.
44 | 
45 | - **For those who are interested in learning Sports Analytics**
46 | 
47 | Lorem ipsum dolor sit amet consectetur adipiscing elit. Quisque faucibus ex sapien vitae pellentesque sem placerat. In id cursus mi pretium tellus duis convallis. Tempus leo eu aenean sed diam urna tempor. Pulvinar vivamus fringilla lacus nec metus bibendum egestas. Iaculis massa nisl malesuada lacinia integer nunc posuere. Ut hendrerit semper vel class aptent taciti sociosqu. Ad litora torquent per conubia nostra inceptos himenaeos.
48 | 
49 | **6. Liscences and Acknolwledgments**
50 | ---
51 | Lorem ipsum dolor sit amet consectetur adipiscing elit. Quisque faucibus ex sapien vitae pellentesque sem placerat. In id cursus mi pretium tellus duis convallis. Tempus leo eu aenean sed diam urna tempor. Pulvinar vivamus fringilla lacus nec metus bibendum egestas. Iaculis massa nisl malesuada lacinia integer nunc posuere. Ut hendrerit semper vel class aptent taciti sociosqu. Ad litora torquent per conubia nostra inceptos himenaeos.
52 | 
53 | **7. Contact**
54 | ---
55 | 
56 | 


--------------------------------------------------------------------------------
/washed/helperFunctions.py:
--------------------------------------------------------------------------------
  1 | import nfl_data_py as nfl
  2 | import pandas as pd
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | 
  6 | def get_season_totals_by_position(year: int, position: str) -> pd.DataFrame:
  7 |     """
  8 |     Return trimmed full-season stats for all players at a given position.
  9 | 
 10 |     Args:
 11 |         year (int): NFL season (e.g., 2024)
 12 |         position (str): Player position ('QB', 'RB', 'WR', 'TE', etc.)
 13 | 
 14 |     Returns:
 15 |         pandas.DataFrame: One row per player with core season-total stats.
 16 |     """
 17 |     # Load weekly stats for the season
 18 |     weekly = nfl.import_weekly_data([year])
 19 | 
 20 |     pos = position.upper()
 21 | 
 22 |     if "position" not in weekly.columns:
 23 |         raise ValueError("Column 'position' not found in weekly data.")
 24 | 
 25 |     # Filter to the requested position
 26 |     pos_df = weekly[weekly["position"] == pos].copy()
 27 |     if pos_df.empty:
 28 |         raise ValueError(f"No data found for position '{pos}' in season {year}.")
 29 | 
 30 |     # Identify player columns
 31 |     group_cols = ["player_display_name", "player_id", "position", "recent_team"]
 32 |     group_cols = [c for c in group_cols if c in pos_df.columns]
 33 | 
 34 |     # Calculate games played per player
 35 |     if "week" in pos_df.columns:
 36 |         games_played = (
 37 |             pos_df.groupby(group_cols)["week"]
 38 |             .nunique()
 39 |             .reset_index(name="games_played")
 40 |         )
 41 |     else:
 42 |         # Fallback if week isn't available
 43 |         games_played = (
 44 |             pos_df.groupby(group_cols)
 45 |             .size()
 46 |             .reset_index(name="games_played")
 47 |         )
 48 | 
 49 |     # Numeric columns to sum
 50 |     numeric_cols = pos_df.select_dtypes(include="number").columns.tolist()
 51 |     # Remove fantasy-related columns
 52 |     fantasy_cols = [c for c in numeric_cols if "fantasy" in c.lower()]
 53 |     numeric_cols = [c for c in numeric_cols if c not in fantasy_cols]
 54 | 
 55 |     # Group by player and sum
 56 |     season_totals = (
 57 |         pos_df[group_cols + numeric_cols]
 58 |         .groupby(group_cols, as_index=False)[numeric_cols]
 59 |         .sum()
 60 |     )
 61 | 
 62 |     # Merge games_played
 63 |     season_totals = season_totals.merge(games_played, on=group_cols, how="left")
 64 | 
 65 |     # ---- Position-specific core columns ----
 66 |     core_stats_by_pos = {
 67 |         "QB": [
 68 |             "games_played",
 69 |             "attempts",
 70 |             "completions",
 71 |             "passing_yards",
 72 |             "passing_tds",
 73 |             "interceptions",
 74 |             "rushing_yards",
 75 |             "rushing_tds",
 76 |         ],
 77 |         "RB": [
 78 |             "games_played",
 79 |             "rushing_attempts",
 80 |             "rushing_yards",
 81 |             "rushing_tds",
 82 |             "targets",
 83 |             "receptions",
 84 |             "receiving_yards",
 85 |             "receiving_tds",
 86 |         ],
 87 |         "WR": [
 88 |             "games_played",
 89 |             "targets",
 90 |             "receptions",
 91 |             "receiving_yards",
 92 |             "receiving_tds",
 93 |         ],
 94 |         "TE": [
 95 |             "games_played",
 96 |             "targets",
 97 |             "receptions",
 98 |             "receiving_yards",
 99 |             "receiving_tds",
100 |         ],
101 |     }
102 | 
103 |     # Choose which columns to keep
104 |     base_cols = group_cols.copy()
105 |     pos_core = core_stats_by_pos.get(pos, ["games_played"])
106 |     keep_cols = base_cols + [c for c in pos_core if c in season_totals.columns]
107 | 
108 |     # Filter to trimmed set of columns
109 |     season_totals = season_totals[keep_cols]
110 | 
111 |     # Sort by a key stat depending on position
112 |     if pos == "QB" and "passing_yards" in season_totals.columns:
113 |         season_totals = season_totals.sort_values("passing_yards", ascending=False)
114 |     elif pos == "RB" and "rushing_yards" in season_totals.columns:
115 |         season_totals = season_totals.sort_values("rushing_yards", ascending=False)
116 |     elif pos in ("WR", "TE") and "receiving_yards" in season_totals.columns:
117 |         season_totals = season_totals.sort_values("receiving_yards", ascending=False)
118 | 
119 |     return season_totals
120 | 
121 | #qb_2024_totals_top5 = get_season_totals_by_position(2024, "QB")
122 | #print(qb_2024_totals_top5.head())
123 | #qb_2024 = get_season_totals_by_position(2024, "QB")
124 | #print(qb_2024)
125 | 
126 | def plot_position_stat_bar(year: int,
127 |                            position: str,
128 |                            stat_col: str,
129 |                            top_n: int = 20,
130 |                            save_path: str = None) -> None:
131 |     """
132 |     Plot a bar chart for a given stat column for all players at a position,
133 |     and optionally save it as a PNG file.
134 | 
135 |     Args:
136 |         year (int): NFL season (e.g., 2024)
137 |         position (str): Position (e.g., 'QB', 'RB', 'WR', 'TE')
138 |         stat_col (str): Stat column to plot (e.g., 'passing_yards')
139 |         top_n (int): Show top N players (default 20)
140 |         save_path (str): Optional. File path to save PNG (e.g., 'qb_passing_2024.png')
141 | 
142 |     Returns:
143 |         None
144 |     """
145 |     df = get_season_totals_by_position(year, position)
146 | 
147 |     if stat_col not in df.columns:
148 |         raise ValueError(
149 |             f"Column '{stat_col}' not found. Available columns: {list(df.columns)}"
150 |         )
151 | 
152 |     df_sorted = df.sort_values(stat_col, ascending=False).head(top_n)
153 | 
154 |     plt.figure(figsize=(12, 6))
155 |     plt.bar(df_sorted["player_display_name"], df_sorted[stat_col])
156 | 
157 |     pretty_stat = stat_col.replace("_", " ").title()
158 |     plt.title(f"Top {top_n} {position.upper()} by {pretty_stat} in {year}")
159 |     plt.xlabel("Player")
160 |     plt.ylabel(pretty_stat)
161 |     plt.xticks(rotation=45, ha="right")
162 | 
163 |     plt.tight_layout()
164 | 
165 |     # --- 🔥 Save chart if save_path is given ---
166 |     if save_path:
167 |         plt.savefig(save_path, dpi=300)  # dpi=300 gives high quality images
168 |         print(f"Chart saved as: {save_path}")
169 | 
170 |     plt.show()
171 | 
172 | # plot_position_stat_bar(2024, "QB", "passing_yards", save_path="qb_passing_2024.png", top_n=20)
173 | # plot_position_stat_bar(2024, "RB", "rushing_yards", save_path="rb_rushing_2024.png", top_n=20)
174 | 
175 | def get_player_stats(year: int, first_name: str, last_name: str) -> pd.DataFrame:
176 |     """
177 |     Get all weekly stats for a single NFL player for a given season.
178 |     Requires exact match on first and last name.
179 | 
180 |     Args:
181 |         year (int): NFL season year (e.g., 2024)
182 |         first_name (str): Player's first name (e.g., "Jalen")
183 |         last_name (str): Player's last name (e.g., "Hurts")
184 | 
185 |     Returns:
186 |         pandas.DataFrame: All weekly stats for that player in that season.
187 |     """
188 | 
189 |     # Load weekly data for the season
190 |     weekly = nfl.import_weekly_data([year])
191 | 
192 |     # Normalize inputs
193 |     first = first_name.lower().strip()
194 |     last = last_name.lower().strip()
195 | 
196 |     # Normalize player names in the dataset
197 |     weekly["first"] = weekly["player_display_name"].str.split().str[0].str.lower()
198 |     weekly["last"] = weekly["player_display_name"].str.split().str[-1].str.lower()
199 | 
200 |     # Exact match on first + last
201 |     player_df = weekly[(weekly["first"] == first) & (weekly["last"] == last)].copy()
202 | 
203 |     if player_df.empty:
204 |         raise ValueError(
205 |             f"No data found for player '{first_name} {last_name}' in season {year}."
206 |         )
207 | 
208 |     # Sort by week for clean output
209 |     player_df = player_df.sort_values("week")
210 | 
211 |     # Remove temporary helper columns
212 |     player_df = player_df.drop(columns=["first", "last"], errors="ignore")
213 | 
214 |     return player_df
215 | 
216 | # playerData= get_player_stats(2024, 'Lamar','Jackson')
217 | # print(playerData)
218 | 
219 | def dataframe_to_png(df, png_path="dataframe.png", fontsize=10, col_width=2.0):
220 |     """
221 |     Save a pandas DataFrame as a PNG image using Matplotlib.
222 | 
223 |     Args:
224 |         df (pd.DataFrame): The DataFrame to export
225 |         png_path (str): File path to save the PNG
226 |         fontsize (int): Font size in the table
227 |         col_width (float): Width of each column in the image
228 | 
229 |     Returns:
230 |         None (saves PNG file)
231 |     """
232 | 
233 |     # Calculate figure size based on rows and columns
234 |     n_rows, n_cols = df.shape
235 |     figsize = (col_width * n_cols, 0.4 * n_rows)
236 | 
237 |     fig, ax = plt.subplots(figsize=figsize)
238 |     ax.axis("off")  # hide axes
239 | 
240 |     # Create table
241 |     table = ax.table(
242 |         cellText=df.values,
243 |         colLabels=df.columns,
244 |         loc="center",
245 |         cellLoc="center",
246 |     )
247 | 
248 |     table.auto_set_font_size(False)
249 |     table.set_fontsize(fontsize)
250 |     table.scale(1, 1.5)  # increase row height
251 | 
252 |     # Save image
253 |     plt.savefig(png_path, bbox_inches="tight", dpi=300)
254 |     plt.close()
255 | 
256 |     print(f"DataFrame saved as PNG: {png_path}")
257 | 
258 | # qb_totals = get_season_totals_by_position(2024, "QB")
259 | 
260 | # dataframe_to_png(qb_totals, "qb_totals_2024.png")
261 | 
262 | def export_player_season_png(
263 |     year: int,
264 |     first_name: str,
265 |     last_name: str,
266 |     png_path: str | None = None,
267 |     columns: list[str] | None = None,
268 |     fontsize: int = 10,
269 | ) -> str:
270 |     """
271 |     Get a player's weekly stats for a season and export them as a PNG table.
272 | 
273 |     Args:
274 |         year (int): NFL season (e.g., 2024)
275 |         first_name (str): Player's first name (e.g., "Jalen")
276 |         last_name (str): Player's last name (e.g., "Hurts")
277 |         png_path (str | None): Optional file path for the PNG.
278 |                                If None, a name is generated automatically.
279 |         columns (list[str] | None): Optional list of columns to include.
280 |                                     If None, all columns are used.
281 |         fontsize (int): Font size for the table text.
282 | 
283 |     Returns:
284 |         str: The path to the saved PNG file.
285 |     """
286 | 
287 |     # 1. Get the player's DataFrame (one row per week)
288 |     df = get_player_stats(year, first_name, last_name)
289 | 
290 |     # 2. Keep only selected columns if provided
291 |     if columns is not None:
292 |         # Only keep columns that exist in df
293 |         cols_to_use = [c for c in columns if c in df.columns]
294 |         if not cols_to_use:
295 |             raise ValueError("None of the specified columns exist in the DataFrame.")
296 |         df = df[cols_to_use]
297 | 
298 |     # 3. Auto-generate a file name if not provided
299 |     if png_path is None:
300 |         safe_first = first_name.lower().replace(" ", "_")
301 |         safe_last = last_name.lower().replace(" ", "_")
302 |         png_path = f"{safe_first}_{safe_last}_{year}_stats.png"
303 | 
304 |     # 4. Build the table figure
305 |     n_rows, n_cols = df.shape
306 |     # Reasonable sizing for a single player season (usually <= 18 games)
307 |     figsize = (max(8, n_cols * 1.2), max(2, n_rows * 0.6))
308 | 
309 |     fig, ax = plt.subplots(figsize=figsize)
310 |     ax.axis("off")
311 | 
312 |     table = ax.table(
313 |         cellText=df.values,
314 |         colLabels=df.columns,
315 |         loc="center",
316 |         cellLoc="center",
317 |     )
318 | 
319 |     table.auto_set_font_size(False)
320 |     table.set_fontsize(fontsize)
321 |     table.scale(1, 1.4)  # increase row height a bit
322 | 
323 |     # 5. Add a title
324 |     full_name = f"{first_name} {last_name}"
325 |     ax.set_title(f"{full_name} – {year} Season Stats (Weekly)", pad=20)
326 | 
327 |     # 6. Save as PNG
328 |     plt.savefig(png_path, bbox_inches="tight", dpi=300)
329 |     plt.close(fig)
330 | 
331 |     print(f"Saved player stats table as: {png_path}")
332 |     return png_path
333 | 
334 | # export_player_season_png(2024, "Jalen", "Hurts")
335 | 
336 | def plot_player_stat_by_week(
337 |     year: int,
338 |     first_name: str,
339 |     last_name: str,
340 |     stat_col: str,
341 |     save_path: str | None = None
342 | ) -> None:
343 |     """
344 |     Plot a line graph for a specific player's stat by week for a given season.
345 | 
346 |     Args:
347 |         year (int): NFL season year (e.g., 2024)
348 |         first_name (str): Player's first name (e.g., "Jalen")
349 |         last_name (str): Player's last name (e.g., "Hurts")
350 |         stat_col (str): Column name of the stat to plot
351 |                         (e.g., "passing_yards", "rushing_yards", "receiving_yards")
352 |         save_path (str | None): Optional path to save the plot as a PNG.
353 |                                 If None, the plot is just shown.
354 | 
355 |     Returns:
356 |         None
357 |     """
358 | 
359 |     # Get the player's weekly stats DataFrame (using the helper we wrote earlier)
360 |     df = get_player_stats(year, first_name, last_name)
361 | 
362 |     # Make sure the stat column exists
363 |     if stat_col not in df.columns:
364 |         raise ValueError(
365 |             f"Column '{stat_col}' not found in player data. "
366 |             f"Available columns include: {list(df.columns)}"
367 |         )
368 | 
369 |     # Ensure data is sorted by week
370 |     if "week" not in df.columns:
371 |         raise ValueError("Column 'week' not found in player data.")
372 |     df = df.sort_values("week")
373 | 
374 |     # Convert the stat column to numeric (just in case) and fill NaN with 0
375 |     df[stat_col] = pd.to_numeric(df[stat_col], errors="coerce").fillna(0)
376 | 
377 |     weeks = df["week"]
378 |     values = df[stat_col]
379 | 
380 |     # Create the line plot
381 |     plt.figure(figsize=(10, 5))
382 |     plt.plot(weeks, values, marker="o")
383 | 
384 |     # Labels and title
385 |     pretty_stat = stat_col.replace("_", " ").title()
386 |     full_name = f"{first_name} {last_name}"
387 | 
388 |     plt.title(f"{full_name} – {pretty_stat} by Week ({year} Season)")
389 |     plt.xlabel("Week")
390 |     plt.ylabel(pretty_stat)
391 |     plt.xticks(weeks)  # show actual week numbers on x-axis
392 |     plt.grid(True, linestyle="--", alpha=0.5)
393 | 
394 |     plt.tight_layout()
395 | 
396 |     # Optionally save as PNG
397 |     if save_path is not None:
398 |         plt.savefig(save_path, dpi=300, bbox_inches="tight")
399 |         print(f"Saved line chart as: {save_path}")
400 | 
401 |     # Show the plot
402 |     plt.show()
403 | 
404 | # plot_player_stat_by_week(
405 | #    2004,
406 | #    "Brian",
407 | #    "Westbrook",
408 | #    "rushing_yards",
409 | #    save_path="brian_westbrook_2004_rushing_yards_by_week.png"
410 | # ) 
411 | 
412 | def get_team_season_data(year: int, include_team_meta: bool = True) -> pd.DataFrame:
413 |     """
414 |     Get clean team-level season stats for all teams for a given NFL season.
415 |     
416 |     Removes ALL metadata fields related to:
417 |         - logos
418 |         - colors
419 |         - wordmarks
420 |         - nicknames
421 |         - divisions
422 | 
423 |     Adds:
424 |         - points_for
425 |         - points_against
426 |         - point_diff
427 |         - ppg_for
428 |         - ppg_against
429 | 
430 |     Compatible with nfl_data_py 0.3.3.
431 |     """
432 | 
433 |     # 1. Load schedule data
434 |     games = nfl.import_schedules([year])
435 | 
436 |     if "season_type" in games.columns:
437 |         games = games[games["season_type"] == "REG"]
438 | 
439 |     games = games.dropna(subset=["home_score", "away_score"])
440 | 
441 |     # 2. Build home and away rows
442 |     home = games[["home_team", "home_score", "away_score"]].rename(
443 |         columns={"home_team": "team", "home_score": "points_for", "away_score": "points_against"}
444 |     )
445 |     away = games[["away_team", "away_score", "home_score"]].rename(
446 |         columns={"away_team": "team", "away_score": "points_for", "home_score": "points_against"}
447 |     )
448 | 
449 |     # Outcomes
450 |     for df in (home, away):
451 |         df["win"] = (df["points_for"] > df["points_against"]).astype(int)
452 |         df["loss"] = (df["points_for"] < df["points_against"]).astype(int)
453 |         df["tie"]  = (df["points_for"] == df["points_against"]).astype(int)
454 | 
455 |     # Combine
456 |     team_games = pd.concat([home, away], ignore_index=True)
457 | 
458 |     # 3. Aggregate season totals
459 |     team_stats = team_games.groupby("team").agg(
460 |         games_played=("win", "size"),
461 |         wins=("win", "sum"),
462 |         losses=("loss", "sum"),
463 |         ties=("tie", "sum"),
464 |         points_for=("points_for", "sum"),
465 |         points_against=("points_against", "sum")
466 |     ).reset_index()
467 | 
468 |     # Derived stats
469 |     team_stats["point_diff"] = team_stats["points_for"] - team_stats["points_against"]
470 |     team_stats["ppg_for"] = team_stats["points_for"] / team_stats["games_played"]
471 |     team_stats["ppg_against"] = team_stats["points_against"] / team_stats["games_played"]
472 | 
473 |     # 4. Optional metadata merge + cleaning
474 |     if include_team_meta:
475 |         try:
476 |             meta = nfl.import_team_desc()
477 | 
478 |             # Merge on appropriate key
479 |             if "team" in meta.columns:
480 |                 team_stats = team_stats.merge(meta, on="team", how="left")
481 |             elif "team_abbr" in meta.columns:
482 |                 team_stats = team_stats.merge(meta, left_on="team", right_on="team_abbr", how="left")
483 | 
484 |             # Fields to remove
485 |             remove_cols = [
486 |                 c for c in team_stats.columns
487 |                 if any(keyword in c.lower() for keyword in [
488 |                     "logo", "wordmark", "color", "nick", "division"
489 |                 ])
490 |             ]
491 | 
492 |             team_stats = team_stats.drop(columns=remove_cols, errors="ignore")
493 | 
494 |         except Exception:
495 |             pass
496 | 
497 |     # Add season
498 |     team_stats["season"] = year
499 | 
500 |     # Reorder
501 |     cols = [
502 |         "season", "team",
503 |         "games_played", "wins", "losses", "ties",
504 |         "points_for", "points_against", "point_diff",
505 |         "ppg_for", "ppg_against"
506 |     ]
507 |     other_cols = [c for c in team_stats.columns if c not in cols]
508 | 
509 |     return team_stats[cols + other_cols]
510 | 
511 | 
512 | #df = get_team_season_data(2024)
513 | #print(df.head)
514 | 
515 | def get_all_team_game_stats(year: int) -> pd.DataFrame:
516 |     """
517 |     Return game-by-game stats for every team in a given NFL season.
518 |     One row per *team-game* (so each actual game appears twice: once per team).
519 | 
520 |     Columns include:
521 |         - season, week, game_id, gameday
522 |         - team, opponent, is_home
523 |         - points_for, points_against, point_diff
524 |         - result ('W', 'L', 'T')
525 |     Compatible with nfl_data_py 0.3.3.
526 |     """
527 | 
528 |     # 1. Load schedule data
529 |     games = nfl.import_schedules([year])
530 | 
531 |     # Filter to regular season if column exists
532 |     if "season_type" in games.columns:
533 |         games = games[games["season_type"] == "REG"]
534 | 
535 |     # Drop games without final scores
536 |     games = games.dropna(subset=["home_score", "away_score"])
537 | 
538 |     # 2. Build home team rows
539 |     home = games.copy()
540 |     home["team"] = home["home_team"]
541 |     home["opponent"] = home["away_team"]
542 |     home["is_home"] = True
543 |     home["points_for"] = home["home_score"]
544 |     home["points_against"] = home["away_score"]
545 | 
546 |     # 3. Build away team rows
547 |     away = games.copy()
548 |     away["team"] = away["away_team"]
549 |     away["opponent"] = away["home_team"]
550 |     away["is_home"] = False
551 |     away["points_for"] = away["away_score"]
552 |     away["points_against"] = away["home_score"]
553 | 
554 |     # 4. Combine into team-game logs
555 |     team_games = pd.concat([home, away], ignore_index=True)
556 | 
557 |     # 5. Derived stats
558 |     team_games["point_diff"] = team_games["points_for"] - team_games["points_against"]
559 |     team_games["result"] = np.where(
560 |         team_games["points_for"] > team_games["points_against"], "W",
561 |         np.where(team_games["points_for"] < team_games["points_against"], "L", "T")
562 |     )
563 | 
564 |     # 6. Keep / rename main columns (and keep extras if present)
565 |     base_cols = [
566 |         "season" if "season" in team_games.columns else None,
567 |         "week" if "week" in team_games.columns else None,
568 |         "gameday" if "gameday" in team_games.columns else None,
569 |         "game_id" if "game_id" in team_games.columns else None,
570 |         "team", "opponent", "is_home",
571 |         "points_for", "points_against", "point_diff", "result",
572 |     ]
573 |     base_cols = [c for c in base_cols if c is not None]
574 | 
575 |     # Put base columns first, then everything else
576 |     other_cols = [c for c in team_games.columns if c not in base_cols]
577 |     team_games = team_games[base_cols + other_cols]
578 | 
579 |     # Sort by team + week if week exists
580 |     if "week" in team_games.columns:
581 |         team_games = team_games.sort_values(["team", "week"]).reset_index(drop=True)
582 | 
583 |     return team_games
584 | 
585 | def get_team_game_stats(year: int, team: str) -> pd.DataFrame:
586 |     """
587 |     Get game-by-game stats for a single team in a given season.
588 | 
589 |     Args:
590 |         year (int): NFL season (e.g., 2024)
591 |         team (str): Team abbreviation, e.g. 'PHI', 'DAL', 'KC'
592 | 
593 |     Returns:
594 |         pandas.DataFrame: one row per game for that team.
595 |     """
596 |     team = team.upper()
597 |     all_games = get_all_team_game_stats(year)
598 |     return all_games[all_games["team"] == team].reset_index(drop=True)
599 | 
600 | phi_2024 = get_team_game_stats(2024, "PHI")
601 | print(phi_2024[["week", "team", "opponent", "is_home", "points_for", "points_against", "result"]])
602 | 
603 | 
604 | def get_team_touchdown_stats(year: int) -> pd.DataFrame:
605 |     """
606 |     Build touchdown stats for each team using play-by-play data.
607 |     Compatible with nfl_data_py 0.3.3.
608 |     """
609 | 
610 |     pbp = nfl.import_pbp_data([year])
611 | 
612 |     # Determine scoring team (posteam for offensive TDs, defteam for defensive TDs)
613 |     pbp["team"] = pbp["posteam"].fillna(pbp["defteam"])
614 | 
615 |     # Create TD type indicators
616 |     pbp["rush_td"] = pbp.get("rush_touchdown", 0)
617 |     pbp["pass_td"] = pbp.get("pass_touchdown", 0)
618 | 
619 |     # Defensive TD (fumble return, interception return)
620 |     pbp["def_td"] = pbp.get("defensive_touchdown", 0)
621 | 
622 |     # Special teams TD (punt return, kickoff return, blocked FG return)
623 |     pbp["special_td"] = pbp.get("special_teams_touchdown", 0)
624 | 
625 |     # Total touchdowns
626 |     pbp["total_td"] = (
627 |         pbp["rush_td"] +
628 |         pbp["pass_td"] +
629 |         pbp["def_td"] +
630 |         pbp["special_td"]
631 |     )
632 | 
633 |     # Group by team
634 |     td_stats = pbp.groupby("team").agg(
635 |         total_td=("total_td", "sum"),
636 |         rush_td=("rush_td", "sum"),
637 |         pass_td=("pass_td", "sum"),
638 |         def_td=("def_td", "sum"),
639 |         special_td=("special_td", "sum"),
640 |     ).reset_index()
641 | 
642 |     return td_stats
643 | 
644 | def get_team_season_with_tds(year: int) -> pd.DataFrame:
645 |     base_df = get_team_season_data(year)
646 |     td_df = get_team_touchdown_stats(year)
647 | 
648 |     merged = base_df.merge(td_df, on="team", how="left")
649 | 
650 |     td_cols = ["total_td", "rush_td", "pass_td", "def_td", "special_td"]
651 |     for col in td_cols:
652 |         merged[col] = merged[col].fillna(0).astype(int)
653 | 
654 |     return merged
655 | 
656 | 
657 | def plot_team_stat_bar(year: int, stat_col: str):
658 |     df = get_team_season_with_tds(year)
659 | 
660 |     if stat_col not in df.columns:
661 |         raise ValueError(f"Column '{stat_col}' not found. "
662 |                          "Did you mean one of: points_for, rush_td, pass_td, total_td?")
663 |     
664 |     df = df.sort_values(stat_col, ascending=False)
665 | 
666 |     teams = df["team"]
667 |     values = df[stat_col]
668 | 
669 |     plt.figure(figsize=(12, 6))
670 |     plt.bar(teams, values)
671 |     plt.title(f"{year} - Team Comparison by {stat_col.replace('_',' ').title()}")
672 |     plt.xticks(rotation=45)
673 |     plt.xlabel("Team")
674 |     plt.ylabel(stat_col.replace("_"," ").title())
675 |     plt.tight_layout()
676 |     plt.show()
677 | 
678 | # plot_team_stat_bar(2024, "total_td")
679 | 
680 | # plot_team_stat_bar(2024, "total_td")
681 | # plot_team_stat_bar(2022, "rush_td")
682 | 


--------------------------------------------------------------------------------
/Analytics Helper Functions/helperFunctions.py:
--------------------------------------------------------------------------------
  1 | import nfl_data_py as nfl
  2 | import pandas as pd
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | 
  6 | def get_season_totals_by_position(year: int, position: str) -> pd.DataFrame:
  7 |     """
  8 |     Return trimmed full-season stats for all players at a given position.
  9 | 
 10 |     Args:
 11 |         year (int): NFL season (e.g., 2024)
 12 |         position (str): Player position ('QB', 'RB', 'WR', 'TE', etc.)
 13 | 
 14 |     Returns:
 15 |         pandas.DataFrame: One row per player with core season-total stats.
 16 |     """
 17 |     # Load weekly stats for the season
 18 |     weekly = nfl.import_weekly_data([year])
 19 | 
 20 |     pos = position.upper()
 21 | 
 22 |     if "position" not in weekly.columns:
 23 |         raise ValueError("Column 'position' not found in weekly data.")
 24 | 
 25 |     # Filter to the requested position
 26 |     pos_df = weekly[weekly["position"] == pos].copy()
 27 |     if pos_df.empty:
 28 |         raise ValueError(f"No data found for position '{pos}' in season {year}.")
 29 | 
 30 |     # Identify player columns
 31 |     group_cols = ["player_display_name", "player_id", "position", "recent_team"]
 32 |     group_cols = [c for c in group_cols if c in pos_df.columns]
 33 | 
 34 |     # Calculate games played per player
 35 |     if "week" in pos_df.columns:
 36 |         games_played = (
 37 |             pos_df.groupby(group_cols)["week"]
 38 |             .nunique()
 39 |             .reset_index(name="games_played")
 40 |         )
 41 |     else:
 42 |         # Fallback if week isn't available
 43 |         games_played = (
 44 |             pos_df.groupby(group_cols)
 45 |             .size()
 46 |             .reset_index(name="games_played")
 47 |         )
 48 | 
 49 |     # Numeric columns to sum
 50 |     numeric_cols = pos_df.select_dtypes(include="number").columns.tolist()
 51 |     # Remove fantasy-related columns
 52 |     fantasy_cols = [c for c in numeric_cols if "fantasy" in c.lower()]
 53 |     numeric_cols = [c for c in numeric_cols if c not in fantasy_cols]
 54 | 
 55 |     # Group by player and sum
 56 |     season_totals = (
 57 |         pos_df[group_cols + numeric_cols]
 58 |         .groupby(group_cols, as_index=False)[numeric_cols]
 59 |         .sum()
 60 |     )
 61 | 
 62 |     # Merge games_played
 63 |     season_totals = season_totals.merge(games_played, on=group_cols, how="left")
 64 | 
 65 |     # ---- Position-specific core columns ----
 66 |     core_stats_by_pos = {
 67 |         "QB": [
 68 |             "games_played",
 69 |             "attempts",
 70 |             "completions",
 71 |             "passing_yards",
 72 |             "passing_tds",
 73 |             "interceptions",
 74 |             "rushing_yards",
 75 |             "rushing_tds",
 76 |         ],
 77 |         "RB": [
 78 |             "games_played",
 79 |             "rushing_attempts",
 80 |             "rushing_yards",
 81 |             "rushing_tds",
 82 |             "targets",
 83 |             "receptions",
 84 |             "receiving_yards",
 85 |             "receiving_tds",
 86 |         ],
 87 |         "WR": [
 88 |             "games_played",
 89 |             "targets",
 90 |             "receptions",
 91 |             "receiving_yards",
 92 |             "receiving_tds",
 93 |         ],
 94 |         "TE": [
 95 |             "games_played",
 96 |             "targets",
 97 |             "receptions",
 98 |             "receiving_yards",
 99 |             "receiving_tds",
100 |         ],
101 |     }
102 | 
103 |     # Choose which columns to keep
104 |     base_cols = group_cols.copy()
105 |     pos_core = core_stats_by_pos.get(pos, ["games_played"])
106 |     keep_cols = base_cols + [c for c in pos_core if c in season_totals.columns]
107 | 
108 |     # Filter to trimmed set of columns
109 |     season_totals = season_totals[keep_cols]
110 | 
111 |     # Sort by a key stat depending on position
112 |     if pos == "QB" and "passing_yards" in season_totals.columns:
113 |         season_totals = season_totals.sort_values("passing_yards", ascending=False)
114 |     elif pos == "RB" and "rushing_yards" in season_totals.columns:
115 |         season_totals = season_totals.sort_values("rushing_yards", ascending=False)
116 |     elif pos in ("WR", "TE") and "receiving_yards" in season_totals.columns:
117 |         season_totals = season_totals.sort_values("receiving_yards", ascending=False)
118 | 
119 |     return season_totals
120 | 
121 | #qb_2024_totals_top5 = get_season_totals_by_position(2024, "QB")
122 | #print(qb_2024_totals_top5.head())
123 | #qb_2024 = get_season_totals_by_position(2024, "QB")
124 | #print(qb_2024)
125 | 
126 | def plot_position_stat_bar(year: int,
127 |                            position: str,
128 |                            stat_col: str,
129 |                            top_n: int = 20,
130 |                            save_path: str = None) -> None:
131 |     """
132 |     Plot a bar chart for a given stat column for all players at a position,
133 |     and optionally save it as a PNG file.
134 | 
135 |     Args:
136 |         year (int): NFL season (e.g., 2024)
137 |         position (str): Position (e.g., 'QB', 'RB', 'WR', 'TE')
138 |         stat_col (str): Stat column to plot (e.g., 'passing_yards')
139 |         top_n (int): Show top N players (default 20)
140 |         save_path (str): Optional. File path to save PNG (e.g., 'qb_passing_2024.png')
141 | 
142 |     Returns:
143 |         None
144 |     """
145 |     df = get_season_totals_by_position(year, position)
146 | 
147 |     if stat_col not in df.columns:
148 |         raise ValueError(
149 |             f"Column '{stat_col}' not found. Available columns: {list(df.columns)}"
150 |         )
151 | 
152 |     df_sorted = df.sort_values(stat_col, ascending=False).head(top_n)
153 | 
154 |     plt.figure(figsize=(12, 6))
155 |     plt.bar(df_sorted["player_display_name"], df_sorted[stat_col])
156 | 
157 |     pretty_stat = stat_col.replace("_", " ").title()
158 |     plt.title(f"Top {top_n} {position.upper()} by {pretty_stat} in {year}")
159 |     plt.xlabel("Player")
160 |     plt.ylabel(pretty_stat)
161 |     plt.xticks(rotation=45, ha="right")
162 | 
163 |     plt.tight_layout()
164 | 
165 |     # --- 🔥 Save chart if save_path is given ---
166 |     if save_path:
167 |         plt.savefig(save_path, dpi=300)  # dpi=300 gives high quality images
168 |         print(f"Chart saved as: {save_path}")
169 | 
170 |     plt.show()
171 | 
172 | # plot_position_stat_bar(2024, "QB", "passing_yards", save_path="qb_passing_2024.png", top_n=20)
173 | # plot_position_stat_bar(2024, "RB", "rushing_yards", save_path="rb_rushing_2024.png", top_n=20)
174 | 
175 | def get_player_stats(year: int, first_name: str, last_name: str) -> pd.DataFrame:
176 |     """
177 |     Get all weekly stats for a single NFL player for a given season.
178 |     Requires exact match on first and last name.
179 | 
180 |     Args:
181 |         year (int): NFL season year (e.g., 2024)
182 |         first_name (str): Player's first name (e.g., "Jalen")
183 |         last_name (str): Player's last name (e.g., "Hurts")
184 | 
185 |     Returns:
186 |         pandas.DataFrame: All weekly stats for that player in that season.
187 |     """
188 | 
189 |     # Load weekly data for the season
190 |     weekly = nfl.import_weekly_data([year])
191 | 
192 |     # Normalize inputs
193 |     first = first_name.lower().strip()
194 |     last = last_name.lower().strip()
195 | 
196 |     # Normalize player names in the dataset
197 |     weekly["first"] = weekly["player_display_name"].str.split().str[0].str.lower()
198 |     weekly["last"] = weekly["player_display_name"].str.split().str[-1].str.lower()
199 | 
200 |     # Exact match on first + last
201 |     player_df = weekly[(weekly["first"] == first) & (weekly["last"] == last)].copy()
202 | 
203 |     if player_df.empty:
204 |         raise ValueError(
205 |             f"No data found for player '{first_name} {last_name}' in season {year}."
206 |         )
207 | 
208 |     # Sort by week for clean output
209 |     player_df = player_df.sort_values("week")
210 | 
211 |     # Remove temporary helper columns
212 |     player_df = player_df.drop(columns=["first", "last"], errors="ignore")
213 | 
214 |     return player_df
215 | 
216 | # playerData= get_player_stats(2024, 'Lamar','Jackson')
217 | # print(playerData)
218 | 
219 | def dataframe_to_png(df, png_path="dataframe.png", fontsize=10, col_width=2.0):
220 |     """
221 |     Save a pandas DataFrame as a PNG image using Matplotlib.
222 | 
223 |     Args:
224 |         df (pd.DataFrame): The DataFrame to export
225 |         png_path (str): File path to save the PNG
226 |         fontsize (int): Font size in the table
227 |         col_width (float): Width of each column in the image
228 | 
229 |     Returns:
230 |         None (saves PNG file)
231 |     """
232 | 
233 |     # Calculate figure size based on rows and columns
234 |     n_rows, n_cols = df.shape
235 |     figsize = (col_width * n_cols, 0.4 * n_rows)
236 | 
237 |     fig, ax = plt.subplots(figsize=figsize)
238 |     ax.axis("off")  # hide axes
239 | 
240 |     # Create table
241 |     table = ax.table(
242 |         cellText=df.values,
243 |         colLabels=df.columns,
244 |         loc="center",
245 |         cellLoc="center",
246 |     )
247 | 
248 |     table.auto_set_font_size(False)
249 |     table.set_fontsize(fontsize)
250 |     table.scale(1, 1.5)  # increase row height
251 | 
252 |     # Save image
253 |     plt.savefig(png_path, bbox_inches="tight", dpi=300)
254 |     plt.close()
255 | 
256 |     print(f"DataFrame saved as PNG: {png_path}")
257 | 
258 | # qb_totals = get_season_totals_by_position(2024, "QB")
259 | 
260 | # dataframe_to_png(qb_totals, "qb_totals_2024.png")
261 | 
262 | def export_player_season_png(
263 |     year: int,
264 |     first_name: str,
265 |     last_name: str,
266 |     png_path: str | None = None,
267 |     columns: list[str] | None = None,
268 |     fontsize: int = 10,
269 | ) -> str:
270 |     """
271 |     Get a player's weekly stats for a season and export them as a PNG table.
272 | 
273 |     Args:
274 |         year (int): NFL season (e.g., 2024)
275 |         first_name (str): Player's first name (e.g., "Jalen")
276 |         last_name (str): Player's last name (e.g., "Hurts")
277 |         png_path (str | None): Optional file path for the PNG.
278 |                                If None, a name is generated automatically.
279 |         columns (list[str] | None): Optional list of columns to include.
280 |                                     If None, all columns are used.
281 |         fontsize (int): Font size for the table text.
282 | 
283 |     Returns:
284 |         str: The path to the saved PNG file.
285 |     """
286 | 
287 |     # 1. Get the player's DataFrame (one row per week)
288 |     df = get_player_stats(year, first_name, last_name)
289 | 
290 |     # 2. Keep only selected columns if provided
291 |     if columns is not None:
292 |         # Only keep columns that exist in df
293 |         cols_to_use = [c for c in columns if c in df.columns]
294 |         if not cols_to_use:
295 |             raise ValueError("None of the specified columns exist in the DataFrame.")
296 |         df = df[cols_to_use]
297 | 
298 |     # 3. Auto-generate a file name if not provided
299 |     if png_path is None:
300 |         safe_first = first_name.lower().replace(" ", "_")
301 |         safe_last = last_name.lower().replace(" ", "_")
302 |         png_path = f"{safe_first}_{safe_last}_{year}_stats.png"
303 | 
304 |     # 4. Build the table figure
305 |     n_rows, n_cols = df.shape
306 |     # Reasonable sizing for a single player season (usually <= 18 games)
307 |     figsize = (max(8, n_cols * 1.2), max(2, n_rows * 0.6))
308 | 
309 |     fig, ax = plt.subplots(figsize=figsize)
310 |     ax.axis("off")
311 | 
312 |     table = ax.table(
313 |         cellText=df.values,
314 |         colLabels=df.columns,
315 |         loc="center",
316 |         cellLoc="center",
317 |     )
318 | 
319 |     table.auto_set_font_size(False)
320 |     table.set_fontsize(fontsize)
321 |     table.scale(1, 1.4)  # increase row height a bit
322 | 
323 |     # 5. Add a title
324 |     full_name = f"{first_name} {last_name}"
325 |     ax.set_title(f"{full_name} – {year} Season Stats (Weekly)", pad=20)
326 | 
327 |     # 6. Save as PNG
328 |     plt.savefig(png_path, bbox_inches="tight", dpi=300)
329 |     plt.close(fig)
330 | 
331 |     print(f"Saved player stats table as: {png_path}")
332 |     return png_path
333 | 
334 | # export_player_season_png(2024, "Jalen", "Hurts")
335 | 
336 | def plot_player_stat_by_week(
337 |     year: int,
338 |     first_name: str,
339 |     last_name: str,
340 |     stat_col: str,
341 |     save_path: str | None = None
342 | ) -> None:
343 |     """
344 |     Plot a line graph for a specific player's stat by week for a given season.
345 | 
346 |     Args:
347 |         year (int): NFL season year (e.g., 2024)
348 |         first_name (str): Player's first name (e.g., "Jalen")
349 |         last_name (str): Player's last name (e.g., "Hurts")
350 |         stat_col (str): Column name of the stat to plot
351 |                         (e.g., "passing_yards", "rushing_yards", "receiving_yards")
352 |         save_path (str | None): Optional path to save the plot as a PNG.
353 |                                 If None, the plot is just shown.
354 | 
355 |     Returns:
356 |         None
357 |     """
358 | 
359 |     # Get the player's weekly stats DataFrame (using the helper we wrote earlier)
360 |     df = get_player_stats(year, first_name, last_name)
361 | 
362 |     # Make sure the stat column exists
363 |     if stat_col not in df.columns:
364 |         raise ValueError(
365 |             f"Column '{stat_col}' not found in player data. "
366 |             f"Available columns include: {list(df.columns)}"
367 |         )
368 | 
369 |     # Ensure data is sorted by week
370 |     if "week" not in df.columns:
371 |         raise ValueError("Column 'week' not found in player data.")
372 |     df = df.sort_values("week")
373 | 
374 |     # Convert the stat column to numeric (just in case) and fill NaN with 0
375 |     df[stat_col] = pd.to_numeric(df[stat_col], errors="coerce").fillna(0)
376 | 
377 |     weeks = df["week"]
378 |     values = df[stat_col]
379 | 
380 |     # Create the line plot
381 |     plt.figure(figsize=(10, 5))
382 |     plt.plot(weeks, values, marker="o")
383 | 
384 |     # Labels and title
385 |     pretty_stat = stat_col.replace("_", " ").title()
386 |     full_name = f"{first_name} {last_name}"
387 | 
388 |     plt.title(f"{full_name} – {pretty_stat} by Week ({year} Season)")
389 |     plt.xlabel("Week")
390 |     plt.ylabel(pretty_stat)
391 |     plt.xticks(weeks)  # show actual week numbers on x-axis
392 |     plt.grid(True, linestyle="--", alpha=0.5)
393 | 
394 |     plt.tight_layout()
395 | 
396 |     # Optionally save as PNG
397 |     if save_path is not None:
398 |         plt.savefig(save_path, dpi=300, bbox_inches="tight")
399 |         print(f"Saved line chart as: {save_path}")
400 | 
401 |     # Show the plot
402 |     plt.show()
403 | 
404 | # plot_player_stat_by_week(
405 | #    2004,
406 | #    "Brian",
407 | #    "Westbrook",
408 | #    "rushing_yards",
409 | #    save_path="brian_westbrook_2004_rushing_yards_by_week.png"
410 | # ) 
411 | 
412 | def get_team_season_data(year: int, include_team_meta: bool = True) -> pd.DataFrame:
413 |     """
414 |     Get clean team-level season stats for all teams for a given NFL season.
415 |     
416 |     Removes ALL metadata fields related to:
417 |         - logos
418 |         - colors
419 |         - wordmarks
420 |         - nicknames
421 |         - divisions
422 | 
423 |     Adds:
424 |         - points_for
425 |         - points_against
426 |         - point_diff
427 |         - ppg_for
428 |         - ppg_against
429 | 
430 |     Compatible with nfl_data_py 0.3.3.
431 |     """
432 | 
433 |     # 1. Load schedule data
434 |     games = nfl.import_schedules([year])
435 | 
436 |     if "season_type" in games.columns:
437 |         games = games[games["season_type"] == "REG"]
438 | 
439 |     games = games.dropna(subset=["home_score", "away_score"])
440 | 
441 |     # 2. Build home and away rows
442 |     home = games[["home_team", "home_score", "away_score"]].rename(
443 |         columns={"home_team": "team", "home_score": "points_for", "away_score": "points_against"}
444 |     )
445 |     away = games[["away_team", "away_score", "home_score"]].rename(
446 |         columns={"away_team": "team", "away_score": "points_for", "home_score": "points_against"}
447 |     )
448 | 
449 |     # Outcomes
450 |     for df in (home, away):
451 |         df["win"] = (df["points_for"] > df["points_against"]).astype(int)
452 |         df["loss"] = (df["points_for"] < df["points_against"]).astype(int)
453 |         df["tie"]  = (df["points_for"] == df["points_against"]).astype(int)
454 | 
455 |     # Combine
456 |     team_games = pd.concat([home, away], ignore_index=True)
457 | 
458 |     # 3. Aggregate season totals
459 |     team_stats = team_games.groupby("team").agg(
460 |         games_played=("win", "size"),
461 |         wins=("win", "sum"),
462 |         losses=("loss", "sum"),
463 |         ties=("tie", "sum"),
464 |         points_for=("points_for", "sum"),
465 |         points_against=("points_against", "sum")
466 |     ).reset_index()
467 | 
468 |     # Derived stats
469 |     team_stats["point_diff"] = team_stats["points_for"] - team_stats["points_against"]
470 |     team_stats["ppg_for"] = team_stats["points_for"] / team_stats["games_played"]
471 |     team_stats["ppg_against"] = team_stats["points_against"] / team_stats["games_played"]
472 | 
473 |     # 4. Optional metadata merge + cleaning
474 |     if include_team_meta:
475 |         try:
476 |             meta = nfl.import_team_desc()
477 | 
478 |             # Merge on appropriate key
479 |             if "team" in meta.columns:
480 |                 team_stats = team_stats.merge(meta, on="team", how="left")
481 |             elif "team_abbr" in meta.columns:
482 |                 team_stats = team_stats.merge(meta, left_on="team", right_on="team_abbr", how="left")
483 | 
484 |             # Fields to remove
485 |             remove_cols = [
486 |                 c for c in team_stats.columns
487 |                 if any(keyword in c.lower() for keyword in [
488 |                     "logo", "wordmark", "color", "nick", "division"
489 |                 ])
490 |             ]
491 | 
492 |             team_stats = team_stats.drop(columns=remove_cols, errors="ignore")
493 | 
494 |         except Exception:
495 |             pass
496 | 
497 |     # Add season
498 |     team_stats["season"] = year
499 | 
500 |     # Reorder
501 |     cols = [
502 |         "season", "team",
503 |         "games_played", "wins", "losses", "ties",
504 |         "points_for", "points_against", "point_diff",
505 |         "ppg_for", "ppg_against"
506 |     ]
507 |     other_cols = [c for c in team_stats.columns if c not in cols]
508 | 
509 |     return team_stats[cols + other_cols]
510 | 
511 | 
512 | #df = get_team_season_data(2024)
513 | #print(df.head)
514 | 
515 | def get_all_team_game_stats(year: int) -> pd.DataFrame:
516 |     """
517 |     Return game-by-game stats for every team in a given NFL season.
518 |     One row per *team-game* (so each actual game appears twice: once per team).
519 | 
520 |     Columns include:
521 |         - season, week, game_id, gameday
522 |         - team, opponent, is_home
523 |         - points_for, points_against, point_diff
524 |         - result ('W', 'L', 'T')
525 |     Compatible with nfl_data_py 0.3.3.
526 |     """
527 | 
528 |     # 1. Load schedule data
529 |     games = nfl.import_schedules([year])
530 | 
531 |     # Filter to regular season if column exists
532 |     if "season_type" in games.columns:
533 |         games = games[games["season_type"] == "REG"]
534 | 
535 |     # Drop games without final scores
536 |     games = games.dropna(subset=["home_score", "away_score"])
537 | 
538 |     # 2. Build home team rows
539 |     home = games.copy()
540 |     home["team"] = home["home_team"]
541 |     home["opponent"] = home["away_team"]
542 |     home["is_home"] = True
543 |     home["points_for"] = home["home_score"]
544 |     home["points_against"] = home["away_score"]
545 | 
546 |     # 3. Build away team rows
547 |     away = games.copy()
548 |     away["team"] = away["away_team"]
549 |     away["opponent"] = away["home_team"]
550 |     away["is_home"] = False
551 |     away["points_for"] = away["away_score"]
552 |     away["points_against"] = away["home_score"]
553 | 
554 |     # 4. Combine into team-game logs
555 |     team_games = pd.concat([home, away], ignore_index=True)
556 | 
557 |     # 5. Derived stats
558 |     team_games["point_diff"] = team_games["points_for"] - team_games["points_against"]
559 |     team_games["result"] = np.where(
560 |         team_games["points_for"] > team_games["points_against"], "W",
561 |         np.where(team_games["points_for"] < team_games["points_against"], "L", "T")
562 |     )
563 | 
564 |     # 6. Keep / rename main columns (and keep extras if present)
565 |     base_cols = [
566 |         "season" if "season" in team_games.columns else None,
567 |         "week" if "week" in team_games.columns else None,
568 |         "gameday" if "gameday" in team_games.columns else None,
569 |         "game_id" if "game_id" in team_games.columns else None,
570 |         "team", "opponent", "is_home",
571 |         "points_for", "points_against", "point_diff", "result",
572 |     ]
573 |     base_cols = [c for c in base_cols if c is not None]
574 | 
575 |     # Put base columns first, then everything else
576 |     other_cols = [c for c in team_games.columns if c not in base_cols]
577 |     team_games = team_games[base_cols + other_cols]
578 | 
579 |     # Sort by team + week if week exists
580 |     if "week" in team_games.columns:
581 |         team_games = team_games.sort_values(["team", "week"]).reset_index(drop=True)
582 | 
583 |     return team_games
584 | 
585 | def get_team_game_stats(year: int, team: str) -> pd.DataFrame:
586 |     """
587 |     Get game-by-game stats for a single team in a given season.
588 | 
589 |     Args:
590 |         year (int): NFL season (e.g., 2024)
591 |         team (str): Team abbreviation, e.g. 'PHI', 'DAL', 'KC'
592 | 
593 |     Returns:
594 |         pandas.DataFrame: one row per game for that team.
595 |     """
596 |     team = team.upper()
597 |     all_games = get_all_team_game_stats(year)
598 |     return all_games[all_games["team"] == team].reset_index(drop=True)
599 | 
600 | phi_2024 = get_team_game_stats(2024, "PHI")
601 | print(phi_2024[["week", "team", "opponent", "is_home", "points_for", "points_against", "result"]])
602 | 
603 | 
604 | def get_team_touchdown_stats(year: int) -> pd.DataFrame:
605 |     """
606 |     Build touchdown stats for each team using play-by-play data.
607 |     Compatible with nfl_data_py 0.3.3.
608 |     """
609 | 
610 |     pbp = nfl.import_pbp_data([year])
611 | 
612 |     # Determine scoring team (posteam for offensive TDs, defteam for defensive TDs)
613 |     pbp["team"] = pbp["posteam"].fillna(pbp["defteam"])
614 | 
615 |     # Create TD type indicators
616 |     pbp["rush_td"] = pbp.get("rush_touchdown", 0)
617 |     pbp["pass_td"] = pbp.get("pass_touchdown", 0)
618 | 
619 |     # Defensive TD (fumble return, interception return)
620 |     pbp["def_td"] = pbp.get("defensive_touchdown", 0)
621 | 
622 |     # Special teams TD (punt return, kickoff return, blocked FG return)
623 |     pbp["special_td"] = pbp.get("special_teams_touchdown", 0)
624 | 
625 |     # Total touchdowns
626 |     pbp["total_td"] = (
627 |         pbp["rush_td"] +
628 |         pbp["pass_td"] +
629 |         pbp["def_td"] +
630 |         pbp["special_td"]
631 |     )
632 | 
633 |     # Group by team
634 |     td_stats = pbp.groupby("team").agg(
635 |         total_td=("total_td", "sum"),
636 |         rush_td=("rush_td", "sum"),
637 |         pass_td=("pass_td", "sum"),
638 |         def_td=("def_td", "sum"),
639 |         special_td=("special_td", "sum"),
640 |     ).reset_index()
641 | 
642 |     return td_stats
643 | 
644 | def get_team_season_with_tds(year: int) -> pd.DataFrame:
645 |     base_df = get_team_season_data(year)
646 |     td_df = get_team_touchdown_stats(year)
647 | 
648 |     merged = base_df.merge(td_df, on="team", how="left")
649 | 
650 |     td_cols = ["total_td", "rush_td", "pass_td", "def_td", "special_td"]
651 |     for col in td_cols:
652 |         merged[col] = merged[col].fillna(0).astype(int)
653 | 
654 |     return merged
655 | 
656 | 
657 | def plot_team_stat_bar(year: int, stat_col: str):
658 |     df = get_team_season_with_tds(year)
659 | 
660 |     if stat_col not in df.columns:
661 |         raise ValueError(f"Column '{stat_col}' not found. "
662 |                          "Did you mean one of: points_for, rush_td, pass_td, total_td?")
663 |     
664 |     df = df.sort_values(stat_col, ascending=False)
665 | 
666 |     teams = df["team"]
667 |     values = df[stat_col]
668 | 
669 |     plt.figure(figsize=(12, 6))
670 |     plt.bar(teams, values)
671 |     plt.title(f"{year} - Team Comparison by {stat_col.replace('_',' ').title()}")
672 |     plt.xticks(rotation=45)
673 |     plt.xlabel("Team")
674 |     plt.ylabel(stat_col.replace("_"," ").title())
675 |     plt.tight_layout()
676 |     plt.show()
677 | 
678 | # plot_team_stat_bar(2024, "total_td")
679 | 
680 | # plot_team_stat_bar(2024, "total_td")
681 | # plot_team_stat_bar(2022, "rush_td")
682 | 


--------------------------------------------------------------------------------