├── .Rbuildignore ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── R ├── cleaning_functions.R ├── ngscleanR-package.R ├── plotting_functions.R ├── silence_tidy_eval_notes.R └── sysdata.rda ├── README.Rmd ├── README.md ├── README_files └── figure-gfm │ ├── gif-1.gif │ └── plots-1.png ├── data-raw ├── coverage_labels.rds ├── coverages_week1.rds ├── create_package_data.R ├── final_predict_stage.R ├── generate_silence_eval_notes.R ├── model_stuff │ ├── coverage_classifier.R │ ├── coverage_classifier_functions.R │ ├── coverage_classifier_make_tensors.R │ ├── coverage_classifier_with_time.R │ └── plot_random_plays.R ├── nflfastr_plays.rds ├── sample_bdb_2019.rds ├── sample_bdb_2020.rds ├── sample_bdb_2021.rds └── sample_ngs.rds ├── man ├── clean_and_rotate.Rd ├── compute_o_diff.Rd ├── cut_plays.Rd ├── ngscleanR-package.Rd ├── plot_play.Rd └── prepare_bdb_week.Rd ├── ngscleanR.Rproj └── vignettes ├── .gitignore └── getting-started.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^ngscleanR\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^data-raw$ 4 | ^LICENSE\.md$ 5 | ^vignettes$ 6 | ^vignettes/getting-started.Rmd 7 | ^README_files$ 8 | ^README\.Rmd$ 9 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: ngscleanR 2 | Title: Helper Functions for Cleaning Player Tracking Data 3 | Version: 0.0.0.9002 4 | Authors@R: 5 | person(given = "First", 6 | family = "Last", 7 | role = c("aut", "cre"), 8 | email = "first.last@example.com", 9 | comment = c(ORCID = "YOUR-ORCID-ID")) 10 | Description: What the package does (one paragraph). 11 | License: MIT + file LICENSE 12 | Encoding: UTF-8 13 | LazyData: true 14 | Roxygen: list(markdown = TRUE) 15 | RoxygenNote: 7.1.1 16 | Depends: 17 | R (>= 3.5) 18 | Imports: 19 | dplyr, 20 | gganimate, 21 | ggplot2, 22 | glue, 23 | janitor, 24 | magrittr, 25 | readr, 26 | rlang, 27 | sportyR, 28 | tibble (>= 3.0), 29 | tidyr (>= 1.0.0), 30 | tidyselect (>= 1.1.0), 31 | usethis (>= 1.6.0) 32 | Suggests: 33 | knitr, 34 | gt 35 | VignetteBuilder: knitr 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2021 2 | COPYRIGHT HOLDER: ngscleanR authors 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2021 ngscleanR authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(clean_and_rotate) 4 | export(compute_o_diff) 5 | export(cut_plays) 6 | export(plot_play) 7 | export(prepare_bdb_week) 8 | import(dplyr) 9 | import(ggplot2) 10 | importFrom(glue,glue) 11 | importFrom(magrittr,"%>%") 12 | importFrom(rlang,":=") 13 | importFrom(tidyr,pivot_wider) 14 | importFrom(tidyselect,any_of) 15 | -------------------------------------------------------------------------------- /R/cleaning_functions.R: -------------------------------------------------------------------------------- 1 | #' Standardize direction and add play information 2 | #' 3 | #' @description Standardize direction and add play information. 4 | #' @param df A dataframe of player tracking data obtained from a Big Data Bowl 5 | #' or NGS highlights 6 | #' @return The original data with the columns below appended. Note that all returned columns will have 7 | #' cleaned names, including the original columns in `df` (e.g. play_id rather than playId), to end the tyranny of weird Big Data Bowl 8 | #' column names. 9 | #' \describe{ 10 | #' \item{team_name}{Values of home team (eg "SEA"), away team (eg "GB"), or "football"} 11 | #' \item{defense}{Whether player is on defense (football has 0 here)} 12 | #' \item{play}{Unique play identifier in format "gameid_playid" with gameid old GSIS format. Ex: "2018091000_1101".} 13 | #' \item{nflfastr_game_id}{Game ID in nflfastR format. Ex: "2018_01_ATL_PHI"} 14 | #' \item{week}{Week of season} 15 | #' \item{posteam}{Possession team} 16 | #' \item{home_team}{Home team (e.g. "PHI")} 17 | #' \item{away_team}{Away team (e.g. "ATL")} 18 | #' \item{down}{Down} 19 | #' \item{ydstogo}{Yards to go} 20 | #' \item{yardline_100}{Distance from opponent end zone} 21 | #' \item{qtr}{Quarter} 22 | #' \item{epa}{Expected Points Added gained on play from nflfastR} 23 | #' \item{yards_gained}{Yards gained on play} 24 | #' \item{air_yards}{Air yards (when applicable)} 25 | #' \item{desc}{Play description} 26 | #' \item{pass}{Was it a dropback? From nflfastR} 27 | #' \item{rush}{Was it a designed rush attempt? From nflfastR} 28 | #' \item{play_type_nfl}{Play type from NFL data. E.g. "PASS", "PENALTY", "RUSH", "SACK", "PUNT", etc.} 29 | #' \item{team_color}{Primary team color. Useful for play animations} 30 | #' \item{team_color2}{Secondary team color. Useful for play animations} 31 | #' \item{team_logo_espn}{URL of team logo} 32 | #' \item{los_x}{x location of line of scrimmage (e.g. 20 means own 10 yard line)} 33 | #' \item{dist_from_los}{Distance of player from line of scirmmage in x direction} 34 | #' \item{o_x}{Orientation of player in x direction} 35 | #' \item{o_y}{Orientation of player in y direction} 36 | #' \item{dir_x}{Direction of player in x direction} 37 | #' \item{dir_y}{Direction of player in y direction} 38 | #' \item{s_x}{Speed of player in x direction} 39 | #' \item{s_y}{Speed of player in y direction} 40 | #' \item{a_x}{Acceleration of player in x direction} 41 | #' \item{a_y}{Acceleration of player in y direction} 42 | #' } 43 | #' @export 44 | clean_and_rotate <- function(df) { 45 | 46 | original_cols <- df %>% 47 | janitor::clean_names() %>% 48 | names() 49 | 50 | added_cols <- c( 51 | "team_name", 52 | "defense", 53 | "play", 54 | "nflfastr_game_id", 55 | "week", 56 | "posteam", 57 | "home_team", 58 | "away_team", 59 | "down", "ydstogo", "yardline_100", "qtr", "epa", "yards_gained", 60 | "air_yards", "desc", "pass", "rush", "play_type_nfl", 61 | "team_color", "team_color2", "team_logo_espn", 62 | "los_x", "dist_from_los", "o_x", "o_y", "dir_x", "dir_y", 63 | "s_x", "s_y", "a_x", "a_y" 64 | ) 65 | df %>% 66 | add_info() %>% 67 | rotate_to_ltr() %>% 68 | dplyr::select(tidyselect::any_of(c(original_cols, added_cols))) 69 | } 70 | 71 | # creates team_name, defense, and adds some play info from nflfastr 72 | add_info <- function(df) { 73 | 74 | # make column names look reasonable 75 | df <- df %>% 76 | janitor::clean_names() 77 | 78 | # NGS highlights use "frame" instead of "frame_id" so make frame_id for these 79 | if (!"frame_id" %in% names(df) & "frame" %in% names(df)) { 80 | df <- df %>% 81 | dplyr::rename(frame_id = frame) 82 | } 83 | 84 | # NGS highlights have home_team_flag instead of team 85 | if (!"team" %in% names(df)) { 86 | 87 | df <- df %>% 88 | mutate( 89 | team = case_when( 90 | home_team_flag == 1 ~ "home", 91 | home_team_flag == 0 ~ "away", 92 | is.na(home_team_flag) ~ "football" 93 | ) 94 | ) 95 | 96 | } 97 | 98 | # 2020 bdb used "orientation" instead of "o" 99 | if ("orientation" %in% names(df)) { 100 | df <- df %>% 101 | dplyr::rename(o = orientation) 102 | } 103 | 104 | # 2020 big data bowl has weird play IDs where game_id is pre-pended 105 | if (max(nchar(df$play_id)) > 10) { 106 | 107 | df <- df %>% 108 | mutate( 109 | play_id = substr(play_id, 11, 14) %>% as.integer(), 110 | game_id = as.integer(game_id), 111 | 112 | # since bdb only has handoffs and doesn't have event, put in the event 113 | event = "handoff" 114 | ) 115 | 116 | } 117 | 118 | 119 | df %>% 120 | # get rid of the columns we're joining so no join duplicates 121 | select(-tidyselect::any_of(c( 122 | "posteam", "home_team", "away_team", "week", 123 | "down", "ydstogo", "qtr", "yardline_100", "epa", 124 | "yards_gained", "air_yards", "desc", "pass", "rush", "play_type_nfl" 125 | ))) %>% 126 | left_join(pbp, by = c("game_id", "play_id")) %>% 127 | mutate( 128 | team_name = case_when( 129 | team == "home" ~ home_team, 130 | team == "away" ~ away_team, 131 | # for the football ("football") 132 | TRUE ~ "football", 133 | ), 134 | defense = case_when( 135 | posteam == home_team & team == "away" ~ 1, 136 | posteam == away_team & team == "home" ~ 1, 137 | TRUE ~ 0 138 | ) 139 | ) %>% 140 | left_join(colors, by = "team_name") %>% 141 | mutate( 142 | team_color = ifelse(team_name == "football", "#663300", team_color), 143 | play = paste0(game_id, "_", play_id) 144 | ) %>% 145 | return() 146 | 147 | } 148 | 149 | # rotate field so all plays are left to right 150 | # affects x, y, o, dir 151 | rotate_to_ltr <- function(df) { 152 | 153 | if (!"play_direction" %in% names(df)) { 154 | message("Can't find play direction. Inferring from offense & defense locations at snap") 155 | 156 | df <- df %>% 157 | filter(event == "ball_snap", team != "football") %>% 158 | group_by(game_id, play_id, defense) %>% 159 | summarize(mean_x = mean(x, na.rm = T)) %>% 160 | pivot_wider(names_from = defense, values_from = mean_x, names_prefix = "x_") %>% 161 | ungroup() %>% 162 | mutate( 163 | play_direction = 164 | ifelse( 165 | # if defense has bigger x than offense, it's left to right 166 | x_1 > x_0, 167 | "right", 168 | "left" 169 | ) 170 | ) %>% 171 | select(game_id, play_id, play_direction) %>% 172 | inner_join(df, by = c("game_id", "play_id")) 173 | 174 | } 175 | 176 | # now we're ready to flip everything on left-moving plays 177 | df <- df %>% 178 | mutate( 179 | # standardize all plays so they are left to right 180 | to_left = ifelse(play_direction == "left", 1, 0), 181 | 182 | # reflect x & y 183 | x = ifelse(to_left == 1, 120 - x, x), 184 | y = ifelse(to_left == 1, 160/3 - y, y), 185 | 186 | # get x value of line of scrimmage 187 | los_x = 110 - yardline_100, 188 | dist_from_los = x - los_x 189 | ) 190 | 191 | # if orientation is in df, standardize it 192 | if ("o" %in% names(df)) { 193 | df <- df %>% 194 | mutate( 195 | # rotate 180 degrees for the angles 196 | o = ifelse(to_left == 1, o + 180, o), 197 | 198 | # make sure measured 0 to 360 199 | o = ifelse(o > 360, o - 360, o), 200 | 201 | # convert to radians 202 | o_rad = pi * (o / 180), 203 | 204 | # get orientation and direction in x and y direction 205 | # NA checks are for the ball 206 | o_x = ifelse(is.na(o), NA_real_, sin(o_rad)), 207 | o_y = ifelse(is.na(o), NA_real_, cos(o_rad)) 208 | ) 209 | } 210 | 211 | # if dir is in df, standardize it 212 | if ("dir" %in% names(df)) { 213 | df <- df %>% 214 | mutate( 215 | # rotate 180 degrees for the angles 216 | dir = ifelse(to_left == 1, dir + 180, dir), 217 | 218 | # make sure measured 0 to 360 219 | dir = ifelse(dir > 360, dir - 360, dir), 220 | 221 | # convert to radians 222 | dir_rad = pi * (dir / 180), 223 | 224 | # get orientation and direction in x and y direction 225 | # NA checks are for the ball 226 | dir_x = ifelse(is.na(dir), NA_real_, sin(dir_rad)), 227 | dir_y = ifelse(is.na(dir), NA_real_, cos(dir_rad)), 228 | 229 | s_x = dir_x * s, 230 | s_y = dir_y * s, 231 | 232 | a_x = dir_x * a, 233 | a_y = dir_y * a 234 | ) 235 | } 236 | 237 | return(df) 238 | 239 | } 240 | 241 | 242 | 243 | #' Compute orientation difference 244 | #' 245 | #' @description Compute difference in orientation between direction player is currently facing and 246 | #' orientation if player were facing towards a given x and y location. 247 | #' @param df A dataframe containing x, y, o, "prefix"_x, and "prefix"_y 248 | #' @param prefix (default = "qb"). Columns prefix_x and prefix_y must be contained in `df`. These columns 249 | #' contain the x and y locations that will be used to calculate orientation difference. 250 | #' @return Original dataframe with o_to_"prefix" added, which is the difference in orientation 251 | #' in degrees between the way the player is facing and where the "prefix" player is (0 is facing 252 | #' directly at the "prefix" player, 180 is directly away). 253 | #' @export 254 | #' @examples 255 | #' df <- tibble::tibble("x" = 20, "y" = 30, "o" = 270, "qb_x" = 10, "qb_y" = 25) 256 | #' df <- compute_o_diff(df) 257 | #' str(df) 258 | compute_o_diff <- function(df, prefix = "qb") { 259 | 260 | name_x <- sym(paste0(prefix, "_x")) 261 | name_y <- sym(paste0(prefix, "_y")) 262 | 263 | new_column <- paste0("o_to_", prefix) 264 | 265 | df <- df %>% 266 | mutate( 267 | # compute distances 268 | dis_x = {{name_x}} - x, 269 | dis_y = {{name_y}} - y, 270 | 271 | # get atan2 in degrees 272 | tmp = atan2(dis_y, dis_x) * (180 / pi), 273 | 274 | # set clockwise (360 - tmp) with 0 on top instead of east (+ 90) 275 | # https://math.stackexchange.com/questions/707673/find-angle-in-degrees-from-one-point-to-another-in-2d-space 276 | tmp = (360 - tmp) + 90, 277 | 278 | # make sure 0 to 360 279 | tmp = case_when(tmp < 0 ~ tmp + 360, 280 | tmp > 360 ~ tmp - 360 , 281 | TRUE ~ tmp), 282 | 283 | # difference in angles 284 | diff = abs(o - tmp), 285 | 286 | # angle to qb 287 | !!new_column := pmin(360 - diff, diff) 288 | ) %>% 289 | select(-diff, -tmp) 290 | 291 | return(df) 292 | 293 | } 294 | 295 | #' Trim plays based on events 296 | #' 297 | #' @description Trim frames for a play and/or remove plays based on how quickly provided events happen in the play. 298 | #' @param df A dataframe containing player tracking data with `event`, `frame_id`, and `play` with the latter uniquely identifying plays. 299 | #' @param end_events Events designated as play end events. Defaults are when a pass is thrown or QB's involvement ends in some 300 | #' other way (sack, strip sack, shovel pass, etc). 301 | #' @param time_after_event Number of frames to keep after the `end_events` (default: 0). 302 | #' Note that there are 10 frames in each second so providing 10 would keep one additional second after a pass was thrown 303 | #' when using the default end events. 304 | #' @param throw_frame If not NULL, for plays when one of the `end_events` happens before this frame, 305 | #' these plays will be removed from the returned df (default: 25, ie 1.5 seconds 306 | #' into the play). To not employ play dropping, provide throw_frame = NULL and all of the plays provided in original 307 | #' `df` will be returned. 308 | #' @return The original df with trimmed frames (and if throw_frame not NULL, the shorter plays removed). 309 | #' @export 310 | cut_plays <- function(df, 311 | 312 | # cut off anything that happens after this event 313 | end_events = c("pass_forward", "qb_sack", "qb_strip_sack", "qb_spike", "tackle", "pass_shovel"), 314 | # keep this many frames after the end event 315 | time_after_event = 0, 316 | # remove plays with throws before this frame 317 | throw_frame = 25) { 318 | 319 | # default truncates data at pass 320 | if (!is.null(end_events)) { 321 | 322 | mins <- df %>% 323 | arrange(play, frame_id) %>% 324 | group_by(play) %>% 325 | mutate( 326 | end_event = cumsum(event %in% end_events) 327 | ) %>% 328 | filter(end_event > 0) %>% 329 | dplyr::slice(1) %>% 330 | ungroup() %>% 331 | # if throw happens on frame 36 and user wants 5 frames, keep 36 - 40 332 | mutate(end_frame = frame_id + time_after_event - 1) %>% 333 | select(play, end_frame) 334 | 335 | df <- df %>% 336 | left_join(mins, by = c("play")) %>% 337 | filter(frame_id <= end_frame) 338 | 339 | 340 | } 341 | 342 | # if the play ends before throw_frame, throw out the play 343 | # frame 25 is 1.5 seconds into play 344 | if (!is.null(throw_frame)) { 345 | 346 | df <- df %>% 347 | arrange(play, frame_id) %>% 348 | group_by(play) %>% 349 | mutate(max_frame = max(frame_id)) %>% 350 | filter(max_frame >= throw_frame) %>% 351 | ungroup() 352 | 353 | } 354 | 355 | return(df) 356 | 357 | } 358 | 359 | 360 | 361 | #' Prepare a week of data from the 2021 Big Data Bowl 362 | #' 363 | #' @description Prepare a week of data from the 2021 Big Data Bowl (data from 2018 season). To use this, you'll need to have 364 | #' the BDB data saved and unzipped somewhere in a directory on your computer. 365 | #' @param week Get and prepare this week of data (1-17) 366 | #' @param dir Location of directory where BDB data lives. Default is unzipped to adjacent directory 367 | #' (default = "../nfl-big-data-bowl-2021/input") 368 | #' @param trim_frame If a throw, sack, etc happens before this frame, drop the play (default = 25; i.e. before 369 | #' 1.5 seconds into the play). 370 | #' @param frames_after_throw If a frame happened more than this many frames after throw, drop the frame. 371 | #' @param keep_frames Keep these frames. Default: NULL (ie keep all frames). 372 | #' @param drop_positions Drop these positions from the returned data (default = "QB"). 373 | #' @details Loads raw .csvs from 2021 BDB, cleans, rotates, applies frame trimming, calculates orientation to QB, 374 | #' drops plays without at least 3 offensive and defensive players. 375 | #' @export 376 | prepare_bdb_week <- function( 377 | week, 378 | dir = "../nfl-big-data-bowl-2021/input", 379 | trim_frame = 25, 380 | frames_after_throw = 10, 381 | keep_frames = NULL, 382 | drop_positions = c("QB") 383 | ) { 384 | df <- readr::read_csv(glue::glue("{dir}/week{week}.csv")) %>% 385 | 386 | # do all the cleaning 387 | clean_and_rotate() %>% 388 | # stop plays at pass forward 389 | # and remove short (< 1 seconds) plays 390 | cut_plays( 391 | # if throw happens before this frame (1.5 seconds after snap), discard 392 | throw_frame = trim_frame, 393 | # keep this many frames after the throw 394 | time_after_event = frames_after_throw 395 | ) 396 | 397 | # get qb location 398 | # first, throw out plays with 2 qbs 399 | n_qbs <- df %>% 400 | filter(position == "QB") %>% 401 | group_by(game_id, play_id, frame_id) %>% 402 | summarize(qbs = n()) %>% 403 | group_by(game_id, play_id) %>% 404 | summarise(qbs = max(qbs)) %>% 405 | filter(qbs == 1) %>% 406 | ungroup() 407 | 408 | # now get the location of the QB 409 | qbs <- df %>% 410 | filter(position == "QB") %>% 411 | dplyr::select( 412 | game_id, 413 | play_id, 414 | frame_id, 415 | qb_x = x, 416 | qb_y = y 417 | ) %>% 418 | inner_join(n_qbs, by = c("game_id", "play_id")) %>% 419 | select(-qbs) 420 | 421 | # add qb location 422 | df <- df %>% 423 | # inner_join(labels, by = c("game_id", "play_id")) %>% 424 | left_join(qbs, by = c("game_id", "play_id", "frame_id")) %>% 425 | compute_o_diff("qb") %>% 426 | # scale 0 to 1 427 | mutate(o_to_qb = o_to_qb / 180) %>% 428 | dplyr::filter( 429 | position != drop_positions, 430 | !is.na(position), 431 | !is.na(o_to_qb) 432 | ) %>% 433 | dplyr::select( 434 | game_id, 435 | play_id, 436 | week, 437 | frame_id, 438 | nfl_id, 439 | play, 440 | defense, 441 | # coverage, 442 | x, 443 | y, 444 | s_x, 445 | s_y, 446 | a_x, 447 | a_y, 448 | o, 449 | o_to_qb, 450 | los_x, 451 | dist_from_los 452 | ) %>% 453 | # this slice is probably not necessary anymore after getting rid of plays 454 | # with 2 qbs 455 | group_by(game_id, play_id, frame_id, nfl_id) %>% 456 | dplyr::slice(1) %>% 457 | # for getting rid of plays without any defense or offense players 458 | group_by(game_id, play_id, frame_id) %>% 459 | mutate( 460 | n_defenders = sum(defense), 461 | n_offense = sum(1 - defense) 462 | ) %>% 463 | ungroup() %>% 464 | filter( 465 | n_defenders > 2 & n_offense > 2, 466 | n_defenders <= 11 & n_offense <= 11 467 | ) 468 | 469 | if (!is.null(keep_frames)) { 470 | df <- df %>% 471 | filter( 472 | frame_id %in% keep_frames 473 | ) 474 | } 475 | 476 | df %>% 477 | select(-game_id, -play_id, -n_defenders, -n_offense) %>% 478 | arrange( 479 | play, frame_id, defense, nfl_id 480 | ) 481 | 482 | } 483 | 484 | 485 | -------------------------------------------------------------------------------- /R/ngscleanR-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | "_PACKAGE" 3 | 4 | # The following block is used by usethis to automatically manage 5 | # roxygen namespace tags. Modify with care! 6 | ## usethis namespace: start 7 | #' @import dplyr 8 | #' @import ggplot2 9 | #' @importFrom glue glue 10 | #' @importFrom magrittr %>% 11 | #' @importFrom rlang := 12 | #' @importFrom tidyr pivot_wider 13 | #' @importFrom tidyselect any_of 14 | ## usethis namespace: end 15 | NULL 16 | -------------------------------------------------------------------------------- /R/plotting_functions.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' Plot a play 4 | #' 5 | #' @description Plot or animate a play. 6 | #' @param df_track A df of tracking data from one play. 7 | #' @param orientation Show lines representing where player is facing (default = T). 8 | #' @param dot_size Size of player dots (default = 6). 9 | #' @param segment_length Length of orientation segment lines (default = 2.5). 10 | #' @param segment_size Width of orientation segment lines (default = 1.5). 11 | #' @param numbers Show player jersey numbers (default = T). 12 | #' @param animated Whether play is animated, rather than a still frame (default = T). 13 | #' @param animated_h If animated, height of animated image (default = 4). 14 | #' @param animated_w If animated, width of animated image (default = 8). 15 | #' @param animated_res If animated, resolution of animated image (default = 200). 16 | #' @param frame frame_id to plot (default = NULL, ie plot all provided frames). 17 | #' @export 18 | plot_play <- function( 19 | df_track, 20 | orientation = TRUE, 21 | dot_size = 6, 22 | segment_length = 2.5, 23 | segment_size = 1.5, 24 | numbers = TRUE, 25 | animated = TRUE, 26 | animated_h = 4, 27 | animated_w = 8, 28 | animated_res = 200, 29 | frame = NULL 30 | ) { 31 | 32 | caption <- glue::glue("{df_track$nflfastr_game_id[1]} {df_track$down[1]}&{df_track$ydstogo[1]}: Q{df_track$qtr[1]} {df_track$desc[1]}") 33 | 34 | if (!is.null(frame)) { 35 | 36 | df_track <- df_track %>% filter(frame_id == frame) 37 | 38 | } 39 | 40 | fig <- nfl_field + 41 | # dots 42 | geom_point(data = df_track, aes(x, y), 43 | color = df_track$team_color, 44 | shape = ifelse( 45 | df_track$team_name == "football" | df_track$defense == 1, 46 | 19, 1 47 | ), 48 | size = dot_size 49 | ) + 50 | labs( 51 | caption = caption 52 | ) + 53 | theme( 54 | plot.title = element_blank(), 55 | plot.margin = margin(.1, 0, .5, 0, "cm"), 56 | plot.caption = element_text(size = 8) 57 | ) 58 | 59 | if (orientation == TRUE & "o" %in% names(df_track)) { 60 | 61 | fig <- fig + 62 | # orientation lines 63 | geom_segment( 64 | data = df_track, 65 | aes(x, y, xend = x + segment_length * o_x, yend = y + segment_length * o_y), 66 | color = df_track$team_color, size = segment_size 67 | ) 68 | 69 | } 70 | 71 | if (numbers) { 72 | 73 | fig <- fig + 74 | geom_text( 75 | data = df_track, 76 | mapping = aes(x = x, y = y, label = jersey_number), 77 | colour = ifelse(df_track$defense == 1, df_track$team_color2, "white"), 78 | size = 2 79 | ) 80 | 81 | } 82 | 83 | if (animated) { 84 | 85 | # if (animated_output == "mp4") { 86 | # renderer <- gganimate::gifski_renderer() 87 | # } else { 88 | # renderer <- gganimate::av_renderer() 89 | # } 90 | # 91 | fig <- fig + 92 | gganimate::transition_time(df_track$frame_id) 93 | 94 | fig <- gganimate::animate( 95 | fig, 96 | # renderer = renderer, 97 | height = animated_h, width = animated_w, units = "in", 98 | res = animated_res, 99 | nframes = n_distinct(df_track$frame_id), 100 | start_pause = 6, 101 | end_pause = 4 102 | ) 103 | 104 | } 105 | 106 | return(fig) 107 | 108 | } 109 | 110 | 111 | # helper function to not make every table have so many lines of code 112 | make_table <- function(df) { 113 | df %>% 114 | gt::gt() %>% 115 | gt::tab_style( 116 | style = gt::cell_text(color = "black", weight = "bold"), 117 | locations = list( 118 | gt::cells_column_labels(dplyr::everything()) 119 | ) 120 | ) %>% 121 | gt::tab_options( 122 | row_group.border.top.width = gt::px(3), 123 | row_group.border.top.color = "black", 124 | row_group.border.bottom.color = "black", 125 | table_body.hlines.color = "white", 126 | table.border.top.color = "black", 127 | table.border.top.width = gt::px(1), 128 | table.border.bottom.color = "white", 129 | table.border.bottom.width = gt::px(1), 130 | column_labels.border.bottom.color = "black", 131 | column_labels.border.bottom.width = gt::px(2), 132 | row.striping.background_color = '#FFFFFF', 133 | row.striping.include_table_body = TRUE, 134 | table.background.color = '#F2F2F2', 135 | data_row.padding = gt::px(2), 136 | table.font.size = gt::px(16L) 137 | ) %>% 138 | return() 139 | } 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /R/silence_tidy_eval_notes.R: -------------------------------------------------------------------------------- 1 | a <- 2 | a_x <- 3 | a_y <- 4 | defense <- 5 | dir_rad <- 6 | dir_x <- 7 | dir_y <- 8 | dis_x <- 9 | dis_y <- 10 | dist_from_los <- 11 | end_event <- 12 | end_frame <- 13 | event <- 14 | frame <- 15 | frame_id <- 16 | game_id <- 17 | jersey_number <- 18 | los_x <- 19 | max_frame <- 20 | mean_x <- 21 | n_defenders <- 22 | n_offense <- 23 | nfl_id <- 24 | o <- 25 | o_rad <- 26 | o_to_qb <- 27 | o_x <- 28 | o_y <- 29 | orientation <- 30 | play <- 31 | play_direction <- 32 | play_id <- 33 | position <- 34 | s <- 35 | s_x <- 36 | s_y <- 37 | team <- 38 | team_color <- 39 | team_name <- 40 | tmp <- 41 | to_left <- 42 | x <- 43 | x_0 <- 44 | x_1 <- 45 | y <- 46 | yardline_100 <- NULL -------------------------------------------------------------------------------- /R/sysdata.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guga31bb/ngscleanR/9cdb48368d904ec64c8abf52ec942b541b2037aa/R/sysdata.rda -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "ngscleanR" 3 | output: github_document 4 | --- 5 | 6 | ```{r, include = FALSE} 7 | knitr::opts_chunk$set( 8 | collapse = TRUE, 9 | comment = "#>", 10 | fig.width=9, 11 | fig.height=5, 12 | tidy = 'styler' 13 | ) 14 | ``` 15 | 16 | `ngscleanR` is a set of functions to clean up and standardize NFL player tracking data. The package handles some of the necessary, but boring, parts of dealing with player tracking data. The included functions: 17 | 18 | * **`clean_and_rotate()`**: Makes all plays go from left to right, append some play information from `nflfastR` (yard line, play description, play type, etc), and add some post-standardized information about where the player is moving and facing (e.g., `s_x`, `s_y`, `o_x`, `o_y`, etc) 19 | * **`compute_o_diff()`**: Computes difference in orientation between direction player is currently facing and 20 | #' orientation if player were facing towards a given x and y location. For example, this could be used to determine the extent to which a player is facing towards the quarterback on a given frame. 21 | * **`cut_plays()`** Trim frames for a play and/or remove plays based on how quickly provided events happen in the play. For example, this could be used to remove frames after a pass was thrown or discard plays where a pass is thrown very quickly. 22 | * **`prepare_bdb_week()`**: A wrapper around the above three functions that cleans the raw data from the 2021 Big Data Bowl (2018 season). 23 | * **`plot_play()`**: A wrapper around `ggplot` and `gganimate` for plotting a play. 24 | 25 | ## Installation 26 | 27 | Install from github using: 28 | 29 | ```{r, eval = FALSE} 30 | if (!require("remotes")) install.packages("remotes") 31 | remotes::install_github("guga31bb/ngscleanR") 32 | ``` 33 | 34 | 35 | ## Usage 36 | 37 | First we load the necessary packages (`patchwork` is for the plot at the end). 38 | 39 | ```{r setup, message=FALSE} 40 | library(ngscleanR) 41 | library(tidyverse) 42 | library(patchwork) 43 | ``` 44 | 45 | ## Load sample week 46 | 47 | To demonstrate the package features, we start by loading some small sample data stored in the package github repo that come from 2021 Big Data Bowl: 48 | 49 | ```{r} 50 | tracking <- readRDS("data-raw/sample_bdb_2021.rds") 51 | names(tracking) 52 | ``` 53 | 54 | ### The main function 55 | 56 | This will clean up the data, attach some information associated with the play, and make everything face from left to right. 57 | 58 | ```{r} 59 | cleaned <- tracking %>% 60 | clean_and_rotate() 61 | 62 | names(cleaned) 63 | ``` 64 | 65 | ### Play cutting function 66 | 67 | This discards any plays where the throw happens before frame 25 (i.e. 1.5 seconds into the play). In addition, it removes any frames that took place more than 10 frames after a pass was thrown or some other play ending event (sack, fumble, etc). 68 | 69 | ```{r} 70 | cleaned <- cleaned %>% 71 | cut_plays( 72 | # get rid of plays with throw before this frame 73 | throw_frame = 25, 74 | # get rid of frames that happen after this many frames after pass released 75 | time_after_event = 10 76 | ) 77 | 78 | names(cleaned) 79 | ``` 80 | 81 | ### Plot some sample plays 82 | 83 | Here is a demonstration of the `plot_play` function on some still frames: 84 | 85 | ```{r plots, warning = FALSE, message = FALSE, results = 'hide', fig.keep = 'all', dpi = 400, layout="l-body-outset"} 86 | ex <- sample(cleaned$play, 4) 87 | 88 | plots <- map(ex, ~{ 89 | plot <- cleaned %>% 90 | filter(play == .x) %>% 91 | plot_play( 92 | # show still frame, not animation 93 | animated = FALSE, 94 | # just plot this frame_id 95 | frame = 28, 96 | segment_length = 6, 97 | segment_size = 3, 98 | dot_size = 4 99 | 100 | ) 101 | 102 | plot + 103 | theme(plot.title = element_blank(), 104 | plot.caption = element_blank(), 105 | plot.margin = unit(c(0, 0, 0, 0), "cm") 106 | ) 107 | }) 108 | 109 | (plots[[1]] + plots[[2]]) / (plots[[3]] + plots[[4]]) 110 | ``` 111 | 112 | Or we can animate a play: 113 | 114 | ```{r gif, warning = FALSE, message = FALSE, fig.keep = 'all', dpi = 400, layout="l-body-outset"} 115 | ex <- sample(cleaned$play, 1) 116 | 117 | plot <- cleaned %>% 118 | filter(play == ex) %>% 119 | plot_play( 120 | # show still frame, not animation 121 | animated = TRUE, 122 | # just plot this frame_id 123 | segment_length = 6, 124 | segment_size = 3, 125 | dot_size = 4, 126 | animated_h = 4, 127 | animated_w = 8, 128 | animated_res = 150 129 | ) 130 | 131 | 132 | plot 133 | ``` 134 | 135 | ### The big cleaning function 136 | 137 | And the wrapper that can be used to prepare raw 2021 Big Data Bowl data. See [this Open Source Football post](https://www.opensourcefootball.com/posts/2021-05-31-computer-vision-in-r-using-torch/) for how it might be useful. 138 | 139 | ```{r} 140 | prepare_bdb_week( 141 | week = 1, 142 | dir = "../nfl-big-data-bowl-2021/input", 143 | trim_frame = 25, 144 | frames_after_throw = 10, 145 | keep_frames = c(30), 146 | drop_positions = c("QB") 147 | ) %>% 148 | str() 149 | ``` 150 | 151 | 152 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ngscleanR 2 | ================ 3 | 4 | `ngscleanR` is a set of functions to clean up and standardize NFL player 5 | tracking data. The package handles some of the necessary, but boring, 6 | parts of dealing with player tracking data. The included functions: 7 | 8 | - **`clean_and_rotate()`**: Makes all plays go from left to right, 9 | append some play information from `nflfastR` (yard line, play 10 | description, play type, etc), and add some post-standardized 11 | information about where the player is moving and facing (e.g., 12 | `s_x`, `s_y`, `o_x`, `o_y`, etc) 13 | - **`compute_o_diff()`**: Computes difference in orientation between 14 | direction player is currently facing and \#’ orientation if player 15 | were facing towards a given x and y location. For example, this 16 | could be used to determine the extent to which a player is facing 17 | towards the quarterback on a given frame. 18 | - **`cut_plays()`** Trim frames for a play and/or remove plays based 19 | on how quickly provided events happen in the play. For example, this 20 | could be used to remove frames after a pass was thrown or discard 21 | plays where a pass is thrown very quickly. 22 | - **`prepare_bdb_week()`**: A wrapper around the above three functions 23 | that cleans the raw data from the 2021 Big Data Bowl (2018 season). 24 | - **`plot_play()`**: A wrapper around `ggplot` and `gganimate` for 25 | plotting a play. 26 | 27 | ## Installation 28 | 29 | Install from github using: 30 | 31 | ``` r 32 | if (!require("remotes")) install.packages("remotes") 33 | remotes::install_github("guga31bb/ngscleanR") 34 | ``` 35 | 36 | ## Usage 37 | 38 | First we load the necessary packages (`patchwork` is for the plot at the 39 | end). 40 | 41 | ``` r 42 | library(ngscleanR) 43 | library(tidyverse) 44 | library(patchwork) 45 | ``` 46 | 47 | ## Load sample week 48 | 49 | To demonstrate the package features, we start by loading some small 50 | sample data stored in the package github repo that come from 2021 Big 51 | Data Bowl: 52 | 53 | ``` r 54 | tracking <- readRDS("data-raw/sample_bdb_2021.rds") 55 | names(tracking) 56 | #> [1] "time" "x" "y" "s" 57 | #> [5] "a" "dis" "o" "dir" 58 | #> [9] "event" "nflId" "displayName" "jerseyNumber" 59 | #> [13] "position" "frameId" "team" "gameId" 60 | #> [17] "playId" "playDirection" "route" 61 | ``` 62 | 63 | ### The main function 64 | 65 | This will clean up the data, attach some information associated with the 66 | play, and make everything face from left to right. 67 | 68 | ``` r 69 | cleaned <- tracking %>% 70 | clean_and_rotate() 71 | 72 | names(cleaned) 73 | #> [1] "time" "x" "y" "s" 74 | #> [5] "a" "dis" "o" "dir" 75 | #> [9] "event" "nfl_id" "display_name" "jersey_number" 76 | #> [13] "position" "frame_id" "team" "game_id" 77 | #> [17] "play_id" "play_direction" "route" "team_name" 78 | #> [21] "defense" "play" "nflfastr_game_id" "week" 79 | #> [25] "posteam" "home_team" "away_team" "down" 80 | #> [29] "ydstogo" "yardline_100" "qtr" "epa" 81 | #> [33] "yards_gained" "air_yards" "desc" "pass" 82 | #> [37] "rush" "play_type_nfl" "team_color" "team_color2" 83 | #> [41] "team_logo_espn" "los_x" "dist_from_los" "o_x" 84 | #> [45] "o_y" "dir_x" "dir_y" "s_x" 85 | #> [49] "s_y" "a_x" "a_y" 86 | ``` 87 | 88 | ### Play cutting function 89 | 90 | This discards any plays where the throw happens before frame 25 91 | (i.e. 1.5 seconds into the play). In addition, it removes any frames 92 | that took place more than 10 frames after a pass was thrown or some 93 | other play ending event (sack, fumble, etc). 94 | 95 | ``` r 96 | cleaned <- cleaned %>% 97 | cut_plays( 98 | # get rid of plays with throw before this frame 99 | throw_frame = 25, 100 | # get rid of frames that happen after this many frames after pass released 101 | time_after_event = 10 102 | ) 103 | 104 | names(cleaned) 105 | #> [1] "time" "x" "y" "s" 106 | #> [5] "a" "dis" "o" "dir" 107 | #> [9] "event" "nfl_id" "display_name" "jersey_number" 108 | #> [13] "position" "frame_id" "team" "game_id" 109 | #> [17] "play_id" "play_direction" "route" "team_name" 110 | #> [21] "defense" "play" "nflfastr_game_id" "week" 111 | #> [25] "posteam" "home_team" "away_team" "down" 112 | #> [29] "ydstogo" "yardline_100" "qtr" "epa" 113 | #> [33] "yards_gained" "air_yards" "desc" "pass" 114 | #> [37] "rush" "play_type_nfl" "team_color" "team_color2" 115 | #> [41] "team_logo_espn" "los_x" "dist_from_los" "o_x" 116 | #> [45] "o_y" "dir_x" "dir_y" "s_x" 117 | #> [49] "s_y" "a_x" "a_y" "end_frame" 118 | #> [53] "max_frame" 119 | ``` 120 | 121 | ### Plot some sample plays 122 | 123 | Here is a demonstration of the `plot_play` function on some still 124 | frames: 125 | 126 | ``` r 127 | ex <- sample(cleaned$play, 4) 128 | 129 | plots <- map(ex, ~ { 130 | plot <- cleaned %>% 131 | filter(play == .x) %>% 132 | plot_play( 133 | # show still frame, not animation 134 | animated = FALSE, 135 | # just plot this frame_id 136 | frame = 28, 137 | segment_length = 6, 138 | segment_size = 3, 139 | dot_size = 4 140 | ) 141 | 142 | plot + 143 | theme( 144 | plot.title = element_blank(), 145 | plot.caption = element_blank(), 146 | plot.margin = unit(c(0, 0, 0, 0), "cm") 147 | ) 148 | }) 149 | 150 | (plots[[1]] + plots[[2]]) / (plots[[3]] + plots[[4]]) 151 | ``` 152 | 153 | ![](README_files/figure-gfm/plots-1.png) 154 | 155 | Or we can animate a play: 156 | 157 | ``` r 158 | ex <- sample(cleaned$play, 1) 159 | 160 | plot <- cleaned %>% 161 | filter(play == ex) %>% 162 | plot_play( 163 | # show still frame, not animation 164 | animated = TRUE, 165 | # just plot this frame_id 166 | segment_length = 6, 167 | segment_size = 3, 168 | dot_size = 4, 169 | animated_h = 4, 170 | animated_w = 8, 171 | animated_res = 150 172 | ) 173 | 174 | 175 | plot 176 | ``` 177 | 178 | ![](README_files/figure-gfm/gif-1.gif) 179 | 180 | ### The big cleaning function 181 | 182 | And the wrapper that can be used to prepare raw 2021 Big Data Bowl data. 183 | See [this Open Source Football 184 | post](https://www.opensourcefootball.com/posts/2021-05-31-computer-vision-in-r-using-torch/) 185 | for how it might be useful. 186 | 187 | ``` r 188 | prepare_bdb_week( 189 | week = 1, 190 | dir = "../nfl-big-data-bowl-2021/input", 191 | trim_frame = 25, 192 | frames_after_throw = 10, 193 | keep_frames = c(30), 194 | drop_positions = c("QB") 195 | ) %>% 196 | str() 197 | #> 198 | #> -- Column specification -------------------------------------------------------- 199 | #> cols( 200 | #> time = col_datetime(format = ""), 201 | #> x = col_double(), 202 | #> y = col_double(), 203 | #> s = col_double(), 204 | #> a = col_double(), 205 | #> dis = col_double(), 206 | #> o = col_double(), 207 | #> dir = col_double(), 208 | #> event = col_character(), 209 | #> nflId = col_double(), 210 | #> displayName = col_character(), 211 | #> jerseyNumber = col_double(), 212 | #> position = col_character(), 213 | #> frameId = col_double(), 214 | #> team = col_character(), 215 | #> gameId = col_double(), 216 | #> playId = col_double(), 217 | #> playDirection = col_character(), 218 | #> route = col_character() 219 | #> ) 220 | #> tibble [13,057 x 15] (S3: tbl_df/tbl/data.frame) 221 | #> $ week : int [1:13057] 1 1 1 1 1 1 1 1 1 1 ... 222 | #> $ frame_id : num [1:13057] 30 30 30 30 30 30 30 30 30 30 ... 223 | #> $ nfl_id : num [1:13057] 2507763 2540158 2552582 2552600 2553502 ... 224 | #> $ play : chr [1:13057] "2018090600_1037" "2018090600_1037" "2018090600_1037" "2018090600_1037" ... 225 | #> $ defense : num [1:13057] 0 0 0 0 0 1 1 1 1 1 ... 226 | #> $ x : num [1:13057] 56.1 55.7 48.4 57.4 50.8 ... 227 | #> $ y : num [1:13057] 14.4 32.3 31.4 37.8 22.5 ... 228 | #> $ s_x : num [1:13057] 5.595 4.438 -0.228 5.443 0.514 ... 229 | #> $ s_y : num [1:13057] -3.847 -3.978 -0.693 -1.757 -4.611 ... 230 | #> $ a_x : num [1:13057] 2.719 1.191 -0.741 2.493 0.505 ... 231 | #> $ a_y : num [1:13057] -1.87 -1.068 -2.251 -0.805 -4.532 ... 232 | #> $ o : num [1:13057] 120.2 102.1 126 80.9 180.9 ... 233 | #> $ o_to_qb : num [1:13057] 0.883 0.864 0.688 0.88 0.75 ... 234 | #> $ los_x : num [1:13057] 53 53 53 53 53 53 53 53 53 53 ... 235 | #> $ dist_from_los: num [1:13057] 3.09 2.67 -4.63 4.4 -2.2 ... 236 | ``` 237 | -------------------------------------------------------------------------------- /README_files/figure-gfm/gif-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guga31bb/ngscleanR/9cdb48368d904ec64c8abf52ec942b541b2037aa/README_files/figure-gfm/gif-1.gif -------------------------------------------------------------------------------- /README_files/figure-gfm/plots-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guga31bb/ngscleanR/9cdb48368d904ec64c8abf52ec942b541b2037aa/README_files/figure-gfm/plots-1.png -------------------------------------------------------------------------------- /data-raw/coverage_labels.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guga31bb/ngscleanR/9cdb48368d904ec64c8abf52ec942b541b2037aa/data-raw/coverage_labels.rds -------------------------------------------------------------------------------- /data-raw/coverages_week1.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guga31bb/ngscleanR/9cdb48368d904ec64c8abf52ec942b541b2037aa/data-raw/coverages_week1.rds -------------------------------------------------------------------------------- /data-raw/create_package_data.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | require(data.table) 3 | 4 | # ngs highlights 5 | # https://github.com/asonty/ngs_highlights 6 | df_ngs <- as.data.frame(fread("data-raw/2018_ARI_2018092311_141.tsv.txt")) %>% 7 | as_tibble() 8 | 9 | saveRDS(df_ngs, "data-raw/sample_ngs.rds") 10 | 11 | # 2019 bdb 12 | bdb19 <- read_csv(url("https://github.com/nfl-football-ops/Big-Data-Bowl/blob/master/Data/tracking_gameId_2017090700.csv?raw=true")) 13 | 14 | bdb19 %>% 15 | saveRDS("data-raw/sample_bdb_2019.rds") 16 | 17 | # 2020 bdb 18 | bdb20 <- read_csv("data-raw/bdb_2020.csv") 19 | 20 | bdb20 %>% 21 | filter(GameId == 2019112411) %>% 22 | saveRDS("data-raw/sample_bdb_2020.rds") 23 | 24 | # 2021 bdb 25 | bdb21 <- suppressMessages(readr::read_csv(glue::glue("../nfl-big-data-bowl-2021/input/week{week}.csv"))) 26 | 27 | bdb21 %>% 28 | # filter(gameId == 2018120600) %>% 29 | saveRDS("data-raw/sample_bdb_2021.rds") 30 | 31 | # # # 32 | # create old nflfastR data 33 | 34 | load_nflfastr <- function(y) { 35 | 36 | .url <- glue::glue("https://github.com/nflverse/nflfastR-data/blob/master/data/play_by_play_{y}.rds?raw=true") 37 | con <- url(.url) 38 | pbp <- readRDS(con) 39 | close(con) 40 | return(pbp) 41 | 42 | } 43 | 44 | message(glue::glue("Getting nflfastR data")) 45 | pbp <- map_df(2017:2020, load_nflfastr) %>% 46 | dplyr::rename(nflfastr_game_id = game_id, game_id = old_game_id) %>% 47 | dplyr::select( 48 | nflfastr_game_id, 49 | game_id, 50 | play_id, 51 | week, 52 | posteam, 53 | home_team, 54 | away_team, 55 | down, 56 | ydstogo, 57 | yardline_100, 58 | qtr, 59 | epa, 60 | yards_gained, 61 | air_yards, 62 | desc, 63 | pass, 64 | rush, 65 | play_type_nfl 66 | ) %>% 67 | dplyr::mutate(game_id = as.integer(game_id)) 68 | 69 | # put in use_data instead 70 | # saveRDS(pbp, "data-raw/nflfastr_plays.rds") 71 | 72 | 73 | # # # 74 | # create coverage labels 75 | labels <- read_csv("../nfl-big-data-bowl-2021/input/coverages_2018.csv") %>% 76 | mutate(coverage = case_when( 77 | coverage == "3 Seam" ~ "Cover 3 Zone", 78 | coverage == "Cover 1 Double" ~ "Cover 1 Man", 79 | coverage %in% c("Red Zone", "Goal Line") ~ "Red zone / goal line", 80 | coverage == "Mis" | is.na(coverage) ~ "Other / misc", 81 | TRUE ~ coverage 82 | )) %>% 83 | filter(coverage %in% c( 84 | "Cover 0 Man", 85 | "Cover 1 Man", 86 | "Cover 2 Man", 87 | "Cover 2 Zone", 88 | "Cover 3 Zone", 89 | "Cover 4 Zone", 90 | "Cover 6 Zone", 91 | "Bracket", 92 | "Prevent" 93 | )) 94 | 95 | labels %>% 96 | saveRDS("data-raw/coverage_labels.rds") 97 | 98 | # from telemetry 99 | readr::read_csv("../nfl-big-data-bowl-2021/input/coverages_week1.csv") %>% 100 | saveRDS("data-raw/coverages_week1.rds") 101 | 102 | 103 | # get team colors and logo for joining 104 | colors <- nflfastR::teams_colors_logos %>% 105 | select(team_name = team_abbr, team_color, team_color2, team_logo_espn) 106 | 107 | nfl_field <- sportyR::geom_football( 108 | 'nfl', 109 | # the CC at the end gives the field lower alpha 110 | grass_color = "#196f0cCC" 111 | ) 112 | 113 | usethis::use_data( 114 | pbp, colors, nfl_field, 115 | internal = TRUE, overwrite = TRUE 116 | ) 117 | 118 | 119 | -------------------------------------------------------------------------------- /data-raw/final_predict_stage.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(torch) 3 | 4 | test_weeks <- c(1) 5 | n_kfold <- 10 6 | 7 | augment_data <- function(df, 8 | # stuff that will be multiplied by -1 (eg Sy) 9 | flip_indices = c(4, 6, 9, 11, 13), 10 | # raw y location 11 | subtract_indices = c(2)) { 12 | 13 | 14 | # indices of the elements that need to be flipped 15 | t <- torch_ones_like(df) 16 | t[ , , flip_indices, , ] <- -1 17 | 18 | # first fix: multiply by -1 where needed (stuff like speed in Y direction) 19 | flipped <- df * t 20 | 21 | # for flipping Y itself, need to do 160/3 - y 22 | t <- torch_zeros_like(df) 23 | t[ , , subtract_indices, , ] <- 160 / 3 24 | 25 | # second fix: flip around y 26 | flipped[ , , subtract_indices, , ] <- t[ , , subtract_indices, , ] - flipped[ , , subtract_indices, , ] 27 | 28 | return(flipped) 29 | } 30 | 31 | # have to download these from kaggle after training there (or wherever they're trained) 32 | models <- map(1:n_kfold, ~ { 33 | torch_load(glue::glue("data-raw/model_stuff/data/models/best_model_{.x}.pt")) 34 | }) 35 | 36 | train_x <- torch_load("data-raw/model_stuff/data/train_x.pt") 37 | train_y <- torch_load("data-raw/model_stuff/data/train_y.pt") 38 | 39 | # created in coverage_classifier_make_tensors.R 40 | frame_lengths <- readRDS("data-raw/model_stuff/data/valid_plays.rds") %>% 41 | group_by(i) %>% summarize(n_frames = n()) %>% ungroup() %>% 42 | pull(n_frames) 43 | 44 | # created in coverage_classifier_make_tensors.R 45 | test_idx <- readRDS("data-raw/model_stuff/data/valid_plays.rds") %>% 46 | filter(week %in% test_weeks) %>% 47 | select(i) %>% distinct() %>% pull(i) 48 | 49 | test_x <- train_x[test_idx, ..] 50 | test_y <- train_y[test_idx] 51 | test_dims <- torch_tensor(frame_lengths[test_idx]) 52 | 53 | n_class <- n_distinct(as.matrix(train_y)) 54 | 55 | rm(train_x) 56 | rm(train_y) 57 | gc() 58 | 59 | message(glue::glue("--------------- TEST STAGE ------------.")) 60 | 61 | labels <- test_y %>% 62 | as.matrix() %>% 63 | as_tibble() %>% 64 | set_names("label") 65 | 66 | test_data_augmented <- augment_data(test_x) 67 | 68 | # folds x obs x class 69 | output <- torch_zeros(n_kfold, dim(test_x)[1], n_class) 70 | 71 | # get augmented prediction for each fold 72 | walk(1:n_kfold, ~ { 73 | message(glue::glue("Doing number {.x}")) 74 | output[.x, ..] <- (models[[.x]](test_x, test_dims) + models[[.x]](test_data_augmented, test_dims)) / 2 75 | }) 76 | 77 | # average prediction over folds 78 | predictions <- (1 / n_kfold) * torch_sum(output, 1) 79 | predictions <- as.matrix(predictions) 80 | 81 | predictions <- predictions %>% 82 | as_tibble() %>% 83 | transform(prediction = max.col(predictions)) %>% 84 | bind_cols(labels) %>% 85 | mutate(correct = ifelse(prediction == label, 1, 0)) %>% 86 | as_tibble() %>% 87 | mutate(high = pmax(V1, V2, V3, V4, V5, V6, V7, V8, V9)) 88 | 89 | message(glue::glue("Week 1 test: {round(100*mean(predictions$correct), 1)}% correct")) 90 | 91 | # 86.8 92 | 93 | 94 | -------------------------------------------------------------------------------- /data-raw/generate_silence_eval_notes.R: -------------------------------------------------------------------------------- 1 | # thank you 2 | # https://stackoverflow.com/questions/58026637/no-visible-global-function-definition-for 3 | get_missing_global_variables <- function(wd = getwd()) { 4 | 5 | # Run devtools::check() and reprex the results 6 | check_output <- reprex::reprex(input = sprintf("devtools::check(pkg = '%s', vignettes = FALSE)\n", wd), 7 | comment = "") 8 | 9 | # Get the lines which are notes about missing global variables, extract the variables and 10 | # construct a vector as a string 11 | missing_global_vars <- check_output %>% 12 | stringr::str_squish() %>% 13 | paste(collapse = " ") %>% 14 | stringr::str_extract_all("no visible binding for global variable '[^']+'") %>% 15 | `[[`(1) %>% 16 | stringr::str_extract("'.+'$") %>% 17 | stringr::str_remove("^'") %>% 18 | stringr::str_remove("'$") %>% 19 | unique() %>% 20 | sort() 21 | 22 | # Get a vector to paste into `globalVariables()` 23 | to_print <- if (length(missing_global_vars) == 0) { 24 | "None" 25 | } else { 26 | missing_global_vars %>% 27 | paste0('"', ., '"', collapse = ", \n ") %>% 28 | paste0("c(", ., ")") 29 | } 30 | 31 | # Put the global variables in the console 32 | cat("Missing global variables:\n", to_print) 33 | 34 | # Return the results of the check 35 | invisible(missing_global_vars) 36 | 37 | } 38 | 39 | # get the names 40 | v <- get_missing_global_variables() 41 | 42 | # write to file 43 | sink("R/silence_tidy_eval_notes.R") 44 | 45 | for (i in 1:length(v)) { 46 | message(glue::glue("i {i} length {length(v)}")) 47 | 48 | if (i != length(v)) { 49 | cat(glue::glue("{v[i]} <-")) 50 | cat("\n") 51 | } else { 52 | cat(glue::glue("{v[i]} <- NULL")) 53 | } 54 | 55 | 56 | } 57 | 58 | sink() 59 | 60 | -------------------------------------------------------------------------------- /data-raw/model_stuff/coverage_classifier.R: -------------------------------------------------------------------------------- 1 | # scheduler 2 | # https://torchvision.mlverse.org/articles/examples/tinyimagenet-alexnet.html 3 | 4 | source("R/coverage_classifier_functions.R") 5 | 6 | # for deciding whether to augment data and saying which features need it 7 | augment = TRUE 8 | device <- if(cuda_is_available()) "cuda" else "cpu" 9 | 10 | # get tensors 11 | train_x <- torch_load("data/train_x_one_frame.pt") 12 | train_y <- torch_load("data/train_y.pt") 13 | 14 | # get pre-saved lengths 15 | lengths <- readRDS("data/data_sizes_one_frame.rds") 16 | 17 | test_length <- lengths$test_length 18 | plays <- lengths$plays 19 | 20 | input_channels <- dim(train_x)[2] 21 | 22 | test_length 23 | plays 24 | 25 | input_channels 26 | 27 | # right now we have tensors for train_x and train_y that also include test data (week 1) 28 | dim(train_x) 29 | dim(train_y) 30 | 31 | # split into test and train 32 | test_x <- train_x[1:test_length, , ] 33 | train_x <- train_x[(test_length + 1) : plays, , ] 34 | 35 | test_y <- train_y[1:test_length] 36 | train_y <- train_y[(test_length + 1) : plays] 37 | 38 | # make plays the length of train data 39 | plays <- dim(train_y) 40 | 41 | # split into train and validation 42 | train_id <- sample(1:plays, ceiling(0.80 * plays)) 43 | valid_id <- setdiff(1:plays, train_id) 44 | 45 | train_data <- train_x[train_id, , , ] 46 | train_label <- train_y[train_id] 47 | 48 | # if you want to augment with flipped data 49 | if (augment) { 50 | 51 | dim(train_data) 52 | train_data_augmented <- augment_data(train_data, c(4, 6, 9, 11, 13), c(2)) 53 | 54 | train_data <- torch_cat(list(train_data, train_data_augmented)) 55 | train_label <- torch_cat(list(train_label, train_label)) 56 | 57 | dim(train_data) 58 | dim(train_label) 59 | } 60 | 61 | 62 | # define dataloader 63 | tracking_dataset <- dataset( 64 | name = "tracking_dataset", 65 | 66 | initialize = function(x_tensor, y_tensor) { 67 | 68 | self$data_x <- x_tensor 69 | self$data_y <- y_tensor 70 | 71 | }, 72 | 73 | .getitem = function(i) { 74 | list("x" = self$data_x[i,], "y" = self$data_y[i]) 75 | }, 76 | 77 | .length = function() { 78 | self$data_y$size()[[1]] 79 | } 80 | ) 81 | 82 | # use dataloaders for train and validation 83 | train_ds <- tracking_dataset(train_data, train_label) 84 | valid_ds <- tracking_dataset(train_x[valid_id, , , ], train_y[valid_id]) 85 | 86 | # Dataloaders 87 | train_dl <- train_ds %>% 88 | dataloader(batch_size = 64, shuffle = TRUE) 89 | 90 | valid_dl <- valid_ds %>% 91 | dataloader(batch_size = 64, shuffle = FALSE) 92 | 93 | model <- net() 94 | 95 | # to test something passing through model 96 | # b <- enumerate(train_dl)[[1]][[1]] 97 | # model(b) 98 | 99 | # For fitting, we use Adam optimizer with a one cycle scheduler over a total of 50 epochs for each fit 100 | # with lower lr being 0.0005 and upper lr being 0.001 and 64 batch size 101 | 102 | # if we need to load (currently broken in torch) 103 | # model <- torch_load("data/model.pt") 104 | 105 | optimizer <- optim_adam(model$parameters, lr = 0.001) 106 | 107 | # decay by about 50% after 15 epochs 108 | scheduler <- lr_step(optimizer, step_size = 1, 0.95) 109 | 110 | # 80% at 8 epochs and 10 111 | # XX at 10 112 | 113 | # cb <- luz_callback_lr_scheduler(torch::lr_step, step_size = 1) 114 | # 115 | # # luz 116 | # fitted <- net %>% 117 | # setup( 118 | # loss = nnf_cross_entropy, 119 | # optimizer = optim_adam 120 | # ) %>% 121 | # fit(train_dl, epochs = 1, valid_data = valid_dl) 122 | 123 | epochs <- 1 124 | for (epoch in 1:epochs) { 125 | 126 | train_losses <- c() 127 | valid_losses <- c() 128 | valid_accuracies <- c() 129 | 130 | # train step 131 | model$train() 132 | for (b in enumerate(train_dl)) { 133 | optimizer$zero_grad() 134 | loss <- nnf_cross_entropy(model(b[[1]]), b[[2]]) 135 | loss$backward() 136 | optimizer$step() 137 | train_losses <- c(train_losses, loss$item()) 138 | } 139 | 140 | # validation step 141 | model$eval() 142 | for (b in enumerate(valid_dl)) { 143 | 144 | output <- model(b[[1]]$to(device = device)) 145 | y <- b[[2]]$to(device = device) 146 | 147 | valid_losses <- c(valid_losses, nnf_cross_entropy(output, y)$item()) 148 | 149 | pred <- torch_max(output, dim = 2)[[2]] 150 | correct <- (pred == y)$sum()$item() 151 | valid_accuracies <- c(valid_accuracies, correct/length(y)) 152 | } 153 | 154 | scheduler$step() 155 | cat(sprintf("\nLoss at epoch %d: training: %1.4f, validation: %1.4f, validation accuracy %1.4f", epoch, mean(train_losses), mean(valid_losses), mean(valid_accuracies))) 156 | 157 | # who knows if this does anything 158 | gc() 159 | } 160 | 161 | # for loading model 162 | # model <- torch_load("data/model.pt") 163 | 164 | # evaluate on test set 165 | model$eval() 166 | 167 | labels <- test_y %>% 168 | as.matrix() %>% 169 | as_tibble() %>% 170 | set_names("label") 171 | 172 | output <- model(test_x) 173 | 174 | 175 | # testing only 176 | # output_augmented <- model(train_data_augmented) 177 | # predictions <- as.matrix(output_augmented) 178 | # end test 179 | 180 | predictions <- as.matrix(output) 181 | 182 | predictions <- predictions %>% 183 | as_tibble() %>% 184 | transform(prediction = max.col(predictions)) %>% 185 | bind_cols(labels) %>% 186 | mutate(correct = ifelse(prediction == label, 1, 0)) %>% 187 | as_tibble() 188 | 189 | message(glue::glue("Week 1 test: {round(100*mean(predictions$correct), 0)}% correct")) 190 | 191 | # augmented preds 192 | test_data_augmented <- augment_data(test_x, c(4, 6, 9, 11, 13), c(2)) 193 | 194 | output_augmented <- model(test_data_augmented) 195 | output <- (output + output_augmented) / 2 196 | 197 | predictions <- as.matrix(output$to(device = "cpu")) 198 | 199 | predictions <- predictions %>% 200 | as_tibble() %>% 201 | transform(prediction = max.col(predictions)) %>% 202 | bind_cols(labels) %>% 203 | mutate(correct = ifelse(prediction == label, 1, 0)) %>% 204 | as_tibble() %>% 205 | mutate( 206 | label = as.factor(label), 207 | prediction = as.factor(prediction) 208 | ) 209 | 210 | message(glue::glue("Week 1 augmented test: {round(100*mean(predictions$correct), 0)}% correct")) 211 | 212 | # confusion matrix 213 | # tab <- predictions %>% 214 | # filter( 215 | # !label %in% c(1, 9), 216 | # !prediction %in% c(1, 9) 217 | # ) %>% 218 | # mutate( 219 | # label = as.factor(label), 220 | # prediction = as.factor(prediction) 221 | # ) 222 | # 223 | # levels(tab$label) <- 224 | # c("C0m", "C1m", "C2m", "C2z", "C3z", "C4z", "C6z") 225 | # levels(tab$prediction) <- 226 | # c("C0m", "C1m", "C2m", "C2z", "C3z", "C4z", "C6z") 227 | # 228 | # conf_mat <- caret::confusionMatrix(tab$prediction, tab$label) 229 | # conf_mat$table %>% 230 | # broom::tidy() %>% 231 | # dplyr::rename( 232 | # Target = Reference, 233 | # N = n 234 | # ) %>% 235 | # cvms::plot_confusion_matrix( 236 | # add_sums = TRUE, place_x_axis_above = FALSE, 237 | # add_normalized = FALSE) 238 | 239 | # predictions %>% head(20) 240 | 241 | 242 | 243 | # coverage n 244 | # 245 | # 1 Bracket 94 246 | # 2 Cover 0 Man 459 247 | # 3 Cover 1 Man 5870 248 | # 4 Cover 2 Man 612 249 | # 5 Cover 2 Zone 2490 250 | # 6 Cover 3 Zone 7312 251 | # 7 Cover 4 Zone 2079 252 | # 8 Cover 6 Zone 1458 253 | # 9 Prevent 110 254 | -------------------------------------------------------------------------------- /data-raw/model_stuff/coverage_classifier_functions.R: -------------------------------------------------------------------------------- 1 | # number of features that only depend on def player 2 | # dist_from_los, y, s_x, s_y, a_x, a_y, o_to_qb 3 | # . subtract . -1 . -1 . 4 | 5 | # number of features that depend on defense and offense player 6 | # rel x, rel y, rel sx, rel sy, rel ax, rel ay 7 | # . -1 . -1 . -1 8 | 9 | # dist_from_los 10 | # y 11 | # s_x 12 | # s_y 13 | # a_x 14 | # a_y 15 | # o_to_qb 16 | # diff_x 17 | # diff_y 18 | # diff_s_x 19 | # diff_s_y 20 | # diff_a_x 21 | # diff_a_y 22 | 23 | augment_data <- function(df, 24 | # stuff that will be multiplied by -1 (eg Sy) 25 | flip_indices = c(4, 6, 9, 11, 13), 26 | # raw y location 27 | subtract_indices = c(2)) { 28 | 29 | # testing 30 | # df <- train_data 31 | 32 | # x, y, sx, sy, o_qb, los_dist, xdiff, ydiff, sxdiff, sydiff 33 | # 1, XX, 1, -1, 1, 1, 1, -1, 1, -1 34 | 35 | # indices of the elements that need to be flipped 36 | 37 | t <- torch_ones_like(df) 38 | t[, flip_indices, , ] <- -1 39 | 40 | # first fix: multiply by -1 where needed 41 | flipped <- df * t 42 | 43 | # now flip y coordinates: 2nd feature dimension 44 | t <- torch_zeros_like(df) 45 | t[, subtract_indices, , ] <- 160/3 46 | 47 | # flip around y 48 | flipped[, subtract_indices, , ] <- t[, subtract_indices, , ] - flipped[, subtract_indices, , ] 49 | 50 | return(flipped) 51 | 52 | } 53 | 54 | 55 | # define dataloader 56 | tracking_dataset <- dataset( 57 | name = "tracking_dataset", 58 | 59 | initialize = function(x_tensor, y_tensor) { 60 | 61 | self$data_x <- x_tensor 62 | self$data_y <- y_tensor 63 | 64 | }, 65 | 66 | .getitem = function(i) { 67 | list("x" = self$data_x[i,], "y" = self$data_y[i]) 68 | }, 69 | 70 | .length = function() { 71 | self$data_y$size()[[1]] 72 | } 73 | ) 74 | 75 | # define dataloader 76 | tracking_dataset_t <- dataset( 77 | name = "tracking_dataset", 78 | 79 | initialize = function(x_tensor, y_tensor, dims) { 80 | 81 | self$data_x <- x_tensor 82 | self$data_y <- y_tensor 83 | self$dims <- dims 84 | 85 | }, 86 | 87 | .getitem = function(i) { 88 | list("x" = self$data_x[i,], "y" = self$data_y[i], "t" = self$dims[i]) 89 | }, 90 | 91 | .length = function() { 92 | self$data_y$size()[[1]] 93 | } 94 | ) 95 | 96 | 97 | 98 | net <- nn_module( 99 | "Net", 100 | 101 | initialize = function() { 102 | 103 | self$conv_block_1 <- nn_sequential( 104 | nn_conv2d( 105 | in_channels = input_channels, 106 | out_channels = 128, 107 | kernel_size = 1 108 | ), 109 | nn_relu(inplace = TRUE), 110 | nn_conv2d( 111 | in_channels = 128, 112 | out_channels = 160, 113 | kernel_size = 1 114 | ), 115 | nn_relu(inplace = TRUE), 116 | nn_conv2d( 117 | in_channels = 160, 118 | out_channels = 128, 119 | kernel_size = 1 120 | ), 121 | nn_relu(inplace = TRUE), 122 | ) 123 | 124 | self$conv_block_2 <- nn_sequential( 125 | nn_batch_norm1d(128), 126 | nn_conv1d( 127 | in_channels = 128, 128 | out_channels = 160, 129 | kernel_size = 1 130 | ), 131 | nn_relu(inplace = TRUE), 132 | nn_batch_norm1d(160), 133 | nn_conv1d( 134 | in_channels = 160, 135 | out_channels = 96, 136 | kernel_size = 1 137 | ), 138 | nn_relu(inplace = TRUE), 139 | nn_batch_norm1d(96), 140 | nn_conv1d( 141 | in_channels = 96, 142 | out_channels = 96, 143 | kernel_size = 1 144 | ), 145 | nn_relu(inplace = TRUE), 146 | nn_batch_norm1d(96) 147 | ) 148 | 149 | self$linear_block <- nn_sequential( 150 | nn_linear(96, 96), 151 | nn_relu(inplace = TRUE), 152 | nn_batch_norm1d(96), 153 | 154 | nn_linear(96, 256), 155 | nn_relu(inplace = TRUE), 156 | 157 | # breaks on current kaggle version 158 | # nn_batch_norm1d(256), 159 | 160 | nn_layer_norm(256), 161 | nn_dropout(p = 0.3), 162 | 163 | nn_linear(256, 9) 164 | 165 | ) 166 | 167 | }, 168 | 169 | forward = function(x) { 170 | 171 | # first conv layer 172 | x <- self$conv_block_1(x) 173 | 174 | # first pool layer 175 | avg <- nn_avg_pool2d(kernel_size = c(1, 5))(x) %>% 176 | torch_squeeze(-1) 177 | max <- nn_max_pool2d(kernel_size = c(1, 5))(x) %>% 178 | torch_squeeze(-1) 179 | 180 | x <- 0.7 * avg + 0.3 * max 181 | 182 | # second conv layer 183 | x <- self$conv_block_2(x) 184 | 185 | # second pool layer 186 | avg <- nn_avg_pool1d(kernel_size = 11)(x) %>% 187 | torch_squeeze(-1) 188 | max <- nn_max_pool1d(kernel_size = 11)(x) %>% 189 | torch_squeeze(-1) 190 | 191 | x <- 0.7 * avg + 0.3 * max 192 | 193 | x <- self$linear_block(x) 194 | 195 | x 196 | 197 | } 198 | ) 199 | 200 | 201 | fill_row <- function(df, row) { 202 | 203 | # indices for putting in tensor 204 | i = row$i 205 | f = row$f 206 | 207 | # play info for extracting from df 208 | playid = row$play 209 | frameid = row$frame_id 210 | 211 | play_df <- df %>% 212 | filter(play == playid, frame_id == frameid) %>% 213 | select(-play, -frame_id) 214 | 215 | defenders <- n_distinct(play_df$nfl_id) 216 | n_offense <- nrow(play_df) / defenders 217 | 218 | play_df <- play_df %>% select(-nfl_id) 219 | 220 | train_x[i, f, , 1:defenders, 1:n_offense] <- 221 | torch_tensor(t(play_df))$view(c(-1, defenders, n_offense)) 222 | 223 | } 224 | 225 | 226 | 227 | -------------------------------------------------------------------------------- /data-raw/model_stuff/coverage_classifier_make_tensors.R: -------------------------------------------------------------------------------- 1 | 2 | # need to run this if not installed 3 | # devtools::install_github("guga31bb/ngscleanR") 4 | 5 | library(tidyverse) 6 | library(torch) 7 | library(ngscleanR) 8 | set.seed(2013) 9 | 10 | # number of features that only depend on def player 11 | # dist_from_los, y, s_x, s_y, a_x, a_y, o_to_qb 12 | def_only_features <- 7 13 | # number of features that depend on defense and offense player 14 | # rel x, rel y, rel sx, rel sy, rel ax, rel ay, rel o 15 | off_def_features <- 7 16 | n_features <- def_only_features + off_def_features 17 | 18 | # pull week 1 through this week: 19 | final_week <- 17 20 | 21 | # get labels 22 | labels <- readRDS("data-raw/coverage_labels.rds") %>% 23 | mutate( 24 | play = paste0(game_id, "_", play_id) 25 | ) %>% 26 | filter(!is.na(coverage)) %>% 27 | select(play, coverage) 28 | 29 | # make sure ngscleanR installed (see top) 30 | df <- map_df(1:final_week, ~{ 31 | ngscleanR::prepare_bdb_week( 32 | week = .x, 33 | # where is your big data bowl data saved? 34 | dir = "../nfl-big-data-bowl-2021/input", 35 | # any throw that happens before 1.5 seconds after snap is thrown away 36 | trim_frame = 25, 37 | # all frames coming more than 1 second after pass released are thrown away 38 | frames_after_throw = 10, 39 | # let's keep these frames for fun (every 3 frames starting at snap for 12 frames) 40 | keep_frames = seq(11, 45, by = 3) 41 | ) 42 | }) %>% 43 | inner_join(labels, by = "play") 44 | 45 | df 46 | 47 | # put together the df of defense players relative to offense players 48 | offense_df <- df %>% 49 | filter(defense == 0) %>% 50 | select(play, frame_id, o_x = x, o_y = y, o_s_x = s_x, o_s_y = s_y, o_a_x = a_x, o_a_y = a_y) 51 | 52 | defense_df <- df %>% 53 | filter(defense == 1) %>% 54 | select(play, frame_id, nfl_id, o, x, y, s_x, s_y, a_x, a_y, o_to_qb, dist_from_los) 55 | 56 | rel_df <- defense_df %>% 57 | left_join(offense_df, by = c("play", "frame_id")) %>% 58 | mutate(diff_x = o_x - x, diff_y = o_y - y, diff_s_x = o_s_x - s_x, diff_s_y = o_s_y - s_y, diff_a_x = o_a_x - a_x, diff_a_y = o_a_y - a_y) %>% 59 | compute_o_diff("o") %>% 60 | mutate(o_to_o = o_to_o / 180) %>% 61 | select(play, frame_id, nfl_id, dist_from_los, y, s_x, s_y, a_x, a_y, o_to_qb, starts_with("diff_"), o_to_o) 62 | 63 | rel_df 64 | 65 | 66 | object.size(df) %>% format("MB") 67 | object.size(rel_df) %>% format("MB") 68 | 69 | # offense: 4-5 players 70 | # defense: 5-11 players 71 | # input shape (time steps (t) * n_features (14) * n_defenders (11) * n_non-qb-offense (5)) 72 | 73 | play_indices <- df %>% 74 | select(play, frame_id, play, week, coverage) %>% 75 | unique() %>% 76 | # get play index for 1 : n_plays 77 | mutate( 78 | i = as.integer(as.factor(play)) 79 | ) %>% 80 | # get time step indices 81 | group_by(play) %>% 82 | mutate(f = 1 : n()) %>% 83 | ungroup() 84 | 85 | play_indices 86 | 87 | n_frames <- n_distinct(play_indices$f) 88 | plays <- n_distinct(df$play) 89 | plays 90 | n_frames 91 | 92 | # i, f, features, def, off, 93 | train_x = torch_empty(plays, n_frames, n_features, 11, 5) 94 | 95 | # row <- play_indices %>% dplyr::slice(19684) 96 | 97 | fill_row <- function(row) { 98 | 99 | # indices for putting in tensor 100 | i = row$i 101 | f = row$f 102 | 103 | # play info for extracting from df 104 | playid = row$play 105 | frameid = row$frame_id 106 | 107 | play_df <- rel_df %>% 108 | filter(play == playid, frame_id == frameid) %>% 109 | select(-play, -frame_id) 110 | 111 | defenders <- n_distinct(play_df$nfl_id) 112 | n_offense <- nrow(play_df) / defenders 113 | 114 | play_df <- play_df %>% select(-nfl_id) 115 | 116 | train_x[i, f, , 1:defenders, 1:n_offense] <- 117 | torch_tensor(t(play_df))$view(c(-1, defenders, n_offense)) 118 | 119 | } 120 | 121 | # build the tensor for train and test data 122 | walk(1 : nrow(play_indices), ~{ 123 | if(.x %% 250 == 0) { 124 | message(glue::glue("{.x} of {nrow(play_indices)}")) 125 | } 126 | fill_row(play_indices %>% dplyr::slice(.x)) 127 | }) 128 | 129 | 130 | 131 | train_y <- torch_zeros(plays, dtype = torch_long()) 132 | 133 | train_y[1:plays] <- df %>% 134 | mutate(coverage = as.factor(coverage) %>% as.integer()) %>% 135 | group_by(play) %>% 136 | dplyr::slice(1) %>% 137 | ungroup() %>% 138 | pull(coverage) 139 | 140 | dim(train_x) 141 | dim(train_y) 142 | 143 | # save everything 144 | saveRDS(play_indices, "data-raw/model_stuff/data/valid_plays.rds") 145 | torch_save(train_y, "data-raw/model_stuff/data/train_y.pt") 146 | torch_save(train_x, "data-raw/model_stuff/data/train_x.pt") 147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /data-raw/model_stuff/coverage_classifier_with_time.R: -------------------------------------------------------------------------------- 1 | # 2 | # https://gist.github.com/HarshTrivedi/f4e7293e941b17d19058f6fb90ab0fec 3 | 4 | # scheduler 5 | # https://torchvision.mlverse.org/articles/examples/tinyimagenet-alexnet.html 6 | 7 | source("R/coverage_classifier_functions.R") 8 | 9 | # for deciding whether to augment data and saying which features need it 10 | augment = FALSE 11 | 12 | packageVersion("torch") 13 | device <- if(cuda_is_available()) "cuda" else "cpu" 14 | device 15 | 16 | # get tensors 17 | train_x <- torch_load("data/train_x.pt") 18 | train_y <- torch_load("data/train_y.pt") 19 | 20 | # get pre-saved lengths 21 | lengths <- readRDS("data/data_sizes.rds") 22 | frame_lengths <- readRDS("data/valid_plays.rds") %>% 23 | group_by(i) %>% summarize(n_frames = n()) %>% ungroup() %>% 24 | pull(n_frames) 25 | 26 | test_length <- lengths$test_length 27 | plays <- lengths$plays 28 | 29 | input_channels <- dim(train_x)[3] 30 | 31 | test_length 32 | plays 33 | 34 | input_channels 35 | 36 | # right now we have tensors for train_x and train_y that also include test data (week 1) 37 | dim(train_x) 38 | dim(train_y) 39 | 40 | # split into test and train 41 | test_x <- train_x[1:test_length, ..] 42 | test_y <- train_y[1:test_length] 43 | test_dims <- torch_tensor(frame_lengths[1:test_length]) 44 | 45 | train_x <- train_x[(test_length + 1) : plays, ..] 46 | train_y <- train_y[(test_length + 1) : plays] 47 | train_dims <- frame_lengths[(test_length + 1) : plays] 48 | 49 | # make plays the length of train data 50 | plays <- dim(train_y) 51 | 52 | # split into train and validation 53 | train_id <- sample(1:plays, ceiling(0.80 * plays)) 54 | valid_id <- setdiff(1:plays, train_id) 55 | 56 | # get all the data read 57 | valid_data <- train_x[valid_id, ..] 58 | valid_label <- train_y[valid_id] 59 | valid_dims <- torch_tensor(train_dims[valid_id]) 60 | 61 | train_data <- train_x[train_id, ..] 62 | train_label <- train_y[train_id] 63 | train_dims <- torch_tensor(train_dims[train_id]) 64 | 65 | # do the function 66 | 67 | # use dataloaders for train and validation 68 | train_ds <- tracking_dataset_t(train_data, train_label, train_dims) 69 | valid_ds <- tracking_dataset_t(valid_data, valid_label, valid_dims) 70 | 71 | # Dataloaders 72 | train_dl <- train_ds %>% 73 | dataloader(batch_size = 64, shuffle = TRUE) 74 | 75 | valid_dl <- valid_ds %>% 76 | dataloader(batch_size = 64, shuffle = FALSE) 77 | 78 | model <- full_model() 79 | model$to(device = device) 80 | 81 | # to test something passing through model 82 | # b <- enumerate(train_dl)[[1]][[1]] 83 | # model(b) 84 | 85 | # For fitting, we use Adam optimizer with a one cycle scheduler over a total of 50 epochs for each fit 86 | # with lower lr being 0.0005 and upper lr being 0.001 and 64 batch size 87 | 88 | # if we need to load (currently broken in torch) 89 | # model <- torch_load("data/model.pt") 90 | 91 | optimizer <- optim_adam(model$parameters, lr = 0.001) 92 | 93 | # decay by about 50% after 15 epochs 94 | scheduler <- lr_step(optimizer, step_size = 1, 0.975) 95 | 96 | # 80% at 8 epochs and 10 97 | # XX at 10 98 | 99 | epochs <- 1 100 | for (epoch in 1:epochs) { 101 | 102 | train_losses <- c() 103 | valid_losses <- c() 104 | 105 | # train step 106 | model$train() 107 | for (b in enumerate(train_dl)) { 108 | optimizer$zero_grad() 109 | 110 | # message("trying loss") 111 | loss <- nnf_cross_entropy(model(b[[1]]$to(device = device), b[[3]]$to(device = device)), b[[2]]$to(device = device)) 112 | 113 | # message("trying backward") 114 | loss$backward() 115 | 116 | # message("trying step") 117 | optimizer$step() 118 | train_losses <- c(train_losses, loss$item()) 119 | 120 | } 121 | 122 | # message("got train losses") 123 | 124 | # validation step 125 | model$eval() 126 | for (b in enumerate(valid_dl)) { 127 | 128 | # message("trying valid losses") 129 | loss <- nnf_cross_entropy(model(b[[1]]$to(device = device), b[[3]]$to(device = device)), b[[2]]$to(device = device)) 130 | valid_losses <- c(valid_losses, loss$item()) 131 | } 132 | 133 | scheduler$step() 134 | cat(sprintf("\nLoss at epoch %d: training: %1.4f, validation: %1.4f", epoch, mean(train_losses), mean(valid_losses))) 135 | 136 | # who knows if this does anything 137 | gc() 138 | } 139 | 140 | # move to cpu for saving 141 | model$to(device = "cpu") 142 | torch_save(model, "model.pt") 143 | 144 | # put back 145 | model$to(device = device) 146 | 147 | 148 | # for loading model 149 | # model <- torch_load("data/model.pt") 150 | 151 | # evaluate on test set 152 | model$eval() 153 | 154 | labels <- test_y %>% 155 | as.matrix() %>% 156 | as_tibble() %>% 157 | set_names("label") 158 | 159 | output <- model(test_x$to(device = device), test_dims$to(device = device)) 160 | 161 | # testing only 162 | # output_augmented <- model(train_data_augmented) 163 | # predictions <- as.matrix(output_augmented) 164 | # end test 165 | 166 | predictions <- as.matrix(output$to(device = "cpu")) 167 | 168 | predictions <- predictions %>% 169 | as_tibble() %>% 170 | transform(prediction = max.col(predictions)) %>% 171 | bind_cols(labels) %>% 172 | mutate(correct = ifelse(prediction == label, 1, 0)) %>% 173 | as_tibble() 174 | 175 | message(glue::glue("Week 1 test: {round(100*mean(predictions$correct), 0)}% correct")) 176 | 177 | 178 | # confusion matrix 179 | # tab <- predictions %>% 180 | # filter( 181 | # !label %in% c(1, 9), 182 | # !prediction %in% c(1, 9) 183 | # ) %>% 184 | # mutate( 185 | # label = as.factor(label), 186 | # prediction = as.factor(prediction) 187 | # ) 188 | # 189 | # levels(tab$label) <- 190 | # c("C0m", "C1m", "C2m", "C2z", "C3z", "C4z", "C6z") 191 | # levels(tab$prediction) <- 192 | # c("C0m", "C1m", "C2m", "C2z", "C3z", "C4z", "C6z") 193 | # 194 | # conf_mat <- caret::confusionMatrix(tab$prediction, tab$label) 195 | # conf_mat$table %>% 196 | # broom::tidy() %>% 197 | # dplyr::rename( 198 | # Target = Reference, 199 | # N = n 200 | # ) %>% 201 | # cvms::plot_confusion_matrix( 202 | # add_sums = TRUE, place_x_axis_above = FALSE, 203 | # add_normalized = FALSE) 204 | 205 | # predictions %>% head(20) 206 | 207 | 208 | 209 | # coverage n 210 | # 211 | # 1 Bracket 94 212 | # 2 Cover 0 Man 459 213 | # 3 Cover 1 Man 5870 214 | # 4 Cover 2 Man 612 215 | # 5 Cover 2 Zone 2490 216 | # 6 Cover 3 Zone 7312 217 | # 7 Cover 4 Zone 2079 218 | # 8 Cover 6 Zone 1458 219 | # 9 Prevent 110 220 | -------------------------------------------------------------------------------- /data-raw/model_stuff/plot_random_plays.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(torch) 3 | source("R/cleaning_functions.R") 4 | source("R/plotting_functions.R") 5 | set.seed(2013) 6 | 7 | # if you just want to keep one frame from each play 8 | # frame_number is (frame_number - 10) seconds into the play 9 | one_frame = FALSE 10 | 11 | # all frames will be in this range 12 | # cut play off here (frame_number - 10) frames after snap 13 | start_frame_number <- 0 14 | end_frame_number <- 45 15 | 16 | keep_frames <- c(1:40) 17 | 18 | # pull week 1 through this week: 19 | final_week <- 1 20 | 21 | labels <- readRDS("data-raw/coverage_labels.rds") %>% mutate(play = paste0(game_id, "_", play_id)) 22 | 23 | get_bdb <- function(w) { 24 | df <- read_csv(glue::glue("../nfl-big-data-bowl-2021/input/week{w}.csv")) %>% 25 | 26 | # do all the cleaning 27 | wrapper() %>% 28 | 29 | cut_plays(throw_frame = 20) %>% 30 | 31 | mutate(play = paste0(game_id, "_", play_id)) 32 | 33 | return(df) 34 | 35 | } 36 | 37 | df <- map_df(1:final_week, get_bdb) 38 | 39 | df 40 | 41 | library(patchwork) 42 | 43 | 44 | ex <- sample(df$play, 4) 45 | 46 | plots <- map(ex, ~{ 47 | lab <- labels %>% filter(play == .x) %>% dplyr::pull(coverage) 48 | plot <- df %>% 49 | filter(frame_id == 28, play == .x) %>% 50 | plot_play( 51 | animated = FALSE, 52 | segment_length = 4, 53 | segment_size = 2, 54 | dot_size = 4 55 | 56 | ) 57 | 58 | plot + 59 | labs(title = lab) + 60 | theme(plot.title = element_blank(), 61 | plot.caption = element_blank(), 62 | plot.margin = unit(c(0, 0, 0, 0), "cm") 63 | ) 64 | }) 65 | 66 | (plots[[1]] + plots[[2]] )/ (plots[[3]] + plots[[4]] ) 67 | 68 | 69 | ggsave("plot.png", width = 10, height = 5, units = "in", dpi = 300) 70 | 71 | -------------------------------------------------------------------------------- /data-raw/nflfastr_plays.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guga31bb/ngscleanR/9cdb48368d904ec64c8abf52ec942b541b2037aa/data-raw/nflfastr_plays.rds -------------------------------------------------------------------------------- /data-raw/sample_bdb_2019.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guga31bb/ngscleanR/9cdb48368d904ec64c8abf52ec942b541b2037aa/data-raw/sample_bdb_2019.rds -------------------------------------------------------------------------------- /data-raw/sample_bdb_2020.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guga31bb/ngscleanR/9cdb48368d904ec64c8abf52ec942b541b2037aa/data-raw/sample_bdb_2020.rds -------------------------------------------------------------------------------- /data-raw/sample_bdb_2021.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guga31bb/ngscleanR/9cdb48368d904ec64c8abf52ec942b541b2037aa/data-raw/sample_bdb_2021.rds -------------------------------------------------------------------------------- /data-raw/sample_ngs.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guga31bb/ngscleanR/9cdb48368d904ec64c8abf52ec942b541b2037aa/data-raw/sample_ngs.rds -------------------------------------------------------------------------------- /man/clean_and_rotate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cleaning_functions.R 3 | \name{clean_and_rotate} 4 | \alias{clean_and_rotate} 5 | \title{Standardize direction and add play information} 6 | \usage{ 7 | clean_and_rotate(df) 8 | } 9 | \arguments{ 10 | \item{df}{A dataframe of player tracking data obtained from a Big Data Bowl 11 | or NGS highlights} 12 | } 13 | \value{ 14 | The original data with the columns below appended. Note that all returned columns will have 15 | cleaned names, including the original columns in \code{df} (e.g. play_id rather than playId), to end the tyranny of weird Big Data Bowl 16 | column names. 17 | \describe{ 18 | \item{team_name}{Values of home team (eg "SEA"), away team (eg "GB"), or "football"} 19 | \item{defense}{Whether player is on defense (football has 0 here)} 20 | \item{play}{Unique play identifier in format "gameid_playid" with gameid old GSIS format. Ex: "2018091000_1101".} 21 | \item{nflfastr_game_id}{Game ID in nflfastR format. Ex: "2018_01_ATL_PHI"} 22 | \item{week}{Week of season} 23 | \item{posteam}{Possession team} 24 | \item{home_team}{Home team (e.g. "PHI")} 25 | \item{away_team}{Away team (e.g. "ATL")} 26 | \item{down}{Down} 27 | \item{ydstogo}{Yards to go} 28 | \item{yardline_100}{Distance from opponent end zone} 29 | \item{qtr}{Quarter} 30 | \item{epa}{Expected Points Added gained on play from nflfastR} 31 | \item{yards_gained}{Yards gained on play} 32 | \item{air_yards}{Air yards (when applicable)} 33 | \item{desc}{Play description} 34 | \item{pass}{Was it a dropback? From nflfastR} 35 | \item{rush}{Was it a designed rush attempt? From nflfastR} 36 | \item{play_type_nfl}{Play type from NFL data. E.g. "PASS", "PENALTY", "RUSH", "SACK", "PUNT", etc.} 37 | \item{team_color}{Primary team color. Useful for play animations} 38 | \item{team_color2}{Secondary team color. Useful for play animations} 39 | \item{team_logo_espn}{URL of team logo} 40 | \item{los_x}{x location of line of scrimmage (e.g. 20 means own 10 yard line)} 41 | \item{dist_from_los}{Distance of player from line of scirmmage in x direction} 42 | \item{o_x}{Orientation of player in x direction} 43 | \item{o_y}{Orientation of player in y direction} 44 | \item{dir_x}{Direction of player in x direction} 45 | \item{dir_y}{Direction of player in y direction} 46 | \item{s_x}{Speed of player in x direction} 47 | \item{s_y}{Speed of player in y direction} 48 | \item{a_x}{Acceleration of player in x direction} 49 | \item{a_y}{Acceleration of player in y direction} 50 | } 51 | } 52 | \description{ 53 | Standardize direction and add play information. 54 | } 55 | -------------------------------------------------------------------------------- /man/compute_o_diff.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cleaning_functions.R 3 | \name{compute_o_diff} 4 | \alias{compute_o_diff} 5 | \title{Compute orientation difference} 6 | \usage{ 7 | compute_o_diff(df, prefix = "qb") 8 | } 9 | \arguments{ 10 | \item{df}{A dataframe containing x, y, o, "prefix"_x, and "prefix"_y} 11 | 12 | \item{prefix}{(default = "qb"). Columns prefix_x and prefix_y must be contained in \code{df}. These columns 13 | contain the x and y locations that will be used to calculate orientation difference.} 14 | } 15 | \value{ 16 | Original dataframe with o_to_"prefix" added, which is the difference in orientation 17 | in degrees between the way the player is facing and where the "prefix" player is (0 is facing 18 | directly at the "prefix" player, 180 is directly away). 19 | } 20 | \description{ 21 | Compute difference in orientation between direction player is currently facing and 22 | orientation if player were facing towards a given x and y location. 23 | } 24 | \examples{ 25 | df <- tibble::tibble("x" = 20, "y" = 30, "o" = 270, "qb_x" = 10, "qb_y" = 25) 26 | df <- compute_o_diff(df) 27 | str(df) 28 | } 29 | -------------------------------------------------------------------------------- /man/cut_plays.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cleaning_functions.R 3 | \name{cut_plays} 4 | \alias{cut_plays} 5 | \title{Trim plays based on events} 6 | \usage{ 7 | cut_plays( 8 | df, 9 | end_events = c("pass_forward", "qb_sack", "qb_strip_sack", "qb_spike", "tackle", 10 | "pass_shovel"), 11 | time_after_event = 0, 12 | throw_frame = 25 13 | ) 14 | } 15 | \arguments{ 16 | \item{df}{A dataframe containing player tracking data with \code{event}, \code{frame_id}, and \code{play} with the latter uniquely identifying plays.} 17 | 18 | \item{end_events}{Events designated as play end events. Defaults are when a pass is thrown or QB's involvement ends in some 19 | other way (sack, strip sack, shovel pass, etc).} 20 | 21 | \item{time_after_event}{Number of frames to keep after the \code{end_events} (default: 0). 22 | Note that there are 10 frames in each second so providing 10 would keep one additional second after a pass was thrown 23 | when using the default end events.} 24 | 25 | \item{throw_frame}{If not NULL, for plays when one of the \code{end_events} happens before this frame, 26 | these plays will be removed from the returned df (default: 25, ie 1.5 seconds 27 | into the play). To not employ play dropping, provide throw_frame = NULL and all of the plays provided in original 28 | \code{df} will be returned.} 29 | } 30 | \value{ 31 | The original df with trimmed frames (and if throw_frame not NULL, the shorter plays removed). 32 | } 33 | \description{ 34 | Trim frames for a play and/or remove plays based on how quickly provided events happen in the play. 35 | } 36 | -------------------------------------------------------------------------------- /man/ngscleanR-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ngscleanR-package.R 3 | \docType{package} 4 | \name{ngscleanR-package} 5 | \alias{ngscleanR} 6 | \alias{ngscleanR-package} 7 | \title{ngscleanR: Helper Functions for Cleaning Player Tracking Data} 8 | \description{ 9 | What the package does (one paragraph). 10 | } 11 | \author{ 12 | \strong{Maintainer}: First Last \email{first.last@example.com} (\href{https://orcid.org/YOUR-ORCID-ID}{ORCID}) 13 | 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/plot_play.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotting_functions.R 3 | \name{plot_play} 4 | \alias{plot_play} 5 | \title{Plot a play} 6 | \usage{ 7 | plot_play( 8 | df_track, 9 | orientation = TRUE, 10 | dot_size = 6, 11 | segment_length = 2.5, 12 | segment_size = 1.5, 13 | numbers = TRUE, 14 | animated = TRUE, 15 | animated_h = 4, 16 | animated_w = 8, 17 | animated_res = 200, 18 | frame = NULL 19 | ) 20 | } 21 | \arguments{ 22 | \item{df_track}{A df of tracking data from one play.} 23 | 24 | \item{orientation}{Show lines representing where player is facing (default = T).} 25 | 26 | \item{dot_size}{Size of player dots (default = 6).} 27 | 28 | \item{segment_length}{Length of orientation segment lines (default = 2.5).} 29 | 30 | \item{segment_size}{Width of orientation segment lines (default = 1.5).} 31 | 32 | \item{numbers}{Show player jersey numbers (default = T).} 33 | 34 | \item{animated}{Whether play is animated, rather than a still frame (default = T).} 35 | 36 | \item{animated_h}{If animated, height of animated image (default = 4).} 37 | 38 | \item{animated_w}{If animated, width of animated image (default = 8).} 39 | 40 | \item{animated_res}{If animated, resolution of animated image (default = 200).} 41 | 42 | \item{frame}{frame_id to plot (default = NULL, ie plot all provided frames).} 43 | } 44 | \description{ 45 | Plot or animate a play. 46 | } 47 | -------------------------------------------------------------------------------- /man/prepare_bdb_week.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cleaning_functions.R 3 | \name{prepare_bdb_week} 4 | \alias{prepare_bdb_week} 5 | \title{Prepare a week of data from the 2021 Big Data Bowl} 6 | \usage{ 7 | prepare_bdb_week( 8 | week, 9 | dir = "../nfl-big-data-bowl-2021/input", 10 | trim_frame = 25, 11 | frames_after_throw = 10, 12 | keep_frames = NULL, 13 | drop_positions = c("QB") 14 | ) 15 | } 16 | \arguments{ 17 | \item{week}{Get and prepare this week of data (1-17)} 18 | 19 | \item{dir}{Location of directory where BDB data lives. Default is unzipped to adjacent directory 20 | (default = "../nfl-big-data-bowl-2021/input")} 21 | 22 | \item{trim_frame}{If a throw, sack, etc happens before this frame, drop the play (default = 25; i.e. before 23 | 1.5 seconds into the play).} 24 | 25 | \item{frames_after_throw}{If a frame happened more than this many frames after throw, drop the frame.} 26 | 27 | \item{keep_frames}{Keep these frames. Default: NULL (ie keep all frames).} 28 | 29 | \item{drop_positions}{Drop these positions from the returned data (default = "QB").} 30 | } 31 | \description{ 32 | Prepare a week of data from the 2021 Big Data Bowl (data from 2018 season). To use this, you'll need to have 33 | the BDB data saved and unzipped somewhere in a directory on your computer. 34 | } 35 | \details{ 36 | Loads raw .csvs from 2021 BDB, cleans, rotates, applies frame trimming, calculates orientation to QB, 37 | drops plays without at least 3 offensive and defensive players. 38 | } 39 | -------------------------------------------------------------------------------- /ngscleanR.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | LineEndingConversion: Posix 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /vignettes/getting-started.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Getting started" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{getting-started} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>", 14 | fig.width=9, 15 | fig.height=5, 16 | tidy = 'styler' 17 | ) 18 | ``` 19 | 20 | ```{r setup, message=FALSE} 21 | library(ngscleanR) 22 | library(tidyverse) 23 | library(patchwork) 24 | ``` 25 | 26 | ## Load sample week 27 | 28 | ```{r} 29 | tracking <- readRDS("../data-raw/sample_bdb_2021.rds") 30 | str(tracking) 31 | 32 | labs <- readRDS("../data-raw/coverages_week1.rds") %>% 33 | mutate(play = paste0(gameId, "_", playId)) %>% 34 | select(play, coverage) 35 | str(labs) 36 | ``` 37 | 38 | ## The main function 39 | 40 | This will clean up the data, attach some information associated with the play, and make everything face from left ot right. 41 | 42 | ```{r} 43 | cleaned <- tracking %>% 44 | clean_and_rotate() %>% 45 | inner_join(labs, by = "play") 46 | 47 | str(cleaned) 48 | ``` 49 | 50 | ## Play cutting function 51 | 52 | This discards any plays where the throw happens before frame 25 (i.e. 1.5 seconds after snap). 53 | 54 | ```{r} 55 | cleaned <- cleaned %>% 56 | cut_plays( 57 | # get rid of plays with throw before this frame 58 | throw_frame = 25, 59 | # get rid of frames that happen after this many frames after pass released 60 | time_after_event = 10 61 | ) 62 | ``` 63 | 64 | ## Plot some sample plays 65 | 66 | ```{r plots, warning = FALSE, message = FALSE, results = 'hide', fig.keep = 'all', dpi = 400, layout="l-body-outset"} 67 | ex <- sample(cleaned$play, 4) 68 | 69 | plots <- map(ex, ~{ 70 | lab <- cleaned %>% filter(play == .x) %>% dplyr::pull(coverage) 71 | plot <- cleaned %>% 72 | filter(frame_id == 28, play == .x) %>% 73 | plot_play( 74 | animated = FALSE, 75 | segment_length = 6, 76 | segment_size = 3, 77 | dot_size = 4 78 | 79 | ) 80 | 81 | plot + 82 | labs(title = lab) + 83 | theme(plot.title = element_blank(), 84 | plot.caption = element_blank(), 85 | plot.margin = unit(c(0, 0, 0, 0), "cm") 86 | ) 87 | }) 88 | 89 | (plots[[1]] + plots[[2]]) / (plots[[3]] + plots[[4]]) 90 | ``` 91 | 92 | ## The big cleaning function 93 | 94 | ```{r} 95 | prepare_bdb_week( 96 | week = 1, 97 | dir = "../../nfl-big-data-bowl-2021/input", 98 | trim_frame = 25, 99 | frames_after_throw = 10, 100 | keep_frames = c(30), 101 | drop_positions = c("QB") 102 | ) %>% 103 | str() 104 | ``` 105 | 106 | 107 | --------------------------------------------------------------------------------