├── DESCRIPTION ├── NAMESPACE ├── R ├── active_record_linkage.R ├── plus_minus.R ├── scrape_lineups.R └── scrape_sofifa.R ├── README.md ├── man ├── active_comparison.Rd ├── active_insert.Rd ├── add_red_card_info.Rd ├── block_comparison.Rd ├── clean_sofifa_table.Rd ├── create_design_matrix.Rd ├── create_segmentation.Rd ├── get_player_name.Rd ├── get_rpm.Rd ├── get_shot_attempt_by_segment.Rd ├── insert_fifa_name.Rd ├── scrape_lineup.Rd ├── scrape_sofifa_table.Rd └── split_lineup_when_no_stats.Rd ├── tests ├── testthat.R └── testthat │ └── test-sofifa.R └── vignettes └── my-vignette.Rmd /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: PlusMinusData 2 | Title: Downloads play-by-play data from espn.com and performs interactive record linkage 3 | Version: 0.0.0.9000 4 | Authors@R: person("Francesca", "Matano", email = "fmatano@andrew.cmu.edu", role = c("aut", "cre")) 5 | Description: This package allows data driven sports enthusiasts to easily download play-by-play data 6 | from ESPN. The functions within this package help getting the data to compute 7 | Plus Minus estimates in soccer 8 | Depends: R (>= 3.4.1) 9 | License: CC0 10 | Encoding: UTF-8 11 | LazyData: true 12 | Suggests: testthat, 13 | knitr, 14 | rmarkdown 15 | RoxygenNote: 6.1.0 16 | Imports: rvest, 17 | xml2 18 | VignetteBuilder: knitr 19 | URL: https://github.com/fmatano/PlusMinusData 20 | 21 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | -------------------------------------------------------------------------------- /R/active_record_linkage.R: -------------------------------------------------------------------------------- 1 | #' Block comparison 2 | #' 3 | #' This function shows the espn name with one or more fifa candidates and ask the 4 | #' user to decide which one is the most plausible match. The user can input 5 | #' the row number of the selected match, or 'y' if the only row is shown and the 6 | #' user believes that is a match, 0 if the user doesn't see any plausible match 7 | #' @param records table of records with espn, fifa name and some additional info 8 | #' @return the user answer 9 | block_comparison <- function(records) { 10 | 11 | cat("Choose the row you think it's a match. Enter 0 if you think there is no match. \n") 12 | cat("If there is a single row shown, you can also enter 'y' for yes. \n") 13 | 14 | print(records) 15 | user_answer <- readline("Which row is a match? ") 16 | 17 | return(user_answer) 18 | } 19 | 20 | #' Active comparison between records 21 | #' 22 | #' This function shows one at the time all the comparisons, 23 | #' for all unique espn players in data 24 | #' @param data dataframe with espn names and fifa names and possibly additional 25 | #' information 26 | #' @return vector with all answers 27 | active_comparison <- function(data) { 28 | 29 | unique_players <- data$espn_name %>% unique 30 | matches <- c() 31 | 32 | for (ii in 1:length(unique_players)) { 33 | wwhich <- which(data$espn_name == unique_players[ii]) 34 | 35 | # Check that the input is correct 36 | not_valid <- TRUE 37 | while (not_valid) { 38 | result <- block_comparison(data[wwhich, ]) 39 | 40 | if (length(wwhich)==1 & result=='y') result <- rownames(data[wwhich, ]) 41 | if (result %in% c(0, rownames(data[wwhich, ]))) { 42 | not_valid <- FALSE 43 | } else { 44 | cat("ATTENTION: The row number you enter isn't valid, please check your answer again. \n") 45 | } 46 | } 47 | matches <- c(matches, result) 48 | } 49 | 50 | return(matches) 51 | } 52 | 53 | 54 | #' Insert fifa name 55 | #' 56 | #' This function allows the user to insert the fifa name, given the espn name 57 | #' and some other info 58 | #' @param record is the espn name with possibly additional info on the player 59 | #' @return the answer of the user 60 | insert_fifa_name <- function(record) { 61 | 62 | cat("Insert the fifa name for the record shown below or NA if uknown.\n \n") 63 | 64 | print(record) 65 | 66 | user_name <- readline("Your answer: ") 67 | return(user_name) 68 | } 69 | 70 | #' Active insertion of fifa names 71 | #' 72 | #' This function allow the user to insert the fifa names for all unique espn players 73 | #' in data 74 | #' @param data dataframe with espn names and possibly additional 75 | #' information 76 | #' @return vector with all answers. Either NA or the answer of the user 77 | active_insert <- function(data) { 78 | 79 | if(!("espn_name" %in% names(data))) stop("espn_name must be in the data") 80 | 81 | unique_players <- data$espn_name %>% unique 82 | matches <- c() 83 | 84 | for (ii in 1:length(unique_players)) { 85 | wwhich <- which(data$espn_name == unique_players[ii]) 86 | result <- insert_fifa_name(data[wwhich, ]) 87 | 88 | if (result == 'na' | result == 'Na' | result == 'nA') result <- 'NA' 89 | matches <- c(matches, result) 90 | 91 | } 92 | 93 | return(matches) 94 | } 95 | -------------------------------------------------------------------------------- /R/plus_minus.R: -------------------------------------------------------------------------------- 1 | #' Accounting for red cards 2 | #' 3 | #' This function adds the info about red card to the game_lineup 4 | #' @param game_commentary from fcscrapR 5 | #' @param game_lineup computed from the package 6 | #' @param type_card is the type of card to consider. It has to be red, 7 | #' left it flexible for testing purposes 8 | add_red_card_info <- function(game_commentary, game_lineup, type_card = "red"){ 9 | 10 | wwhich_card <- which(game_commentary$card_type == type_card) 11 | 12 | # Transform the line-up start and end data accounting for commentary info 13 | # into numerical 14 | game_lineup$time_of_sub_in <- game_lineup$time_of_sub_in %>% as.character() 15 | game_lineup$time_of_sub_in[game_lineup$time_of_sub_in == "start"] <- 0 16 | 17 | game_lineup$time_of_sub_out <- game_lineup$time_of_sub_out %>% as.character() 18 | game_lineup$time_of_sub_out[game_lineup$time_of_sub_out == "end"] <- 19 | max(game_commentary$match_time_numeric, na.rm = TRUE) 20 | 21 | game_lineup$time_of_sub_in <- game_lineup$time_of_sub_in %>% as.numeric 22 | game_lineup$time_of_sub_out <- game_lineup$time_of_sub_out %>% as.numeric 23 | 24 | 25 | # If there is at least one red card select the players 26 | if(length(wwhich_card) > 0){ 27 | players_card <- game_commentary$card_player[wwhich_card] 28 | minutes_card <- game_commentary$match_time_numeric[wwhich_card] 29 | # game_lineup$time_of_sub_out <- game_lineup$time_of_sub_out %>% as.character() 30 | 31 | for(ii in 1:length(players_card)){ 32 | wwhich <- which(game_lineup$lineup == players_card[ii]) 33 | game_lineup$time_of_sub_out[wwhich] <- minutes_card[ii] # %>% as.character() 34 | } 35 | } 36 | return(game_lineup) 37 | } 38 | 39 | #' Creating segmentation 40 | #' 41 | #' This function creates segmentation every time a the configuration 42 | #' on the pitch changes 43 | #' @param game_commentary from fcscrapR 44 | #' @param game_lineup computed from the package 45 | create_segmentation <- function(game_lineup){ 46 | 47 | # Get the order line up by time_of_sub_out 48 | ordered_lineup <- game_lineup[order(game_lineup$time_of_sub_out %>% as.character() %>% 49 | as.numeric(), decreasing = FALSE),] 50 | 51 | # Create the segmentation matrix 52 | segments <- c(0, ordered_lineup$time_of_sub_out %>% unique) %>% as.numeric() 53 | segment_matrix <- matrix(NA, ncol = 2, nrow = length(segments) - 1) 54 | for(j in 1:(length(segments) - 1)) 55 | segment_matrix[j,] <- c(segments[j], segments[j + 1]) 56 | 57 | return(segment_matrix) 58 | } 59 | 60 | #' Creating the shot attempt by segmentation for both teams 61 | #' 62 | #' This function creates the shot attempt matrix by segmentation for both teams 63 | #' @param game_commentary from fcscrapR 64 | #' @param segmentation_matrix a matrix with each row a segment, each coloumn 65 | #' the start and the end of the segment 66 | get_shot_attempt_by_segment <- function(game_commentary, segmentation_matrix){ 67 | 68 | shot_locations_levels <- c("outside_the_box", "the_centre_of_the_box", 69 | "right_shot_very_close_range", "the_right_side_of_the_box", 70 | "the_left_side_of_the_box", "the_left_side_of_the_six_yard_box", 71 | "the_right_side_of_the_six_yard_box", "right_shot_a_difficult_angle_and_long_range", 72 | "very_close_range", "left_shot_long_range", "left_shot_very_close_range", 73 | "left_shot_a_difficult_angle_and_long_range", 74 | "right_shot_long_range", "with_an_attempt_very_close_range") 75 | shot_locations_levels_commentary <- stringr::str_replace_all(shot_locations_levels, 76 | "_", " ") 77 | 78 | # Creating all the columns 79 | other_cols <- expand.grid(c("shot_saved", "shot_missed", "shot_blocked", 80 | shot_locations_levels, 81 | "shots_on_goal", "goal", "corner", 82 | "offside", "foul", "free_kick"), c("home_", "away_")) %>% 83 | apply(1, function(x) paste0(x[2], x[1], collapse = " ")) 84 | 85 | 86 | stat_by_segment <- matrix(0, ncol = length(other_cols) + 2, 87 | nrow = nrow(segmentation_matrix)) %>% as.data.frame() 88 | colnames(stat_by_segment) <- c("ts", "tf", other_cols) 89 | 90 | # Define team one and team two 91 | home_team <- game_commentary$team_one %>% unique 92 | away_team <- game_commentary$team_two %>% unique 93 | 94 | # Initialize stat_by_sement 95 | stat_by_segment[,1:2] <- segmentation_matrix[,1:2] 96 | 97 | 98 | # Loop through all the segments 99 | for(t in 1:nrow(segmentation_matrix)){ 100 | 101 | if(stat_by_segment$tf[t] > stat_by_segment$ts[t]){ 102 | 103 | # Find which index in the commentary is contained in the time segment 104 | wwhich <- which((game_commentary$match_time_numeric < 105 | stat_by_segment$tf[t]) & 106 | (game_commentary$match_time_numeric >= 107 | stat_by_segment$ts[t])) 108 | } else{ 109 | wwhich <- which(game_commentary$match_time_numeric == stat_by_segment$tf[t]) 110 | } 111 | 112 | # If there is any shot in the interval tf - ts 113 | if(length(wwhich) > 0){ 114 | 115 | # Check if there is shootout, it needs to be deleted or it alters the 116 | # scoring rate 117 | commentary_t <- game_commentary$commentary[wwhich] 118 | wwhich_shootout <- which(startsWith(commentary_t, "Penalty Shootout begins")) 119 | if((wwhich_shootout %>% length) > 0) 120 | wwhich <- wwhich[1:(wwhich_shootout - 1)] 121 | 122 | # Build the segment matrix for each column ------------------------------- 123 | # + Shot type 124 | shot_table <- table(factor(game_commentary$shot_result[wwhich], 125 | levels = c("blocked", "goal", "missed", "saved")), 126 | factor(game_commentary$shot_by_team[wwhich], levels = c(home_team, away_team))) 127 | 128 | for(word in c("blocked", "missed", "saved")) { 129 | 130 | stat_by_segment[t, grep(word, colnames(stat_by_segment))] <- 131 | shot_table[grep(word, rownames(shot_table)),] 132 | } 133 | # Need to be separate or it overwrites shots_on_goal 134 | stat_by_segment[t, c("home_goal", "away_goal")] <- shot_table["goal",] 135 | 136 | # Create shots on goal 137 | stat_by_segment$home_shots_on_goal[t] <- stat_by_segment$home_shot_saved[t] + 138 | stat_by_segment$home_goal[t] 139 | stat_by_segment$away_shots_on_goal[t] <- stat_by_segment$away_shot_saved[t] + 140 | stat_by_segment$away_goal[t] 141 | 142 | # Shot Location 143 | shot_table <- table(factor(game_commentary$shot_where[wwhich], 144 | levels = shot_locations_levels_commentary), 145 | factor(game_commentary$shot_by_team[wwhich], levels = c(home_team, away_team))) 146 | 147 | for(word in shot_locations_levels) { 148 | stat_by_segment[t, grep(word, colnames(stat_by_segment))] <- 149 | shot_table[grep(stringr::str_replace_all(word, "_", " "), rownames(shot_table)),] 150 | } 151 | 152 | # + Corner, Offside, Foul, Free kick 153 | stat_by_segment[t, c("home_corner", "away_corner")] <- 154 | table(factor(game_commentary$corner_team[wwhich], levels = c(home_team, away_team))) 155 | stat_by_segment[t, c("home_offside", "away_offside")] <- 156 | table(factor(game_commentary$offside_team[wwhich], levels = c(home_team, away_team))) 157 | stat_by_segment[t, c("home_foul", "away_foul")] <- 158 | table(factor(game_commentary$foul_by_team[wwhich], levels = c(home_team, away_team))) 159 | 160 | free_kick_table <- table(factor(game_commentary$free_kick_where[wwhich], 161 | levels = c("attacking half", "defensive half")), 162 | factor(game_commentary$free_kick_team[wwhich], levels = c(home_team, away_team))) 163 | stat_by_segment[t, c("home_free_kick", "away_free_kick")] <- free_kick_table 164 | 165 | } 166 | } 167 | 168 | return(stat_by_segment) 169 | } 170 | 171 | 172 | 173 | 174 | 175 | 176 | #' Real Plus Minus matrix 177 | #' 178 | #' This function creates the design matrix for a given league, given the lineups 179 | #' and the segmentation matrix 180 | #' @param lineups all lineups by league stacked in a matrix 181 | #' @param segments all segments by league stacked in a matrix 182 | create_design_matrix <- function(lineups, segments){ 183 | 184 | if(is.null(lineups$espn_id)) stop("The column with espn_id is missing") 185 | lineups <- lineups[!is.na(lineups$espn_id), ] 186 | 187 | players <- gsub(" ", "_", unique(as.character(lineups$lineup))) 188 | n_players <- length(unique(as.character(lineups$lineup))) 189 | 190 | # Create the new segment matrix with a column for player 191 | new_segments <- cbind(segments, matrix(0, nrow(segments), n_players)) 192 | names(new_segments)[(ncol(segments) + 1) : ncol(new_segments)] <- players 193 | 194 | for (i in 1:nrow(lineups)){ 195 | player <- gsub(" ", "_", as.character(lineups$lineup)[i]) 196 | game <- lineups$espn_id[i] 197 | time_in <- lineups$time_of_sub_in[i] 198 | time_out <- lineups$time_of_sub_out[i] 199 | 200 | # Espn has some error and empty names. If that is the case skip 201 | if(stringr::str_length(player) > 1) { 202 | new_segments[(new_segments$espn_id == game) & (new_segments$ts >= time_in) & 203 | (new_segments$tf <= time_out), player] = 204 | ifelse(lineups$team[i] == "home", 1, -1) 205 | } 206 | } 207 | return(new_segments) 208 | } 209 | 210 | 211 | #' Compute the real-plus minus 212 | #' 213 | #' This function computes real plus minus and threshold by minutes player 214 | #' if requested 215 | #' @param design_matrix 0, -1, 1 matrix with info about shots and goals 216 | #' @param threshold for minutes, by default 0 217 | #' @param minutes dataframe with minutes info by player 218 | #' @param response character to chose as response, either goal, or shots 219 | #' @param type can be offensive, defensive or both which takes the differnce 220 | #' in the response 221 | get_rpm <- function(design_matrix, threshold = 0, minutes, response = "goal", type = "both") { 222 | 223 | if(!any(stringr::str_detect(colnames(design_matrix), response))) 224 | stop(paste("The string ", response, " deoesn't exist in the design matrix. 225 | You might have mispelled the world, or used the plural.")) 226 | 227 | if(!(type %in% c("both", "offense", "defense"))) 228 | stop("type can be one of these three: both, offense, defense.") 229 | 230 | # Identify where the players info is located in the matrix 231 | response_home <- paste0("home_", response) 232 | response_away <- paste0("away_", response) 233 | wwhich_design <- grep("[A-Z]", colnames(design_matrix)) 234 | 235 | # Determine y based on type of rpm 236 | if(type == "both") 237 | y <- as.numeric(design_matrix[, response_home] - design_matrix[, response_away]) 238 | if(type == "offense") 239 | y <- as.numeric(design_matrix[, response_home]) 240 | if(type == "defense") 241 | y <- as.numeric(design_matrix[, response_away]) 242 | 243 | x <- as.matrix(design_matrix[,wwhich_design]) 244 | weights <- 1 / (design_matrix[, "tf"] - design_matrix[, "ts"]) 245 | 246 | # Ridge regression 247 | model_cv <- glmnet::cv.glmnet(x = x, y = y, weights = weights, alpha = 0) 248 | model <- glmnet::glmnet(x = x, y = y, weights = weights, alpha = 0, 249 | lambda = model_cv$lambda.min) 250 | coefs <- model$beta 251 | 252 | # Build the dataframe with coeffs 253 | df <- data.frame(betas = coefs@x, names = coefs@Dimnames[[1]]) 254 | 255 | # Subselect based on threshold 256 | more_than_thresh <- minutes[which(minutes$tot_mins > threshold), "name_underscore"] 257 | thresh_rows <- which(df$names %in% as.character(more_than_thresh)) 258 | 259 | df_thresh <- df[thresh_rows, ] 260 | df_tresh_sorted <- df_thresh[order(df_thresh$betas, decreasing=TRUE), ] 261 | 262 | return(df_tresh_sorted) 263 | } 264 | -------------------------------------------------------------------------------- /R/scrape_lineups.R: -------------------------------------------------------------------------------- 1 | #' Scrape lineups for ESPN Game ID 2 | #' 3 | #' This function scrapes lineup for a given game on espn 4 | #' 5 | #' @param game_id espn id 6 | #' 7 | #' @return list with the two line ups plus subs info 8 | #' 9 | scrape_lineup <- function(game_id) { 10 | if (length(game_id) > 1) { stop("game_id should be length one!") } 11 | team_info <- list(); teams <- NULL; n_players <- 11 12 | game_id <- as.character(game_id) 13 | 14 | # Compose the url 15 | url <- paste0("http://www.espn.com/soccer/lineups?gameId=", game_id) 16 | 17 | # All the substitution pattern are "#'-#, but extra time has pattern "90+#'-# 18 | regular_time_pattern <- expand.grid(1:120, 1:99) %>% 19 | apply(1, function(x) paste0(x[1], "'-", x[2], "")) 20 | extra_time_pattern1 <- expand.grid(paste0("45'\\+", 1:9, "'-"), 1:99) %>% 21 | apply(1, function(x) paste0(x[1], x[2])) 22 | extra_time_pattern2 <- expand.grid(paste0("90'\\+", 1:9, "'-"), 1:99) %>% 23 | apply(1, function(x) paste0(x[1], x[2])) 24 | extra_time_pattern3 <- expand.grid(paste0("120'\\+", 1:9, "'-"), 1:99) %>% 25 | apply(1, function(x) paste0(x[1], x[2])) 26 | substitution_pattern <- c(regular_time_pattern, extra_time_pattern1, 27 | extra_time_pattern2, extra_time_pattern3) %>% 28 | stringr::str_replace_all(" ", "") 29 | 30 | # For each of the two teams 31 | for (team_j in 1:2) { 32 | 33 | url_nodes <- url %>% xml2::read_html() %>% rvest::html_nodes("table") 34 | 35 | if (length(url_nodes) < 2) { 36 | cat("Game info", game_id, "- there is no line-up available for this game! \n") 37 | return(NULL) 38 | } 39 | 40 | # Extract the line up from the espn url 41 | team_lineup <- url_nodes[[team_j]] %>% 42 | rvest::html_text() %>% 43 | stringr::str_replace_all(pattern = "\n", replacement = "-") %>% 44 | stringr::str_replace_all(pattern = "\t", replacement = "") 45 | 46 | team <- stringr::str_extract(team_lineup, pattern = "(?<=--).*(?=No.-Name)") %>% 47 | stringr::str_replace_all(pattern = "-", replacement = "") 48 | teams <- c(teams, team) 49 | team_lineup <- sub('.*\\.-Name', '', team_lineup) 50 | 51 | 52 | # PLAYER EXTRACTION DIFFERS BETWEEN GAMES 53 | # First remove buzzwords that confuse player name number pattern 54 | buzz_words <- c("Saves", "Goals", "Shots on Target", "Shots", "Fouls Committed", 55 | "Fouls Against", "Assists", "Discipline", "Yellow", "Red", "Offsides") 56 | for(kk in 1:length(buzz_words)) 57 | team_lineup <- stringr::str_replace_all(team_lineup, buzz_words[kk], "") 58 | 59 | # Split players 60 | player_by_player_data <- strsplit(team_lineup, "-------------------") %>% 61 | unlist(use.names = FALSE) 62 | 63 | # There is no boxscore stats for some player, need to re-split 64 | # according to ad different pattern 65 | length_roster <- length(player_by_player_data) 66 | if(length_roster < n_players) { 67 | player_by_player_data <- split_lineup_when_no_stats(team_lineup) 68 | } 69 | 70 | 71 | # This shouldn't be a problem if we eliminate games not played yet 72 | # if(!(stringr::str_detect(team_lineup, "Shots") %>% any)) 73 | # player_by_player_data <- set_player_by_player_data_other_pattern(team_lineup) 74 | line_up <- time_of_sub_in <- time_of_sub_out <- c(); count_sub <- 0 75 | for(ii in 1:n_players){ 76 | 77 | 78 | # Extract player name 79 | line_up[ii] <- get_player_name(player_by_player_data[ii]) 80 | time_of_sub_in[ii] <- "start" 81 | time_of_sub_out[ii] <- "end" 82 | 83 | # If the player was sub out 84 | is_player_sub_out <- stringr::str_detect(player_by_player_data[ii], 85 | substitution_pattern) 86 | 87 | # At the beginning the player sub out is the ii-th of the starting lineup 88 | jj <- ii 89 | 90 | # Until we get sub out info we stay on the same line. A player could sub 91 | # in and be sub out 92 | while(is_player_sub_out %>% any()){ 93 | 94 | count_sub <- count_sub + 1 95 | wwhich <- which(is_player_sub_out == TRUE) 96 | 97 | # There could be multiple strings that match, the longest are the exact match 98 | l <- sapply(as.list(wwhich), function(x) 99 | stringr::str_length(substitution_pattern[x])) 100 | wwhich <- wwhich[which(l == max(l))] 101 | 102 | # if there are multiple subs length wwwhich > 1, you need to pick 103 | # the right pattern, which is the first that occurs 104 | where_all_subs_start <- sapply(stringr::str_locate_all(pattern = substitution_pattern[wwhich], 105 | player_by_player_data[ii]), 106 | function(x) x[,"start"]) 107 | which_first <- which(where_all_subs_start == min(where_all_subs_start)) 108 | wwhich <- wwhich[which_first] 109 | 110 | # Get the time of substitution 111 | time_of_sub_out[jj] <- stringr::str_extract(substitution_pattern[wwhich], 112 | "\\-*\\d+\\.*\\d*") 113 | time_of_sub_in[n_players + count_sub] <- time_of_sub_out[jj] 114 | time_of_sub_out[n_players + count_sub] <- "end" 115 | 116 | # Remove everything before the pattern, so that only the sub pattern remains 117 | where_sub_pattern <- stringr::str_locate(pattern = substitution_pattern[wwhich], 118 | player_by_player_data[ii]) 119 | substitution_string <- substring(player_by_player_data[ii], 120 | where_sub_pattern[2]) 121 | # Compute the player name 122 | line_up[n_players + count_sub] <- get_player_name(substitution_string) 123 | 124 | # Update the player string 125 | player_by_player_data[ii] <- substitution_string 126 | is_player_sub_out <- stringr::str_detect(player_by_player_data[ii], 127 | substitution_pattern) 128 | # Update the sub: now the possible sub out guy is the one just sub-in 129 | jj <- n_players + count_sub 130 | } 131 | } 132 | # Fill out the time of sub and provide the list 133 | team_info[[team_j]] <- data.frame(lineup = line_up, time_of_sub_in = time_of_sub_in, 134 | time_of_sub_out = time_of_sub_out, 135 | team = rep(ifelse(team_j == 1, "home", "away"), 136 | length(line_up))) 137 | 138 | } 139 | 140 | cat("Game info", game_id, "-", teams[1], "-", teams[2], "\n") 141 | team_info <- Reduce(rbind, team_info) 142 | return(team_info) 143 | } 144 | 145 | 146 | #' Split the lineup when no stats are available for some players 147 | #' 148 | #' This function splits the lineup info when the stats are missing for some players 149 | #' @param team_lineup team lineup extracted by espn removed of buzzwords and extra 150 | #' spaces 151 | split_lineup_when_no_stats <- function(team_lineup){ 152 | 153 | n_players <- 11 154 | 155 | # First eliminate multiple space pattern 156 | team_lineup <- stringr::str_replace_all(team_lineup, "-\\s-", "") 157 | 158 | # Find the new splitting point 159 | new_splitting_point <- c(gregexpr("-[0-9]+----[A-Z]", team_lineup) %>% 160 | unlist(use.names=FALSE)) 161 | 162 | # Split the line up based on the Number ----- Name pattern 163 | player_by_player_data <- c() 164 | for(ii in 1:n_players) { 165 | player_by_player_data[ii] <- substring(team_lineup, new_splitting_point[ii] + 1, 166 | new_splitting_point[ii + 1]) 167 | } 168 | return(player_by_player_data) 169 | } 170 | 171 | 172 | #' Extract the player name given a string with player data info 173 | #' 174 | #' This function extracts the player name given a string with player data info 175 | #' @param player_by_player_string string with a single player info 176 | get_player_name <- function(player_by_player_string){ 177 | 178 | where_name_starts <- gregexpr("[A-Z]", player_by_player_string)[[1]][1] 179 | player_name <- substring(player_by_player_string, where_name_starts) 180 | where_name_ends <- gregexpr("-", player_name)[[1]][1] 181 | player_name <- substring(player_name, 1, where_name_ends - 1) 182 | 183 | return(player_name) 184 | } 185 | -------------------------------------------------------------------------------- /R/scrape_sofifa.R: -------------------------------------------------------------------------------- 1 | #' Scrape sofifa tables 2 | #' 3 | #' @param url url for the sofifa table 4 | #' 5 | scrape_sofifa_table <- function(url) { 6 | xml_url <- xml2::read_html(url) 7 | tables <- rvest::html_nodes(xml_url, "table") 8 | sofifa_table <- rvest::html_table( tables[1][[1]] ) 9 | 10 | links <- rvest::html_nodes(x = tables, css = "td") %>% rvest::html_nodes("a") %>% rvest::html_attr("href") 11 | titles <- rvest::html_nodes(x = tables, css = "td") %>% rvest::html_nodes("a") %>% rvest::html_attr("title") 12 | link_title_df <- data.frame(link=as.character(links), title=as.character(titles)) 13 | 14 | return(list(sofifa_table=sofifa_table, link_title_df=link_title_df)) 15 | } 16 | 17 | #' Clean raw sofifa table 18 | #' 19 | #' @param tab data-frame of sofifa table extracted from \code{scrape_sofifa_table} 20 | #' 21 | clean_sofifa_table <- function(tab) { 22 | sofifa_table <- tab$sofifa_table[2:nrow(tab$sofifa_table), c(2, 3, 4, 5, 6, 8, 9)] 23 | names(sofifa_table) <- c("name", "age", "overall", "potential", "team", "value", "wage") 24 | 25 | # Extract full player names from the links 26 | player_name_rows <- grep(pattern = "/player/", tab$link_title_df$link) 27 | player_names <- as.character(tab$link_title_df$title[player_name_rows]) 28 | stopifnot(length(player_names) == nrow(sofifa_table)) 29 | sofifa_table$full_name <- player_names 30 | 31 | # Extract player coutry from links 32 | player_country_rows <- player_name_rows - 1 33 | # ERROR IN THIS LINE IN RUSSIA: grep(pattern = "/players\\?na=", x = tab$link_title_df$link) 34 | player_country <- as.character(tab$link_title_df$title[player_country_rows]) 35 | stopifnot(length(player_country) == nrow(sofifa_table)) 36 | sofifa_table$country <- player_country 37 | 38 | # Split the first column into "name" and "position" 39 | name_pos_split <-stringr::str_split_fixed(string = sofifa_table$name, pattern = "\t", n=2) 40 | sofifa_table$name <- name_pos_split[, 1] 41 | pos <- name_pos_split[, 2] 42 | pos <- stringr::str_replace_all(string = pos, pattern = "\t", replacement = "") 43 | sofifa_table$position <- pos 44 | 45 | # Parse Team 46 | team_split <- stringr::str_split_fixed(sofifa_table$team, pattern = "\t", n=2) 47 | sofifa_table$team <- team_split[, 1] 48 | 49 | return(sofifa_table) 50 | } 51 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PlusMinusData 2 | 3 | # Introduction 4 | Welcome to PlusMinusData! 5 | 6 | This package will allow you to download play by play data from espn.com and sofifa.com and perform active record linkage. 7 | 8 | ## Donwload the package and other dipendencies 9 | You can get started by downloading the package and some dependencies 10 | 11 | ```{r, eval = FALSE} 12 | devtools::install_github("ryurko/fcscrapR") 13 | devtools::install_github("fmatano/PlusMinusData") 14 | library(magrittr) 15 | library(fcscrapR) 16 | library(PlusMinusData) 17 | ``` 18 | 19 | ## Donwload play-by-play data, for a given game 20 | Here is an example on how to download play-by-play data for a given game 21 | 22 | We first choose league and season 23 | ```{r} 24 | league_selected <- "english premier league" 25 | years <- 2017 26 | ``` 27 | 28 | You can select a date of interest and scrape all the games that occurred that day 29 | 30 | ```{r, eval = FALSE} 31 | date_selected <- as.Date("2017-10-14") 32 | espn_games <- fcscrapR::scrape_scoreboard_ids(scoreboard_name=league_selected, game_date=date_selected) 33 | ``` 34 | You'll notice that there are games that don't belong to the league selected. This happens because espn always displays your current day's games on their page. You need to make sure to select the one game or all the games you are actually interested in, or remember to delete all the current day's games. 35 | 36 | Let's for instance download play-by-play data for the match match Liverpool - Manchester United. 37 | 38 | We start by extracting the game lineup as follows 39 | 40 | ```{r, eval = FALSE} 41 | game_id <- "480831" 42 | lineup <- scrape_lineup(game_id=game_id) 43 | ``` 44 | Then we add minutes, events and red-card info from the commentary data 45 | 46 | ```{r, eval = FALSE} 47 | game_commentary <- fcscrapR::scrape_commentary(game_id=game_id) 48 | lineup <- add_red_card_info(game_commentary=game_commentary, game_lineup=lineup) 49 | 50 | lineup 51 | ``` 52 | Finally we compute the segments of the game and combine all the events that happened in that segment for home and away team respectively 53 | ```{r, eval = FALSE} 54 | segmentation_mat <- create_segmentation(game_lineup=lineup) 55 | segmentation_mat <- get_shot_attempt_by_segment(game_commentary=game_commentary, segmentation_matrix=segmentation_mat) 56 | 57 | ``` 58 | 59 | 60 | The design matrix can be now created by input lineup and segmentation matrix. Notice that this can handle a dataframe with lineups and segmentation for a series of games 61 | ```{r, eval = FALSE} 62 | lineup$espn_id <- game_id 63 | segmentation_mat$espn_id <- game_id 64 | design_matrix <- create_design_matrix(lineups=lineup, segments=segmentation_mat) 65 | 66 | ``` 67 | The design matrix contains all the information for the given match. 68 | -------------------------------------------------------------------------------- /man/active_comparison.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/active_record_linkage.R 3 | \name{active_comparison} 4 | \alias{active_comparison} 5 | \title{Active comparison between records} 6 | \usage{ 7 | active_comparison(data) 8 | } 9 | \arguments{ 10 | \item{data}{dataframe with espn names and fifa names and possibly additional 11 | information} 12 | } 13 | \value{ 14 | vector with all answers 15 | } 16 | \description{ 17 | This function shows one at the time all the comparisons, 18 | for all unique espn players in data 19 | } 20 | -------------------------------------------------------------------------------- /man/active_insert.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/active_record_linkage.R 3 | \name{active_insert} 4 | \alias{active_insert} 5 | \title{Active insertion of fifa names} 6 | \usage{ 7 | active_insert(data) 8 | } 9 | \arguments{ 10 | \item{data}{dataframe with espn names and possibly additional 11 | information} 12 | } 13 | \value{ 14 | vector with all answers. Either NA or the answer of the user 15 | } 16 | \description{ 17 | This function allow the user to insert the fifa names for all unique espn players 18 | in data 19 | } 20 | -------------------------------------------------------------------------------- /man/add_red_card_info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plus_minus.R 3 | \name{add_red_card_info} 4 | \alias{add_red_card_info} 5 | \title{Accounting for red cards} 6 | \usage{ 7 | add_red_card_info(game_commentary, game_lineup, type_card = "red") 8 | } 9 | \arguments{ 10 | \item{game_commentary}{from fcscrapR} 11 | 12 | \item{game_lineup}{computed from the package} 13 | 14 | \item{type_card}{is the type of card to consider. It has to be red, 15 | left it flexible for testing purposes} 16 | } 17 | \description{ 18 | This function adds the info about red card to the game_lineup 19 | } 20 | -------------------------------------------------------------------------------- /man/block_comparison.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/active_record_linkage.R 3 | \name{block_comparison} 4 | \alias{block_comparison} 5 | \title{Block comparison} 6 | \usage{ 7 | block_comparison(records) 8 | } 9 | \arguments{ 10 | \item{records}{table of records with espn, fifa name and some additional info} 11 | } 12 | \value{ 13 | the user answer 14 | } 15 | \description{ 16 | This function shows the espn name with one or more fifa candidates and ask the 17 | user to decide which one is the most plausible match. The user can input 18 | the row number of the selected match, or 'y' if the only row is shown and the 19 | user believes that is a match, 0 if the user doesn't see any plausible match 20 | } 21 | -------------------------------------------------------------------------------- /man/clean_sofifa_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scrape_sofifa.R 3 | \name{clean_sofifa_table} 4 | \alias{clean_sofifa_table} 5 | \title{Clean raw sofifa table} 6 | \usage{ 7 | clean_sofifa_table(tab) 8 | } 9 | \arguments{ 10 | \item{tab}{data-frame of sofifa table extracted from \code{scrape_sofifa_table}} 11 | } 12 | \description{ 13 | Clean raw sofifa table 14 | } 15 | -------------------------------------------------------------------------------- /man/create_design_matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plus_minus.R 3 | \name{create_design_matrix} 4 | \alias{create_design_matrix} 5 | \title{Real Plus Minus matrix} 6 | \usage{ 7 | create_design_matrix(lineups, segments) 8 | } 9 | \arguments{ 10 | \item{lineups}{all lineups by league stacked in a matrix} 11 | 12 | \item{segments}{all segments by league stacked in a matrix} 13 | } 14 | \description{ 15 | This function creates the design matrix for a given league, given the lineups 16 | and the segmentation matrix 17 | } 18 | -------------------------------------------------------------------------------- /man/create_segmentation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plus_minus.R 3 | \name{create_segmentation} 4 | \alias{create_segmentation} 5 | \title{Creating segmentation} 6 | \usage{ 7 | create_segmentation(game_lineup) 8 | } 9 | \arguments{ 10 | \item{game_lineup}{computed from the package} 11 | 12 | \item{game_commentary}{from fcscrapR} 13 | } 14 | \description{ 15 | This function creates segmentation every time a the configuration 16 | on the pitch changes 17 | } 18 | -------------------------------------------------------------------------------- /man/get_player_name.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scrape_lineups.R 3 | \name{get_player_name} 4 | \alias{get_player_name} 5 | \title{Extract the player name given a string with player data info} 6 | \usage{ 7 | get_player_name(player_by_player_string) 8 | } 9 | \arguments{ 10 | \item{player_by_player_string}{string with a single player info} 11 | } 12 | \description{ 13 | This function extracts the player name given a string with player data info 14 | } 15 | -------------------------------------------------------------------------------- /man/get_rpm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plus_minus.R 3 | \name{get_rpm} 4 | \alias{get_rpm} 5 | \title{Compute the real-plus minus} 6 | \usage{ 7 | get_rpm(design_matrix, threshold = 0, minutes, response = "goal", 8 | type = "both") 9 | } 10 | \arguments{ 11 | \item{design_matrix}{0, -1, 1 matrix with info about shots and goals} 12 | 13 | \item{threshold}{for minutes, by default 0} 14 | 15 | \item{minutes}{dataframe with minutes info by player} 16 | 17 | \item{response}{character to chose as response, either goal, or shots} 18 | 19 | \item{type}{can be offensive, defensive or both which takes the differnce 20 | in the response} 21 | } 22 | \description{ 23 | This function computes real plus minus and threshold by minutes player 24 | if requested 25 | } 26 | -------------------------------------------------------------------------------- /man/get_shot_attempt_by_segment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plus_minus.R 3 | \name{get_shot_attempt_by_segment} 4 | \alias{get_shot_attempt_by_segment} 5 | \title{Creating the shot attempt by segmentation for both teams} 6 | \usage{ 7 | get_shot_attempt_by_segment(game_commentary, segmentation_matrix) 8 | } 9 | \arguments{ 10 | \item{game_commentary}{from fcscrapR} 11 | 12 | \item{segmentation_matrix}{a matrix with each row a segment, each coloumn 13 | the start and the end of the segment} 14 | } 15 | \description{ 16 | This function creates the shot attempt matrix by segmentation for both teams 17 | } 18 | -------------------------------------------------------------------------------- /man/insert_fifa_name.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/active_record_linkage.R 3 | \name{insert_fifa_name} 4 | \alias{insert_fifa_name} 5 | \title{Insert fifa name} 6 | \usage{ 7 | insert_fifa_name(record) 8 | } 9 | \arguments{ 10 | \item{record}{is the espn name with possibly additional info on the player} 11 | } 12 | \value{ 13 | the answer of the user 14 | } 15 | \description{ 16 | This function allows the user to insert the fifa name, given the espn name 17 | and some other info 18 | } 19 | -------------------------------------------------------------------------------- /man/scrape_lineup.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scrape_lineups.R 3 | \name{scrape_lineup} 4 | \alias{scrape_lineup} 5 | \title{Scrape lineups for ESPN Game ID} 6 | \usage{ 7 | scrape_lineup(game_id) 8 | } 9 | \arguments{ 10 | \item{game_id}{espn id} 11 | } 12 | \value{ 13 | list with the two line ups plus subs info 14 | } 15 | \description{ 16 | This function scrapes lineup for a given game on espn 17 | } 18 | -------------------------------------------------------------------------------- /man/scrape_sofifa_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scrape_sofifa.R 3 | \name{scrape_sofifa_table} 4 | \alias{scrape_sofifa_table} 5 | \title{Scrape sofifa tables} 6 | \usage{ 7 | scrape_sofifa_table(url) 8 | } 9 | \arguments{ 10 | \item{url}{url for the sofifa table} 11 | } 12 | \description{ 13 | Scrape sofifa tables 14 | } 15 | -------------------------------------------------------------------------------- /man/split_lineup_when_no_stats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scrape_lineups.R 3 | \name{split_lineup_when_no_stats} 4 | \alias{split_lineup_when_no_stats} 5 | \title{Split the lineup when no stats are available for some players} 6 | \usage{ 7 | split_lineup_when_no_stats(team_lineup) 8 | } 9 | \arguments{ 10 | \item{team_lineup}{team lineup extracted by espn removed of buzzwords and extra 11 | spaces} 12 | } 13 | \description{ 14 | This function splits the lineup info when the stats are missing for some players 15 | } 16 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(apm) 3 | 4 | test_check("apm") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-sofifa.R: -------------------------------------------------------------------------------- 1 | context("sofifa.com scraper") 2 | 3 | test_that("sofifa scraper works", { 4 | example_url <- "https://sofifa.com/players/?v=07&offset=0" 5 | 6 | sotab1 <- scrape_sofifa_table(url=example_url) 7 | expect_true(nrow(sotab1$sofifa_table) == 81) 8 | expect_true(class(sotab1$link_title_df) == "data.frame") 9 | 10 | sotab1 <- clean_sofifa_table(tab=sotab1) 11 | expect_true(class(sotab1) == "data.frame") 12 | expect_true(nrow(sotab1) == 80) 13 | 14 | example_url2 <- "https://sofifa.com/players/?v=07&offset=80" 15 | sotab2 <- scrape_sofifa_table(url=example_url2) 16 | sotab2 <- clean_sofifa_table(tab=sotab2) 17 | 18 | expect_true( all(as.numeric(sotab1[80, "overall"]) >= as.numeric(sotab2$overall)) ) 19 | }) 20 | -------------------------------------------------------------------------------- /vignettes/my-vignette.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "PlusMinusData" 3 | author: "Francesca Matano" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Vignette Title} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | 13 | ```{r setup, include = FALSE} 14 | knitr::opts_chunk$set( 15 | collapse = TRUE, 16 | comment = "#>" 17 | ) 18 | ``` 19 | # Introduction 20 | Welcome to PlusMinusData! 21 | 22 | This vignette will guide you through the basic functionality of the PlusMinusData package. We will first explain how to download play-by-play data from espn.com, by league and season. We will then explain you how to download FIFA ratings for each player, from sofifa.com. Finally we will show you how to link player names between these two different sources of data, and use our interactive record linkage system. 23 | 24 | 25 | ## Donwload the package and other dipendencies 26 | 27 | ```{r, eval = FALSE} 28 | devtools::install_github("ryurko/fcscrapR") 29 | devtools::install_github("fmatano/PlusMinusData") 30 | library(magrittr) 31 | library(fcscrapR) 32 | library(PlusMinusData) 33 | ``` 34 | 35 | ```{r, include = FALSE} 36 | library(magrittr) 37 | library(fcscrapR) 38 | devtools::load_all() 39 | ``` 40 | 41 | ## Donwload play-by-play data, for a given game 42 | 43 | We first choose league and season 44 | ```{r} 45 | league_selected <- "english premier league" 46 | years <- 2017 47 | ``` 48 | 49 | A list of all possible leagues is given by running 50 | ```{r, eval = FALSE} 51 | # install.packages(pander) 52 | league_url_data %>% 53 | head() %>% 54 | pander::pander() 55 | ``` 56 | 57 | 58 | You can select a date of interest and scrape all the games that occurred that day 59 | 60 | ```{r} 61 | 62 | date_selected <- as.Date("2017-10-14") 63 | espn_games <- fcscrapR::scrape_scoreboard_ids(scoreboard_name=league_selected, game_date=date_selected) 64 | espn_games 65 | ``` 66 | You'll notice that there are games that don't belong to the league selected. This happens because espn always displays your current day's games on their page. You need to make sure to select the one game or all the games you are actually interested in, or remember to delete all the current day's games. 67 | 68 | Let's for instance use the match Liverpool - Manchester United. 69 | 70 | We can obtain their data by extracting this game id and plugging the game id into our function. We first extract the game lineup 71 | 72 | ```{r} 73 | game_id <- "480831" 74 | lineup <- scrape_lineup(game_id = game_id) 75 | lineup 76 | ``` 77 | Then we add minutes, events and red-card info from the commentary data 78 | 79 | ```{r} 80 | game_commentary <- fcscrapR::scrape_commentary(game_id = game_id) 81 | lineup <- add_red_card_info(game_commentary = game_commentary, game_lineup = lineup) 82 | 83 | lineup 84 | ``` 85 | Finally we compute the segments of the game and combine all the events that happened in that segment for home and away team respectively 86 | ```{r} 87 | segmentation_mat <- create_segmentation(game_lineup = lineup) 88 | segmentation_mat <- get_shot_attempt_by_segment(game_commentary = game_commentary, segmentation_matrix = segmentation_mat) 89 | 90 | segmentation_mat[, 1:10] 91 | ``` 92 | 93 | 94 | The design matrix can be now created by input lineup and segmentation matrix. Notice that this can handle a dataframe with lineups and segmentation for a series of games 95 | ```{r, eval = FALSE} 96 | lineup$espn_id <- game_id 97 | segmentation_mat$espn_id <- game_id 98 | design_matrix <- create_design_matrix(lineups = lineup, segments = segmentation_mat) 99 | 100 | ``` 101 | 102 | 103 | ## Scrape sofifa.com 104 | The sofifa website display ratings at various point of the season, so you first need to decide which one you want to scrape. 105 | Suppose you are interested in the fifa ratings at the beginning of the season 2017 then you can pass directly the link from the webpage 106 | 107 | ```{r} 108 | sofifa_2007 <- "https://sofifa.com/players?col=oa&sort=desc%3F&e=154818&set=true&v=07" 109 | 110 | sotab <- scrape_sofifa_table(url = sofifa_2007) 111 | sotab_clean <- clean_sofifa_table(tab = sotab) 112 | 113 | head(sotab_clean) 114 | ``` 115 | 116 | ## Interactive Record Linkage 117 | Sofifa doesn't provide information on the league, but only the team the players are in at the beginning of the season. We can now match the players between sofifa and espn by using our interactive record linkage functionality. 118 | 119 | We can either perform an active comparison in which we evaluate our data and the system will 120 | ask to provide the match between a list of possible matches 121 | ```{r, eval = FALSE} 122 | table_for_active_comparison <- data.frame(espn_name = lineup$lineup, fifa_name = sotab_clean$full_name, fifa_team = sotab_clean$team) 123 | rows_of_match <- active_comparison(data = table_for_active_comparison) 124 | ``` 125 | 126 | ```{r, echo = FALSE} 127 | unmatched_names <- data.frame(espn_name = c('David Silva', 'David Silva'), 128 | espn_team = c('Manchester City', 'Manchester City'), 129 | fifa_name = c('David Silva', 'David Josué Jiménez Silva'), 130 | fifa_team = c('CD Los Millionarios Bogota', 'Manchester City')) 131 | print("Choose the row you think it's a match. Enter 0 if you think there is no match.") 132 | print ("If there is a single row shown, you can also enter 'y' for yes.") 133 | print(unmatched_names) 134 | print('Which row is a match:') 135 | ``` 136 | 137 | 138 | Alternatively, we can enter the match directly, using the active insert following funcionality 139 | 140 | ```{r, eval = FALSE} 141 | unmatched_names <- data.frame(espn_name = 'David Silva', team = 'Manchester City', fifa_name = NA) 142 | fifa_names <- active_insert(data = unmatched_names) 143 | ``` 144 | 145 | ```{r, echo = FALSE} 146 | unmatched_names <- data.frame(espn_name = 'David Silva', team = 'Manchester City') 147 | print("Insert the fifa name for the record shown below or NA if uknown.") 148 | 149 | print(unmatched_names) 150 | print('Your answer:') 151 | ``` 152 | --------------------------------------------------------------------------------