├── nflTeamBoxscoreStats.r ├── 2017_TNFMNF.csv ├── nflTeamBoxscoreFunctions.r ├── 2017_schedule.csv ├── app.r ├── nflTeamBoxscoreScrape.py ├── README.md ├── nflTeamBoxscoreAnalysis.r ├── 2018_schedule_single.csv ├── 2017_schedule_single.csv ├── nflTeamBoxscoreScrape_v3.py ├── nflPlayByPlayScrape_v1.py ├── nflTeamBoxscoreScrape_v2.py ├── nflBoxscoreScrape2018.csv └── nflSnapCount2018.csv /nflTeamBoxscoreStats.r: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/6ord/NFLfootball/HEAD/nflTeamBoxscoreStats.r -------------------------------------------------------------------------------- /2017_TNFMNF.csv: -------------------------------------------------------------------------------- 1 | 1,2017-09-07,KC,@NE 2 | 1,2017-09-11,NO,@MIN 3 | 1,2017-09-11,LAC,@DEN 4 | 2,2017-09-14,HOU,@CIN 5 | 2,2017-09-18,DET,@NYG 6 | 3,2017-09-21,LAR,@SF 7 | 3,2017-09-25,DAL,@ARI 8 | 4,2017-09-28,CHI,@GB 9 | 4,2017-10-02,WSH,@KC 10 | 5,2017-10-05,NE,@TB 11 | 5,2017-10-09,MIN,@CHI 12 | 6,2017-10-12,PHI,@CAR 13 | 6,2017-10-16,IND,@TEN 14 | 7,2017-10-19,KC,@OAK 15 | 7,2017-10-23,WSH,@PHI 16 | 8,2017-10-26,MIA,@BAL 17 | 8,2017-10-30,DEN,@KC 18 | 9,2017-11-02,BUF,@NYJ 19 | 9,2017-11-06,DET,@GB 20 | 10,2017-11-09,SEA,@ARI 21 | 10,2017-11-13,MIA,@CAR 22 | 11,2017-11-16,TEN,@PIT 23 | 11,2017-11-20,ATL,@SEA 24 | 12,2017-11-23,MIN,@DET 25 | 12,2017-11-23,LAC,@DAL 26 | 12,2017-11-23,NYG,@WSH 27 | 12,2017-11-27,HOU,@BAL 28 | 13,2017-11-30,WSH,@DAL 29 | 13,2017-12-04,PIT,@CIN 30 | 14,2017-12-07,NO,@ATL 31 | 14,2017-12-11,NE,@MIA 32 | 15,2017-12-14,DEN,@IND 33 | 15,2017-12-16,CHI,@DET 34 | 15,2017-12-16,LAC,@KC 35 | 15,2017-12-18,ATL,@TB 36 | 16,2017-12-23,IND,@BAL 37 | 16,2017-12-23,MIN,@GB 38 | 16,2017-12-25,PIT,@HOU 39 | 16,2017-12-25,OAK,@PHI 40 | -------------------------------------------------------------------------------- /nflTeamBoxscoreFunctions.r: -------------------------------------------------------------------------------- 1 | 2 | # vars <- list(currWk=3, 3 | # numWks=2 4 | # ) 5 | 6 | matchups <- function(x){ #see line 140 for call 7 | rbind(data.frame(Week=x$Week,Team=x$Road,Oppn=gsub('@','',x$Home)), 8 | data.frame(Week=x$Week,Team=gsub('@','',x$Home),Oppn=x$Road))} 9 | 10 | buildThisWk <- function(week,schedule){ 11 | x <- schedule[which(schedule$Week==week),] 12 | 13 | # CREDIT: https://stackoverflow.com/questions/19297475/simplest-way-to-get-rbind-to-ignore-column-names 14 | x <- rbind(x[c(3,4)],setNames(rev(x[c(3,4)]),names(x[c(3,4)]))) 15 | colnames(x) <- c('Team','Oppn') 16 | x <- cbind(Week=week,x) 17 | x 18 | } 19 | 20 | # wks 1 to vars$numWks 21 | #aggPriorWks <- function(weeklybox, numWks=vars$numWks){ 22 | aggPriorWks <- function(weeklybox, numWks){ 23 | aggregate(.~Team, 24 | data=subset(weeklybox,weeklybox$Week %in% c((max(weeklybox$Week)-vars$numWks+1):(max(weeklybox$Week))))[c(2,6:87)], 25 | FUN=sum) 26 | } 27 | 28 | getMetric <- function(weeklyBoxAgg, team, colnum){ 29 | sapply(team,function(x){weeklyBoxAgg[which(weeklyBoxAgg$Team==x),][colnum]}) 30 | } 31 | 32 | topSnapCounts <- function(tm, pos=c('QB','RB','TE','WR','CB','DE','DT','LB','S'), week, num=5){ 33 | head(subset(snapcounts2018,snapcounts2018$team %in% tm & snapcounts2018$position %in% pos)[order(snapcounts2018[week+3],decreasing = TRUE),],num) 34 | } -------------------------------------------------------------------------------- /2017_schedule.csv: -------------------------------------------------------------------------------- 1 | TEAM,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17 2 | ARI,@DET,@IND,DAL,SF,@PHI,TB,@LAR,BYE,@SF,SEA,@HOU,JAX,LAR,TEN,@WSH,NYG,@SEA 3 | ATL,@CHI,GB,@DET,BUF,BYE,MIA,@NE,@NYJ,@CAR,DAL,@SEA,TB,MIN,NO,@TB,@NO,CAR 4 | BAL,@CIN,CLE,@JAX,PIT,@OAK,CHI,@MIN,MIA,@TEN,BYE,@GB,HOU,DET,@PIT,@CLE,IND,CIN 5 | BUF,NYJ,@CAR,DEN,@ATL,@CIN,BYE,TB,OAK,@NYJ,NO,@LAC,@KC,NE,IND,MIA,@NE,@MIA 6 | CAR,@SF,BUF,NO,@NE,@DET,PHI,@CHI,@TB,ATL,MIA,BYE,@NYJ,@NO,MIN,GB,TB,@ATL 7 | CHI,ATL,@TB,PIT,@GB,MIN,@BAL,CAR,@NO,BYE,GB,DET,@PHI,SF,@CIN,@DET,CLE,@MIN 8 | CIN,BAL,HOU,@GB,@CLE,BUF,BYE,@PIT,IND,@JAX,@TEN,@DEN,CLE,PIT,CHI,@MIN,DET,@BAL 9 | CLE,PIT,@BAL,@IND,CIN,NYJ,@HOU,TEN,MIN,BYE,@DET,JAX,@CIN,@LAC,GB,BAL,@CHI,@PIT 10 | DAL,NYG,@DEN,@ARI,LAR,GB,BYE,@SF,@WSH,KC,@ATL,PHI,LAC,WSH,@NYG,@OAK,SEA,@PHI 11 | DEN,LAC,DAL,@BUF,OAK,BYE,NYG,@LAC,@KC,@PHI,NE,CIN,@OAK,@MIA,NYJ,@IND,@WSH,KC 12 | DET,ARI,@NYG,ATL,@MIN,CAR,@NO,BYE,PIT,@GB,CLE,@CHI,MIN,@BAL,@TB,CHI,@CIN,GB 13 | GB,SEA,@ATL,CIN,CHI,@DAL,@MIN,NO,BYE,DET,@CHI,BAL,@PIT,TB,@CLE,@CAR,MIN,@DET 14 | HOU,JAX,@CIN,@NE,TEN,KC,CLE,BYE,@SEA,IND,@LAR,ARI,@BAL,@TEN,SF,@JAX,PIT,@IND 15 | IND,@LAR,ARI,CLE,@SEA,SF,@TEN,JAX,@CIN,@HOU,PIT,BYE,TEN,@JAX,@BUF,DEN,@BAL,HOU 16 | JAX,@HOU,TEN,BAL,@NYJ,@PIT,LAR,@IND,BYE,CIN,LAC,@CLE,@ARI,IND,SEA,HOU,@SF,@TEN 17 | KC,@NE,PHI,@LAC,WSH,@HOU,PIT,@OAK,DEN,@DAL,BYE,@NYG,BUF,@NYJ,OAK,LAC,MIA,@DEN 18 | LAR,IND,WSH,@SF,@DAL,SEA,@JAX,ARI,BYE,@NYG,HOU,@MIN,NO,@ARI,PHI,@SEA,@TEN,SF 19 | LAC,@DEN,MIA,KC,PHI,@NYG,@OAK,DEN,@NE,BYE,@JAX,BUF,@DAL,CLE,WSH,@KC,@NYJ,OAK 20 | MIA,BYE,@LAC,@NYJ,NO,TEN,@ATL,NYJ,@BAL,OAK,@CAR,TB,@NE,DEN,NE,@BUF,@KC,BUF 21 | MIN,NO,@PIT,TB,DET,@CHI,GB,BAL,@CLE,BYE,@WSH,LAR,@DET,@ATL,@CAR,CIN,@GB,CHI 22 | NE,KC,@NO,HOU,CAR,@TB,@NYJ,ATL,LAC,BYE,@DEN,@OAK,MIA,@BUF,@MIA,@PIT,BUF,NYJ 23 | NO,@MIN,NE,@CAR,@MIA,BYE,DET,@GB,CHI,TB,@BUF,WSH,@LAR,CAR,@ATL,NYJ,ATL,@TB 24 | NYG,@DAL,DET,@PHI,@TB,LAC,@DEN,SEA,BYE,LAR,@SF,KC,@WSH,@OAK,DAL,PHI,@ARI,WSH 25 | NYJ,@BUF,@OAK,MIA,JAX,@CLE,NE,@MIA,ATL,BUF,@TB,BYE,CAR,KC,@DEN,@NO,LAC,@NE 26 | OAK,@TEN,NYJ,@WSH,@DEN,BAL,LAC,KC,@BUF,@MIA,BYE,NE,DEN,NYG,@KC,DAL,@PHI,@LAC 27 | PHI,@WSH,@KC,NYG,@LAC,ARI,@CAR,WSH,SF,DEN,BYE,@DAL,CHI,@SEA,@LAR,@NYG,OAK,DAL 28 | PIT,@CLE,MIN,@CHI,@BAL,JAX,@KC,CIN,@DET,BYE,@IND,TEN,GB,@CIN,BAL,NE,@HOU,CLE 29 | SF,CAR,@SEA,LAR,@ARI,@IND,@WSH,DAL,@PHI,ARI,NYG,BYE,SEA,@CHI,@HOU,TEN,JAX,@LAR 30 | SEA,@GB,SF,@TEN,IND,@LAR,BYE,@NYG,HOU,WSH,@ARI,ATL,@SF,PHI,@JAX,LAR,@DAL,ARI 31 | TB,BYE,CHI,@MIN,NYG,NE,@ARI,@BUF,CAR,@NO,NYJ,@MIA,@ATL,@GB,DET,ATL,@CAR,NO 32 | TEN,OAK,@JAX,SEA,@HOU,@MIA,IND,@CLE,BYE,BAL,CIN,@PIT,@IND,HOU,@ARI,@SF,LAR,JAX 33 | WSH,PHI,@LAR,OAK,@KC,BYE,SF,@PHI,DAL,@SEA,MIN,@NO,NYG,@DAL,@LAC,ARI,DEN,@NYG 34 | -------------------------------------------------------------------------------- /app.r: -------------------------------------------------------------------------------- 1 | library(shiny) 2 | 3 | # vars <- list(currWk=3, 4 | # numWks=2 5 | # ) 6 | 7 | # setwd('D:/workbin/BigData/NFLScrape/') 8 | # source('D:/workbin/BigData/NFLScrape/nflTeamBoxscoreFunctions.r') 9 | # source('D:/workbin/BigData/NFLScrape/nflTeamBoxscoreStats.r') 10 | # source('D:/workbin/BigData/NFLScrape/nflTeamBoxscoreAnalysis.r') 11 | 12 | # numPastWks <- 4 13 | # currWk <- 8 14 | 15 | source('nflTeamBoxscoreFunctions.r') 16 | source('nflTeamBoxscoreStats.r') 17 | source('nflTeamBoxscoreAnalysis.r') 18 | 19 | 20 | 21 | #CREDIT: https://shiny.rstudio.com/gallery/ 22 | 23 | # Define UI for data download app ---- 24 | ui <- fluidPage( 25 | 26 | # App title ---- 27 | titlePanel("2019 NFL Regular Season Wk 3 Outlook based on past 2 weeks."), 28 | 29 | # Sidebar layout with input and output definitions ---- 30 | sidebarLayout( 31 | 32 | # Sidebar panel for inputs ---- 33 | sidebarPanel( 34 | 35 | # Input: Choose dataset ---- 36 | selectInput("dataset", "Choose a report:", 37 | choices = c("Boxscore Details", "All Ranking", 38 | "Avg Ranking - Passing", 39 | "Avg Ranking - Rushing", 40 | "Avg Ranking - Defense", 41 | "Avg Ranking - Defense BX", 42 | "Avg Ranking - Defense DB", 43 | "Avg Ranking - Kicker", 44 | "SnapCounts")), 45 | 46 | # Button 47 | downloadButton("downloadData", "Download") 48 | 49 | ), 50 | 51 | # Main panel for displaying outputs ---- 52 | mainPanel( 53 | 54 | tableOutput("table") 55 | 56 | ) 57 | ) 58 | ) 59 | 60 | # Define server logic to display and download selected file ---- 61 | server <- function(input, output) { 62 | 63 | # Reactive value for selected dataset ---- 64 | datasetInput <- reactive({ 65 | switch(input$dataset, 66 | "Boxscore Details" = priorWks, 67 | "All Ranking" = thisWkRank, 68 | "Avg Ranking - Passing"=thisWkRankAvg[order(thisWkRankAvg$QBWRTE),], 69 | "Avg Ranking - Rushing"=thisWkRankAvg[order(thisWkRankAvg$RB),], 70 | "Avg Ranking - Defense"=thisWkRankAvg[order(thisWkRankAvg$DEF),], 71 | "Avg Ranking - Defense BX"=thisWkRankAvg[order(thisWkRankAvg$DLLB),], 72 | "Avg Ranking - Defense DB"=thisWkRankAvg[order(thisWkRankAvg$DB),], 73 | "Avg Ranking - Kicker"=thisWkRankAvg[order(thisWkRankAvg$KR),], 74 | "SnapCounts"=snapcounts2018 75 | ) 76 | }) 77 | 78 | # Table of selected dataset ---- 79 | output$table <- renderTable({ 80 | datasetInput() 81 | }) 82 | 83 | # Downloadable csv of selected dataset ---- 84 | output$downloadData <- downloadHandler( 85 | filename = function() { 86 | paste(input$dataset, ".csv", sep = "") 87 | }, 88 | content = function(file) { 89 | write.csv(datasetInput(), file, row.names = FALSE) 90 | } 91 | ) 92 | 93 | } 94 | 95 | # Create Shiny app ---- 96 | shinyApp(ui, server) 97 | 98 | -------------------------------------------------------------------------------- /nflTeamBoxscoreScrape.py: -------------------------------------------------------------------------------- 1 | import re, requests, bs4, csv, datetime 2 | 3 | def nflBoxscoreScrape(): 4 | print(datetime.datetime.now()) 5 | #Ex, https://www.cbssports.com/nfl/gametracker/boxscore/NFL_20171210_IND@BUF 6 | url_base = 'https://www.cbssports.com/nfl/gametracker/boxscore/NFL_' 7 | url_game = [] 8 | schedule = tuple(csv.reader(open('./2017_schedule_single.csv'))) 9 | soupOutput = [] 10 | tagDump = open('nflBoxscoreScrape(Tags).csv','w',newline='') 11 | tagCSVWriter = csv.writer(tagDump,delimiter=',',lineterminator='\n') 12 | 13 | realOutput = [] 14 | realDump = open('nflBoxscoreScrape.csv','w',newline='') 15 | realCSVWriter = csv.writer(realDump,delimiter=',',lineterminator='\n') 16 | 17 | for i in range(1,257): 18 | url_game.append(schedule[i][1][0:4]+ 19 | schedule[i][1][5:7]+ 20 | schedule[i][1][8:10]+ 21 | '_'+ 22 | schedule[i][2]+ 23 | schedule[i][3]) 24 | 25 | tagPick_lines = '.team-stats tr' 26 | #tagPick_stat_feld/valu = '.team-stats td' 27 | print('\n'+str(len(url_game))+'\n') 28 | print(url_game[-1]) 29 | #for i in range(256): 30 | for i in range(len(url_game)): 31 | #for i in range(2): 32 | url=str(url_base+url_game[i]) 33 | boxSoup = bs4.BeautifulSoup(requests.get(url).text,'html.parser') 34 | 35 | # TESTING: Make sure each game has 36 stats(from bs4 Object length) 36 | print('Gm '+str(i)+' '+url+' '+str(len(boxSoup.select(tagPick_lines)))) 37 | print(datetime.datetime.now()) 38 | soupOutput.append([url[55:len(url)],boxSoup.select(tagPick_lines)[0].getText()]) 39 | 40 | for j in range(1,len(boxSoup.select(tagPick_lines))): 41 | soupOutput[i].append(boxSoup.select(tagPick_lines)[j].getText()) 42 | 43 | tagCSVWriter.writerow(soupOutput[i]) 44 | 45 | tagDump.close() 46 | 47 | print('NFL Boxscore from CBS Scrape ended.') 48 | #print('Example row: '+str(soupOutput[0])) #TO SEE \n 's 49 | #print('Example row: '+soupOutput[0][35]) 50 | print(datetime.datetime.now()) 51 | 52 | 53 | nxt_rcrd = 0 54 | realOutput.append(['Gm','Date','Team']) 55 | for h in range(1,len(soupOutput[0])): 56 | realOutput[nxt_rcrd].append(soupOutput[0][h].splitlines()[0]) 57 | nxt_rcrd += 1 58 | 59 | for i in range(len(soupOutput)): 60 | url=str(url_base+url_game[i]) 61 | realOutput.append([url[55:len(url)], 62 | url[55:63], 63 | url[url.rfind('_')+1:url.find('@')] #away team 64 | ]) 65 | for j in range(1,len(soupOutput[i])): 66 | realOutput[nxt_rcrd].append(soupOutput[i][j].splitlines()[1]) 67 | nxt_rcrd += 1 68 | realOutput.append([url[55:len(url)], 69 | url[55:63], 70 | url[url.find('@')+1:len(url)] #home team 71 | ]) 72 | for k in range(1,len(soupOutput[i])): #Do something different if j=34,35 73 | #RZ and GoalToGo success rate had 74 | #FIVE lines (\n) to include %. 75 | if not 33 h3') 7 | 8 | def RegSeason2017BoxscoreScrape(): 9 | print(datetime.datetime.now()) 10 | #Ex, https://www.cbssports.com/nfl/gametracker/boxscore/NFL_20171210_IND@BUF 11 | url_base = 'https://www.cbssports.com/nfl/gametracker/boxscore/NFL_' 12 | url_game = [] 13 | schedule = tuple(csv.reader(open('./2017_schedule_single.csv'))) 14 | soupOutput = [] 15 | tagDump = open('nflBoxscoreScrape(Tags).csv','w',newline='') 16 | tagCSVWriter = csv.writer(tagDump,delimiter=',',lineterminator='\n') 17 | 18 | realOutput = [] 19 | realDump = open('nflBoxscoreScrape.csv','w',newline='',encoding='utf-8') 20 | realCSVWriter = csv.writer(realDump,delimiter=',',lineterminator='\n') 21 | 22 | for i in range(1,257): 23 | url_game.append(schedule[i][1][0:4]+ 24 | schedule[i][1][5:7]+ 25 | schedule[i][1][8:10]+ 26 | '_'+ 27 | schedule[i][2]+ 28 | schedule[i][3]) 29 | 30 | tagPick_lines = '.team-stats tr' 31 | #tagPick_stat_feld/valu = '.team-stats td' 32 | print('\n'+str(len(url_game))+'\n') 33 | print(url_game[-1]) 34 | #for i in range(256): 35 | for i in range(len(url_game)): 36 | #for i in range(2): 37 | url=str(url_base+url_game[i]) 38 | boxSoup = bs4.BeautifulSoup(requests.get(url).text,'html.parser') 39 | 40 | # TESTING: Make sure each game has 36 stats(from bs4 Object length) 41 | print('Gm '+str(i)+' '+url+' '+str(len(boxSoup.select(tagPick_lines)))) 42 | print(datetime.datetime.now()) 43 | soupOutput.append([url[55:len(url)],boxSoup.select(tagPick_lines)[0].getText()]) 44 | 45 | for j in range(1,len(boxSoup.select(tagPick_lines))): 46 | soupOutput[i].append(boxSoup.select(tagPick_lines)[j].getText()) 47 | 48 | tagCSVWriter.writerow(soupOutput[i]) 49 | 50 | tagDump.close() 51 | 52 | print('NFL Boxscore from CBS Scrape ended.') 53 | #print('Example row: '+str(soupOutput[0])) #TO SEE \n 's 54 | #print('Example row: '+soupOutput[0][35]) 55 | print(datetime.datetime.now()) 56 | 57 | 58 | nxt_rcrd = 0 59 | realOutput.append(['Gm','Date','Team']) 60 | for h in range(1,len(soupOutput[0])): 61 | realOutput[nxt_rcrd].append(soupOutput[0][h].splitlines()[0]) 62 | nxt_rcrd += 1 63 | 64 | for i in range(len(soupOutput)): 65 | url=str(url_base+url_game[i]) 66 | realOutput.append([url[55:len(url)], 67 | url[55:63], 68 | url[url.rfind('_')+1:url.find('@')] #away team 69 | ]) 70 | for j in range(1,len(soupOutput[i])): 71 | realOutput[nxt_rcrd].append(soupOutput[i][j].splitlines()[1]) 72 | nxt_rcrd += 1 73 | realOutput.append([url[55:len(url)], 74 | url[55:63], 75 | url[url.find('@')+1:len(url)] #home team 76 | ]) 77 | for k in range(1,len(soupOutput[i])): #Do something different if j=34,35 78 | #RZ and GoalToGo success rate had 79 | #FIVE lines (\n) to include %. 80 | if not 33